summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore4
-rw-r--r--Makefile59
-rw-r--r--src/compiler/ast-tree.c226
-rw-r--r--src/compiler/ast-tree.h64
-rw-r--r--src/compiler/code-generator.c192
-rw-r--r--src/compiler/code-generator.h37
-rw-r--r--src/compiler/lexer.c232
-rw-r--r--src/compiler/lexer.h66
-rw-r--r--src/compiler/parser.c614
-rw-r--r--src/compiler/parser.h96
-rw-r--r--src/main.c123
-rw-r--r--src/utils/file.c27
-rw-r--r--src/utils/file.h3
-rw-r--r--src/utils/log.c15
-rw-r--r--src/utils/log.h5
-rw-r--r--src/utils/memory.c31
-rw-r--r--src/utils/memory.h7
-rw-r--r--src/utils/string.c14
-rw-r--r--src/utils/string.h6
-rw-r--r--test/goal.felan85
-rw-r--r--test/main.felan3
21 files changed, 1909 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f1aa812
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,4 @@
+build/
+compile_commands.json
+.cache/
+val.log*
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..0dc0bf7
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,59 @@
+CC := gcc
+# CC := tcc
+
+BUILD_DIR := ./build
+SRC_DIR := ./src
+
+SRCS := $(shell find $(SRC_DIR) -name "*.c" -type f)
+HEADERS := $(shell find $(SRC_DIR) -name "*.h" -type f)
+OBJS := $(SRCS:%=$(BUILD_DIR)/%.o)
+
+RED := \033[0;31m
+GREEN := \033[0;32m
+NC := \033[0m
+
+# INC_DIRS := $(shell find $(SRC_DIRS) -type d)
+INC_DIRS := $(SRC_DIR)
+INC_FLAGS := $(addprefix -I,$(INC_DIRS))
+
+CFLAGS := $(INC_FLAGS) -Wall -Wextra -std=gnu23 -O3
+# CFLAGS := $(INC_FLAGS) -Wall -Wextra -std=gnu23 -Oz
+# CFLAGS := $(INC_FLAGS) -Wall -Wextra -std=gnu23 -g
+
+EXEC_FILE := $(BUILD_DIR)/felan
+
+all: $(EXEC_FILE)
+
+$(EXEC_FILE): $(OBJS)
+ $(CC) $(CFLAGS) $(OBJS) -o $@ $(LDFLAGS)
+
+$(BUILD_DIR)/%.c.o: %.c $(HEADERS) Makefile
+ mkdir -p $(dir $@)
+ $(CC) $(CFLAGS) -c $< -o $@
+
+lsp-files: clean
+ bear -- make all
+
+.PHONY: clean
+clean:
+ rm -rf $(BUILD_DIR)
+
+run: $(EXEC_FILE)
+ $(EXEC_FILE) $(args)
+
+val-run: $(EXEC_FILE)
+ valgrind --log-file="val.log" --leak-check=full --track-origins=yes --show-leak-kinds=all -s $(EXEC_FILE) $(args)
+
+gdb-run: $(EXEC_FILE)
+ gdb $(EXEC_FILE) $(args)
+
+.PHONY: test
+test: $(EXEC_FILE)
+ $(EXEC_FILE) test/main.felan
+
+val-test: $(EXEC_FILE)
+ valgrind --log-file="val.log" --leak-check=full --track-origins=yes --show-leak-kinds=all -s $(EXEC_FILE) test/main.felan
+
+# $@ = left hand of :
+# $< = right hand of : first one of them
+# $^ = right hand of : all
diff --git a/src/compiler/ast-tree.c b/src/compiler/ast-tree.c
new file mode 100644
index 0000000..f25fc51
--- /dev/null
+++ b/src/compiler/ast-tree.c
@@ -0,0 +1,226 @@
+#include "ast-tree.h"
+#include "compiler/parser.h"
+#include "utils/log.h"
+#include "utils/memory.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+const char *AST_TREE_TOKEN_STRINGS[] = {
+ "AST_TREE_TOKEN_FUNCTION",
+ "AST_TREE_TOKEN_KEYWORD_PRINT",
+ "AST_TREE_TOKEN_NONE",
+};
+
+void astTreePrint(const AstTree *tree, int indent) {
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("{token=\"%s\"", AST_TREE_TOKEN_STRINGS[tree->token]);
+ switch (tree->token) {
+ case AST_TREE_TOKEN_FUNCTION:
+ AstTreeScope *metadata = tree->metadata;
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("expressions=[\n");
+ for (size_t i = 0; i < metadata->expressions_size; ++i) {
+ astTreePrint(&metadata->expressions[i], indent + 1);
+ printf(",\n");
+ }
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("]");
+ case AST_TREE_TOKEN_KEYWORD_PRINT:
+ goto RETURN_SUCCESS;
+ case AST_TREE_TOKEN_NONE:
+ }
+
+ printLog("Bad token '%d'", tree->token);
+ exit(1);
+
+RETURN_SUCCESS:
+ printf("}");
+}
+
+void astTreeRootPrint(const AstTreeRoot *root) {
+ for (size_t i = 0; i < root->variables.size; ++i) {
+ const AstTreeVariable *variable = root->variables.data[i];
+ printf("{\nname=\"%.*s\",\nvalue=\n",
+ (int)(variable->name_end - variable->name_begin),
+ variable->name_begin);
+ astTreePrint(variable->value, 1);
+ printf("\n},\n");
+ }
+}
+
+void astTreeDestroy(AstTree tree) {
+ switch (tree.token) {
+ case AST_TREE_TOKEN_FUNCTION:
+ AstTreeScope *metadata = tree.metadata;
+ for (size_t i = 0; i < metadata->expressions_size; ++i) {
+ astTreeDestroy(metadata->expressions[i]);
+ }
+ case AST_TREE_TOKEN_KEYWORD_PRINT:
+ return;
+ case AST_TREE_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", tree.token);
+ exit(1);
+}
+
+void astTreeDelete(AstTree *tree) {
+ astTreeDestroy(*tree);
+ free(tree);
+}
+
+void astTreeRootDelete(AstTreeRoot *root) {
+ for (size_t i = 0; i < root->variables.size; ++i) {
+ astTreeDelete(root->variables.data[i]->value);
+ }
+ free(root->variables.data);
+ free(root);
+}
+
+AstTree *newAstTree(AstTreeToken token, void *metadata) {
+ AstTree *result = a404m_malloc(sizeof(*result));
+ result->token = token;
+ result->metadata = metadata;
+ return result;
+}
+
+AstTreeRoot *makeAstTree(ParserNode *parsedRoot) {
+ if (parsedRoot->token != PARSER_TOKEN_ROOT) {
+ return NULL;
+ }
+
+ ParserNodeArray *nodes = parsedRoot->metadata;
+
+ AstTreeRoot *root = a404m_malloc(sizeof(*root));
+
+ size_t variables_size = nodes->size;
+ root->variables.data =
+ a404m_malloc(variables_size * sizeof(*root->variables.data));
+ root->variables.size = 0;
+
+ for (size_t i = 0; i < nodes->size; ++i) {
+ ParserNode *eol = nodes->data[i];
+ if (eol->token != PARSER_TOKEN_SYMBOL_EOL) {
+ printLog("Unexpected %s", PARSER_TOKEN_STRINGS[eol->token]);
+ goto RETURN_ERROR;
+ }
+ ParserNode *node = (ParserNodeEOLMetadata *)eol->metadata;
+ if (node->token != PARSER_TOKEN_CONSTANT) {
+ printLog("Unexpected %s", PARSER_TOKEN_STRINGS[node->token]);
+ goto RETURN_ERROR;
+ }
+ ParserNodeVariableMetadata *node_metadata = node->metadata;
+
+ AstTreeVariable *variable = a404m_malloc(sizeof(*variable));
+ variable->name_begin = node_metadata->name->str_begin;
+ variable->name_end = node_metadata->name->str_end;
+
+ pushVariable(&root->variables, &variables_size, variable);
+ }
+
+ for (size_t i = 0; i < nodes->size; ++i) {
+ ParserNode *node = (ParserNodeEOLMetadata *)nodes->data[i]->metadata;
+ ParserNodeVariableMetadata *node_metadata = node->metadata;
+
+ // TODO: check type
+ AstTree *tree = astTreeParse(node_metadata->value, &root->variables, 1);
+ if (tree == NULL) {
+ goto RETURN_ERROR;
+ }
+
+ root->variables.data[i]->value = tree;
+ }
+
+ return root;
+
+RETURN_ERROR:
+ free(root->variables.data);
+ free(root);
+ return NULL;
+}
+
+void pushVariable(AstTreeVariables *variables, size_t *variables_size,
+ AstTreeVariable *variable) {
+ if (*variables_size == variables->size) {
+ *variables_size += *variables_size / 2 + 1;
+ variables->data = a404m_realloc(variables->data,
+ *variables_size * sizeof(*variables->data));
+ }
+ variables->data[variables->size] = variable;
+ variables->size += 1;
+}
+
+AstTree *astTreeParse(ParserNode *parserNode, AstTreeVariables *variables,
+ size_t variables_size) {
+ switch (parserNode->token) {
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ return newAstTree(AST_TREE_TOKEN_KEYWORD_PRINT, NULL);
+ case PARSER_TOKEN_FUNCTION_DEFINITION:
+ return astTreeParseFunction(parserNode, variables, variables_size);
+ case PARSER_TOKEN_SYMBOL_EOL:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ case PARSER_TOKEN_CONSTANT:
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_TYPE_VOID:
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_NONE:
+ case PARSER_TOKEN_ROOT:
+ }
+ printLog("Bad token %d", parserNode->token);
+ return NULL;
+}
+
+AstTree *astTreeParseFunction(ParserNode *parserNode,
+ AstTreeVariables *variables,
+ size_t variables_size) {
+ ParserNodeFunctionDefnitionMetadata *node_metadata = parserNode->metadata;
+ ParserNodeArray *body = node_metadata->body->metadata;
+
+ AstTreeScope *scope = a404m_malloc(sizeof(*scope));
+
+ size_t expressions_size = 0;
+ scope->expressions =
+ a404m_malloc(expressions_size * sizeof(*scope->expressions));
+ scope->expressions_size = 0;
+
+ for (size_t i = 0; i < body->size; ++i) {
+ ParserNode *eol = body->data[i];
+ if (eol->token != PARSER_TOKEN_SYMBOL_EOL) {
+ printLog("Unexpected %s", PARSER_TOKEN_STRINGS[eol->token]);
+ goto RETURN_ERROR;
+ }
+ ParserNode *node = (ParserNodeEOLMetadata *)eol->metadata;
+ AstTree *tree = astTreeParse(node, variables, variables_size);
+
+ if (tree == NULL) {
+ goto RETURN_ERROR;
+ }
+
+ if (expressions_size == scope->expressions_size) {
+ expressions_size += expressions_size / 2 + 1;
+ scope->expressions = a404m_realloc(
+ scope->expressions, expressions_size * sizeof(*scope->expressions));
+ }
+ scope->expressions[scope->expressions_size] = *tree;
+ scope->expressions_size += 1;
+
+ free(tree);
+ }
+
+ scope->expressions =
+ a404m_realloc(scope->expressions,
+ scope->expressions_size * sizeof(*scope->expressions));
+
+ AstTree *result = newAstTree(AST_TREE_TOKEN_FUNCTION, scope);
+
+ return result;
+
+RETURN_ERROR:
+ free(scope->expressions);
+ free(scope);
+ return NULL;
+}
diff --git a/src/compiler/ast-tree.h b/src/compiler/ast-tree.h
new file mode 100644
index 0000000..758d113
--- /dev/null
+++ b/src/compiler/ast-tree.h
@@ -0,0 +1,64 @@
+#pragma once
+
+#include "compiler/parser.h"
+#include <stddef.h>
+
+typedef enum AstTreeToken {
+ AST_TREE_TOKEN_FUNCTION,
+ AST_TREE_TOKEN_KEYWORD_PRINT,
+ AST_TREE_TOKEN_NONE,
+} AstTreeToken;
+
+typedef struct AstTreeType {
+} AstTreeType;
+
+typedef struct AstTree {
+ AstTreeToken token;
+ void *metadata;
+} AstTree;
+
+typedef struct AstTreeVariable {
+ char *name_begin;
+ char *name_end;
+ AstTreeType type;
+ AstTree *value;
+} AstTreeVariable;
+
+typedef struct AstTreeVariables {
+ AstTreeVariable **data;
+ size_t size;
+} AstTreeVariables;
+
+typedef struct AstTreeRoot {
+ AstTreeVariables variables;
+} AstTreeRoot;
+
+typedef struct AstTreeScope {
+ AstTreeVariable *variables;
+ size_t variables_size;
+ AstTree *expressions;
+ size_t expressions_size;
+} AstTreeScope;
+
+extern const char *AST_TREE_TOKEN_STRINGS[];
+
+void astTreePrint(const AstTree *tree,int indent);
+void astTreeRootPrint(const AstTreeRoot *root);
+
+void astTreeDestroy(AstTree tree);
+void astTreeDelete(AstTree *tree);
+void astTreeRootDelete(AstTreeRoot *root);
+
+AstTree *newAstTree(AstTreeToken token, void *metadata);
+
+AstTreeRoot *makeAstTree(ParserNode *parsedRoot);
+
+void pushVariable(AstTreeVariables *variables, size_t *variables_size,
+ AstTreeVariable *variable);
+
+AstTree *astTreeParse(ParserNode *parserNode, AstTreeVariables *variables,
+ size_t variables_size);
+
+AstTree *astTreeParseFunction(ParserNode *parserNode,
+ AstTreeVariables *variables,
+ size_t variables_size);
diff --git a/src/compiler/code-generator.c b/src/compiler/code-generator.c
new file mode 100644
index 0000000..0de098c
--- /dev/null
+++ b/src/compiler/code-generator.c
@@ -0,0 +1,192 @@
+#include "code-generator.h"
+#include "compiler/ast-tree.h"
+#include "utils/log.h"
+#include "utils/memory.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+CodeGeneratorCode createGenerateCode(char *label_begin, char *label_end,
+ CodeGeneratorInstruction instruction) {
+ CodeGeneratorCode code = {
+ .label_begin = label_begin,
+ .label_end = label_end,
+ .instruction = instruction,
+ };
+ return code;
+}
+
+CodeGeneratorCode *newGenerateCode(char *label_begin, char *label_end,
+ CodeGeneratorInstruction instruction) {
+ CodeGeneratorCode *result = a404m_malloc(sizeof(*result));
+ result->label_begin = label_begin;
+ result->label_end = label_end;
+ result->instruction = instruction;
+ return result;
+}
+
+void generateCodePushCode(CodeGeneratorCodes *codes, CodeGeneratorCode code) {
+ size_t codes_size =
+ a404m_malloc_usable_size(codes->codes) / sizeof(*codes->codes);
+ if (codes_size == codes->codes_size) {
+ codes_size += codes_size / 2 + 1;
+ codes->codes =
+ a404m_realloc(codes->codes, codes_size * sizeof(*codes->codes));
+ }
+ codes->codes[codes->codes_size] = code;
+ codes->codes_size += 1;
+}
+
+CodeGeneratorCodes *codeGenerator(AstTreeRoot *astTreeRoot) {
+ CodeGeneratorCodes *codes = a404m_malloc(sizeof(*codes));
+
+ codes->codes = a404m_malloc(0);
+ codes->codes_size = 0;
+
+ for (size_t i = 0; i < astTreeRoot->variables.size; ++i) {
+ AstTreeVariable *variable = astTreeRoot->variables.data[i];
+ switch (variable->value->token) {
+ case AST_TREE_TOKEN_FUNCTION:
+ codeGeneratorAstTreeFunction(variable->name_begin, variable->name_end,
+ *variable->value, codes);
+ continue;
+ case AST_TREE_TOKEN_KEYWORD_PRINT:
+ generateCodePushCode(
+ codes,
+ createGenerateCode(NULL, NULL, CODE_GENERATOR_INSTRUCTION_PRINT));
+ continue;
+ case AST_TREE_TOKEN_NONE:
+ }
+ printLog("Bad token %d", variable->value->token);
+ }
+
+ return codes;
+}
+
+bool codeGeneratorAstTreeFunction(char *label_begin, char *label_end,
+ AstTree astTree, CodeGeneratorCodes *codes) {
+ AstTreeScope *scope = astTree.metadata;
+
+ for (size_t i = 0; i < scope->expressions_size; ++i) {
+ AstTree tree = scope->expressions[i];
+ switch (tree.token) {
+ case AST_TREE_TOKEN_KEYWORD_PRINT:
+ generateCodePushCode(
+ codes, createGenerateCode(label_begin, label_end,
+ CODE_GENERATOR_INSTRUCTION_PRINT));
+ goto OK;
+ case AST_TREE_TOKEN_FUNCTION:
+ case AST_TREE_TOKEN_NONE:
+ }
+ printLog("Bad token %d", tree.token);
+ return false;
+ OK:
+ label_begin = NULL;
+ label_end = NULL;
+ }
+
+ generateCodePushCode(codes,
+ createGenerateCode(label_begin, label_end,
+ CODE_GENERATOR_INSTRUCTION_RET));
+
+ return true;
+}
+
+static const char TEMPLATE[] =
+ "format ELF64 executable 3\nSYS_exit = 60\nSYS_write = 1\nSTDOUT = "
+ "1\nsegment readable writable\nhello: db \"Hello, "
+ "World!\",0xa\nhello_len = $-hello\nsegment readable executable\nentry "
+ "_start\nprint:\nmov rax, SYS_write\nmov rdi, STDOUT\nmov rsi, "
+ "hello\nmov rdx, hello_len\nsyscall\nret\n_start:\ncall main\nmov rax, "
+ "SYS_exit\nxor "
+ "rdi,rdi\nsyscall\n";
+static const size_t TEMPLATE_LEN =
+ sizeof(TEMPLATE) / sizeof(*TEMPLATE) - sizeof(*TEMPLATE);
+
+static void codeGeneratorAppendFlatASMCommand(char **fasm, size_t *fasm_size,
+ size_t *fasm_inserted,
+ const char *str, size_t str_len) {
+ size_t inserted_last = *fasm_inserted;
+ *fasm_inserted += str_len;
+ if (*fasm_inserted + 1 >= *fasm_size) {
+ *fasm_size = *fasm_inserted + *fasm_inserted / 2 + 1;
+ *fasm = a404m_realloc(*fasm, *fasm_size * sizeof(*fasm));
+ }
+ strncpy(*fasm + inserted_last, str, str_len);
+}
+
+char *codeGeneratorToFlatASM(const CodeGeneratorCodes *codes) {
+ size_t fasm_size = TEMPLATE_LEN + 1;
+ char *fasm = a404m_malloc(fasm_size * sizeof(*fasm));
+ size_t fasm_inserted = 0;
+ fasm[0] = '\0';
+
+ codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, TEMPLATE,
+ TEMPLATE_LEN);
+
+ for (size_t i = 0; i < codes->codes_size; ++i) {
+ const CodeGeneratorCode code = codes->codes[i];
+ if (code.label_begin != code.label_end) {
+ codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted,
+ code.label_begin,
+ code.label_end - code.label_begin);
+
+ codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted,
+ ":\n", 2);
+ }
+
+ switch (code.instruction) {
+ case CODE_GENERATOR_INSTRUCTION_PRINT: {
+ constexpr char INST[] = "call print\n";
+ codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, INST,
+ strlen(INST));
+ }
+ continue;
+ case CODE_GENERATOR_INSTRUCTION_RET: {
+ constexpr char INST[] = "ret\n";
+ codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, INST,
+ strlen(INST));
+ }
+ continue;
+ }
+ printLog("Bad instruction %d", code.instruction);
+ }
+
+ fasm[fasm_inserted] = '\0';
+ return fasm;
+}
+
+bool codeGeneratorFlatASMExec(const char *filePath, const char *fasm) {
+ const size_t filePathLen = strlen(filePath);
+ char *asmFilePath = a404m_malloc((filePathLen + 4 + 1) * sizeof(char));
+ strncpy(asmFilePath, filePath, filePathLen);
+ strcpy(asmFilePath + filePathLen, ".asm");
+
+ FILE *file = fopen(asmFilePath, "w");
+
+ if (file == NULL) {
+ return false;
+ }
+
+ fwrite(fasm, sizeof(*fasm), strlen(fasm), file);
+
+ fclose(file);
+
+ char *command;
+ asprintf(&command, "fasm -m 102400 \"%s\" \"%s\"", asmFilePath, filePath);
+
+ if (system(command) != 0) {
+ free(command);
+ return false;
+ }
+ free(command);
+
+ asprintf(&command, "chmod + \"%s\"", filePath);
+
+ if (system(command) != 0) {
+ free(command);
+ return false;
+ }
+
+ return true;
+}
diff --git a/src/compiler/code-generator.h b/src/compiler/code-generator.h
new file mode 100644
index 0000000..3232fb3
--- /dev/null
+++ b/src/compiler/code-generator.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "compiler/ast-tree.h"
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum CodeGeneratorInstruction : uint8_t {
+ CODE_GENERATOR_INSTRUCTION_PRINT,
+ CODE_GENERATOR_INSTRUCTION_RET,
+} CodeGeneratorInstruction;
+
+typedef struct CodeGeneratorCode {
+ char *label_begin;
+ char *label_end;
+ CodeGeneratorInstruction instruction;
+} CodeGeneratorCode;
+
+typedef struct CodeGeneratorCodes {
+ CodeGeneratorCode *codes;
+ size_t codes_size;
+} CodeGeneratorCodes;
+
+CodeGeneratorCode createGenerateCode(char *label_begin, char *label_end,
+ CodeGeneratorInstruction instruction);
+
+CodeGeneratorCode *newGenerateCode(char *label_begin, char *label_end,
+ CodeGeneratorInstruction instruction);
+
+void generateCodePushCode(CodeGeneratorCodes *codes, CodeGeneratorCode code);
+
+CodeGeneratorCodes *codeGenerator(AstTreeRoot *astTreeRoot);
+
+bool codeGeneratorAstTreeFunction(char *label_begin,char *label_end,AstTree astTree, CodeGeneratorCodes *codes);
+
+char *codeGeneratorToFlatASM(const CodeGeneratorCodes *codes);
+
+bool codeGeneratorFlatASMExec(const char *filePath,const char *fasm);
diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c
new file mode 100644
index 0000000..b919be0
--- /dev/null
+++ b/src/compiler/lexer.c
@@ -0,0 +1,232 @@
+#include "lexer.h"
+
+#include "utils/log.h"
+#include "utils/memory.h"
+#include "utils/string.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+const char *LEXER_TOKEN_STRINGS[] = {
+ "LEXER_TOKEN_IDENTIFIER",
+ "LEXER_TOKEN_KEYWORD_VOID",
+ "LEXER_TOKEN_KEYWORD_PRINT",
+
+ "LEXER_TOKEN_NUMBER",
+
+ "LEXER_TOKEN_SYMBOL",
+ "LEXER_TOKEN_SYMBOL_EOL",
+ "LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS",
+ "LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS",
+ "LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET",
+ "LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET",
+ "LEXER_TOKEN_SYMBOL_FUNCTION_ARROW",
+ "LEXER_TOKEN_SYMBOL_COLON",
+
+ "LEXER_TOKEN_NONE",
+};
+
+const char *LEXER_SYMBOL_STRINGS[] = {
+ ";", "(", ")", "{", "}", "->", ":",
+};
+const LexerToken LEXER_SYMBOL_TOKENS[] = {
+ LEXER_TOKEN_SYMBOL_EOL,
+ LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS,
+ LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS,
+ LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET,
+ LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET,
+ LEXER_TOKEN_SYMBOL_FUNCTION_ARROW,
+ LEXER_TOKEN_SYMBOL_COLON,
+};
+const size_t LEXER_SYMBOL_SIZE =
+ sizeof(LEXER_SYMBOL_TOKENS) / sizeof(*LEXER_SYMBOL_TOKENS);
+
+const char *LEXER_KEYWORD_STRINGS[] = {
+ "void",
+ "print",
+};
+const LexerToken LEXER_KEYWORD_TOKENS[] = {
+ LEXER_TOKEN_KEYWORD_VOID,
+ LEXER_TOKEN_KEYWORD_PRINT,
+};
+const size_t LEXER_KEYWORD_SIZE =
+ sizeof(LEXER_KEYWORD_TOKENS) / sizeof(*LEXER_KEYWORD_TOKENS);
+
+bool lexerNodeArrayIsError(LexerNodeArray array) {
+ return LEXER_NODE_ARRAY_ERROR.size == array.size;
+}
+
+void lexerNodeArrayPrint(LexerNodeArray array) {
+ for (size_t i = 0; i < array.size; ++i) {
+ LexerNode node = array.data[i];
+ printf("{str=\"%.*s\",token=%s}\n", (int)(node.str_end - node.str_begin),
+ node.str_begin, LEXER_TOKEN_STRINGS[node.token]);
+ }
+}
+
+void lexerNodeArrayDestroy(LexerNodeArray array) { free(array.data); }
+
+LexerNodeArray lexer(char *str) {
+ size_t result_size = 0;
+ LexerNodeArray result = {
+ .data = a404m_malloc(result_size),
+ .size = 0,
+ };
+
+ LexerToken node_token = LEXER_TOKEN_NONE;
+ char *node_str_begin = str;
+ char *iter = str;
+ for (; *iter != '\0'; ++iter) {
+ char c = *iter;
+ if (c == '/') {
+ ++iter;
+ c = *iter;
+ if (c == '/') {
+ lexerPushClear(&result, &result_size, iter - 1, &node_str_begin,
+ &node_token, LEXER_TOKEN_NONE);
+ for (; *iter != '\n'; ++iter) {
+ if (*iter == '\0') {
+ goto RETURN_SUCCESS;
+ }
+ }
+ continue;
+ } else if (c == '*') {
+ lexerPushClear(&result, &result_size, iter - 1, &node_str_begin,
+ &node_token, LEXER_TOKEN_NONE);
+ printLog("Not implemented"); // TODO: implement it
+ exit(1);
+ }
+ }
+ if (isSpace(c)) {
+ lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token,
+ LEXER_TOKEN_NONE);
+ } else if (isIdentifier(c)) {
+ if (node_token != LEXER_TOKEN_IDENTIFIER) {
+ lexerPushClear(&result, &result_size, iter, &node_str_begin,
+ &node_token, LEXER_TOKEN_IDENTIFIER);
+ }
+ } else if (isNumber(c)) {
+ if (node_token != LEXER_TOKEN_IDENTIFIER &&
+ node_token != LEXER_TOKEN_NUMBER) {
+ lexerPushClear(&result, &result_size, iter, &node_str_begin,
+ &node_token, LEXER_TOKEN_NUMBER);
+ }
+ } else if (isSymbol(c)) {
+ if (node_token != LEXER_TOKEN_SYMBOL) {
+ lexerPushClear(&result, &result_size, iter, &node_str_begin,
+ &node_token, LEXER_TOKEN_SYMBOL);
+ }
+ } else if (isSingleSymbol(c)) {
+ lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token,
+ LEXER_TOKEN_SYMBOL);
+ } else {
+ free(result.data);
+ printLog("Unexpected character '%c' at position %ld", c, iter - str);
+ return LEXER_NODE_ARRAY_ERROR;
+ }
+ }
+ lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token,
+ LEXER_TOKEN_NONE);
+
+RETURN_SUCCESS:
+ result.data = a404m_realloc(result.data, result.size * sizeof(*result.data));
+
+ return result;
+}
+
+void lexerPushClear(LexerNodeArray *array, size_t *array_size, char *iter,
+ char **node_str_begin, LexerToken *node_token,
+ LexerToken token) {
+ switch (*node_token) {
+ case LEXER_TOKEN_IDENTIFIER: {
+ const size_t index =
+ searchInStringArray(LEXER_KEYWORD_STRINGS, LEXER_KEYWORD_SIZE,
+ *node_str_begin, iter - *node_str_begin);
+ if (index != LEXER_KEYWORD_SIZE) {
+ *node_token = LEXER_KEYWORD_TOKENS[index];
+ }
+ }
+ goto PUSH;
+ case LEXER_TOKEN_SYMBOL: {
+ const size_t index =
+ searchInStringArray(LEXER_SYMBOL_STRINGS, LEXER_SYMBOL_SIZE,
+ *node_str_begin, iter - *node_str_begin);
+ if (index != LEXER_SYMBOL_SIZE) {
+ *node_token = LEXER_SYMBOL_TOKENS[index];
+ }
+ }
+ // goto PUSH;
+ PUSH:
+ case LEXER_TOKEN_KEYWORD_VOID:
+ case LEXER_TOKEN_KEYWORD_PRINT:
+ case LEXER_TOKEN_NUMBER:
+ case LEXER_TOKEN_SYMBOL_EOL:
+ case LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS:
+ case LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS:
+ case LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET:
+ case LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET:
+ case LEXER_TOKEN_SYMBOL_FUNCTION_ARROW:
+ case LEXER_TOKEN_SYMBOL_COLON:
+ if (*array_size == array->size) {
+ *array_size += 1 + *array_size / 2;
+ array->data =
+ a404m_realloc(array->data, *array_size * sizeof(*array->data));
+ }
+
+ array->data[array->size].token = *node_token;
+ array->data[array->size].str_begin = *node_str_begin;
+ array->data[array->size].str_end = iter;
+ array->data[array->size].parserNode = NULL;
+
+ array->size += 1;
+
+ // goto RETURN_SUCCESS;
+ case LEXER_TOKEN_NONE:
+ goto RETURN_SUCCESS;
+ }
+ printLog("Bad token '%d'", *node_token);
+ exit(1);
+RETURN_SUCCESS:
+ *node_str_begin = iter;
+ *node_token = token;
+}
+
+bool isIdentifier(char c) {
+ return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
+}
+
+bool isNumber(char c) { return '0' <= c && c <= '9'; }
+
+bool isSymbol(char c) {
+ switch (c) {
+ case '-':
+ case '>':
+ case '.':
+ case '+':
+ case '*':
+ case '/':
+ case '%':
+ case '=':
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isSingleSymbol(char c) {
+ switch (c) {
+ case ';':
+ case ':':
+ case ',':
+ case '(':
+ case ')':
+ case '{':
+ case '}':
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isSpace(char c) { return isspace(c); }
diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h
new file mode 100644
index 0000000..3c3bc3f
--- /dev/null
+++ b/src/compiler/lexer.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <stddef.h>
+
+typedef enum LexerToken {
+ LEXER_TOKEN_IDENTIFIER,
+ LEXER_TOKEN_KEYWORD_VOID,
+ LEXER_TOKEN_KEYWORD_PRINT,
+
+ LEXER_TOKEN_NUMBER,
+
+ LEXER_TOKEN_SYMBOL,
+ LEXER_TOKEN_SYMBOL_EOL,
+ LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS,
+ LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS,
+ LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET,
+ LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET,
+ LEXER_TOKEN_SYMBOL_FUNCTION_ARROW,
+ LEXER_TOKEN_SYMBOL_COLON,
+
+ LEXER_TOKEN_NONE,
+} LexerToken;
+
+extern const char *LEXER_TOKEN_STRINGS[];
+
+extern const char *LEXER_SYMBOL_STRINGS[];
+extern const LexerToken LEXER_SYMBOL_TOKENS[];
+extern const size_t LEXER_SYMBOL_SIZE;
+
+extern const char *LEXER_KEYWORD_STRINGS[];
+extern const LexerToken LEXER_KEYWORD_TOKENS[];
+extern const size_t LEXER_KEYWORD_SIZE;
+
+struct ParserNode;
+
+typedef struct LexerNode {
+ char *str_begin;
+ char *str_end;
+ LexerToken token;
+ struct ParserNode *parserNode;
+} LexerNode;
+
+typedef struct LexerNodeArray {
+ LexerNode *data;
+ size_t size;
+} LexerNodeArray;
+
+constexpr LexerNodeArray LEXER_NODE_ARRAY_ERROR = {
+ .size = -1ULL,
+};
+
+extern bool lexerNodeArrayIsError(LexerNodeArray array);
+extern void lexerNodeArrayPrint(LexerNodeArray array);
+extern void lexerNodeArrayDestroy(LexerNodeArray array);
+
+extern LexerNodeArray lexer(char *str);
+
+extern void lexerPushClear(LexerNodeArray *array, size_t *array_size,
+ char *iter, char **node_str_begin,
+ LexerToken *node_token, LexerToken token);
+
+extern bool isIdentifier(char c);
+extern bool isNumber(char c);
+extern bool isSymbol(char c);
+extern bool isSingleSymbol(char c);
+extern bool isSpace(char c);
diff --git a/src/compiler/parser.c b/src/compiler/parser.c
new file mode 100644
index 0000000..e9fec5f
--- /dev/null
+++ b/src/compiler/parser.c
@@ -0,0 +1,614 @@
+#include "parser.h"
+#include "compiler/lexer.h"
+#include "utils/log.h"
+#include "utils/memory.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+const char *PARSER_TOKEN_STRINGS[] = {
+ "PARSER_TOKEN_ROOT",
+
+ "PARSER_TOKEN_IDENTIFIER",
+
+ "PARSER_TOKEN_TYPE_FUNCTION",
+ "PARSER_TOKEN_TYPE_VOID",
+
+ "PARSER_TOKEN_KEYWORD_PRINT",
+
+ "PARSER_TOKEN_CONSTANT",
+
+ "PARSER_TOKEN_SYMBOL_EOL",
+ "PARSER_TOKEN_SYMBOL_CURLY_BRACKET",
+ "PARSER_TOKEN_SYMBOL_PARENTHESIS",
+
+ "PARSER_TOKEN_FUNCTION_DEFINITION",
+
+ "PARSER_TOKEN_NONE",
+};
+
+static constexpr ParserOrder PARSER_ORDER[] = {
+ {
+ .ltr = true,
+ .size = 2,
+ .data =
+ {
+ LEXER_TOKEN_IDENTIFIER,
+ LEXER_TOKEN_KEYWORD_VOID,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 2,
+ .data =
+ {
+ LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS,
+ LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET,
+ },
+ },
+ {
+ .ltr = false,
+ .size = 1,
+ .data =
+ {
+ LEXER_TOKEN_SYMBOL_FUNCTION_ARROW,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 2,
+ .data =
+ {
+ LEXER_TOKEN_SYMBOL_COLON,
+ LEXER_TOKEN_KEYWORD_PRINT,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 1,
+ .data =
+ {
+ LEXER_TOKEN_SYMBOL_EOL,
+ },
+ },
+};
+
+static constexpr size_t PARSER_ORDER_SIZE =
+ sizeof(PARSER_ORDER) / sizeof(*PARSER_ORDER);
+
+void parserNodePrint(const ParserNode *node, int indent) {
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+
+ if (node == NULL) {
+ printf("null");
+ return;
+ }
+
+ printf("{token=\"%s\"", PARSER_TOKEN_STRINGS[node->token]);
+
+ switch (node->token) {
+ case PARSER_TOKEN_ROOT:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS: {
+ const ParserNodeArray *metadata = node->metadata;
+ printf(",children=[\n");
+ for (size_t i = 0; i < metadata->size; ++i) {
+ parserNodePrint(metadata->data[i], indent + 1);
+ printf(",\n");
+ }
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("]");
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_TYPE_VOID:
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_CONSTANT: {
+ const ParserNodeVariableMetadata *metadata = node->metadata;
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("name=\n");
+ parserNodePrint(metadata->name, indent + 1);
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("type=\n");
+ parserNodePrint(metadata->type, indent + 1);
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("value=\n");
+ parserNodePrint(metadata->value, indent + 1);
+ printf("\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_SYMBOL_EOL: {
+ const ParserNodeEOLMetadata *metadata = node->metadata;
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("child=\n");
+ parserNodePrint(metadata, indent + 1);
+ printf("\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_FUNCTION_DEFINITION: {
+ const ParserNodeFunctionDefnitionMetadata *metadata = node->metadata;
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("arguments=\n");
+ parserNodePrint(metadata->arguments, indent + 1);
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("returnType=\n");
+ parserNodePrint(metadata->returnType, indent + 1);
+ printf(",\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ printf("body=\n");
+ parserNodePrint(metadata->body, indent + 1);
+ printf("\n");
+ for (int i = 0; i < indent; ++i)
+ printf(" ");
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", node->token);
+ exit(1);
+
+RETURN_SUCCESS:
+ printf("}");
+}
+
+void parserNodeDelete(ParserNode *node) {
+ if (node == NULL) {
+ return;
+ }
+ switch (node->token) {
+ case PARSER_TOKEN_ROOT:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS: {
+ ParserNodeArray *metadata = node->metadata;
+ for (size_t i = 0; i < metadata->size; ++i) {
+ parserNodeDelete(metadata->data[i]);
+ }
+ free(metadata->data);
+ free(metadata);
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_TYPE_VOID:
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_CONSTANT: {
+ ParserNodeVariableMetadata *metadata = node->metadata;
+ parserNodeDelete(metadata->name);
+ parserNodeDelete(metadata->type);
+ parserNodeDelete(metadata->value);
+ free(metadata);
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_SYMBOL_EOL: {
+ ParserNodeEOLMetadata *metadata = node->metadata;
+ parserNodeDelete(metadata);
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_FUNCTION_DEFINITION: {
+ ParserNodeFunctionDefnitionMetadata *metadata = node->metadata;
+ parserNodeDelete(metadata->arguments);
+ parserNodeDelete(metadata->returnType);
+ parserNodeDelete(metadata->body);
+ free(metadata);
+ }
+ goto RETURN_SUCCESS;
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", node->token);
+ exit(1);
+
+RETURN_SUCCESS:
+ free(node);
+}
+
+ParserNode *newParserNode(ParserToken token, char *str_begin, char *str_end,
+ void *metadata, ParserNode *parent) {
+ ParserNode *parserNode = a404m_malloc(sizeof(*parserNode));
+ parserNode->token = token;
+ parserNode->str_begin = str_begin;
+ parserNode->str_end = str_end;
+ parserNode->metadata = metadata;
+ parserNode->parent = parent;
+ return parserNode;
+}
+
+ParserNode *parser(LexerNodeArray lexed) {
+ ParserNode *root = newParserNode(PARSER_TOKEN_ROOT, NULL, NULL, NULL, NULL);
+ if (parserNodeArray(lexed.data, lexed.data + lexed.size, root)) {
+ return root;
+ } else {
+ free(root);
+ return NULL;
+ }
+}
+
+bool parserNodeArray(LexerNode *begin, LexerNode *end, ParserNode *parent) {
+ size_t parsedNodes_size = 0;
+
+ ParserNodeArray *parsedNodes = a404m_malloc(sizeof(*parsedNodes));
+ parsedNodes->data =
+ a404m_malloc(parsedNodes_size * sizeof(*parsedNodes->data));
+ parsedNodes->size = 0;
+
+ for (size_t orders_index = 0; orders_index < PARSER_ORDER_SIZE;
+ ++orders_index) {
+ const ParserOrder orders = PARSER_ORDER[orders_index];
+
+ for (LexerNode *iter = orders.ltr ? begin : end - 1;
+ iter < end && iter >= begin; orders.ltr ? ++iter : --iter) {
+
+ if (iter->parserNode != NULL) {
+ continue;
+ }
+
+ for (size_t order_index = 0; order_index < orders.size; ++order_index) {
+ const LexerToken order = orders.data[order_index];
+ if (iter->token == order) {
+ ParserNode *parserNode = parseNode(iter, begin, end, parent);
+ if (parserNode == NULL) {
+ goto RETURN_ERROR;
+ }
+ if (parsedNodes->size == parsedNodes_size) {
+ parsedNodes_size += parsedNodes_size / 2 + 1;
+ parsedNodes->data =
+ a404m_realloc(parsedNodes->data,
+ sizeof(*parsedNodes->data) * parsedNodes_size);
+ }
+ parsedNodes->data[parsedNodes->size] = parserNode;
+ parsedNodes->size += 1;
+ }
+ }
+ }
+ }
+
+ size_t inserted = 0;
+ for (size_t i = 0; i < parsedNodes->size; ++i) {
+ ParserNode *parserNode = parsedNodes->data[i];
+ if (parserNode->parent == parent) {
+ // TODO: check here
+ if (inserted != i) {
+ parsedNodes->data[inserted] = parserNode;
+ }
+ ++inserted;
+ }
+ }
+
+ parsedNodes->data =
+ a404m_realloc(parsedNodes->data, inserted * sizeof(*parsedNodes->data));
+ parsedNodes->size = inserted;
+
+ parent->metadata = parsedNodes;
+
+ return true;
+
+RETURN_ERROR:
+ for (size_t i = 0; i < parsedNodes->size; ++i) {
+ free(parsedNodes->data[i]);
+ }
+ free(parsedNodes->data);
+ free(parsedNodes);
+ return false;
+}
+
+ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent) {
+ switch (node->token) {
+ case LEXER_TOKEN_IDENTIFIER:
+ return parserIdentifier(node, parent);
+ case LEXER_TOKEN_KEYWORD_VOID:
+ return parserVoid(node, parent);
+ case LEXER_TOKEN_KEYWORD_PRINT:
+ return parserPrint(node, parent);
+ case LEXER_TOKEN_SYMBOL_EOL:
+ return parserEol(node, begin, parent);
+ case LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS:
+ return parserParenthesis(node, begin, parent);
+ case LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET:
+ return parserCurlyBrackets(node, begin, parent);
+ case LEXER_TOKEN_SYMBOL_FUNCTION_ARROW:
+ return parserFunction(node, begin, end, parent);
+ case LEXER_TOKEN_SYMBOL_COLON:
+ return parserVariable(node, begin, end, parent);
+ case LEXER_TOKEN_NONE:
+ case LEXER_TOKEN_NUMBER:
+ case LEXER_TOKEN_SYMBOL:
+ case LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS:
+ case LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET:
+ }
+ printLog("Bad token '%d'", node->token);
+ return NULL;
+}
+
+ParserNode *getUntilCommonParent(ParserNode *node, ParserNode *parent) {
+ while (node != NULL && node->parent != parent) {
+ node = node->parent;
+ }
+ return node;
+}
+
+ParserNode *parserIdentifier(LexerNode *node, ParserNode *parent) {
+ ParserNode *parserNode = newParserNode(
+ PARSER_TOKEN_IDENTIFIER, node->str_begin, node->str_end, NULL, parent);
+ node->parserNode = parserNode;
+ return parserNode;
+}
+
+ParserNode *parserVoid(LexerNode *node, ParserNode *parent) {
+ ParserNode *parserNode = newParserNode(
+ PARSER_TOKEN_TYPE_VOID, node->str_begin, node->str_end, NULL, parent);
+ node->parserNode = parserNode;
+ return parserNode;
+}
+
+ParserNode *parserPrint(LexerNode *node, ParserNode *parent) {
+ ParserNode *parserNode = newParserNode(
+ PARSER_TOKEN_KEYWORD_PRINT, node->str_begin, node->str_end, NULL, parent);
+ node->parserNode = parserNode;
+ return parserNode;
+}
+
+ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent) {
+ LexerNode *nodeBeore = node - 1;
+ ParserNode *parserNodeBefore;
+ if (nodeBeore < begin) {
+ parserNodeBefore = NULL;
+ } else if (nodeBeore->parserNode == NULL) {
+ printLog("Bad EOL after %s", LEXER_TOKEN_STRINGS[nodeBeore->token]);
+ return NULL;
+ } else {
+ parserNodeBefore = getUntilCommonParent(nodeBeore->parserNode, parent);
+ if (parserNodeBefore == NULL || !isExpression(parserNodeBefore)) {
+ printLog("Bad EOL");
+ return NULL;
+ }
+ }
+ ParserNode *parserNode =
+ newParserNode(PARSER_TOKEN_SYMBOL_EOL, node->str_begin, node->str_end,
+ (ParserNodeEOLMetadata *)parserNodeBefore, parent);
+ node->parserNode = parserNode;
+ if (parserNodeBefore != NULL) {
+ parserNodeBefore->parent = parserNode;
+ }
+ return parserNode;
+}
+
+ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin,
+ ParserNode *parent) {
+ LexerNode *opening = NULL;
+
+ for (LexerNode *iter = closing - 1; iter >= begin; --iter) {
+ if (iter->parserNode == NULL &&
+ iter->token == LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS) {
+ opening = iter;
+ break;
+ }
+ }
+
+ if (opening == NULL) {
+ printLog("No opening for parenthesis");
+ return NULL;
+ }
+
+ ParserNode *parserNode =
+ newParserNode(PARSER_TOKEN_SYMBOL_PARENTHESIS, opening->str_begin,
+ closing->str_end, NULL, parent);
+
+ opening->parserNode = parserNode;
+ closing->parserNode = parserNode;
+ if (parserNodeArray(opening + 1, closing, parserNode)) {
+ return parserNode;
+ } else {
+ free(parserNode);
+ return NULL;
+ }
+}
+
+ParserNode *parserCurlyBrackets(LexerNode *closing, LexerNode *begin,
+ ParserNode *parent) {
+ LexerNode *opening = NULL;
+
+ for (LexerNode *iter = closing - 1; iter >= begin; --iter) {
+ if (iter->parserNode == NULL &&
+ iter->token == LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET) {
+ opening = iter;
+ break;
+ }
+ }
+
+ if (opening == NULL) {
+ printLog("No opening for curly brackets");
+ return NULL;
+ }
+
+ ParserNode *parserNode =
+ newParserNode(PARSER_TOKEN_SYMBOL_CURLY_BRACKET, opening->str_begin,
+ closing->str_end, NULL, parent);
+
+ opening->parserNode = parserNode;
+ closing->parserNode = parserNode;
+ if (parserNodeArray(opening + 1, closing, parserNode)) {
+ return parserNode;
+ } else {
+ free(parserNode);
+ return NULL;
+ }
+}
+
+ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent) {
+ LexerNode *paramsNode = node - 1;
+ LexerNode *retTypeNode = node + 1;
+ LexerNode *bodyNode = node + 2;
+ // TODO: if body is not there then it is a function type not a function
+ // definition
+ if (paramsNode < begin || paramsNode->parserNode == NULL) {
+ NO_PARAMS:
+ printLog("No params");
+ return NULL;
+ } else if (retTypeNode >= end || retTypeNode->parserNode == NULL) {
+ NO_RETURN_TYPE:
+ printLog("No return type");
+ return NULL;
+ }
+ ParserNode *params = getUntilCommonParent(paramsNode->parserNode, parent);
+ ParserNode *retType = getUntilCommonParent(retTypeNode->parserNode, parent);
+ ParserNode *body;
+
+ if (bodyNode >= end || bodyNode->parserNode == NULL) {
+ body = NULL;
+ // TODO: implement
+ printLog("Not implemented");
+ return NULL;
+ } else {
+ body = getUntilCommonParent(bodyNode->parserNode, parent);
+ }
+
+ // TODO: check parenthesis
+ if (params->token != PARSER_TOKEN_SYMBOL_PARENTHESIS) {
+ goto NO_PARAMS;
+ } else if (!isType(retType)) {
+ goto NO_RETURN_TYPE;
+ }
+
+ // TODO: check body
+
+ ParserNodeFunctionDefnitionMetadata *metadata =
+ a404m_malloc(sizeof(*metadata));
+
+ metadata->body = body;
+ metadata->arguments = params;
+ metadata->returnType = retType;
+
+ return params->parent = retType->parent = body->parent = node->parserNode =
+ newParserNode(PARSER_TOKEN_FUNCTION_DEFINITION, params->str_begin,
+ body->str_end, metadata, parent);
+}
+
+ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent) {
+ LexerNode *nameNode = node - 1;
+ if (nameNode < begin || nameNode->parserNode == NULL) {
+ printLog("No name");
+ return NULL;
+ }
+
+ ParserNode *name = getUntilCommonParent(nameNode->parserNode, parent);
+
+ if (name->token != PARSER_TOKEN_IDENTIFIER) {
+ printLog("Name should be identifier");
+ return NULL;
+ }
+
+ LexerNode *node1 = node + 1;
+
+ if (node1 >= end || node1->token != LEXER_TOKEN_SYMBOL_COLON) {
+ printLog("Bad variable definition");
+ return NULL;
+ }
+
+ LexerNode *valueNode = node + 2;
+
+ if (valueNode >= end || valueNode->parserNode == NULL) {
+ printLog("No initialziation");
+ return NULL;
+ }
+
+ ParserNode *value = getUntilCommonParent(valueNode->parserNode, parent);
+
+ if (!isValue(value)) {
+ printLog("No value");
+ return NULL;
+ }
+
+ ParserNodeVariableMetadata *metadata = a404m_malloc(sizeof(*metadata));
+ metadata->value = value;
+ metadata->name = name;
+ metadata->type = NULL;
+
+ return name->parent = node1->parserNode = value->parent = node->parserNode =
+ newParserNode(PARSER_TOKEN_CONSTANT, name->str_begin,
+ value->str_end, metadata, parent);
+}
+
+bool isExpression(ParserNode *node) {
+ switch (node->token) {
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_CONSTANT:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS:
+ case PARSER_TOKEN_FUNCTION_DEFINITION:
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ return true;
+ case PARSER_TOKEN_ROOT:
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_TYPE_VOID:
+ case PARSER_TOKEN_SYMBOL_EOL:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ return false;
+ case PARSER_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", node->token);
+ exit(1);
+}
+
+bool isType(ParserNode *node) {
+ switch (node->token) {
+ case PARSER_TOKEN_TYPE_VOID:
+ return true;
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_CONSTANT:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS:
+ case PARSER_TOKEN_FUNCTION_DEFINITION:
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ case PARSER_TOKEN_ROOT:
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_SYMBOL_EOL:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ return false;
+ case PARSER_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", node->token);
+ exit(1);
+}
+
+bool isValue(ParserNode *node) {
+ switch (node->token) {
+ case PARSER_TOKEN_FUNCTION_DEFINITION:
+ return true;
+ case PARSER_TOKEN_TYPE_VOID:
+ case PARSER_TOKEN_IDENTIFIER:
+ case PARSER_TOKEN_CONSTANT:
+ case PARSER_TOKEN_SYMBOL_PARENTHESIS:
+ case PARSER_TOKEN_KEYWORD_PRINT:
+ case PARSER_TOKEN_ROOT:
+ case PARSER_TOKEN_TYPE_FUNCTION:
+ case PARSER_TOKEN_SYMBOL_EOL:
+ case PARSER_TOKEN_SYMBOL_CURLY_BRACKET:
+ return false;
+ case PARSER_TOKEN_NONE:
+ }
+ printLog("Bad token '%d'", node->token);
+ exit(1);
+}
diff --git a/src/compiler/parser.h b/src/compiler/parser.h
new file mode 100644
index 0000000..1e6f487
--- /dev/null
+++ b/src/compiler/parser.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include "compiler/lexer.h"
+#include <stddef.h>
+
+typedef enum ParserToken {
+ PARSER_TOKEN_ROOT,
+
+ PARSER_TOKEN_IDENTIFIER,
+
+ PARSER_TOKEN_TYPE_FUNCTION,
+ PARSER_TOKEN_TYPE_VOID,
+
+ PARSER_TOKEN_KEYWORD_PRINT,
+
+ PARSER_TOKEN_CONSTANT,
+
+ PARSER_TOKEN_SYMBOL_EOL,
+ PARSER_TOKEN_SYMBOL_CURLY_BRACKET,
+ PARSER_TOKEN_SYMBOL_PARENTHESIS,
+
+ PARSER_TOKEN_FUNCTION_DEFINITION,
+
+ PARSER_TOKEN_NONE,
+} ParserToken;
+
+extern const char *PARSER_TOKEN_STRINGS[];
+
+typedef struct ParserOrder {
+ bool ltr;
+ size_t size;
+ LexerToken data[8];
+} ParserOrder;
+
+typedef struct ParserNode {
+ ParserToken token;
+ char *str_begin;
+ char *str_end;
+ void *metadata;
+ struct ParserNode *parent;
+} ParserNode;
+
+typedef struct ParserNodeTypeFunctionMetadata {
+ ParserNode *arguments;
+ ParserNode *returnType;
+} ParserNodeTypeFunctionMetadata;
+
+typedef struct ParserNodeVariableMetadata {
+ ParserNode *name;
+ ParserNode *type;
+ ParserNode *value;
+} ParserNodeVariableMetadata;
+
+typedef struct ParserNodeFunctionDefnitionMetadata {
+ ParserNode *arguments;
+ ParserNode *returnType;
+ ParserNode *body;
+} ParserNodeFunctionDefnitionMetadata;
+
+typedef struct ParserNodeArray {
+ ParserNode **data;
+ size_t size;
+} ParserNodeArray;
+
+typedef ParserNode ParserNodeEOLMetadata;
+
+void parserNodePrint(const ParserNode *node,int indent);
+void parserNodeDelete(ParserNode *node);
+
+ParserNode *parser(LexerNodeArray lexed);
+bool parserNodeArray(LexerNode *begin, LexerNode *end, ParserNode *parent);
+
+ParserNode *newParserNode(ParserToken token, char *str_begin, char *str_end,
+ void *metadata, ParserNode *parent);
+
+ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent);
+
+ParserNode *getUntilCommonParent(ParserNode *node, ParserNode *parent);
+
+ParserNode *parserIdentifier(LexerNode *node, ParserNode *parent);
+ParserNode *parserVoid(LexerNode *node, ParserNode *parent);
+ParserNode *parserPrint(LexerNode *node, ParserNode *parent);
+ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent);
+ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin,
+ ParserNode *parent);
+ParserNode *parserCurlyBrackets(LexerNode *closing, LexerNode *begin,
+ ParserNode *parent);
+ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent);
+ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end,
+ ParserNode *parent);
+
+bool isExpression(ParserNode *node);
+bool isType(ParserNode *node);
+bool isValue(ParserNode *node);
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..68da20e
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,123 @@
+#include "compiler/ast-tree.h"
+#include "compiler/code-generator.h"
+#include "compiler/lexer.h"
+#include "compiler/parser.h"
+#include "utils/file.h"
+#include "utils/log.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+int runWithPrint(const char *filePath) {
+ char *code = readWholeFile(filePath);
+ if (code == NULL) {
+ return 1;
+ }
+
+ LexerNodeArray lexed = lexer(code);
+ if (lexerNodeArrayIsError(lexed)) {
+ free(code);
+ return 1;
+ }
+ lexerNodeArrayPrint(lexed);
+
+ ParserNode *parsedRoot = parser(lexed);
+ if (parsedRoot == NULL) {
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+ parserNodePrint(parsedRoot, 0);
+ printf("\n");
+
+ AstTreeRoot *astTree = makeAstTree(parsedRoot);
+ if (astTree == NULL) {
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+ astTreeRootPrint(astTree);
+
+ CodeGeneratorCodes *codes = codeGenerator(astTree);
+ if (codes == NULL) {
+ astTreeRootDelete(astTree);
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+
+ char *fasm = codeGeneratorToFlatASM(codes);
+ printf("%s", fasm);
+ if (codeGeneratorFlatASMExec("build/test-file", fasm)) {
+ system("./build/test-file");
+ }
+
+ free(fasm);
+ astTreeRootDelete(astTree);
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 0;
+}
+
+int run(const char *filePath) {
+ char *code = readWholeFile(filePath);
+ if (code == NULL) {
+ return 1;
+ }
+
+ LexerNodeArray lexed = lexer(code);
+ if (lexerNodeArrayIsError(lexed)) {
+ free(code);
+ return 1;
+ }
+
+ ParserNode *parsedRoot = parser(lexed);
+ if (parsedRoot == NULL) {
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+
+ AstTreeRoot *astTree = makeAstTree(parsedRoot);
+ if (astTree == NULL) {
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+
+ CodeGeneratorCodes *codes = codeGenerator(astTree);
+ if (codes == NULL) {
+ astTreeRootDelete(astTree);
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 1;
+ }
+
+ char *fasm = codeGeneratorToFlatASM(codes);
+ if (codeGeneratorFlatASMExec("build/test-file", fasm)) {
+ system("./build/test-file");
+ }
+
+ free(fasm);
+ astTreeRootDelete(astTree);
+ parserNodeDelete(parsedRoot);
+ lexerNodeArrayDestroy(lexed);
+ free(code);
+ return 0;
+}
+
+int main(int argc, char *argv[]) {
+ (void)argc;
+ (void)argv;
+
+ if (argc < 2) {
+ printLog("Too few args");
+ return 1;
+ }
+
+ return runWithPrint(argv[1]);
+}
diff --git a/src/utils/file.c b/src/utils/file.c
new file mode 100644
index 0000000..c2dd614
--- /dev/null
+++ b/src/utils/file.c
@@ -0,0 +1,27 @@
+#include "file.h"
+
+#include "utils/log.h"
+#include "utils/memory.h"
+#include <stddef.h>
+#include <stdio.h>
+
+char *readWholeFile(const char *filePath) {
+ FILE *file = fopen(filePath, "r");
+
+ if (!file) {
+ printLog("Can't open file '%s'", filePath);
+ return NULL;
+ }
+ fseek(file, 0, SEEK_END);
+ const size_t file_size = ftell(file);
+ fseek(file, 0, SEEK_SET);
+
+ char *str = a404m_malloc((file_size + 1) * sizeof(char));
+
+ fread(str, file_size, 1, file);
+ str[file_size] = '\0';
+
+ fclose(file);
+
+ return str;
+}
diff --git a/src/utils/file.h b/src/utils/file.h
new file mode 100644
index 0000000..6f9753f
--- /dev/null
+++ b/src/utils/file.h
@@ -0,0 +1,3 @@
+#pragma once
+
+char *readWholeFile(const char *filePath);
diff --git a/src/utils/log.c b/src/utils/log.c
new file mode 100644
index 0000000..f54394a
--- /dev/null
+++ b/src/utils/log.c
@@ -0,0 +1,15 @@
+#include "log.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void _printLogBack(const char *format, const char *file, int line, ...) {
+ va_list args;
+ va_start(args, end);
+ char *errorStr;
+ vasprintf(&errorStr, format, args);
+
+ printf("\e[0;31mError: %s at compiler %s:%d\e[0m\n", errorStr, file, line);
+ free(errorStr);
+}
diff --git a/src/utils/log.h b/src/utils/log.h
new file mode 100644
index 0000000..32f6a8f
--- /dev/null
+++ b/src/utils/log.h
@@ -0,0 +1,5 @@
+#pragma once
+
+#define printLog(format,...) _printLogBack(format, __FILE_NAME__, __LINE__, ## __VA_ARGS__)
+
+extern void _printLogBack(const char *format, const char *file, int line, ...);
diff --git a/src/utils/memory.c b/src/utils/memory.c
new file mode 100644
index 0000000..cde2995
--- /dev/null
+++ b/src/utils/memory.c
@@ -0,0 +1,31 @@
+#include "memory.h"
+
+#include <stdlib.h>
+#include <malloc.h>
+
+void *a404m_malloc(size_t size) {
+ if (size == 0) {
+ return NULL;
+ } else {
+ return malloc(size);
+ }
+}
+
+void *a404m_realloc(void *restrict pointer, size_t size) {
+ if (size == 0) {
+ free(pointer);
+ return NULL;
+ } else if (pointer != NULL) {
+ return realloc(pointer, size);
+ } else {
+ return malloc(size);
+ }
+}
+
+size_t a404m_malloc_usable_size(void *pointer) {
+ if (pointer == NULL) {
+ return 0;
+ } else {
+ return malloc_usable_size(pointer);
+ }
+}
diff --git a/src/utils/memory.h b/src/utils/memory.h
new file mode 100644
index 0000000..29bd3db
--- /dev/null
+++ b/src/utils/memory.h
@@ -0,0 +1,7 @@
+#pragma once
+
+#include <stddef.h>
+
+extern void *a404m_malloc(size_t size);
+extern void *a404m_realloc(void *restrict pointer, size_t size);
+extern size_t a404m_malloc_usable_size(void *pointer);
diff --git a/src/utils/string.c b/src/utils/string.c
new file mode 100644
index 0000000..81be415
--- /dev/null
+++ b/src/utils/string.c
@@ -0,0 +1,14 @@
+#include "string.h"
+
+#include <string.h>
+
+size_t searchInStringArray(const char *array[], size_t array_size,
+ const char *str, size_t str_size) {
+ for (size_t i = 0; i < array_size; ++i) {
+ const size_t el_size = strlen(array[i]);
+ if (el_size == str_size && strncmp(array[i], str, str_size) == 0) {
+ return i;
+ }
+ }
+ return array_size;
+}
diff --git a/src/utils/string.h b/src/utils/string.h
new file mode 100644
index 0000000..79af8a9
--- /dev/null
+++ b/src/utils/string.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <stddef.h>
+
+size_t searchInStringArray(const char *array[], size_t array_size, const char *str,
+ size_t str_size);
diff --git a/test/goal.felan b/test/goal.felan
new file mode 100644
index 0000000..868dac2
--- /dev/null
+++ b/test/goal.felan
@@ -0,0 +1,85 @@
+void :: struct {};
+
+u64 :: struct {
+ _ : value64;
+};
+
+Syscall :: enum {
+ READ,
+ WRITE,
+ OPEN,
+ CLOSE,
+ EXIT,
+};
+
+StdFile :: enum {
+ IN,
+ OUT,
+ ERR,
+};
+
+syscall :: fasm (
+ sysc : Syscall,
+ arg0 : u64 = undefined,
+ arg1 : u64 = undefined,
+ arg2 : u64 = undefined,
+ arg3 : u64 = undefined,
+ arg4 : u64 = undefined,
+ arg5 : u64 = undefined,
+ arg6 : u64 = undefined,
+) -> void {
+ syscall
+};
+
+operator ltr order2 infix + :: fasm (a : u64, b : u64) -> u64 {
+ add_i64
+};
+
+operator ltr order2 infix - :: fasm (a : u64, b : u64) -> u64 {
+ sub_i64
+};
+
+operator ltr order3 infix * :: fasm (a : u64, b : u64) -> u64 {
+ mul_u64
+};
+
+operator ltr order3 infix / :: fasm (a : u64, b : u64) -> u64 {
+ div_u64
+};
+
+operator ltr order3 infix % :: fasm (a : u64, b : u64) -> u64 {
+ rem_u64
+};
+
+operator ltr order4 prefix + :: fasm (a : u64) -> u64 {
+ // nothing
+};
+
+operator ltr order4 prefix - :: fasm (a : u64, b : u64) -> u64 {
+ negu64
+};
+
+operator ltr order5 prefix ** :: (a : u64, b : u64) -> u64 {
+ return pow(a,b);
+};
+
+assignToAddr64 :: fasm (addr : value64, b : value64) -> void {
+ pop64
+};
+
+// self assign can't be overloaded because of pass to function itself
+operator rtl order1 infix = :: (a : u64, b : value64) -> u64 {
+ assignToAddr64(@addressOf(a),b);
+ return a;
+};
+
+exit :: (code : u64) -> void {
+ syscall(Syscall.EXIT, code);
+};
+
+// a comment
+main :: () -> void {
+ a : u64 = 2; // some comment
+ exit(a + 4);
+};
+
diff --git a/test/main.felan b/test/main.felan
new file mode 100644
index 0000000..eef127e
--- /dev/null
+++ b/test/main.felan
@@ -0,0 +1,3 @@
+main :: () -> void {
+ print;
+};