From 43392fc66ab207e53a6924a2edbcd7ca0acecea8 Mon Sep 17 00:00:00 2001 From: A404M Date: Sat, 18 Jan 2025 20:42:54 +0330 Subject: initial commit --- src/compiler/ast-tree.c | 226 ++++++++++++++++ src/compiler/ast-tree.h | 64 +++++ src/compiler/code-generator.c | 192 +++++++++++++ src/compiler/code-generator.h | 37 +++ src/compiler/lexer.c | 232 ++++++++++++++++ src/compiler/lexer.h | 66 +++++ src/compiler/parser.c | 614 ++++++++++++++++++++++++++++++++++++++++++ src/compiler/parser.h | 96 +++++++ src/main.c | 123 +++++++++ src/utils/file.c | 27 ++ src/utils/file.h | 3 + src/utils/log.c | 15 ++ src/utils/log.h | 5 + src/utils/memory.c | 31 +++ src/utils/memory.h | 7 + src/utils/string.c | 14 + src/utils/string.h | 6 + 17 files changed, 1758 insertions(+) create mode 100644 src/compiler/ast-tree.c create mode 100644 src/compiler/ast-tree.h create mode 100644 src/compiler/code-generator.c create mode 100644 src/compiler/code-generator.h create mode 100644 src/compiler/lexer.c create mode 100644 src/compiler/lexer.h create mode 100644 src/compiler/parser.c create mode 100644 src/compiler/parser.h create mode 100644 src/main.c create mode 100644 src/utils/file.c create mode 100644 src/utils/file.h create mode 100644 src/utils/log.c create mode 100644 src/utils/log.h create mode 100644 src/utils/memory.c create mode 100644 src/utils/memory.h create mode 100644 src/utils/string.c create mode 100644 src/utils/string.h (limited to 'src') diff --git a/src/compiler/ast-tree.c b/src/compiler/ast-tree.c new file mode 100644 index 0000000..f25fc51 --- /dev/null +++ b/src/compiler/ast-tree.c @@ -0,0 +1,226 @@ +#include "ast-tree.h" +#include "compiler/parser.h" +#include "utils/log.h" +#include "utils/memory.h" +#include +#include + +const char *AST_TREE_TOKEN_STRINGS[] = { + "AST_TREE_TOKEN_FUNCTION", + "AST_TREE_TOKEN_KEYWORD_PRINT", + "AST_TREE_TOKEN_NONE", +}; + +void astTreePrint(const AstTree *tree, int indent) { + for (int i = 0; i < indent; ++i) + printf(" "); + printf("{token=\"%s\"", AST_TREE_TOKEN_STRINGS[tree->token]); + switch (tree->token) { + case AST_TREE_TOKEN_FUNCTION: + AstTreeScope *metadata = tree->metadata; + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("expressions=[\n"); + for (size_t i = 0; i < metadata->expressions_size; ++i) { + astTreePrint(&metadata->expressions[i], indent + 1); + printf(",\n"); + } + for (int i = 0; i < indent; ++i) + printf(" "); + printf("]"); + case AST_TREE_TOKEN_KEYWORD_PRINT: + goto RETURN_SUCCESS; + case AST_TREE_TOKEN_NONE: + } + + printLog("Bad token '%d'", tree->token); + exit(1); + +RETURN_SUCCESS: + printf("}"); +} + +void astTreeRootPrint(const AstTreeRoot *root) { + for (size_t i = 0; i < root->variables.size; ++i) { + const AstTreeVariable *variable = root->variables.data[i]; + printf("{\nname=\"%.*s\",\nvalue=\n", + (int)(variable->name_end - variable->name_begin), + variable->name_begin); + astTreePrint(variable->value, 1); + printf("\n},\n"); + } +} + +void astTreeDestroy(AstTree tree) { + switch (tree.token) { + case AST_TREE_TOKEN_FUNCTION: + AstTreeScope *metadata = tree.metadata; + for (size_t i = 0; i < metadata->expressions_size; ++i) { + astTreeDestroy(metadata->expressions[i]); + } + case AST_TREE_TOKEN_KEYWORD_PRINT: + return; + case AST_TREE_TOKEN_NONE: + } + printLog("Bad token '%d'", tree.token); + exit(1); +} + +void astTreeDelete(AstTree *tree) { + astTreeDestroy(*tree); + free(tree); +} + +void astTreeRootDelete(AstTreeRoot *root) { + for (size_t i = 0; i < root->variables.size; ++i) { + astTreeDelete(root->variables.data[i]->value); + } + free(root->variables.data); + free(root); +} + +AstTree *newAstTree(AstTreeToken token, void *metadata) { + AstTree *result = a404m_malloc(sizeof(*result)); + result->token = token; + result->metadata = metadata; + return result; +} + +AstTreeRoot *makeAstTree(ParserNode *parsedRoot) { + if (parsedRoot->token != PARSER_TOKEN_ROOT) { + return NULL; + } + + ParserNodeArray *nodes = parsedRoot->metadata; + + AstTreeRoot *root = a404m_malloc(sizeof(*root)); + + size_t variables_size = nodes->size; + root->variables.data = + a404m_malloc(variables_size * sizeof(*root->variables.data)); + root->variables.size = 0; + + for (size_t i = 0; i < nodes->size; ++i) { + ParserNode *eol = nodes->data[i]; + if (eol->token != PARSER_TOKEN_SYMBOL_EOL) { + printLog("Unexpected %s", PARSER_TOKEN_STRINGS[eol->token]); + goto RETURN_ERROR; + } + ParserNode *node = (ParserNodeEOLMetadata *)eol->metadata; + if (node->token != PARSER_TOKEN_CONSTANT) { + printLog("Unexpected %s", PARSER_TOKEN_STRINGS[node->token]); + goto RETURN_ERROR; + } + ParserNodeVariableMetadata *node_metadata = node->metadata; + + AstTreeVariable *variable = a404m_malloc(sizeof(*variable)); + variable->name_begin = node_metadata->name->str_begin; + variable->name_end = node_metadata->name->str_end; + + pushVariable(&root->variables, &variables_size, variable); + } + + for (size_t i = 0; i < nodes->size; ++i) { + ParserNode *node = (ParserNodeEOLMetadata *)nodes->data[i]->metadata; + ParserNodeVariableMetadata *node_metadata = node->metadata; + + // TODO: check type + AstTree *tree = astTreeParse(node_metadata->value, &root->variables, 1); + if (tree == NULL) { + goto RETURN_ERROR; + } + + root->variables.data[i]->value = tree; + } + + return root; + +RETURN_ERROR: + free(root->variables.data); + free(root); + return NULL; +} + +void pushVariable(AstTreeVariables *variables, size_t *variables_size, + AstTreeVariable *variable) { + if (*variables_size == variables->size) { + *variables_size += *variables_size / 2 + 1; + variables->data = a404m_realloc(variables->data, + *variables_size * sizeof(*variables->data)); + } + variables->data[variables->size] = variable; + variables->size += 1; +} + +AstTree *astTreeParse(ParserNode *parserNode, AstTreeVariables *variables, + size_t variables_size) { + switch (parserNode->token) { + case PARSER_TOKEN_KEYWORD_PRINT: + return newAstTree(AST_TREE_TOKEN_KEYWORD_PRINT, NULL); + case PARSER_TOKEN_FUNCTION_DEFINITION: + return astTreeParseFunction(parserNode, variables, variables_size); + case PARSER_TOKEN_SYMBOL_EOL: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_CONSTANT: + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_TYPE_VOID: + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_NONE: + case PARSER_TOKEN_ROOT: + } + printLog("Bad token %d", parserNode->token); + return NULL; +} + +AstTree *astTreeParseFunction(ParserNode *parserNode, + AstTreeVariables *variables, + size_t variables_size) { + ParserNodeFunctionDefnitionMetadata *node_metadata = parserNode->metadata; + ParserNodeArray *body = node_metadata->body->metadata; + + AstTreeScope *scope = a404m_malloc(sizeof(*scope)); + + size_t expressions_size = 0; + scope->expressions = + a404m_malloc(expressions_size * sizeof(*scope->expressions)); + scope->expressions_size = 0; + + for (size_t i = 0; i < body->size; ++i) { + ParserNode *eol = body->data[i]; + if (eol->token != PARSER_TOKEN_SYMBOL_EOL) { + printLog("Unexpected %s", PARSER_TOKEN_STRINGS[eol->token]); + goto RETURN_ERROR; + } + ParserNode *node = (ParserNodeEOLMetadata *)eol->metadata; + AstTree *tree = astTreeParse(node, variables, variables_size); + + if (tree == NULL) { + goto RETURN_ERROR; + } + + if (expressions_size == scope->expressions_size) { + expressions_size += expressions_size / 2 + 1; + scope->expressions = a404m_realloc( + scope->expressions, expressions_size * sizeof(*scope->expressions)); + } + scope->expressions[scope->expressions_size] = *tree; + scope->expressions_size += 1; + + free(tree); + } + + scope->expressions = + a404m_realloc(scope->expressions, + scope->expressions_size * sizeof(*scope->expressions)); + + AstTree *result = newAstTree(AST_TREE_TOKEN_FUNCTION, scope); + + return result; + +RETURN_ERROR: + free(scope->expressions); + free(scope); + return NULL; +} diff --git a/src/compiler/ast-tree.h b/src/compiler/ast-tree.h new file mode 100644 index 0000000..758d113 --- /dev/null +++ b/src/compiler/ast-tree.h @@ -0,0 +1,64 @@ +#pragma once + +#include "compiler/parser.h" +#include + +typedef enum AstTreeToken { + AST_TREE_TOKEN_FUNCTION, + AST_TREE_TOKEN_KEYWORD_PRINT, + AST_TREE_TOKEN_NONE, +} AstTreeToken; + +typedef struct AstTreeType { +} AstTreeType; + +typedef struct AstTree { + AstTreeToken token; + void *metadata; +} AstTree; + +typedef struct AstTreeVariable { + char *name_begin; + char *name_end; + AstTreeType type; + AstTree *value; +} AstTreeVariable; + +typedef struct AstTreeVariables { + AstTreeVariable **data; + size_t size; +} AstTreeVariables; + +typedef struct AstTreeRoot { + AstTreeVariables variables; +} AstTreeRoot; + +typedef struct AstTreeScope { + AstTreeVariable *variables; + size_t variables_size; + AstTree *expressions; + size_t expressions_size; +} AstTreeScope; + +extern const char *AST_TREE_TOKEN_STRINGS[]; + +void astTreePrint(const AstTree *tree,int indent); +void astTreeRootPrint(const AstTreeRoot *root); + +void astTreeDestroy(AstTree tree); +void astTreeDelete(AstTree *tree); +void astTreeRootDelete(AstTreeRoot *root); + +AstTree *newAstTree(AstTreeToken token, void *metadata); + +AstTreeRoot *makeAstTree(ParserNode *parsedRoot); + +void pushVariable(AstTreeVariables *variables, size_t *variables_size, + AstTreeVariable *variable); + +AstTree *astTreeParse(ParserNode *parserNode, AstTreeVariables *variables, + size_t variables_size); + +AstTree *astTreeParseFunction(ParserNode *parserNode, + AstTreeVariables *variables, + size_t variables_size); diff --git a/src/compiler/code-generator.c b/src/compiler/code-generator.c new file mode 100644 index 0000000..0de098c --- /dev/null +++ b/src/compiler/code-generator.c @@ -0,0 +1,192 @@ +#include "code-generator.h" +#include "compiler/ast-tree.h" +#include "utils/log.h" +#include "utils/memory.h" +#include +#include +#include + +CodeGeneratorCode createGenerateCode(char *label_begin, char *label_end, + CodeGeneratorInstruction instruction) { + CodeGeneratorCode code = { + .label_begin = label_begin, + .label_end = label_end, + .instruction = instruction, + }; + return code; +} + +CodeGeneratorCode *newGenerateCode(char *label_begin, char *label_end, + CodeGeneratorInstruction instruction) { + CodeGeneratorCode *result = a404m_malloc(sizeof(*result)); + result->label_begin = label_begin; + result->label_end = label_end; + result->instruction = instruction; + return result; +} + +void generateCodePushCode(CodeGeneratorCodes *codes, CodeGeneratorCode code) { + size_t codes_size = + a404m_malloc_usable_size(codes->codes) / sizeof(*codes->codes); + if (codes_size == codes->codes_size) { + codes_size += codes_size / 2 + 1; + codes->codes = + a404m_realloc(codes->codes, codes_size * sizeof(*codes->codes)); + } + codes->codes[codes->codes_size] = code; + codes->codes_size += 1; +} + +CodeGeneratorCodes *codeGenerator(AstTreeRoot *astTreeRoot) { + CodeGeneratorCodes *codes = a404m_malloc(sizeof(*codes)); + + codes->codes = a404m_malloc(0); + codes->codes_size = 0; + + for (size_t i = 0; i < astTreeRoot->variables.size; ++i) { + AstTreeVariable *variable = astTreeRoot->variables.data[i]; + switch (variable->value->token) { + case AST_TREE_TOKEN_FUNCTION: + codeGeneratorAstTreeFunction(variable->name_begin, variable->name_end, + *variable->value, codes); + continue; + case AST_TREE_TOKEN_KEYWORD_PRINT: + generateCodePushCode( + codes, + createGenerateCode(NULL, NULL, CODE_GENERATOR_INSTRUCTION_PRINT)); + continue; + case AST_TREE_TOKEN_NONE: + } + printLog("Bad token %d", variable->value->token); + } + + return codes; +} + +bool codeGeneratorAstTreeFunction(char *label_begin, char *label_end, + AstTree astTree, CodeGeneratorCodes *codes) { + AstTreeScope *scope = astTree.metadata; + + for (size_t i = 0; i < scope->expressions_size; ++i) { + AstTree tree = scope->expressions[i]; + switch (tree.token) { + case AST_TREE_TOKEN_KEYWORD_PRINT: + generateCodePushCode( + codes, createGenerateCode(label_begin, label_end, + CODE_GENERATOR_INSTRUCTION_PRINT)); + goto OK; + case AST_TREE_TOKEN_FUNCTION: + case AST_TREE_TOKEN_NONE: + } + printLog("Bad token %d", tree.token); + return false; + OK: + label_begin = NULL; + label_end = NULL; + } + + generateCodePushCode(codes, + createGenerateCode(label_begin, label_end, + CODE_GENERATOR_INSTRUCTION_RET)); + + return true; +} + +static const char TEMPLATE[] = + "format ELF64 executable 3\nSYS_exit = 60\nSYS_write = 1\nSTDOUT = " + "1\nsegment readable writable\nhello: db \"Hello, " + "World!\",0xa\nhello_len = $-hello\nsegment readable executable\nentry " + "_start\nprint:\nmov rax, SYS_write\nmov rdi, STDOUT\nmov rsi, " + "hello\nmov rdx, hello_len\nsyscall\nret\n_start:\ncall main\nmov rax, " + "SYS_exit\nxor " + "rdi,rdi\nsyscall\n"; +static const size_t TEMPLATE_LEN = + sizeof(TEMPLATE) / sizeof(*TEMPLATE) - sizeof(*TEMPLATE); + +static void codeGeneratorAppendFlatASMCommand(char **fasm, size_t *fasm_size, + size_t *fasm_inserted, + const char *str, size_t str_len) { + size_t inserted_last = *fasm_inserted; + *fasm_inserted += str_len; + if (*fasm_inserted + 1 >= *fasm_size) { + *fasm_size = *fasm_inserted + *fasm_inserted / 2 + 1; + *fasm = a404m_realloc(*fasm, *fasm_size * sizeof(*fasm)); + } + strncpy(*fasm + inserted_last, str, str_len); +} + +char *codeGeneratorToFlatASM(const CodeGeneratorCodes *codes) { + size_t fasm_size = TEMPLATE_LEN + 1; + char *fasm = a404m_malloc(fasm_size * sizeof(*fasm)); + size_t fasm_inserted = 0; + fasm[0] = '\0'; + + codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, TEMPLATE, + TEMPLATE_LEN); + + for (size_t i = 0; i < codes->codes_size; ++i) { + const CodeGeneratorCode code = codes->codes[i]; + if (code.label_begin != code.label_end) { + codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, + code.label_begin, + code.label_end - code.label_begin); + + codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, + ":\n", 2); + } + + switch (code.instruction) { + case CODE_GENERATOR_INSTRUCTION_PRINT: { + constexpr char INST[] = "call print\n"; + codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, INST, + strlen(INST)); + } + continue; + case CODE_GENERATOR_INSTRUCTION_RET: { + constexpr char INST[] = "ret\n"; + codeGeneratorAppendFlatASMCommand(&fasm, &fasm_size, &fasm_inserted, INST, + strlen(INST)); + } + continue; + } + printLog("Bad instruction %d", code.instruction); + } + + fasm[fasm_inserted] = '\0'; + return fasm; +} + +bool codeGeneratorFlatASMExec(const char *filePath, const char *fasm) { + const size_t filePathLen = strlen(filePath); + char *asmFilePath = a404m_malloc((filePathLen + 4 + 1) * sizeof(char)); + strncpy(asmFilePath, filePath, filePathLen); + strcpy(asmFilePath + filePathLen, ".asm"); + + FILE *file = fopen(asmFilePath, "w"); + + if (file == NULL) { + return false; + } + + fwrite(fasm, sizeof(*fasm), strlen(fasm), file); + + fclose(file); + + char *command; + asprintf(&command, "fasm -m 102400 \"%s\" \"%s\"", asmFilePath, filePath); + + if (system(command) != 0) { + free(command); + return false; + } + free(command); + + asprintf(&command, "chmod + \"%s\"", filePath); + + if (system(command) != 0) { + free(command); + return false; + } + + return true; +} diff --git a/src/compiler/code-generator.h b/src/compiler/code-generator.h new file mode 100644 index 0000000..3232fb3 --- /dev/null +++ b/src/compiler/code-generator.h @@ -0,0 +1,37 @@ +#pragma once + +#include "compiler/ast-tree.h" +#include +#include + +typedef enum CodeGeneratorInstruction : uint8_t { + CODE_GENERATOR_INSTRUCTION_PRINT, + CODE_GENERATOR_INSTRUCTION_RET, +} CodeGeneratorInstruction; + +typedef struct CodeGeneratorCode { + char *label_begin; + char *label_end; + CodeGeneratorInstruction instruction; +} CodeGeneratorCode; + +typedef struct CodeGeneratorCodes { + CodeGeneratorCode *codes; + size_t codes_size; +} CodeGeneratorCodes; + +CodeGeneratorCode createGenerateCode(char *label_begin, char *label_end, + CodeGeneratorInstruction instruction); + +CodeGeneratorCode *newGenerateCode(char *label_begin, char *label_end, + CodeGeneratorInstruction instruction); + +void generateCodePushCode(CodeGeneratorCodes *codes, CodeGeneratorCode code); + +CodeGeneratorCodes *codeGenerator(AstTreeRoot *astTreeRoot); + +bool codeGeneratorAstTreeFunction(char *label_begin,char *label_end,AstTree astTree, CodeGeneratorCodes *codes); + +char *codeGeneratorToFlatASM(const CodeGeneratorCodes *codes); + +bool codeGeneratorFlatASMExec(const char *filePath,const char *fasm); diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c new file mode 100644 index 0000000..b919be0 --- /dev/null +++ b/src/compiler/lexer.c @@ -0,0 +1,232 @@ +#include "lexer.h" + +#include "utils/log.h" +#include "utils/memory.h" +#include "utils/string.h" + +#include +#include +#include + +const char *LEXER_TOKEN_STRINGS[] = { + "LEXER_TOKEN_IDENTIFIER", + "LEXER_TOKEN_KEYWORD_VOID", + "LEXER_TOKEN_KEYWORD_PRINT", + + "LEXER_TOKEN_NUMBER", + + "LEXER_TOKEN_SYMBOL", + "LEXER_TOKEN_SYMBOL_EOL", + "LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS", + "LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS", + "LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET", + "LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET", + "LEXER_TOKEN_SYMBOL_FUNCTION_ARROW", + "LEXER_TOKEN_SYMBOL_COLON", + + "LEXER_TOKEN_NONE", +}; + +const char *LEXER_SYMBOL_STRINGS[] = { + ";", "(", ")", "{", "}", "->", ":", +}; +const LexerToken LEXER_SYMBOL_TOKENS[] = { + LEXER_TOKEN_SYMBOL_EOL, + LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS, + LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS, + LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET, + LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, + LEXER_TOKEN_SYMBOL_FUNCTION_ARROW, + LEXER_TOKEN_SYMBOL_COLON, +}; +const size_t LEXER_SYMBOL_SIZE = + sizeof(LEXER_SYMBOL_TOKENS) / sizeof(*LEXER_SYMBOL_TOKENS); + +const char *LEXER_KEYWORD_STRINGS[] = { + "void", + "print", +}; +const LexerToken LEXER_KEYWORD_TOKENS[] = { + LEXER_TOKEN_KEYWORD_VOID, + LEXER_TOKEN_KEYWORD_PRINT, +}; +const size_t LEXER_KEYWORD_SIZE = + sizeof(LEXER_KEYWORD_TOKENS) / sizeof(*LEXER_KEYWORD_TOKENS); + +bool lexerNodeArrayIsError(LexerNodeArray array) { + return LEXER_NODE_ARRAY_ERROR.size == array.size; +} + +void lexerNodeArrayPrint(LexerNodeArray array) { + for (size_t i = 0; i < array.size; ++i) { + LexerNode node = array.data[i]; + printf("{str=\"%.*s\",token=%s}\n", (int)(node.str_end - node.str_begin), + node.str_begin, LEXER_TOKEN_STRINGS[node.token]); + } +} + +void lexerNodeArrayDestroy(LexerNodeArray array) { free(array.data); } + +LexerNodeArray lexer(char *str) { + size_t result_size = 0; + LexerNodeArray result = { + .data = a404m_malloc(result_size), + .size = 0, + }; + + LexerToken node_token = LEXER_TOKEN_NONE; + char *node_str_begin = str; + char *iter = str; + for (; *iter != '\0'; ++iter) { + char c = *iter; + if (c == '/') { + ++iter; + c = *iter; + if (c == '/') { + lexerPushClear(&result, &result_size, iter - 1, &node_str_begin, + &node_token, LEXER_TOKEN_NONE); + for (; *iter != '\n'; ++iter) { + if (*iter == '\0') { + goto RETURN_SUCCESS; + } + } + continue; + } else if (c == '*') { + lexerPushClear(&result, &result_size, iter - 1, &node_str_begin, + &node_token, LEXER_TOKEN_NONE); + printLog("Not implemented"); // TODO: implement it + exit(1); + } + } + if (isSpace(c)) { + lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token, + LEXER_TOKEN_NONE); + } else if (isIdentifier(c)) { + if (node_token != LEXER_TOKEN_IDENTIFIER) { + lexerPushClear(&result, &result_size, iter, &node_str_begin, + &node_token, LEXER_TOKEN_IDENTIFIER); + } + } else if (isNumber(c)) { + if (node_token != LEXER_TOKEN_IDENTIFIER && + node_token != LEXER_TOKEN_NUMBER) { + lexerPushClear(&result, &result_size, iter, &node_str_begin, + &node_token, LEXER_TOKEN_NUMBER); + } + } else if (isSymbol(c)) { + if (node_token != LEXER_TOKEN_SYMBOL) { + lexerPushClear(&result, &result_size, iter, &node_str_begin, + &node_token, LEXER_TOKEN_SYMBOL); + } + } else if (isSingleSymbol(c)) { + lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token, + LEXER_TOKEN_SYMBOL); + } else { + free(result.data); + printLog("Unexpected character '%c' at position %ld", c, iter - str); + return LEXER_NODE_ARRAY_ERROR; + } + } + lexerPushClear(&result, &result_size, iter, &node_str_begin, &node_token, + LEXER_TOKEN_NONE); + +RETURN_SUCCESS: + result.data = a404m_realloc(result.data, result.size * sizeof(*result.data)); + + return result; +} + +void lexerPushClear(LexerNodeArray *array, size_t *array_size, char *iter, + char **node_str_begin, LexerToken *node_token, + LexerToken token) { + switch (*node_token) { + case LEXER_TOKEN_IDENTIFIER: { + const size_t index = + searchInStringArray(LEXER_KEYWORD_STRINGS, LEXER_KEYWORD_SIZE, + *node_str_begin, iter - *node_str_begin); + if (index != LEXER_KEYWORD_SIZE) { + *node_token = LEXER_KEYWORD_TOKENS[index]; + } + } + goto PUSH; + case LEXER_TOKEN_SYMBOL: { + const size_t index = + searchInStringArray(LEXER_SYMBOL_STRINGS, LEXER_SYMBOL_SIZE, + *node_str_begin, iter - *node_str_begin); + if (index != LEXER_SYMBOL_SIZE) { + *node_token = LEXER_SYMBOL_TOKENS[index]; + } + } + // goto PUSH; + PUSH: + case LEXER_TOKEN_KEYWORD_VOID: + case LEXER_TOKEN_KEYWORD_PRINT: + case LEXER_TOKEN_NUMBER: + case LEXER_TOKEN_SYMBOL_EOL: + case LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS: + case LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS: + case LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET: + case LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET: + case LEXER_TOKEN_SYMBOL_FUNCTION_ARROW: + case LEXER_TOKEN_SYMBOL_COLON: + if (*array_size == array->size) { + *array_size += 1 + *array_size / 2; + array->data = + a404m_realloc(array->data, *array_size * sizeof(*array->data)); + } + + array->data[array->size].token = *node_token; + array->data[array->size].str_begin = *node_str_begin; + array->data[array->size].str_end = iter; + array->data[array->size].parserNode = NULL; + + array->size += 1; + + // goto RETURN_SUCCESS; + case LEXER_TOKEN_NONE: + goto RETURN_SUCCESS; + } + printLog("Bad token '%d'", *node_token); + exit(1); +RETURN_SUCCESS: + *node_str_begin = iter; + *node_token = token; +} + +bool isIdentifier(char c) { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_'; +} + +bool isNumber(char c) { return '0' <= c && c <= '9'; } + +bool isSymbol(char c) { + switch (c) { + case '-': + case '>': + case '.': + case '+': + case '*': + case '/': + case '%': + case '=': + return true; + default: + return false; + } +} + +bool isSingleSymbol(char c) { + switch (c) { + case ';': + case ':': + case ',': + case '(': + case ')': + case '{': + case '}': + return true; + default: + return false; + } +} + +bool isSpace(char c) { return isspace(c); } diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h new file mode 100644 index 0000000..3c3bc3f --- /dev/null +++ b/src/compiler/lexer.h @@ -0,0 +1,66 @@ +#pragma once + +#include + +typedef enum LexerToken { + LEXER_TOKEN_IDENTIFIER, + LEXER_TOKEN_KEYWORD_VOID, + LEXER_TOKEN_KEYWORD_PRINT, + + LEXER_TOKEN_NUMBER, + + LEXER_TOKEN_SYMBOL, + LEXER_TOKEN_SYMBOL_EOL, + LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS, + LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS, + LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET, + LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, + LEXER_TOKEN_SYMBOL_FUNCTION_ARROW, + LEXER_TOKEN_SYMBOL_COLON, + + LEXER_TOKEN_NONE, +} LexerToken; + +extern const char *LEXER_TOKEN_STRINGS[]; + +extern const char *LEXER_SYMBOL_STRINGS[]; +extern const LexerToken LEXER_SYMBOL_TOKENS[]; +extern const size_t LEXER_SYMBOL_SIZE; + +extern const char *LEXER_KEYWORD_STRINGS[]; +extern const LexerToken LEXER_KEYWORD_TOKENS[]; +extern const size_t LEXER_KEYWORD_SIZE; + +struct ParserNode; + +typedef struct LexerNode { + char *str_begin; + char *str_end; + LexerToken token; + struct ParserNode *parserNode; +} LexerNode; + +typedef struct LexerNodeArray { + LexerNode *data; + size_t size; +} LexerNodeArray; + +constexpr LexerNodeArray LEXER_NODE_ARRAY_ERROR = { + .size = -1ULL, +}; + +extern bool lexerNodeArrayIsError(LexerNodeArray array); +extern void lexerNodeArrayPrint(LexerNodeArray array); +extern void lexerNodeArrayDestroy(LexerNodeArray array); + +extern LexerNodeArray lexer(char *str); + +extern void lexerPushClear(LexerNodeArray *array, size_t *array_size, + char *iter, char **node_str_begin, + LexerToken *node_token, LexerToken token); + +extern bool isIdentifier(char c); +extern bool isNumber(char c); +extern bool isSymbol(char c); +extern bool isSingleSymbol(char c); +extern bool isSpace(char c); diff --git a/src/compiler/parser.c b/src/compiler/parser.c new file mode 100644 index 0000000..e9fec5f --- /dev/null +++ b/src/compiler/parser.c @@ -0,0 +1,614 @@ +#include "parser.h" +#include "compiler/lexer.h" +#include "utils/log.h" +#include "utils/memory.h" +#include +#include +#include +#include + +const char *PARSER_TOKEN_STRINGS[] = { + "PARSER_TOKEN_ROOT", + + "PARSER_TOKEN_IDENTIFIER", + + "PARSER_TOKEN_TYPE_FUNCTION", + "PARSER_TOKEN_TYPE_VOID", + + "PARSER_TOKEN_KEYWORD_PRINT", + + "PARSER_TOKEN_CONSTANT", + + "PARSER_TOKEN_SYMBOL_EOL", + "PARSER_TOKEN_SYMBOL_CURLY_BRACKET", + "PARSER_TOKEN_SYMBOL_PARENTHESIS", + + "PARSER_TOKEN_FUNCTION_DEFINITION", + + "PARSER_TOKEN_NONE", +}; + +static constexpr ParserOrder PARSER_ORDER[] = { + { + .ltr = true, + .size = 2, + .data = + { + LEXER_TOKEN_IDENTIFIER, + LEXER_TOKEN_KEYWORD_VOID, + }, + }, + { + .ltr = true, + .size = 2, + .data = + { + LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS, + LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, + }, + }, + { + .ltr = false, + .size = 1, + .data = + { + LEXER_TOKEN_SYMBOL_FUNCTION_ARROW, + }, + }, + { + .ltr = true, + .size = 2, + .data = + { + LEXER_TOKEN_SYMBOL_COLON, + LEXER_TOKEN_KEYWORD_PRINT, + }, + }, + { + .ltr = true, + .size = 1, + .data = + { + LEXER_TOKEN_SYMBOL_EOL, + }, + }, +}; + +static constexpr size_t PARSER_ORDER_SIZE = + sizeof(PARSER_ORDER) / sizeof(*PARSER_ORDER); + +void parserNodePrint(const ParserNode *node, int indent) { + for (int i = 0; i < indent; ++i) + printf(" "); + + if (node == NULL) { + printf("null"); + return; + } + + printf("{token=\"%s\"", PARSER_TOKEN_STRINGS[node->token]); + + switch (node->token) { + case PARSER_TOKEN_ROOT: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: { + const ParserNodeArray *metadata = node->metadata; + printf(",children=[\n"); + for (size_t i = 0; i < metadata->size; ++i) { + parserNodePrint(metadata->data[i], indent + 1); + printf(",\n"); + } + for (int i = 0; i < indent; ++i) + printf(" "); + printf("]"); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_TYPE_VOID: + case PARSER_TOKEN_KEYWORD_PRINT: + goto RETURN_SUCCESS; + case PARSER_TOKEN_CONSTANT: { + const ParserNodeVariableMetadata *metadata = node->metadata; + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("name=\n"); + parserNodePrint(metadata->name, indent + 1); + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("type=\n"); + parserNodePrint(metadata->type, indent + 1); + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("value=\n"); + parserNodePrint(metadata->value, indent + 1); + printf("\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_SYMBOL_EOL: { + const ParserNodeEOLMetadata *metadata = node->metadata; + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("child=\n"); + parserNodePrint(metadata, indent + 1); + printf("\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_FUNCTION_DEFINITION: { + const ParserNodeFunctionDefnitionMetadata *metadata = node->metadata; + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("arguments=\n"); + parserNodePrint(metadata->arguments, indent + 1); + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("returnType=\n"); + parserNodePrint(metadata->returnType, indent + 1); + printf(",\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + printf("body=\n"); + parserNodePrint(metadata->body, indent + 1); + printf("\n"); + for (int i = 0; i < indent; ++i) + printf(" "); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_NONE: + } + printLog("Bad token '%d'", node->token); + exit(1); + +RETURN_SUCCESS: + printf("}"); +} + +void parserNodeDelete(ParserNode *node) { + if (node == NULL) { + return; + } + switch (node->token) { + case PARSER_TOKEN_ROOT: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: { + ParserNodeArray *metadata = node->metadata; + for (size_t i = 0; i < metadata->size; ++i) { + parserNodeDelete(metadata->data[i]); + } + free(metadata->data); + free(metadata); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_TYPE_VOID: + case PARSER_TOKEN_KEYWORD_PRINT: + goto RETURN_SUCCESS; + case PARSER_TOKEN_CONSTANT: { + ParserNodeVariableMetadata *metadata = node->metadata; + parserNodeDelete(metadata->name); + parserNodeDelete(metadata->type); + parserNodeDelete(metadata->value); + free(metadata); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_SYMBOL_EOL: { + ParserNodeEOLMetadata *metadata = node->metadata; + parserNodeDelete(metadata); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_FUNCTION_DEFINITION: { + ParserNodeFunctionDefnitionMetadata *metadata = node->metadata; + parserNodeDelete(metadata->arguments); + parserNodeDelete(metadata->returnType); + parserNodeDelete(metadata->body); + free(metadata); + } + goto RETURN_SUCCESS; + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_NONE: + } + printLog("Bad token '%d'", node->token); + exit(1); + +RETURN_SUCCESS: + free(node); +} + +ParserNode *newParserNode(ParserToken token, char *str_begin, char *str_end, + void *metadata, ParserNode *parent) { + ParserNode *parserNode = a404m_malloc(sizeof(*parserNode)); + parserNode->token = token; + parserNode->str_begin = str_begin; + parserNode->str_end = str_end; + parserNode->metadata = metadata; + parserNode->parent = parent; + return parserNode; +} + +ParserNode *parser(LexerNodeArray lexed) { + ParserNode *root = newParserNode(PARSER_TOKEN_ROOT, NULL, NULL, NULL, NULL); + if (parserNodeArray(lexed.data, lexed.data + lexed.size, root)) { + return root; + } else { + free(root); + return NULL; + } +} + +bool parserNodeArray(LexerNode *begin, LexerNode *end, ParserNode *parent) { + size_t parsedNodes_size = 0; + + ParserNodeArray *parsedNodes = a404m_malloc(sizeof(*parsedNodes)); + parsedNodes->data = + a404m_malloc(parsedNodes_size * sizeof(*parsedNodes->data)); + parsedNodes->size = 0; + + for (size_t orders_index = 0; orders_index < PARSER_ORDER_SIZE; + ++orders_index) { + const ParserOrder orders = PARSER_ORDER[orders_index]; + + for (LexerNode *iter = orders.ltr ? begin : end - 1; + iter < end && iter >= begin; orders.ltr ? ++iter : --iter) { + + if (iter->parserNode != NULL) { + continue; + } + + for (size_t order_index = 0; order_index < orders.size; ++order_index) { + const LexerToken order = orders.data[order_index]; + if (iter->token == order) { + ParserNode *parserNode = parseNode(iter, begin, end, parent); + if (parserNode == NULL) { + goto RETURN_ERROR; + } + if (parsedNodes->size == parsedNodes_size) { + parsedNodes_size += parsedNodes_size / 2 + 1; + parsedNodes->data = + a404m_realloc(parsedNodes->data, + sizeof(*parsedNodes->data) * parsedNodes_size); + } + parsedNodes->data[parsedNodes->size] = parserNode; + parsedNodes->size += 1; + } + } + } + } + + size_t inserted = 0; + for (size_t i = 0; i < parsedNodes->size; ++i) { + ParserNode *parserNode = parsedNodes->data[i]; + if (parserNode->parent == parent) { + // TODO: check here + if (inserted != i) { + parsedNodes->data[inserted] = parserNode; + } + ++inserted; + } + } + + parsedNodes->data = + a404m_realloc(parsedNodes->data, inserted * sizeof(*parsedNodes->data)); + parsedNodes->size = inserted; + + parent->metadata = parsedNodes; + + return true; + +RETURN_ERROR: + for (size_t i = 0; i < parsedNodes->size; ++i) { + free(parsedNodes->data[i]); + } + free(parsedNodes->data); + free(parsedNodes); + return false; +} + +ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent) { + switch (node->token) { + case LEXER_TOKEN_IDENTIFIER: + return parserIdentifier(node, parent); + case LEXER_TOKEN_KEYWORD_VOID: + return parserVoid(node, parent); + case LEXER_TOKEN_KEYWORD_PRINT: + return parserPrint(node, parent); + case LEXER_TOKEN_SYMBOL_EOL: + return parserEol(node, begin, parent); + case LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS: + return parserParenthesis(node, begin, parent); + case LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET: + return parserCurlyBrackets(node, begin, parent); + case LEXER_TOKEN_SYMBOL_FUNCTION_ARROW: + return parserFunction(node, begin, end, parent); + case LEXER_TOKEN_SYMBOL_COLON: + return parserVariable(node, begin, end, parent); + case LEXER_TOKEN_NONE: + case LEXER_TOKEN_NUMBER: + case LEXER_TOKEN_SYMBOL: + case LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS: + case LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET: + } + printLog("Bad token '%d'", node->token); + return NULL; +} + +ParserNode *getUntilCommonParent(ParserNode *node, ParserNode *parent) { + while (node != NULL && node->parent != parent) { + node = node->parent; + } + return node; +} + +ParserNode *parserIdentifier(LexerNode *node, ParserNode *parent) { + ParserNode *parserNode = newParserNode( + PARSER_TOKEN_IDENTIFIER, node->str_begin, node->str_end, NULL, parent); + node->parserNode = parserNode; + return parserNode; +} + +ParserNode *parserVoid(LexerNode *node, ParserNode *parent) { + ParserNode *parserNode = newParserNode( + PARSER_TOKEN_TYPE_VOID, node->str_begin, node->str_end, NULL, parent); + node->parserNode = parserNode; + return parserNode; +} + +ParserNode *parserPrint(LexerNode *node, ParserNode *parent) { + ParserNode *parserNode = newParserNode( + PARSER_TOKEN_KEYWORD_PRINT, node->str_begin, node->str_end, NULL, parent); + node->parserNode = parserNode; + return parserNode; +} + +ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent) { + LexerNode *nodeBeore = node - 1; + ParserNode *parserNodeBefore; + if (nodeBeore < begin) { + parserNodeBefore = NULL; + } else if (nodeBeore->parserNode == NULL) { + printLog("Bad EOL after %s", LEXER_TOKEN_STRINGS[nodeBeore->token]); + return NULL; + } else { + parserNodeBefore = getUntilCommonParent(nodeBeore->parserNode, parent); + if (parserNodeBefore == NULL || !isExpression(parserNodeBefore)) { + printLog("Bad EOL"); + return NULL; + } + } + ParserNode *parserNode = + newParserNode(PARSER_TOKEN_SYMBOL_EOL, node->str_begin, node->str_end, + (ParserNodeEOLMetadata *)parserNodeBefore, parent); + node->parserNode = parserNode; + if (parserNodeBefore != NULL) { + parserNodeBefore->parent = parserNode; + } + return parserNode; +} + +ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin, + ParserNode *parent) { + LexerNode *opening = NULL; + + for (LexerNode *iter = closing - 1; iter >= begin; --iter) { + if (iter->parserNode == NULL && + iter->token == LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS) { + opening = iter; + break; + } + } + + if (opening == NULL) { + printLog("No opening for parenthesis"); + return NULL; + } + + ParserNode *parserNode = + newParserNode(PARSER_TOKEN_SYMBOL_PARENTHESIS, opening->str_begin, + closing->str_end, NULL, parent); + + opening->parserNode = parserNode; + closing->parserNode = parserNode; + if (parserNodeArray(opening + 1, closing, parserNode)) { + return parserNode; + } else { + free(parserNode); + return NULL; + } +} + +ParserNode *parserCurlyBrackets(LexerNode *closing, LexerNode *begin, + ParserNode *parent) { + LexerNode *opening = NULL; + + for (LexerNode *iter = closing - 1; iter >= begin; --iter) { + if (iter->parserNode == NULL && + iter->token == LEXER_TOKEN_SYMBOL_OPEN_CURLY_BRACKET) { + opening = iter; + break; + } + } + + if (opening == NULL) { + printLog("No opening for curly brackets"); + return NULL; + } + + ParserNode *parserNode = + newParserNode(PARSER_TOKEN_SYMBOL_CURLY_BRACKET, opening->str_begin, + closing->str_end, NULL, parent); + + opening->parserNode = parserNode; + closing->parserNode = parserNode; + if (parserNodeArray(opening + 1, closing, parserNode)) { + return parserNode; + } else { + free(parserNode); + return NULL; + } +} + +ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent) { + LexerNode *paramsNode = node - 1; + LexerNode *retTypeNode = node + 1; + LexerNode *bodyNode = node + 2; + // TODO: if body is not there then it is a function type not a function + // definition + if (paramsNode < begin || paramsNode->parserNode == NULL) { + NO_PARAMS: + printLog("No params"); + return NULL; + } else if (retTypeNode >= end || retTypeNode->parserNode == NULL) { + NO_RETURN_TYPE: + printLog("No return type"); + return NULL; + } + ParserNode *params = getUntilCommonParent(paramsNode->parserNode, parent); + ParserNode *retType = getUntilCommonParent(retTypeNode->parserNode, parent); + ParserNode *body; + + if (bodyNode >= end || bodyNode->parserNode == NULL) { + body = NULL; + // TODO: implement + printLog("Not implemented"); + return NULL; + } else { + body = getUntilCommonParent(bodyNode->parserNode, parent); + } + + // TODO: check parenthesis + if (params->token != PARSER_TOKEN_SYMBOL_PARENTHESIS) { + goto NO_PARAMS; + } else if (!isType(retType)) { + goto NO_RETURN_TYPE; + } + + // TODO: check body + + ParserNodeFunctionDefnitionMetadata *metadata = + a404m_malloc(sizeof(*metadata)); + + metadata->body = body; + metadata->arguments = params; + metadata->returnType = retType; + + return params->parent = retType->parent = body->parent = node->parserNode = + newParserNode(PARSER_TOKEN_FUNCTION_DEFINITION, params->str_begin, + body->str_end, metadata, parent); +} + +ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent) { + LexerNode *nameNode = node - 1; + if (nameNode < begin || nameNode->parserNode == NULL) { + printLog("No name"); + return NULL; + } + + ParserNode *name = getUntilCommonParent(nameNode->parserNode, parent); + + if (name->token != PARSER_TOKEN_IDENTIFIER) { + printLog("Name should be identifier"); + return NULL; + } + + LexerNode *node1 = node + 1; + + if (node1 >= end || node1->token != LEXER_TOKEN_SYMBOL_COLON) { + printLog("Bad variable definition"); + return NULL; + } + + LexerNode *valueNode = node + 2; + + if (valueNode >= end || valueNode->parserNode == NULL) { + printLog("No initialziation"); + return NULL; + } + + ParserNode *value = getUntilCommonParent(valueNode->parserNode, parent); + + if (!isValue(value)) { + printLog("No value"); + return NULL; + } + + ParserNodeVariableMetadata *metadata = a404m_malloc(sizeof(*metadata)); + metadata->value = value; + metadata->name = name; + metadata->type = NULL; + + return name->parent = node1->parserNode = value->parent = node->parserNode = + newParserNode(PARSER_TOKEN_CONSTANT, name->str_begin, + value->str_end, metadata, parent); +} + +bool isExpression(ParserNode *node) { + switch (node->token) { + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_CONSTANT: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: + case PARSER_TOKEN_FUNCTION_DEFINITION: + case PARSER_TOKEN_KEYWORD_PRINT: + return true; + case PARSER_TOKEN_ROOT: + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_TYPE_VOID: + case PARSER_TOKEN_SYMBOL_EOL: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + return false; + case PARSER_TOKEN_NONE: + } + printLog("Bad token '%d'", node->token); + exit(1); +} + +bool isType(ParserNode *node) { + switch (node->token) { + case PARSER_TOKEN_TYPE_VOID: + return true; + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_CONSTANT: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: + case PARSER_TOKEN_FUNCTION_DEFINITION: + case PARSER_TOKEN_KEYWORD_PRINT: + case PARSER_TOKEN_ROOT: + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_SYMBOL_EOL: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + return false; + case PARSER_TOKEN_NONE: + } + printLog("Bad token '%d'", node->token); + exit(1); +} + +bool isValue(ParserNode *node) { + switch (node->token) { + case PARSER_TOKEN_FUNCTION_DEFINITION: + return true; + case PARSER_TOKEN_TYPE_VOID: + case PARSER_TOKEN_IDENTIFIER: + case PARSER_TOKEN_CONSTANT: + case PARSER_TOKEN_SYMBOL_PARENTHESIS: + case PARSER_TOKEN_KEYWORD_PRINT: + case PARSER_TOKEN_ROOT: + case PARSER_TOKEN_TYPE_FUNCTION: + case PARSER_TOKEN_SYMBOL_EOL: + case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + return false; + case PARSER_TOKEN_NONE: + } + printLog("Bad token '%d'", node->token); + exit(1); +} diff --git a/src/compiler/parser.h b/src/compiler/parser.h new file mode 100644 index 0000000..1e6f487 --- /dev/null +++ b/src/compiler/parser.h @@ -0,0 +1,96 @@ +#pragma once + +#include "compiler/lexer.h" +#include + +typedef enum ParserToken { + PARSER_TOKEN_ROOT, + + PARSER_TOKEN_IDENTIFIER, + + PARSER_TOKEN_TYPE_FUNCTION, + PARSER_TOKEN_TYPE_VOID, + + PARSER_TOKEN_KEYWORD_PRINT, + + PARSER_TOKEN_CONSTANT, + + PARSER_TOKEN_SYMBOL_EOL, + PARSER_TOKEN_SYMBOL_CURLY_BRACKET, + PARSER_TOKEN_SYMBOL_PARENTHESIS, + + PARSER_TOKEN_FUNCTION_DEFINITION, + + PARSER_TOKEN_NONE, +} ParserToken; + +extern const char *PARSER_TOKEN_STRINGS[]; + +typedef struct ParserOrder { + bool ltr; + size_t size; + LexerToken data[8]; +} ParserOrder; + +typedef struct ParserNode { + ParserToken token; + char *str_begin; + char *str_end; + void *metadata; + struct ParserNode *parent; +} ParserNode; + +typedef struct ParserNodeTypeFunctionMetadata { + ParserNode *arguments; + ParserNode *returnType; +} ParserNodeTypeFunctionMetadata; + +typedef struct ParserNodeVariableMetadata { + ParserNode *name; + ParserNode *type; + ParserNode *value; +} ParserNodeVariableMetadata; + +typedef struct ParserNodeFunctionDefnitionMetadata { + ParserNode *arguments; + ParserNode *returnType; + ParserNode *body; +} ParserNodeFunctionDefnitionMetadata; + +typedef struct ParserNodeArray { + ParserNode **data; + size_t size; +} ParserNodeArray; + +typedef ParserNode ParserNodeEOLMetadata; + +void parserNodePrint(const ParserNode *node,int indent); +void parserNodeDelete(ParserNode *node); + +ParserNode *parser(LexerNodeArray lexed); +bool parserNodeArray(LexerNode *begin, LexerNode *end, ParserNode *parent); + +ParserNode *newParserNode(ParserToken token, char *str_begin, char *str_end, + void *metadata, ParserNode *parent); + +ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent); + +ParserNode *getUntilCommonParent(ParserNode *node, ParserNode *parent); + +ParserNode *parserIdentifier(LexerNode *node, ParserNode *parent); +ParserNode *parserVoid(LexerNode *node, ParserNode *parent); +ParserNode *parserPrint(LexerNode *node, ParserNode *parent); +ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent); +ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin, + ParserNode *parent); +ParserNode *parserCurlyBrackets(LexerNode *closing, LexerNode *begin, + ParserNode *parent); +ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent); +ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end, + ParserNode *parent); + +bool isExpression(ParserNode *node); +bool isType(ParserNode *node); +bool isValue(ParserNode *node); diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..68da20e --- /dev/null +++ b/src/main.c @@ -0,0 +1,123 @@ +#include "compiler/ast-tree.h" +#include "compiler/code-generator.h" +#include "compiler/lexer.h" +#include "compiler/parser.h" +#include "utils/file.h" +#include "utils/log.h" +#include +#include + +int runWithPrint(const char *filePath) { + char *code = readWholeFile(filePath); + if (code == NULL) { + return 1; + } + + LexerNodeArray lexed = lexer(code); + if (lexerNodeArrayIsError(lexed)) { + free(code); + return 1; + } + lexerNodeArrayPrint(lexed); + + ParserNode *parsedRoot = parser(lexed); + if (parsedRoot == NULL) { + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + parserNodePrint(parsedRoot, 0); + printf("\n"); + + AstTreeRoot *astTree = makeAstTree(parsedRoot); + if (astTree == NULL) { + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + astTreeRootPrint(astTree); + + CodeGeneratorCodes *codes = codeGenerator(astTree); + if (codes == NULL) { + astTreeRootDelete(astTree); + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + + char *fasm = codeGeneratorToFlatASM(codes); + printf("%s", fasm); + if (codeGeneratorFlatASMExec("build/test-file", fasm)) { + system("./build/test-file"); + } + + free(fasm); + astTreeRootDelete(astTree); + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 0; +} + +int run(const char *filePath) { + char *code = readWholeFile(filePath); + if (code == NULL) { + return 1; + } + + LexerNodeArray lexed = lexer(code); + if (lexerNodeArrayIsError(lexed)) { + free(code); + return 1; + } + + ParserNode *parsedRoot = parser(lexed); + if (parsedRoot == NULL) { + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + + AstTreeRoot *astTree = makeAstTree(parsedRoot); + if (astTree == NULL) { + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + + CodeGeneratorCodes *codes = codeGenerator(astTree); + if (codes == NULL) { + astTreeRootDelete(astTree); + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 1; + } + + char *fasm = codeGeneratorToFlatASM(codes); + if (codeGeneratorFlatASMExec("build/test-file", fasm)) { + system("./build/test-file"); + } + + free(fasm); + astTreeRootDelete(astTree); + parserNodeDelete(parsedRoot); + lexerNodeArrayDestroy(lexed); + free(code); + return 0; +} + +int main(int argc, char *argv[]) { + (void)argc; + (void)argv; + + if (argc < 2) { + printLog("Too few args"); + return 1; + } + + return runWithPrint(argv[1]); +} diff --git a/src/utils/file.c b/src/utils/file.c new file mode 100644 index 0000000..c2dd614 --- /dev/null +++ b/src/utils/file.c @@ -0,0 +1,27 @@ +#include "file.h" + +#include "utils/log.h" +#include "utils/memory.h" +#include +#include + +char *readWholeFile(const char *filePath) { + FILE *file = fopen(filePath, "r"); + + if (!file) { + printLog("Can't open file '%s'", filePath); + return NULL; + } + fseek(file, 0, SEEK_END); + const size_t file_size = ftell(file); + fseek(file, 0, SEEK_SET); + + char *str = a404m_malloc((file_size + 1) * sizeof(char)); + + fread(str, file_size, 1, file); + str[file_size] = '\0'; + + fclose(file); + + return str; +} diff --git a/src/utils/file.h b/src/utils/file.h new file mode 100644 index 0000000..6f9753f --- /dev/null +++ b/src/utils/file.h @@ -0,0 +1,3 @@ +#pragma once + +char *readWholeFile(const char *filePath); diff --git a/src/utils/log.c b/src/utils/log.c new file mode 100644 index 0000000..f54394a --- /dev/null +++ b/src/utils/log.c @@ -0,0 +1,15 @@ +#include "log.h" + +#include +#include +#include + +void _printLogBack(const char *format, const char *file, int line, ...) { + va_list args; + va_start(args, end); + char *errorStr; + vasprintf(&errorStr, format, args); + + printf("\e[0;31mError: %s at compiler %s:%d\e[0m\n", errorStr, file, line); + free(errorStr); +} diff --git a/src/utils/log.h b/src/utils/log.h new file mode 100644 index 0000000..32f6a8f --- /dev/null +++ b/src/utils/log.h @@ -0,0 +1,5 @@ +#pragma once + +#define printLog(format,...) _printLogBack(format, __FILE_NAME__, __LINE__, ## __VA_ARGS__) + +extern void _printLogBack(const char *format, const char *file, int line, ...); diff --git a/src/utils/memory.c b/src/utils/memory.c new file mode 100644 index 0000000..cde2995 --- /dev/null +++ b/src/utils/memory.c @@ -0,0 +1,31 @@ +#include "memory.h" + +#include +#include + +void *a404m_malloc(size_t size) { + if (size == 0) { + return NULL; + } else { + return malloc(size); + } +} + +void *a404m_realloc(void *restrict pointer, size_t size) { + if (size == 0) { + free(pointer); + return NULL; + } else if (pointer != NULL) { + return realloc(pointer, size); + } else { + return malloc(size); + } +} + +size_t a404m_malloc_usable_size(void *pointer) { + if (pointer == NULL) { + return 0; + } else { + return malloc_usable_size(pointer); + } +} diff --git a/src/utils/memory.h b/src/utils/memory.h new file mode 100644 index 0000000..29bd3db --- /dev/null +++ b/src/utils/memory.h @@ -0,0 +1,7 @@ +#pragma once + +#include + +extern void *a404m_malloc(size_t size); +extern void *a404m_realloc(void *restrict pointer, size_t size); +extern size_t a404m_malloc_usable_size(void *pointer); diff --git a/src/utils/string.c b/src/utils/string.c new file mode 100644 index 0000000..81be415 --- /dev/null +++ b/src/utils/string.c @@ -0,0 +1,14 @@ +#include "string.h" + +#include + +size_t searchInStringArray(const char *array[], size_t array_size, + const char *str, size_t str_size) { + for (size_t i = 0; i < array_size; ++i) { + const size_t el_size = strlen(array[i]); + if (el_size == str_size && strncmp(array[i], str, str_size) == 0) { + return i; + } + } + return array_size; +} diff --git a/src/utils/string.h b/src/utils/string.h new file mode 100644 index 0000000..79af8a9 --- /dev/null +++ b/src/utils/string.h @@ -0,0 +1,6 @@ +#pragma once + +#include + +size_t searchInStringArray(const char *array[], size_t array_size, const char *str, + size_t str_size); -- cgit v1.2.3