diff options
author | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-09-22 19:34:43 +0330 |
---|---|---|
committer | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-09-22 19:34:43 +0330 |
commit | f79290084948f3cf140395c270c07cf29ca58e8d (patch) | |
tree | d716526678782153f3617bbf78984b4c4ebed380 | |
parent | d2ab53c625d386a4fbc6a9d5a5eb29faab1b3f0c (diff) |
Better errors
Added variables
-rw-r--r-- | README.md | 13 | ||||
-rwxr-xr-x | project | 2 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.c | 213 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.h | 19 | ||||
-rw-r--r-- | src/compiler/error_helper/error_helper.c | 85 | ||||
-rw-r--r-- | src/compiler/error_helper/error_helper.h | 6 | ||||
-rw-r--r-- | src/compiler/lexer/lexer.c | 145 | ||||
-rw-r--r-- | src/compiler/lexer/lexer.h | 12 | ||||
-rw-r--r-- | src/compiler/parser/parser.c | 740 | ||||
-rw-r--r-- | src/compiler/parser/parser.h | 92 | ||||
-rw-r--r-- | src/compiler/tree_parser/tree_parser.c | 695 | ||||
-rw-r--r-- | src/compiler/tree_parser/tree_parser.h | 113 | ||||
-rw-r--r-- | src/main.c | 108 | ||||
-rw-r--r-- | src/utils/types.h | 2 | ||||
-rw-r--r-- | src/vm/runner/runner.c | 115 | ||||
-rw-r--r-- | src/vm/runner/runner.h | 27 | ||||
-rw-r--r-- | stdlib/builtins.felan | 19 | ||||
-rw-r--r-- | test.felan | 2 |
18 files changed, 2075 insertions, 333 deletions
@@ -48,3 +48,16 @@ To comment a block you can use C style comments print("Hi comment"/* Here another one */); ``` +Identifier symbols are a way to use keywords as identifier +```felan +`print`("Hello"); +// This is the same as +print("Hello"); +``` + +To define variables you can use : operator +```felan +helloVar:String = "hello"; +print(helloVar); +``` + @@ -12,8 +12,10 @@ function compile(){ gcc -Wall -Wextra -std=gnu23 -I./src/ -O3 \ ./src/main.c \ + ./src/compiler/error_helper/error_helper.c \ ./src/compiler/lexer/lexer.c \ ./src/compiler/parser/parser.c \ + ./src/compiler/tree_parser/tree_parser.c \ ./src/compiler/code_generator/code_generator.c \ ./src/vm/runner/runner.c \ ./src/utils/memory/memory.c \ diff --git a/src/compiler/code_generator/code_generator.c b/src/compiler/code_generator/code_generator.c index 8560129..2b575d8 100644 --- a/src/compiler/code_generator/code_generator.c +++ b/src/compiler/code_generator/code_generator.c @@ -7,6 +7,8 @@ #include <utils/memory/memory.h> #include <utils/types.h> +#include "compiler/tree_parser/tree_parser.h" + const char *COMMAND_STRINGS[] = { "COMMAND_NONE", "COMMAND_CALL_FUNCTION", @@ -21,6 +23,8 @@ void printInstruction(Instruction instruction) { return; case COMMAND_CALL_FUNCTION: case COMMAND_PUSH_STRING: + case COMMAND_POP_IDENTIFIER: + case COMMAND_PUSH_IDENTIFIER: SizedString *sizedString = instruction.operand; printf(" '%.*s'\n", (int)sizedString->size, sizedString->str); return; @@ -40,6 +44,8 @@ void deleteInstruction(Instruction instruction) { return; case COMMAND_PUSH_STRING: case COMMAND_CALL_FUNCTION: + case COMMAND_PUSH_IDENTIFIER: + case COMMAND_POP_IDENTIFIER: SizedString *sizedString = instruction.operand; free(sizedString->str); free(sizedString); @@ -55,30 +61,47 @@ void deleteInstructions(Instructions instructions) { free(instructions.instructions); } -Instructions codeGenerator(ParsedNode *root) { - const ScopeMetadata *metadata = root->metadata; +Instructions codeGenerator(SourceCode code) { + ParsedTree *root = treeParser(code); + if (root != NULL) { + Instructions instructions = _codeGenerator(root, code); + + deleteParsedTree(root); + return instructions; + } + const Instructions error = { + .instructions = NULL, + .size = ERROR_SIZE, + }; + return error; +} + +Instructions _codeGenerator(ParsedTree *root, SourceCode code) { + const TreeScopeMetadata *metadata = root->metadata; size_t instructions_size = 10; Instruction *instructions = - a404m_malloc(instructions_size * sizeof(Instruction)); + a404m_malloc(instructions_size * sizeof(*instructions)); size_t instructions_inserted = 0; - for (size_t i = 0; i < metadata->operands_size; ++i) { - ParsedNode *node = metadata->operands[i]; + for (size_t i = 0; i < metadata->lines_size; ++i) { + ParsedTree *node = metadata->lines[i]; if (!nodeToInstruction(node, &instructions, &instructions_size, - &instructions_inserted)) { + &instructions_inserted, code)) { goto RETURN_ERROR; } } Instructions result = { .instructions = a404m_realloc( - instructions, instructions_inserted * sizeof(Instruction)), + instructions, instructions_inserted * sizeof(*instructions)), .size = instructions_inserted, }; return result; RETURN_ERROR: + free(instructions); + const Instructions error = { .instructions = NULL, .size = ERROR_SIZE, @@ -86,65 +109,99 @@ RETURN_ERROR: return error; } -bool nodeToInstruction(ParsedNode *node, Instruction **instructions, - size_t *instructions_size, - size_t *instructions_inserted) { - switch (node->token) { - case PARSED_TOKEN_FUNCTION_CALL: { - FunctionCallMetadata *metadata = node->metadata; - const ScopeMetadata *scope = metadata->scope; - for (size_t i = 0; i < scope->operands_size; ++i) { - if (!nodeToInstruction(scope->operands[i], instructions, - instructions_size, instructions_inserted)) { +bool nodeToInstruction(ParsedTree *tree, Instruction **instructions, + size_t *instructions_size, size_t *instructions_inserted, + SourceCode code) { + switch (tree->token) { + case TREE_TOKEN_FUNCTION_CALL: { + const TreeFunctionCallMetadata *tree_metadata = tree->metadata; + for (size_t i = 0; i < tree_metadata->values_size; ++i) { + if (!nodeToInstruction(tree_metadata->values[i], instructions, + instructions_size, instructions_inserted, + code)) { return false; } } - SizedString *string = a404m_malloc(sizeof(*string)); - string->size = metadata->functionNameEnd - metadata->functionNameBegin; - string->str = a404m_malloc(string->size); - strncpy(string->str, metadata->functionNameBegin, string->size); - if (string == NULL) { - return false; - } + CommandCallFunctionOperand *operand = a404m_malloc(sizeof(*operand)); + operand->size = + tree_metadata->function->nameEnd - tree_metadata->function->nameBegin; + operand->str = a404m_malloc((operand->size + 1) * sizeof(char)); + strncpy(operand->str, tree_metadata->function->nameBegin, operand->size); const Instruction instruction = { .command = COMMAND_CALL_FUNCTION, - .operand = string, + .operand = operand, }; insertInstruction(instruction, instructions, instructions_size, instructions_inserted); return true; } - case PARSED_TOKEN_PARENTHESIS: { - const ScopeMetadata *metadata = node->metadata; - for (size_t i = 0; i < metadata->operands_size; ++i) { - if (!nodeToInstruction(metadata->operands[i], instructions, - instructions_size, instructions_inserted)) { - return false; - } - } + case TREE_TOKEN_IDENTIFIER: { + const TreeIdentifierMetadata *tree_metadata = tree->metadata; + CommandPushIdentifierOperand *operand = a404m_malloc(sizeof(*operand)); + operand->size = + tree_metadata->variable->nameEnd - tree_metadata->variable->nameBegin; + operand->str = a404m_malloc((operand->size + 1) * sizeof(char)); + strncpy(operand->str, tree_metadata->variable->nameBegin, + operand->size * sizeof(char)); + + const Instruction instruction = { + .command = COMMAND_PUSH_IDENTIFIER, + .operand = operand, + }; + insertInstruction(instruction, instructions, instructions_size, + instructions_inserted); return true; } - case PARSED_TOKEN_EOL: - return nodeToInstruction(node->metadata, instructions, instructions_size, - instructions_inserted); - case PARSED_TOKEN_VALUE_STRING: { - SizedString *string = nodeToString(node); - if (string == NULL) { + case TREE_TOKEN_VALUE_STRING: { + const TreeStringValueMetadata *tree_metadata = tree->metadata; + CommandPushStringOperand *operand = a404m_malloc(sizeof(*operand)); + operand->size = tree_metadata->size; + operand->str = a404m_malloc((operand->size + 1) * sizeof(char)); + memcpy(operand->str, tree_metadata->str, + (operand->size + 1) * sizeof(char)); + + const Instruction instruction = { + .command = COMMAND_PUSH_STRING, + .operand = operand, + }; + insertInstruction(instruction, instructions, instructions_size, + instructions_inserted); + return true; + } + case TREE_TOKEN_DEFINE_VARIABLE: { + const TreeDefineVariableMetadata *tree_metadata = tree->metadata; + if (tree_metadata->value == NULL) { + return true; + } else if (!nodeToInstruction(tree_metadata->value, instructions, + instructions_size, instructions_inserted, + code)) { return false; } + CommandPopIdentifierOperand *operand = a404m_malloc(sizeof(*operand)); + + operand->size = tree_metadata->nameEnd - tree_metadata->nameBegin; + operand->str = a404m_malloc((operand->size + 1) * sizeof(char)); + strncpy(operand->str, tree_metadata->nameBegin, operand->size); + const Instruction instruction = { - .command = COMMAND_PUSH_STRING, - .operand = string, + .command = COMMAND_POP_IDENTIFIER, + .operand = operand, }; insertInstruction(instruction, instructions, instructions_size, instructions_inserted); return true; } - case PARSED_TOKEN_NONE: - case PARSED_TOKEN_ROOT: + case TREE_TOKEN_DEFINE_CONSTANT: + return true; + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: + case TREE_TOKEN_STRUCT: + case TREE_TOKEN_FUNCTION: + case TREE_TOKEN_NONE: } - fprintf(stderr, "unexpected token %s\n", PARSED_TOKEN_STRINGS[node->token]); + printError("Unhandled token %s\n", code, tree->strBegin, tree->strEnd, + TREE_TOKEN_STRINGS[tree->token]); return false; } @@ -160,71 +217,3 @@ void insertInstruction(const Instruction instruction, (*instructions)[*instructions_inserted] = instruction; ++*instructions_inserted; } - -SizedString *nodeToString(ParsedNode const *node) { - const char *strBegin = node->strBegin + 1; - const char *strEnd = node->strEnd - 1; - - char *str = a404m_malloc((strEnd - strBegin + 1) * sizeof(char)); - size_t inserted = 0; - - for (char const *iter = strBegin; iter < strEnd; ++iter) { - char c = *iter; - if (c == '\\') { - if (++iter < strEnd) { - switch (*iter) { - case '\'': - c = '\''; - break; - case '\"': - c = '\"'; - break; - case '\\': - c = '\\'; - break; - case 'a': - c = '\a'; - break; - case 'b': - c = '\b'; - break; - case 'f': - c = '\f'; - break; - case 'n': - c = '\n'; - break; - case 'r': - c = '\r'; - break; - case 't': - c = '\t'; - break; - case 'v': - c = '\v'; - break; - /*case 'u':*/ // TODO: do it - /* c = '';*/ - /* break;*/ - default: - fprintf(stderr, "bad string, bad '\\'\n"); - goto RETURN_ERROR; - } - } else { - fprintf(stderr, "bad string, bad '\\'\n"); - goto RETURN_ERROR; - } - } - str[inserted] = c; - ++inserted; - } - - str[inserted] = '\0'; - SizedString *string = a404m_malloc(sizeof(SizedString)); - string->str = a404m_realloc(str, (inserted + 1) * sizeof(char)); - string->size = inserted; - return string; -RETURN_ERROR: - free(str); - return NULL; -} diff --git a/src/compiler/code_generator/code_generator.h b/src/compiler/code_generator/code_generator.h index da5b686..0f8b69d 100644 --- a/src/compiler/code_generator/code_generator.h +++ b/src/compiler/code_generator/code_generator.h @@ -2,10 +2,15 @@ #include <compiler/parser/parser.h> +#include "compiler/tree_parser/tree_parser.h" +#include "utils/types.h" + typedef enum Command { COMMAND_NONE = 0, COMMAND_CALL_FUNCTION, COMMAND_PUSH_STRING, + COMMAND_PUSH_IDENTIFIER, + COMMAND_POP_IDENTIFIER, } Command; extern const char *COMMAND_STRINGS[]; @@ -20,21 +25,25 @@ typedef struct Instructions { size_t size; } Instructions; +typedef SizedString CommandCallFunctionOperand; +typedef SizedString CommandPushStringOperand; +typedef SizedString CommandPushIdentifierOperand; +typedef SizedString CommandPopIdentifierOperand; + extern void printInstruction(Instruction instruction); extern void printInstructions(Instructions instructions); extern void deleteInstruction(Instruction instruction); extern void deleteInstructions(Instructions instructions); -extern Instructions codeGenerator(ParsedNode *root); +extern Instructions codeGenerator(SourceCode code); +extern Instructions _codeGenerator(ParsedTree *root, SourceCode code); -extern bool nodeToInstruction(ParsedNode *node, Instruction **instructions, +extern bool nodeToInstruction(ParsedTree *tree, Instruction **instructions, size_t *instructions_size, - size_t *instructions_inserted); + size_t *instructions_inserted, SourceCode code); extern void insertInstruction(const Instruction instruction, Instruction **restrict instructions, size_t *restrict instructions_size, size_t *restrict instructions_inserted); - -extern SizedString *nodeToString(ParsedNode const *node); diff --git a/src/compiler/error_helper/error_helper.c b/src/compiler/error_helper/error_helper.c new file mode 100644 index 0000000..fd648e4 --- /dev/null +++ b/src/compiler/error_helper/error_helper.c @@ -0,0 +1,85 @@ +#include "error_helper.h" + +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> + +void printError(char const *error, SourceCode code, char const *begin, + char const *end, ...) { + va_list args; + va_start(args, end); + char *errorStr; + vasprintf(&errorStr, error, args); + + char const *lineBegin = code; + int line = 1; + + char const *iter = code; + for (; iter < begin; ++iter) { + const char c = *iter; + switch (c) { + case '\0': + fprintf(stderr, + "Compiler Internal Error: in printing errors at compiler %s:%d", + __FILE_NAME__, __LINE__); + exit(1); + case '\n': + lineBegin = iter + 1; + ++line; + break; + } + } + char const *lineEnd = end; + + for (;; ++iter) { + const char c = *iter; + switch (c) { + case '\0': + if (iter < end) { + fprintf( + stderr, + "Compiler Internal Error: in printing errors at compiler %s:%d", + __FILE_NAME__, __LINE__); + exit(1); + } + lineEnd = iter; + goto AFTER_LOOP; + case '\n': + lineEnd = iter; + if (iter >= end) { + goto AFTER_LOOP; + } + break; + } + } +AFTER_LOOP: + + fprintf(stderr, "Error: %s at line %d\n", errorStr, line); + + int printed = 0; + for (iter = lineBegin; iter < lineEnd; ++iter) { + fprintf(stderr, "%c", *iter); + if (*iter == '\n') { + PRINT_LINE: + for (int i = 0; i < printed; ++i) { + const char *ch = iter - printed + i; + if (begin <= ch && ch < end) { + fprintf(stderr, "^"); + } else { + fprintf(stderr, " "); + } + } + if (iter + 1 != lineEnd) { + fprintf(stderr, "\n"); + printed = 0; + } + } else if (iter + 1 == lineEnd) { + fprintf(stderr, "\n"); + goto PRINT_LINE; + } else { + ++printed; + } + } + fprintf(stderr, "\n"); + free(errorStr); +} diff --git a/src/compiler/error_helper/error_helper.h b/src/compiler/error_helper/error_helper.h new file mode 100644 index 0000000..3a78c7c --- /dev/null +++ b/src/compiler/error_helper/error_helper.h @@ -0,0 +1,6 @@ +#pragma once + +#include <utils/types.h> + +extern void printError(char const *error, SourceCode code, char const *begin, + char const *end, ...); diff --git a/src/compiler/lexer/lexer.c b/src/compiler/lexer/lexer.c index b48be3c..ebcccc8 100644 --- a/src/compiler/lexer/lexer.c +++ b/src/compiler/lexer/lexer.c @@ -5,6 +5,8 @@ #include <stdlib.h> #include <utils/memory/memory.h> +#include "utils/types.h" + const char *TOKEN_STRINGS[] = { "TOKEN_NONE", "TOKEN_IDENTIFIER", @@ -18,17 +20,30 @@ const char *TOKEN_STRINGS[] = { "TOKEN_OPERATOR_ASSIGN", "TOKEN_OPERATOR_EQUAL", "TOKEN_OPERATOR_COLON", + "TOKEN_OPERATOR_COMMA", "TOKEN_OPERATOR_EOL", + "TOKEN_OPERATOR_FUNCTION", "TOKEN_SYMBOL", + "TOKEN_KEYWORD_STRUCT", + "TOKEN_KEYWORD_EXTERNAL", + "TOKEN_KEYWORD_IMPORT", "TOKEN_PARSED", }; -static const char *KEYWORDS_STRINGS[] = {}; -static const Token KEYWORDS_TOKENS[] = {}; +static const char *KEYWORDS_STRINGS[] = { + "struct", + "external", + "import", +}; +static const Token KEYWORDS_TOKENS[] = { + TOKEN_KEYWORD_STRUCT, + TOKEN_KEYWORD_EXTERNAL, + TOKEN_KEYWORD_IMPORT, +}; static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *); static const char *OPERATORS_STRINGS[] = { - "(", ")", "{", "}", "=", "==", ":", ";", + "(", ")", "{", "}", "=", "==", ":", ",", ";", "->", }; static const Token OPERATORS_TOKENS[] = { TOKEN_OPERATOR_PARENTHESES_OPEN, @@ -38,7 +53,9 @@ static const Token OPERATORS_TOKENS[] = { TOKEN_OPERATOR_ASSIGN, TOKEN_OPERATOR_EQUAL, TOKEN_OPERATOR_COLON, + TOKEN_OPERATOR_COMMA, TOKEN_OPERATOR_EOL, + TOKEN_OPERATOR_FUNCTION, }; static const size_t OPERATORS_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *); @@ -52,161 +69,156 @@ void printNodes(Nodes nodes) { void deleteNodes(Nodes nodes) { free(nodes.nodes); } -Nodes lexer(char const *restrict str) { +Nodes lexer(char const *const restrict code) { size_t nodes_size = 10; Node *nodes = a404m_malloc(nodes_size * sizeof(Node)); size_t nodes_inserted = 0; Node node = { - .strBegin = str, - .strEnd = str, + .strBegin = code, + .strEnd = code, .token = TOKEN_NONE, }; for (int i = 0;; ++i) { - const char c = str[i]; + const char c = code[i]; if (c == '\0') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); break; } else if (c == '/') { - const char follow = str[i + 1]; + const char follow = code[i + 1]; if (follow == '/') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i); - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + for (i += 2; code[i] != '\0' && code[i] != '\n'; ++i); + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - if (str[i] == '\0') { + if (code[i] == '\0') { goto RETURN_SUCCESS; } continue; } else if (follow == '*') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); int in = 1; for (i += 2;; ++i) { - switch (str[i]) { + switch (code[i]) { case '\0': - fprintf(stderr, - "expected multi line comment to end at compiler line %d " - "and in=%d\n", - __LINE__, in); - exit(1); + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; case '*': ++i; - if (str[i] == '/') { + if (code[i] == '/') { --in; if (in == 0) { goto END_OF_BLOCK_COMMENT_LOOP; } - } else if (str[i] == '\0') { - fprintf(stderr, - "expected multi line comment to end at compiler line " - "%d and in=%d\n", - __LINE__, in); - exit(1); + } else if (code[i] == '\0') { + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; } break; case '/': ++i; - if (str[i] == '*') { + if (code[i] == '*') { ++in; - } else if (str[i] == '\0') { - fprintf(stderr, - "expected multi line comment to end at compiler line " - "%d and in=%d\n", - __LINE__, in); - exit(1); + } else if (code[i] == '\0') { + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; } break; } } END_OF_BLOCK_COMMENT_LOOP: - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - if (str[i] == '\0') { + if (code[i] == '\0') { goto RETURN_SUCCESS; } continue; } } if (isSpace(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); } else if (isIdentifier(c)) { if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_IDENTIFIER); } } else if (isIdentifierSymbol(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_IDENTIFIER); for (++i;; ++i) { - const char current = str[i]; + const char current = code[i]; if (current == c) { break; } else if (current == '\0') { - fprintf(stderr, "expected %c to end\n", c); - exit(1); + printError("Expected %c to end", code, node.strBegin, code + i, c); + goto RETURN_ERROR; } } ++node.strBegin; - push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str, - i, TOKEN_NONE); + push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, + code, i, TOKEN_NONE); } else if (isNumber(c)) { if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NUMBER); } } else if (isString(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_STRING); for (++i;; ++i) { - const char current = str[i]; + const char current = code[i]; if (current == c) { break; } else if (current == '\\') { ++i; } else if (current == '\0') { - fprintf(stderr, "expected %c to end\n", c); - exit(1); + printError("Expected %c to end", code, node.strBegin, code + i, c); + goto RETURN_ERROR; } } ++i; - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); --i; } else if (isOperator(c)) { if (node.token == TOKEN_OPERATOR) { - const Token token = getOperator(node.strBegin, str + i + 1); + const Token token = getOperator(node.strBegin, code + i + 1); if (token != TOKEN_NONE) { continue; } else { - node.token = getOperator(node.strBegin, str + i); + node.token = getOperator(node.strBegin, code + i); if (node.token == TOKEN_NONE) { - fprintf(stderr, "unknown operator '%.*s'\n", - (int)(str + i - node.strBegin), node.strBegin); - exit(1); + printError("Unknown operator '%.*s'", code, node.strBegin, + node.strEnd, (int)(code + i - node.strBegin), + node.strBegin); + goto RETURN_ERROR; } push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, - str, i, TOKEN_OPERATOR); + code, i, TOKEN_OPERATOR); } } else { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_OPERATOR); } } else if (isSymbol(c)) { if (node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_SYMBOL); } } else { - fprintf(stderr, "unexpected char '%c'\n", c); - exit(1); + printError("Unexpected char '%c'", code, code + i, code + i + 1, c); + goto RETURN_ERROR; } } @@ -217,6 +229,14 @@ RETURN_SUCCESS: }; return result; +RETURN_ERROR: + free(nodes); + Nodes error = { + .nodes = NULL, + .size = ERROR_SIZE, + }; + + return error; } void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size, @@ -298,9 +318,8 @@ bool isOperator(char c) { case ',': case ';': return true; - default: - return false; } + return false; } bool isSymbol(char c) { return c == '#'; } @@ -312,7 +331,7 @@ Token getTokenInStrings(char const *strBegin, char const *strEnd, for (size_t i = 0; i < size; ++i) { const char *search = strings[i]; - // faster than strlen+strncpy + // faster than strlen+strncmp for (size_t j = 0;; ++j) { const char searchChar = search[j]; if (j == strSize) { diff --git a/src/compiler/lexer/lexer.h b/src/compiler/lexer/lexer.h index 802b50c..8d88a90 100644 --- a/src/compiler/lexer/lexer.h +++ b/src/compiler/lexer/lexer.h @@ -1,5 +1,6 @@ #pragma once +#include <compiler/error_helper/error_helper.h> #include <stddef.h> #include <utils/memory/memory.h> #include <utils/types.h> @@ -17,18 +18,25 @@ typedef enum Token { TOKEN_OPERATOR_ASSIGN, TOKEN_OPERATOR_EQUAL, TOKEN_OPERATOR_COLON, + TOKEN_OPERATOR_COMMA, TOKEN_OPERATOR_EOL, + TOKEN_OPERATOR_FUNCTION, TOKEN_SYMBOL, + TOKEN_KEYWORD_STRUCT, + TOKEN_KEYWORD_EXTERNAL, + TOKEN_KEYWORD_IMPORT, TOKEN_PARSED, } Token; extern const char *TOKEN_STRINGS[]; +struct ParsedNode; + typedef struct Node { char const *strBegin; char const *strEnd; Token token; - void *parsedNode; + struct ParsedNode *parsedNode; } Node; typedef struct Nodes { @@ -39,7 +47,7 @@ typedef struct Nodes { extern void printNodes(Nodes nodes); extern void deleteNodes(Nodes nodes); -extern Nodes lexer(char const *restrict str); +extern Nodes lexer(char const *const restrict code); extern void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size, diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c index 36b2574..61a0cf9 100644 --- a/src/compiler/parser/parser.c +++ b/src/compiler/parser/parser.c @@ -1,14 +1,27 @@ #include "parser.h" +#include <compiler/error_helper/error_helper.h> #include <compiler/lexer/lexer.h> #include <stdio.h> #include <stdlib.h> #include <utils/memory/memory.h> +#include <utils/types.h> const char *PARSED_TOKEN_STRINGS[] = { - "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT", - "PARSED_TOKEN_PARENTHESIS", "PARSED_TOKEN_FUNCTION_CALL", - "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL", + "PARSED_TOKEN_NONE", + "PARSED_TOKEN_ROOT", + "PARSED_TOKEN_PARENTHESIS", + "PARSED_TOKEN_FUNCTION_CALL", + "PARSED_TOKEN_VALUE_STRING", + "PARSED_TOKEN_VALUE_IDENTIFIER", + "PARSED_TOKEN_DEFINE_VARIABLE", + "PARSED_TOKEN_DEFINE_CONSTANT", + "PARSED_TOKEN_EOL", + "PARSED_TOKEN_COMMA", + "PARSED_TOKEN_STRUCT", + "PARSED_TOKEN_FUNCTION", + "PARSED_TOKEN_FUNCTION_PARAMS", + "PARSED_TOKEN_CODE_BODY", }; static const ParseOrder PARSE_ORDER[] = { @@ -23,6 +36,31 @@ static const ParseOrder PARSE_ORDER[] = { }, { .ltr = true, + .size = 2, + .tokens = + { + TOKEN_STRING, + TOKEN_IDENTIFIER, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_OPERATOR_FUNCTION, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_KEYWORD_STRUCT, + }, + }, + { + .ltr = true, .size = 3, .tokens = { @@ -32,11 +70,11 @@ static const ParseOrder PARSE_ORDER[] = { }, }, { - .ltr = true, + .ltr = false, .size = 1, .tokens = { - TOKEN_STRING, + TOKEN_OPERATOR_COMMA, }, }, { @@ -63,15 +101,20 @@ ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, } void _printParsedNode(const ParsedNode *parsedNode, int indent) { + if (parsedNode == NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("null\n"); + return; + } for (int i = 0; i < indent; ++i) printf(" "); printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]); switch (parsedNode->token) { - case PARSED_TOKEN_NONE: - break; + case PARSED_TOKEN_FUNCTION_PARAMS: case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_CODE_BODY: case PARSED_TOKEN_ROOT: { ++indent; - const ScopeMetadata *metadata = parsedNode->metadata; + const ParserScopeMetadata *metadata = parsedNode->metadata; printf(",operands=[\n"); for (size_t i = 0; i < metadata->operands_size; ++i) { _printParsedNode(metadata->operands[i], indent + 1); @@ -79,20 +122,77 @@ void _printParsedNode(const ParsedNode *parsedNode, int indent) { for (int i = 0; i < indent; ++i) printf(" "); printf("]\n"); --indent; - } break; + goto END_SUCCESS; + } case PARSED_TOKEN_VALUE_STRING: - printf("\n"); - break; + case PARSED_TOKEN_IDENTIFIER: + printf(",str='%.*s'\n", (int)(parsedNode->strEnd - parsedNode->strBegin), + parsedNode->strBegin); + goto END_SUCCESS; + case PARSED_TOKEN_COMMA: case PARSED_TOKEN_EOL: { - EOLMetadata *metadata = parsedNode->metadata; + const ParserEOLMetadata *metadata = parsedNode->metadata; printf(",operand=\n"); _printParsedNode(metadata, indent + 1); - } break; - default: - fprintf(stderr, "bad parsed token %d at compiler line %d\n", - parsedNode->token, __LINE__); - exit(1); + goto END_SUCCESS; + } + case PARSED_TOKEN_FUNCTION_CALL: { + const ParserFunctionCallMetadata *metadata = parsedNode->metadata; + printf(",functionName=%.*s,operands=[\n", + (int)(metadata->functionNameEnd - metadata->functionNameBegin), + metadata->functionNameBegin); + ++indent; + const ParserScopeMetadata *scope = metadata->scope; + for (size_t i = 0; i < scope->operands_size; ++i) { + _printParsedNode(scope->operands[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("]\n"); + --indent; + goto END_SUCCESS; + } + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_DEFINE_VARIABLE: { + const ParserVariableDefineMetadata *metadata = parsedNode->metadata; + printf(",name=%.*s\n", + (int)(metadata->name->strEnd - metadata->name->strBegin), + metadata->name->strBegin); + if (metadata->type != NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf(",type=\n"); + _printParsedNode(metadata->type, indent + 1); + } + if (metadata->value) { + for (int i = 0; i < indent; ++i) printf(" "); + printf(",value=\n"); + _printParsedNode(metadata->value, indent + 1); + } + goto END_SUCCESS; + } + case PARSED_TOKEN_STRUCT: { + const ParserStructMetadata *metadata = parsedNode->metadata; + printf(",body=\n"); + _printParsedNode(metadata->body, indent + 1); + goto END_SUCCESS; + } + case PARSED_TOKEN_FUNCTION: { + const ParserFunctionMetadata *metadata = parsedNode->metadata; + printf(",params=\n"); + _printParsedNode(metadata->params, indent + 1); + for (int i = 0; i < indent; ++i) printf(" "); + printf(",type=\n"); + _printParsedNode(metadata->type, indent + 1); + for (int i = 0; i < indent; ++i) printf(" "); + printf(",body=\n"); + _printParsedNode(metadata->body, indent + 1); + goto END_SUCCESS; + } + case PARSED_TOKEN_NONE: } + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); +END_SUCCESS: for (int i = 0; i < indent; ++i) printf(" "); printf("}\n"); } @@ -109,13 +209,19 @@ ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) { } void deleteParsedNode(ParsedNode *parsedNode) { + if (parsedNode == NULL) { + return; + } switch (parsedNode->token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: goto FREE; + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_ROOT: { - ScopeMetadata *metadata = parsedNode->metadata; + ParserScopeMetadata *metadata = parsedNode->metadata; for (size_t i = 0; i < metadata->operands_size; ++i) { deleteParsedNode(metadata->operands[i]); } @@ -123,14 +229,15 @@ void deleteParsedNode(ParsedNode *parsedNode) { free(metadata); goto FREE; } + case PARSED_TOKEN_COMMA: case PARSED_TOKEN_EOL: { - EOLMetadata *metadata = parsedNode->metadata; + ParserEOLMetadata *metadata = parsedNode->metadata; deleteParsedNode(metadata); goto FREE; } case PARSED_TOKEN_FUNCTION_CALL: { - FunctionCallMetadata *metadata = parsedNode->metadata; - ScopeMetadata *scope = metadata->scope; + ParserFunctionCallMetadata *metadata = parsedNode->metadata; + ParserScopeMetadata *scope = metadata->scope; for (size_t i = 0; i < scope->operands_size; ++i) { deleteParsedNode(scope->operands[i]); } @@ -139,6 +246,29 @@ void deleteParsedNode(ParsedNode *parsedNode) { free(metadata); goto FREE; } + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_DEFINE_VARIABLE: { + ParserVariableDefineMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->name); + deleteParsedNode(metadata->type); + deleteParsedNode(metadata->value); + free(metadata); + goto FREE; + } + case PARSED_TOKEN_STRUCT: { + ParserStructMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->body); + free(metadata); + goto FREE; + } + case PARSED_TOKEN_FUNCTION: { + ParserFunctionMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->params); + deleteParsedNode(metadata->type); + deleteParsedNode(metadata->body); + free(metadata); + goto FREE; + } } fprintf(stderr, "bad parsed token %d at compiler line %d\n", parsedNode->token, __LINE__); @@ -147,12 +277,23 @@ FREE: free(parsedNode); } -ParsedNode *parser(Nodes lexedNodes) { - ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); +ParsedNode *parser(SourceCode code) { + Nodes nodes = lexer(code); + if (nodes.size == ERROR_SIZE) { + return NULL; + } + ParsedNode *root = _parser(nodes, code); + + deleteNodes(nodes); + return root; +} + +ParsedNode *_parser(Nodes lexedNodes, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); root->token = PARSED_TOKEN_ROOT; root->parent = NULL; - root->metadata = parserScopeCode(lexedNodes.nodes, - lexedNodes.nodes + lexedNodes.size, root); + root->metadata = parserScopeCode( + lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root, code); if (root->metadata == NULL) { free(root); return NULL; @@ -160,8 +301,9 @@ ParsedNode *parser(Nodes lexedNodes) { return root; } -ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, - bool (*isAllowed)(ParsedToken)) { +ParserScopeMetadata *parserScope( + Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, + bool (*isAllowed)(ParsedToken token, bool isLast), SourceCode code) { size_t nodes_size = 0; ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *)); size_t nodes_inserted = 0; @@ -175,10 +317,8 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, ++order_tokens_index) { if (node->token == order->tokens[order_tokens_index]) { ParsedNode *parsedNode = - parseNode(nodesBegin, nodesEnd, node, parent); + parseNode(nodesBegin, nodesEnd, node, parent, code); if (parsedNode == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } if (nodes_size == nodes_inserted) { @@ -194,8 +334,8 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, for (Node *node = nodesBegin; node < nodesEnd; ++node) { if (node->token != TOKEN_PARSED) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Unexpected node with token '%s'", code, node->strBegin, + node->strEnd, TOKEN_STRINGS[node->token]); goto RETURN_ERROR; } } @@ -207,11 +347,10 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, for (size_t i = 0; i < nodes_size; ++i) { ParsedNode *currentNode = nodes[i]; if (currentNode->parent == parent) { - if (!isAllowed(currentNode->token)) { - fprintf( - stderr, - "error in parsing token '%s' is not allowed at compiler line %d\n", - PARSED_TOKEN_STRINGS[currentNode->token], __LINE__); + if (!isAllowed(currentNode->token, i + 1 == nodes_size)) { + printError("Token '%s' is not allowed here", code, + currentNode->strBegin, currentNode->strEnd, + PARSED_TOKEN_STRINGS[currentNode->token]); goto RETURN_ERROR; } operands[nodes_inserted] = currentNode; @@ -220,7 +359,7 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, } free(nodes); - ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata)); + ParserScopeMetadata *metadata = a404m_malloc(sizeof(*metadata)); metadata->operands = a404m_realloc(operands, nodes_inserted * sizeof(ParsedNode *)); metadata->operands_size = nodes_inserted; @@ -232,67 +371,197 @@ RETURN_ERROR: return NULL; } -static bool isAllowedCodeScope(ParsedToken token) { +static bool isAllowedCodeScope(ParsedToken token, bool) { switch (token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: return false; case PARSED_TOKEN_EOL: + case PARSED_TOKEN_CODE_BODY: return true; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } -static bool isAllowedParenthesisScope(ParsedToken token) { +static bool isAllowedParenthesisScope(ParsedToken token, bool) { switch (token) { case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: case PARSED_TOKEN_FUNCTION_CALL: return true; case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_EOL: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_CODE_BODY: + case PARSED_TOKEN_FUNCTION_PARAMS: return false; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } -ScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent) { - return parserScope(nodesBegin, nodesEnd, parent, isAllowedCodeScope); +static bool isAllowedFunctionCallScope(ParsedToken token, bool isLast) { + switch (token) { + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + return isLast; + case PARSED_TOKEN_COMMA: + return true; + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); +} + +static bool isAllowedFunctionParamScope(ParsedToken token, bool isLast) { + switch (token) { + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + return isLast; + case PARSED_TOKEN_COMMA: + return true; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); +} + +static bool isAllowedStructScope(ParsedToken token, bool) { + switch (token) { + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); } -ScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent) { - return parserScope(nodesBegin, nodesEnd, parent, isAllowedParenthesisScope); +ParserScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedCodeScope, code); +} + +ParserScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedParenthesisScope, + code); +} + +ParserScopeMetadata *parserScopeFunctionCall(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedFunctionCallScope, + code); +} + +ParserScopeMetadata *parserScopeFunctionParam(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedFunctionParamScope, + code); +} + +ParserScopeMetadata *parserScopeStruct(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedStructScope, code); } ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, - ParsedNode *parent) { + ParsedNode *parent, SourceCode code) { switch (node->token) { case TOKEN_OPERATOR_PARENTHESES_CLOSE: - return parseParenthesis(nodesBegin, nodesEnd, node, parent); + return parseParenthesis(nodesBegin, nodesEnd, node, parent, code); case TOKEN_STRING: return parseString(node, parent); + case TOKEN_IDENTIFIER: + return parseIdentifier(node, parent); case TOKEN_OPERATOR_EOL: - return parseEOL(nodesBegin, nodesEnd, node, parent); - default: - fprintf(stderr, "unexpected token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); - return NULL; + return parseEOL(nodesBegin, node, parent, code); + case TOKEN_OPERATOR_COLON: + return parseVariable(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_OPERATOR_COMMA: + return parseComma(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_KEYWORD_EXTERNAL: + return parseStruct(nodesEnd, node, parent, code); + case TOKEN_OPERATOR_CURLY_BRACKET_CLOSE: + return parseCurly(nodesBegin, node, parent, code); + case TOKEN_KEYWORD_STRUCT: + return parseStruct(nodesEnd, node, parent, code); + case TOKEN_OPERATOR_FUNCTION: + return parseFunction(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_KEYWORD_IMPORT: + return parseImport(nodesEnd, node, parent, code); + case TOKEN_NONE: + case TOKEN_NUMBER: + case TOKEN_OPERATOR: + case TOKEN_OPERATOR_PARENTHESES_OPEN: + case TOKEN_OPERATOR_CURLY_BRACKET_OPEN: + case TOKEN_OPERATOR_ASSIGN: + case TOKEN_OPERATOR_EQUAL: + case TOKEN_SYMBOL: + case TOKEN_PARSED: } + printError("Unexpected token '%s' at compiler line %d", code, node->strBegin, + node->strEnd, TOKEN_STRINGS[node->token], __LINE__); + return NULL; } -ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, - ParsedNode *parent) { +ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd, Node *closing, + ParsedNode *parent, SourceCode code) { ParsedNode *root = a404m_malloc(sizeof(*root)); Node *opening = NULL; - Node *closing = node; for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) { opening = iter; @@ -300,12 +569,13 @@ ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, } } if (opening == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Found no opening for )", code, closing->strBegin, + closing->strEnd, __LINE__); goto RETURN_ERROR; } - Node *functionName = opening - 1; + Node *const functionName = opening - 1; + Node *const functionTailOperator = closing + 1; if (functionName >= nodesBegin && functionName->token == TOKEN_IDENTIFIER) { functionName->token = TOKEN_PARSED; functionName->parsedNode = root; @@ -315,30 +585,44 @@ ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, root->strEnd = closing->strEnd; root->parent = parent; - FunctionCallMetadata *metadata = root->metadata = + ParserFunctionCallMetadata *metadata = root->metadata = a404m_malloc(sizeof(*metadata)); metadata->functionNameBegin = functionName->strBegin; metadata->functionNameEnd = functionName->strEnd; - if ((metadata->scope = - parserScopeParenthesis(opening + 1, closing, root)) == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + if ((metadata->scope = parserScopeFunctionCall(opening + 1, closing, root, + code)) == NULL) { free(metadata); goto RETURN_ERROR; } + } else if (functionTailOperator < nodesEnd && + functionTailOperator->token == TOKEN_OPERATOR_FUNCTION) { + root->token = PARSED_TOKEN_FUNCTION_PARAMS; + root->strBegin = (opening - 1)->strBegin; + root->strEnd = closing->strEnd; + root->parent = parent; + + if ((root->metadata = parserScopeFunctionParam(opening + 1, closing, root, + code)) == NULL) { + goto RETURN_ERROR; + } } else { root->token = PARSED_TOKEN_PARENTHESIS; root->strBegin = opening->strBegin; root->strEnd = closing->strEnd; root->parent = parent; - if ((root->metadata = parserScopeParenthesis(opening + 1, closing, root)) == - NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + ParserScopeMetadata *const scope = + parserScopeParenthesis(opening + 1, closing, root, code); + if (scope == NULL) { + goto RETURN_ERROR; + } else if (scope->operands_size != 1) { + printError("Parenthesis should only contain one expression", code, + opening->strBegin, closing->strEnd); goto RETURN_ERROR; } + ParserParenthesisMetadata *const metadata = scope->operands[0]; + root->metadata = metadata; } closing->parsedNode = opening->parsedNode = root; @@ -350,6 +634,42 @@ RETURN_ERROR: return NULL; } +ParsedNode *parseCurly(Node *nodesBegin, Node *closing, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + + Node *opening = NULL; + for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { + if (iter->token == TOKEN_OPERATOR_CURLY_BRACKET_OPEN) { + opening = iter; + break; + } + } + if (opening == NULL) { + printError("Found no opening for }", code, closing->strBegin, + closing->strEnd, __LINE__); + goto RETURN_ERROR; + } + + if ((root->metadata = parserScopeCode(opening + 1, closing, parent, code)) == + NULL) { + goto RETURN_ERROR; + } + + root->strBegin = opening->strBegin; + root->strEnd = opening->strEnd; + root->parent = parent; + root->token = PARSED_TOKEN_CODE_BODY; + closing->parsedNode = opening->parsedNode = root; + opening->token = closing->token = TOKEN_PARSED; + + return root; + +RETURN_ERROR: + free(root); + return NULL; +} + ParsedNode *parseString(Node *node, ParsedNode *parent) { node->token = TOKEN_PARSED; return node->parsedNode = @@ -357,20 +677,34 @@ ParsedNode *parseString(Node *node, ParsedNode *parent) { PARSED_TOKEN_VALUE_STRING, NULL, parent); } -ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { +ParsedNode *parseIdentifier(Node *node, ParsedNode *parent) { + node->token = TOKEN_PARSED; + return node->parsedNode = + newParsedNode(node->strBegin, node->strEnd, + PARSED_TOKEN_IDENTIFIER, NULL, parent); +} + +ParsedNode *parseEOL(Node *nodesBegin, Node *node, ParsedNode *parent, + SourceCode code) { Node *before = node - 1; - if (before < nodesBegin || before->token != TOKEN_PARSED) { + if (before < nodesBegin) { + RETURN_EMPTY: + return newParsedNode(node->strBegin, node->strEnd, PARSED_TOKEN_EOL, NULL, + parent); + } else if (before->token != TOKEN_PARSED) { + printError("Unexpected EOL", code, node->strBegin, node->strEnd); return NULL; } ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent); if (experession == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Unexpected EOL", code, node->strBegin, node->strEnd); return NULL; + } else if (experession->token == PARSED_TOKEN_EOL) { + goto RETURN_EMPTY; } - ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + ParsedNode *root = a404m_malloc(sizeof(*root)); root->strBegin = node->strBegin; root->strEnd = node->strEnd; root->token = PARSED_TOKEN_EOL; @@ -383,3 +717,263 @@ ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { node->parsedNode = root; return root; } + +ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserVariableDefineMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + Node *variableName = node - 1; + if (variableName < nodesBegin) { + printError("Variable definition needs name", code, node->strBegin, + node->strEnd); + goto RETURN_ERROR; + } else if (variableName->token != TOKEN_PARSED) { + BAD_VAR_NAME: + printError("Variable name should be an identifier but got something else", + code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + + ParsedNode *const variableNameParsed = + getUntilCommonFather(variableName->parsedNode, parent); + if (variableNameParsed == NULL || + variableNameParsed->token != PARSED_TOKEN_IDENTIFIER) { + goto BAD_VAR_NAME; + } + variableNameParsed->parent = root; + + metadata->name = variableNameParsed; + + Node *follow = node + 1; + ParsedNode *type; + ParsedNode *value = NULL; + if (follow == nodesEnd) { + printError("Variable definition needs type or assignment to a value", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else if (follow->token == TOKEN_PARSED) { + type = getUntilCommonFather(follow->parsedNode, parent); + if (type == NULL) { + printError("Expected type but got something else", code, follow->strBegin, + follow->strEnd); + goto RETURN_ERROR; + } + ++follow; + while (follow->token == TOKEN_PARSED && + getUntilCommonFather(follow->parsedNode, type) != NULL) { + ++follow; + } + } else { + type = NULL; + } + + if (follow == nodesEnd || (follow->token != TOKEN_OPERATOR_ASSIGN && + follow->token != TOKEN_OPERATOR_COLON)) { + root->token = PARSED_TOKEN_DEFINE_VARIABLE; + --follow; + } else { + if (follow->token == TOKEN_OPERATOR_ASSIGN) + root->token = PARSED_TOKEN_DEFINE_VARIABLE; + else + root->token = PARSED_TOKEN_DEFINE_CONSTANT; + + follow->parsedNode = root; + follow->token = TOKEN_PARSED; + ++follow; + if (follow == nodesEnd) { + --follow; + printError("Expected value after assignment but got nothing", code, + follow->strBegin, follow->strEnd); + goto RETURN_ERROR; + } else if (follow->token == TOKEN_PARSED) { + value = getUntilCommonFather(follow->parsedNode, parent); + } else { + BAD_VALUE: + printError("Expected value after assignment but got something else", code, + follow->strBegin, follow->strEnd); + goto RETURN_ERROR; + } + } + + if (type == NULL && value == NULL) { + printError("Variable definition needs type or assignment to a value", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + if (type != NULL) { + metadata->type = type; + type->parent = root; + } else { + metadata->type = NULL; + } + if (value != NULL) { + metadata->value = value = getUntilCommonFather(value, parent); + if (value == NULL) { + goto BAD_VALUE; + } + value->parent = root; + } else { + metadata->value = NULL; + } + + root->strBegin = variableName->strBegin; + root->strEnd = follow->strEnd; + root->parent = parent; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; + +RETURN_ERROR: + free(root); + free(metadata); + return NULL; +} + +ParsedNode *parseComma(Node *nodesBegin, Node *, Node *node, ParsedNode *parent, + SourceCode code) { + Node *before = node - 1; + if (before < nodesBegin || before->token != TOKEN_PARSED) { + UNEXPECTED: + printError("Unexpected comma", code, node->strBegin, node->strEnd); + return NULL; + } + + ParsedNode *const experession = + getUntilCommonFather(before->parsedNode, parent); + if (experession == NULL || experession->token == PARSED_TOKEN_COMMA) { + goto UNEXPECTED; + } + + ParsedNode *root = a404m_malloc(sizeof(*root)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_COMMA; + root->parent = parent; + + root->metadata = experession; + experession->parent = root; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; +} + +ParsedNode *parseStruct(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserStructMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + + Node *const body = node + 1; + if (body >= nodesEnd) { + NO_BODY: + printError("'struct' needs a body", code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + + root->strBegin = node->strBegin; + root->strBegin = body->strEnd; + root->token = PARSED_TOKEN_STRUCT; + root->parent = parent; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + + if (body->token == TOKEN_KEYWORD_EXTERNAL) { + body->token = TOKEN_PARSED; + body->parsedNode = root; + metadata->body = NULL; + } else if (body->token == TOKEN_PARSED) { + if ((metadata->body = getUntilCommonFather(body->parsedNode, parent)) == + NULL) { + goto NO_BODY; + } else { + metadata->body->parent = root; + } + } else { + goto NO_BODY; + } + + return root; +RETURN_ERROR: + free(root); + free(metadata); + return NULL; +} + +ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserFunctionMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + + Node *const params = node - 1; + Node *const type = node + 1; + Node *const body = node + 2; + + if (params < nodesBegin || params->token != TOKEN_PARSED || + params->parsedNode->token != PARSED_TOKEN_FUNCTION_PARAMS) { + printError("Function definition needs a param list", code, node->strBegin, + node->strEnd); + goto RETURN_ERROR; + } else { + metadata->params = params->parsedNode; + metadata->params->parent = root; + } + if (type >= nodesEnd || type->token != TOKEN_PARSED || + type->parsedNode->token != PARSED_TOKEN_IDENTIFIER) { + printError("Function definition needs a type to be identifier (for now)", + code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else { + metadata->type = type->parsedNode; + metadata->type->parent = root; + } + + if (body >= nodesEnd) { + NEED_BODY: + printError("Function definition needs a body or be set to external", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else if (body->token == TOKEN_KEYWORD_EXTERNAL) { + body->token = TOKEN_PARSED; + body->parsedNode = root; + metadata->body = NULL; + } else if (type->token == TOKEN_PARSED && + type->parsedNode->token == PARSED_TOKEN_CODE_BODY) { + metadata->body = type->parsedNode; + metadata->body->parent = root; + } else { + goto NEED_BODY; + } + + node->token = TOKEN_PARSED; + node->parsedNode = root; + root->strBegin = params->strBegin; + root->strEnd = params->strEnd; + root->parent = parent; + root->token = PARSED_TOKEN_FUNCTION; + + return root; +RETURN_ERROR: + + free(metadata); + free(root); + return NULL; +} + +ParsedNode *parseImport(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + + // TODO: do it + + return root; + +RETURN_ERROR: + + free(root); + return NULL; +} diff --git a/src/compiler/parser/parser.h b/src/compiler/parser/parser.h index 520c5d1..24c6c5d 100644 --- a/src/compiler/parser/parser.h +++ b/src/compiler/parser/parser.h @@ -8,7 +8,15 @@ typedef enum ParsedToken { PARSED_TOKEN_PARENTHESIS, PARSED_TOKEN_FUNCTION_CALL, PARSED_TOKEN_VALUE_STRING, + PARSED_TOKEN_IDENTIFIER, + PARSED_TOKEN_DEFINE_VARIABLE, + PARSED_TOKEN_DEFINE_CONSTANT, PARSED_TOKEN_EOL, + PARSED_TOKEN_COMMA, + PARSED_TOKEN_STRUCT, + PARSED_TOKEN_FUNCTION, + PARSED_TOKEN_FUNCTION_PARAMS, + PARSED_TOKEN_CODE_BODY, } ParsedToken; extern const char *PARSED_TOKEN_STRINGS[]; @@ -27,17 +35,32 @@ typedef struct ParsedNode { struct ParsedNode *parent; } ParsedNode; -typedef struct ScopeMetadata { - struct ParsedNode **operands; +typedef struct ParserScopeMetadata { + ParsedNode **operands; size_t operands_size; -} ScopeMetadata; -typedef struct FunctionCallMetadata { +} ParserScopeMetadata; +typedef ParsedNode ParserParenthesisMetadata; +typedef struct ParserFunctionCallMetadata { char const *functionNameBegin; char const *functionNameEnd; - ScopeMetadata *scope; -} FunctionCallMetadata ; -typedef ParsedNode PrintMetadata; -typedef ParsedNode EOLMetadata; + ParserScopeMetadata *scope; +} ParserFunctionCallMetadata; +typedef ParsedNode ParserPrintMetadata; +typedef struct ParserVariableDefine { + ParsedNode *name; + ParsedNode *type; + ParsedNode *value; +} ParserVariableDefineMetadata; +typedef ParsedNode ParserEOLMetadata; +typedef ParsedNode ParserCommaMetadata; +typedef struct ParserStructMetadata { + ParsedNode *body; +} ParserStructMetadata; +typedef struct ParserFunctionMetadata { + ParsedNode *params; + ParsedNode *type; + ParsedNode *body; +} ParserFunctionMetadata; extern ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, ParsedToken token, void *metadata, @@ -48,21 +71,50 @@ extern ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent); extern void deleteParsedNode(ParsedNode *parsedNode); -extern ParsedNode *parser(Nodes lexedNodes); +extern ParsedNode *parser(SourceCode code); +extern ParsedNode *_parser(Nodes lexedNodes, SourceCode code); -extern ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent, - bool (*isAllowed)(ParsedToken)); -extern ScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent); -extern ScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent); +extern ParserScopeMetadata *parserScope( + Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, + bool (*isAllowed)(ParsedToken token, bool isLast), SourceCode code); +extern ParserScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code); +extern ParserScopeMetadata *parserScopeParenthesis(Node *nodesBegin, + Node *nodesEnd, + ParsedNode *parent, + SourceCode code); +extern ParserScopeMetadata *parserScopeFunctionCall(Node *nodesBegin, + Node *nodesEnd, + ParsedNode *parent, + SourceCode code); +extern ParserScopeMetadata *parserScopeFunctionParam(Node *nodesBegin, + Node *nodesEnd, + ParsedNode *parent, + SourceCode code); +extern ParserScopeMetadata *parserScopeStruct(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code); extern ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, - ParsedNode *parent); + ParsedNode *parent, SourceCode code); extern ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd, - Node *node, ParsedNode *parent); + Node *node, ParsedNode *parent, + SourceCode code); +extern ParsedNode *parseCurly(Node *nodesBegin, Node *node, ParsedNode *parent, + SourceCode code); extern ParsedNode *parseString(Node *node, ParsedNode *parent); -extern ParsedNode *parseEOL(Node *nodesBegin, Node *nodesEnd, Node *node, - ParsedNode *parent); +extern ParsedNode *parseIdentifier(Node *node, ParsedNode *parent); +extern ParsedNode *parseEOL(Node *nodesBegin, Node *node, ParsedNode *parent, + SourceCode code); +extern ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code); +extern ParsedNode *parseComma(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code); +extern ParsedNode *parseStruct(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code); +extern ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code); +extern ParsedNode *parseImport(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code); diff --git a/src/compiler/tree_parser/tree_parser.c b/src/compiler/tree_parser/tree_parser.c new file mode 100644 index 0000000..cb1ef4c --- /dev/null +++ b/src/compiler/tree_parser/tree_parser.c @@ -0,0 +1,695 @@ +#include "tree_parser.h" + +#include <compiler/error_helper/error_helper.h> +#include <compiler/lexer/lexer.h> +#include <compiler/parser/parser.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +#include "utils/types.h" + +const char *TREE_TOKEN_STRINGS[] = { + "TREE_TOKEN_NONE", + "TREE_TOKEN_GLOBAL_SCOPE", + "TREE_TOKEN_LOCAL_SCOPE", + "TREE_TOKEN_FUNCTION_CALL", + "TREE_TOKEN_DEFINE_VARIABLE", + "TREE_TOKEN_DEFINE_CONSTANT", + "TREE_TOKEN_IDENTIFIER", + "TREE_TOKEN_VALUE_STRING", + "TREE_TOKEN_STRUCT", + "TREE_TOKEN_FUNCTION", +}; + +void _printParsedTreeNode(const ParsedTree *parsedTree, int indent) { + if (parsedTree == NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("null\n"); + } + printf("{token=%s", TREE_TOKEN_STRINGS[parsedTree->token]); + switch (parsedTree->token) { + case TREE_TOKEN_NONE: + goto RETURN_SUCCESS; + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: + case TREE_TOKEN_FUNCTION_CALL: + case TREE_TOKEN_DEFINE_VARIABLE: + case TREE_TOKEN_IDENTIFIER: + case TREE_TOKEN_VALUE_STRING: + case TREE_TOKEN_STRUCT: + case TREE_TOKEN_DEFINE_CONSTANT: + case TREE_TOKEN_FUNCTION: + } + fprintf(stderr, "bad parsed tree token %d at %s:%d", parsedTree->token, + __FILE_NAME__, __LINE__); + exit(1); +RETURN_SUCCESS: + printf("}\n"); +}; + +void printParsedTreeNode(const ParsedTree *parsedTree) { + _printParsedTreeNode(parsedTree, 0); +} + +void deleteParsedTree(ParsedTree *parsedTree) { + if (parsedTree == NULL) { + return; + } + switch (parsedTree->token) { + case TREE_TOKEN_NONE: + goto RETURN_SUCCESS; + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: { + TreeScopeMetadata *metadata = parsedTree->metadata; + for (size_t i = 0; i < metadata->lines_size; ++i) { + deleteParsedTree(metadata->lines[i]); + } + free(metadata->lines); + free(metadata->variables); + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_FUNCTION_CALL: { + TreeFunctionCallMetadata *metadata = parsedTree->metadata; + for (size_t i = 0; i < metadata->values_size; ++i) { + deleteParsedTree(metadata->values[i]); + } + free(metadata->values); + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_DEFINE_VARIABLE: + case TREE_TOKEN_DEFINE_CONSTANT: { + TreeDefineVariableMetadata *metadata = parsedTree->metadata; + deleteParsedTree(metadata->value); + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_IDENTIFIER: { + TreeIdentifierMetadata *metadata = parsedTree->metadata; + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_VALUE_STRING: { + TreeStringValueMetadata *metadata = parsedTree->metadata; + free(metadata->str); + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_STRUCT: { + TreeStructMetadata *metadata = parsedTree->metadata; + free(metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_FUNCTION: { + TreeFunctionMetadata *metadata = parsedTree->metadata; + TreeScopeMetadata *scope = metadata->scope; + if (scope != NULL) { + for (size_t i = 0; i < scope->lines_size; ++i) { + deleteParsedTree(scope->lines[i]); + } + free(scope->lines); + free(scope->variables); + } + for (size_t i = 0; i < metadata->params_size; ++i) { + deleteParsedTree(metadata->params[i]->value); + free(metadata->params[i]); + } + free(metadata->params); + free(metadata); + goto RETURN_SUCCESS; + } + } + fprintf(stderr, "bad parsed tree token %d at %s:%d", parsedTree->token, + __FILE_NAME__, __LINE__); + exit(1); +RETURN_SUCCESS: + free(parsedTree); +} + +ParsedTree *treeParser(SourceCode code) { + ParsedNode *parsedNode = parser(code); + if (parsedNode == NULL) { + return NULL; + } + ParsedTree *tree= _treeParser(parsedNode, code); + deleteParsedNode(parsedNode); + return tree; +} + +ParsedTree *_treeParser(const ParsedNode *node, SourceCode code) { + if (node->token == PARSED_TOKEN_ROOT) { + return treeParseRoot(node, code); + } else { + return NULL; + } +} + +ParsedTree *treeParseNode(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + switch (node->token) { + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_NONE: + fprintf(stderr, "parsed token not allowed %s", + PARSED_TOKEN_STRINGS[node->token]); + return NULL; + case PARSED_TOKEN_EOL: + return treeParseNode((ParserEOLMetadata *)node->metadata, code, scopes, + scopes_size); + case PARSED_TOKEN_IDENTIFIER: + return treeParseIdentifier(node, code, scopes, scopes_size); + case PARSED_TOKEN_FUNCTION_CALL: + return treeParseFunctionCall(node, code, scopes, scopes_size); + case PARSED_TOKEN_COMMA: + return treeParseNode((ParserCommaMetadata *)node->metadata, code, scopes, + scopes_size); + case PARSED_TOKEN_PARENTHESIS: + return treeParseNode((ParserParenthesisMetadata *)node->metadata, code, + scopes, scopes_size); + case PARSED_TOKEN_CODE_BODY: + return treeParseLocalScope(node, code, scopes, scopes_size); + case PARSED_TOKEN_VALUE_STRING: + return treeParseValueString(node, code); + case PARSED_TOKEN_DEFINE_VARIABLE: + return treeParseVariableDefinition(node, code, scopes, scopes_size, + TREE_TOKEN_DEFINE_VARIABLE); + case PARSED_TOKEN_DEFINE_CONSTANT: + return treeParseVariableDefinition(node, code, scopes, scopes_size, + TREE_TOKEN_DEFINE_CONSTANT); + case PARSED_TOKEN_STRUCT: + return treeParseStruct(node, code, scopes, scopes_size); + case PARSED_TOKEN_FUNCTION: + return treeParseFunction(node, code, scopes, scopes_size); + case PARSED_TOKEN_FUNCTION_PARAMS: + } + fprintf(stderr, "bad parsed token %d at %s:%d", node->token, __FILE_NAME__, + __LINE__); + return NULL; +} + +ParsedTree *treeParseExpr(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + switch (node->token) { + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_CODE_BODY: + printError("Parsed token %s is not an expression", code, node->strBegin, + node->strEnd, PARSED_TOKEN_STRINGS[node->token]); + return NULL; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + return treeParseNode(node, code, scopes, scopes_size); + case PARSED_TOKEN_FUNCTION_PARAMS: + } + fprintf(stderr, "bad parsed token %d", node->token); + return NULL; +} + +ParsedTree *treeParseRoot(const ParsedNode *node, SourceCode code) { + return treeParseLocalScope(node, code, NULL, 0); +} + +ParsedTree *treeParseLocalScope(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + const ParserScopeMetadata *node_metadata = node->metadata; + ParsedNode **const operands = node_metadata->operands; + const size_t operands_size = node_metadata->operands_size; + + ParsedTree *tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_LOCAL_SCOPE; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + + TreeScopeMetadata *metadata = tree->metadata = + a404m_malloc(sizeof(*metadata)); + + size_t metadata_variables_size = 0; + metadata->variables = a404m_malloc(metadata_variables_size * + sizeof(TreeDefineVariableMetadata *)); + metadata->variables_size = 0; + + size_t metadata_lines_size = 0; + metadata->lines = a404m_malloc(metadata_lines_size * sizeof(ParsedTree *)); + metadata->lines_size = 0; + + const size_t newScopes_size = scopes_size + 1; + const TreeScopeMetadata *newScopes[newScopes_size]; + + for (size_t i = 0; i < scopes_size; ++i) { + newScopes[i] = scopes[i]; + } + + newScopes[newScopes_size - 1] = metadata; + + for (size_t i = 0; i < operands_size; ++i) { + const ParsedNode *operand = operands[i]; + ParsedTree *const parsedTree = + treeParseNode(operand, code, newScopes, newScopes_size); + if (parsedTree == NULL) { + goto RETURN_ERROR; + } + switch (parsedTree->token) { + case TREE_TOKEN_DEFINE_CONSTANT: + case TREE_TOKEN_DEFINE_VARIABLE: { + TreeDefineVariableMetadata *const variableDefine = parsedTree->metadata; + if (variableDefine == NULL) { + goto RETURN_ERROR; + } else if (metadata->variables_size == metadata_variables_size) { + metadata_variables_size += metadata_variables_size / 2 + 1; + metadata->variables = a404m_realloc( + metadata->variables, + metadata_variables_size * sizeof(TreeDefineVariableMetadata *)); + } + metadata->variables[metadata->variables_size] = variableDefine; + metadata->variables_size += 1; + } + /* fall through */ + default: + if (metadata->lines_size == metadata_lines_size) { + metadata_lines_size += metadata_lines_size / 2 + 1; + metadata->lines = a404m_realloc( + metadata->lines, metadata_lines_size * sizeof(ParsedTree *)); + } + metadata->lines[metadata->lines_size] = parsedTree; + metadata->lines_size += 1; + continue; + } + printError("'%s' Is not allowed here", code, operand->strBegin, + operand->strEnd, PARSED_TOKEN_STRINGS[operand->token]); + goto RETURN_ERROR; + } + + metadata->variables = + a404m_realloc(metadata->variables, metadata->variables_size); + + return tree; + +RETURN_ERROR: + free(tree); + free(metadata); + return NULL; +} + +TreeDefineVariableMetadata *treeParseDefineVariable( + ParsedTree *tree, const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], size_t scopes_size) { + TreeDefineVariableMetadata *define = a404m_malloc(sizeof(*define)); + define->tree = tree; + + ParserVariableDefineMetadata *metadata = node->metadata; + + if (metadata->value == NULL) { + define->value = NULL; + } else if ((define->value = treeParseExpr(metadata->value, code, scopes, + scopes_size)) == NULL) { + goto RETURN_ERROR; + } + + if (metadata->name->token == PARSED_TOKEN_IDENTIFIER) { + define->nameBegin = metadata->name->strBegin; + define->nameEnd = metadata->name->strEnd; + } else { + printError("Names should be an identifier", code, metadata->name->strBegin, + metadata->name->strEnd); + goto RETURN_ERROR; + } + + if (metadata->type == NULL) { + define->type = getTreeExpressionType(define->value); + } else if (metadata->type->token == PARSED_TOKEN_IDENTIFIER) { + const TreeDefineVariableMetadata *variable = + getVariable(metadata->type->strBegin, metadata->type->strEnd, code, + scopes, scopes_size); + if (variable == NULL) { + goto RETURN_ERROR; + } + define->type = getType(variable); + } else { + printError("Types should be an identifier (for now)", code, + metadata->type->strBegin, metadata->type->strEnd); + goto RETURN_ERROR; + } + + if (define->type == NULL) { + printError("Can't specify type", code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + + return define; + +RETURN_ERROR: + free(define); + return NULL; +} + +TreeDefineVariableMetadata *getVariable(const char *strBegin, + const char *strEnd, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + const size_t size = strEnd - strBegin; + const char *str = strBegin; + + for (size_t i = 0; i < scopes_size; ++i) { + const TreeScopeMetadata *scope = scopes[i]; + for (size_t j = scope->variables_size - 1; j != (typeof(j))-1; --j) { + TreeDefineVariableMetadata *variable = scope->variables[j]; + const size_t variable_str_size = variable->nameEnd - variable->nameBegin; + if (size == variable_str_size && + strncmp(str, variable->nameBegin, size) == 0) { + return variable; + } + } + } + + printError("Identifier is not defined", code, strBegin, strEnd); + return NULL; +} + +ParsedTree *treeParseIdentifier(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + TreeDefineVariableMetadata *variable = + getVariable(node->strBegin, node->strEnd, code, scopes, scopes_size); + if (variable != NULL) { + ParsedTree *tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_IDENTIFIER; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + TreeIdentifierMetadata *metadata = tree->metadata = + a404m_malloc(sizeof(TreeIdentifierMetadata)); + metadata->variable = variable; + return tree; + } + return NULL; +} + +ParsedTree *treeParseFunctionCall(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + ParserFunctionCallMetadata *node_metadata = node->metadata; + + ParsedTree *tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_FUNCTION_CALL; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + + TreeFunctionCallMetadata *metadata = tree->metadata = + a404m_malloc(sizeof(*metadata)); + + size_t metadata_values_size = 0; + metadata->values = a404m_malloc(metadata_values_size * sizeof(ParsedTree *)); + metadata->values_size = 0; + + TreeDefineVariableMetadata *variable = + getVariable(node_metadata->functionNameBegin, + node_metadata->functionNameEnd, code, scopes, scopes_size); + + if (variable == NULL) { + goto RETURN_ERROR; + } + metadata->function = variable; + + for (size_t i = 0; i < node_metadata->scope->operands_size; ++i) { + ParsedNode *const operand = node_metadata->scope->operands[i]; + ParsedTree *const operandTree = + treeParseNode(operand, code, scopes, scopes_size); + if (operandTree == NULL) { + goto RETURN_ERROR; + } + // TODO: check types + if (metadata->values_size == metadata_values_size) { + metadata_values_size += metadata_values_size / 2 + 1; + metadata->values = + a404m_malloc(metadata_values_size * sizeof(ParsedTree *)); + } + metadata->values[metadata->values_size] = operandTree; + metadata->values_size += 1; + } + + metadata->values = a404m_realloc( + metadata->values, metadata->values_size * sizeof(ParsedTree *)); + return tree; + +RETURN_ERROR: + free(metadata); + free(tree); + return NULL; +} + +ParsedTree *treeParseValueString(const ParsedNode *node, SourceCode code) { + ParsedTree *const tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_VALUE_STRING; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + if ((tree->metadata = nodeToString(node, code)) == NULL) { + free(tree); + return NULL; + } + return tree; +} + +ParsedTree *treeParseVariableDefinition(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size, TreeToken token) { + ParsedTree *const tree = a404m_malloc(sizeof(*tree)); + tree->token = token; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + if ((tree->metadata = treeParseDefineVariable(tree, node, code, scopes, + scopes_size)) == NULL) { + free(tree); + return NULL; + } + return tree; +} + +ParsedTree *treeParseStruct(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + const ParserStructMetadata *node_metadata = node->metadata; + + ParsedTree *const tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_STRUCT; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + TreeStructMetadata *const metadata = tree->metadata = + a404m_malloc(sizeof(*metadata)); + + if (node_metadata->body != NULL) { + const ParserScopeMetadata *node_body = node_metadata->body->metadata; + for (size_t i = 0; i < node_body->operands_size; ++i) { + // TODO: implement + printError("Not implemented", code, node->strBegin, node->strEnd); + + free(metadata); + free(tree); + return NULL; + } + } + + return tree; +} + +ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size) { + const ParserFunctionMetadata *node_metadata = node->metadata; + + ParsedTree *const tree = a404m_malloc(sizeof(*tree)); + tree->token = TREE_TOKEN_FUNCTION; + tree->strBegin = node->strBegin; + tree->strEnd = node->strEnd; + + TreeFunctionMetadata *const metadata = tree->metadata = + a404m_malloc(sizeof(*metadata)); + + ParsedTree *parsedTree = + treeParseNode(node_metadata->type, code, scopes, scopes_size); + + if (parsedTree == NULL) { + goto RETURN_ERROR; + } + metadata->returnType = getTreeExpressionType(parsedTree); + // TODO: this is not right + deleteParsedTree(parsedTree); + + const ParserScopeMetadata *params = node_metadata->params->metadata; + + size_t metadata_params_size = 0; + metadata->params = + a404m_malloc(metadata_params_size * sizeof(TreeDefineVariableMetadata *)); + metadata->params_size = 0; + + for (size_t i = 0; i < params->operands_size; ++i) { + const ParsedNode *operand = params->operands[i]; + TreeDefineVariableMetadata *define = + treeParseDefineVariable(tree, operand, code, scopes, scopes_size); + if (define == NULL) { + goto RETURN_ERROR; + } + if (metadata->params_size == metadata_params_size) { + metadata_params_size += metadata_params_size / 2 + 1; + metadata->params = a404m_realloc( + metadata->params, + metadata_params_size * sizeof(TreeDefineVariableMetadata *)); + } + metadata->params[metadata->params_size] = define; + metadata->params_size += 1; + } + metadata->params = + a404m_realloc(metadata->params, metadata->params_size * + sizeof(TreeDefineVariableMetadata *)); + + if (node_metadata->body != NULL) { + printError("Not implemented", code, node->strBegin, node->strEnd); + + free(metadata); + free(tree); + return NULL; + } else { + metadata->scope = NULL; + } + + return tree; +RETURN_ERROR: + + free(metadata); + free(tree); + return NULL; +} + +TypeId getTreeExpressionType(ParsedTree *tree) { + switch (tree->token) { + case TREE_TOKEN_FUNCTION_CALL: + return ((TreeFunctionMetadata *)((TreeFunctionCallMetadata *) + tree->metadata) + ->function->type->metadata) + ->returnType; + case TREE_TOKEN_DEFINE_VARIABLE: + case TREE_TOKEN_DEFINE_CONSTANT: + return ((TreeDefineVariableMetadata *)tree->metadata)->type; + case TREE_TOKEN_IDENTIFIER: + return getType(((TreeIdentifierMetadata *)tree->metadata)->variable); + case TREE_TOKEN_VALUE_STRING: + case TREE_TOKEN_STRUCT: + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: + case TREE_TOKEN_FUNCTION: // TODO: find a better way for function + return tree; + case TREE_TOKEN_NONE: + } + fprintf(stderr, "bad parsed tree token %d at %d:%s", tree->token, __LINE__, + __FILE_NAME__); + exit(1); +} + +TypeId getType(const TreeDefineVariableMetadata *define) { + if (define->value == NULL || !isType(define->value)) { + return define->type; + } else { + return define->tree; + } +} + +bool isType(ParsedTree *const tree) { + switch (tree->token) { + case TREE_TOKEN_FUNCTION_CALL: + case TREE_TOKEN_DEFINE_VARIABLE: + case TREE_TOKEN_DEFINE_CONSTANT: + case TREE_TOKEN_IDENTIFIER: + case TREE_TOKEN_VALUE_STRING: + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: + return false; + case TREE_TOKEN_STRUCT: + case TREE_TOKEN_FUNCTION: // TODO: find a better way for function + return true; + case TREE_TOKEN_NONE: + } + fprintf(stderr, "bad parsed tree token %d at %d:%s", tree->token, __LINE__, + __FILE_NAME__); + exit(1); +} + +SizedString *nodeToString(ParsedNode const *tree, SourceCode code) { + const char *strBegin = tree->strBegin + 1; + const char *strEnd = tree->strEnd - 1; + + char *str = a404m_malloc((strEnd - strBegin + 1) * sizeof(char)); + size_t inserted = 0; + + for (char const *iter = strBegin; iter < strEnd; ++iter) { + char c = *iter; + if (c == '\\') { + if (++iter < strEnd) { + switch (*iter) { + case '\'': + c = '\''; + break; + case '\"': + c = '\"'; + break; + case '\\': + c = '\\'; + break; + case 'a': + c = '\a'; + break; + case 'b': + c = '\b'; + break; + case 'f': + c = '\f'; + break; + case 'n': + c = '\n'; + break; + case 'r': + c = '\r'; + break; + case 't': + c = '\t'; + break; + case 'v': + c = '\v'; + break; + /*case 'u':*/ // TODO: do it + /* c = '';*/ + /* break;*/ + default: + printError("Bad escape code '\\%s'", code, tree->strBegin, + tree->strEnd, *iter); + goto RETURN_ERROR; + } + } else { + printError("Expected character after '\\'", code, tree->strBegin, + tree->strEnd); + goto RETURN_ERROR; + } + } + str[inserted] = c; + ++inserted; + } + + str[inserted] = '\0'; + SizedString *const string = a404m_malloc(sizeof(SizedString)); + string->str = a404m_realloc(str, (inserted + 1) * sizeof(char)); + string->size = inserted; + return string; +RETURN_ERROR: + free(str); + return NULL; +} diff --git a/src/compiler/tree_parser/tree_parser.h b/src/compiler/tree_parser/tree_parser.h new file mode 100644 index 0000000..0ee783f --- /dev/null +++ b/src/compiler/tree_parser/tree_parser.h @@ -0,0 +1,113 @@ +#pragma once + +#include <compiler/parser/parser.h> + +#include "utils/types.h" + +typedef enum TreeToken { + TREE_TOKEN_NONE = 0, + TREE_TOKEN_GLOBAL_SCOPE, + TREE_TOKEN_LOCAL_SCOPE, + TREE_TOKEN_FUNCTION_CALL, + TREE_TOKEN_DEFINE_VARIABLE, + TREE_TOKEN_DEFINE_CONSTANT, + TREE_TOKEN_IDENTIFIER, + TREE_TOKEN_VALUE_STRING, + TREE_TOKEN_STRUCT, + TREE_TOKEN_FUNCTION, +} TreeToken; + +extern const char *TREE_TOKEN_STRINGS[]; + +typedef struct ParsedTree { + char const *strBegin; + char const *strEnd; + TreeToken token; + void *metadata; +} ParsedTree; + +typedef struct TreeStructMetadata { +} TreeStructMetadata; + +typedef ParsedTree *TypeId; + +typedef struct TreeDefineVariableMetadata { + char const *nameBegin; + char const *nameEnd; + TypeId type; + ParsedTree *value; + ParsedTree *tree; +} TreeDefineVariableMetadata; + +typedef struct TreeFunctionCallMetadata { + TreeDefineVariableMetadata *function; + ParsedTree **values; + size_t values_size; +} TreeFunctionCallMetadata; + +typedef struct TreeScopeMetadata { + ParsedTree **lines; + size_t lines_size; + TreeDefineVariableMetadata **variables; + size_t variables_size; +} TreeScopeMetadata; + +typedef struct TreeIdentifierMetadata { + TreeDefineVariableMetadata *variable; +} TreeIdentifierMetadata; + +typedef struct TreeFunctionMetadata { + TreeDefineVariableMetadata **params; + size_t params_size; + TreeScopeMetadata *scope; + TypeId returnType; +} TreeFunctionMetadata; + +typedef SizedString TreeStringValueMetadata; + +extern void _printParsedTreeNode(const ParsedTree *parsedTree,int indent); +extern void printParsedTreeNode(const ParsedTree *parsedTree); + +extern void deleteParsedTree(ParsedTree *parsedTree); + +extern ParsedTree *treeParser(SourceCode code); +extern ParsedTree *_treeParser(const ParsedNode *node, SourceCode code); + +extern ParsedTree *treeParseNode(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); + +extern ParsedTree *treeParseRoot(const ParsedNode *root, SourceCode code); +extern ParsedTree *treeParseLocalScope(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); + +extern TreeDefineVariableMetadata *treeParseDefineVariable( + ParsedTree *tree, const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], size_t scopes_size); +extern TreeDefineVariableMetadata *getVariable( + const char *strBegin, const char *strEnd, SourceCode code, + const TreeScopeMetadata *scopes[], size_t scopes_size); +extern ParsedTree *treeParseIdentifier(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); +extern ParsedTree *treeParseFunctionCall(const ParsedNode *node, + SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); +extern ParsedTree *treeParseValueString(const ParsedNode *node, + SourceCode code); +extern ParsedTree *treeParseVariableDefinition( + const ParsedNode *node, SourceCode code, const TreeScopeMetadata *scopes[], + size_t scopes_size, TreeToken token); +extern ParsedTree *treeParseStruct(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); +extern ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode code, + const TreeScopeMetadata *scopes[], + size_t scopes_size); + +extern TypeId getTreeExpressionType(ParsedTree *const tree); +extern TypeId getType(const TreeDefineVariableMetadata *define); +extern bool isType(ParsedTree *const tree); +extern SizedString *nodeToString(ParsedNode const *tree, SourceCode code); @@ -1,62 +1,70 @@ #include <stdlib.h> +#include <string.h> #include <utils/file.h> #include <utils/time.h> #include <vm/runner/runner.h> +#include "compiler/tree_parser/tree_parser.h" +#include "utils/types.h" + static const char *codes[] = { - "print(\"\");", + "print(\"Hello\\n\");", }; -bool run(char const *restrict code) { - bool ranSuccess = false; - const Nodes nodes = lexer(code); - ParsedNode *parsedNode = parser(nodes); - if (parsedNode != NULL) { - Instructions instructions = codeGenerator(parsedNode); - if (instructions.size != ERROR_SIZE) { - ranSuccess = runner(instructions); - deleteInstructions(instructions); - } - deleteParsedNode(parsedNode); - } - deleteNodes(nodes); - return ranSuccess; -} - Clock runWithPrint(char const *restrict code) { Clock sum = 0; Clock diff = 0; - printf("----code:\n%s\n----\n", code); + fprintf(stderr, "----code:\n%s\n----\n", code); Clock start = getTimeInNano(); const Nodes nodes = lexer(code); diff += getTimeInNano() - start; sum += diff; - printNodes(nodes); - printf("----lexing in %ldns\n", diff); - start = getTimeInNano(); - ParsedNode *parsedNode = parser(nodes); - diff = getTimeInNano() - start; - sum += diff; - printf("----parsing in %ldns\n", diff); - if (parsedNode != NULL) { - printParsedNode(parsedNode); + if (nodes.size != ERROR_SIZE) { + printNodes(nodes); + fprintf(stderr, "----lexing in %ldns\n", diff); start = getTimeInNano(); - Instructions instructions = codeGenerator(parsedNode); - if (instructions.size != ERROR_SIZE) { - diff = getTimeInNano() - start; - sum += diff; - printf("----code_generator in %ldns\n", diff); - printInstructions(instructions); + ParsedNode *parsedNode = _parser(nodes, code); + diff = getTimeInNano() - start; + sum += diff; + if (parsedNode != NULL) { + printParsedNode(parsedNode); + fprintf(stderr, "----node parsing in %ldns\n", diff); start = getTimeInNano(); - bool ranSuccess = runner(instructions); + ParsedTree *parsedTree = _treeParser(parsedNode, code); diff = getTimeInNano() - start; sum += diff; - printf("----runner in %ldns\n", diff); - printf("ran sucessfully = %s\n", ranSuccess ? "true" : "false"); - printf("----sum %ldns\n", sum); - deleteInstructions(instructions); + if (parsedTree != NULL) { + printParsedTreeNode(parsedTree); + fprintf(stderr, "----tree parsing in %ldns\n", diff); + start = getTimeInNano(); + Instructions instructions = _codeGenerator(parsedTree, code); + diff = getTimeInNano() - start; + sum += diff; + if (instructions.size != ERROR_SIZE) { + printInstructions(instructions); + fprintf(stderr, "----code_generator in %ldns\n", diff); + start = getTimeInNano(); + bool ranSuccess = _runner(instructions); + diff = getTimeInNano() - start; + sum += diff; + fprintf(stderr, "----runner in %ldns\n", diff); + fprintf(stderr, "ran sucessfully = %s\n", + ranSuccess ? "true" : "false"); + fprintf(stderr, "----sum %ldns\n", sum); + deleteInstructions(instructions); + } else { + fprintf(stderr, "----returned error"); + } + deleteParsedTree(parsedTree); + } else { + fprintf(stderr, "----returned error"); + } + deleteParsedNode(parsedNode); + } else { + fprintf(stderr, "----returned error"); } - deleteParsedNode(parsedNode); + } else { + fprintf(stderr, "----returned error"); } deleteNodes(nodes); return sum; @@ -66,7 +74,7 @@ Clock process() { Clock sumAll = 0; for (size_t i = 0; i < sizeof(codes) / sizeof(char *); ++i) { Clock start = getTimeInNano(); - run(codes[i]); + runWithPrint(codes[i]); sumAll += getTimeInNano() - start; } return sumAll; @@ -74,7 +82,7 @@ Clock process() { void runBenchmark() { Clock sum = 0; - const int times = 10000; + const int times = 100; for (int i = 0; i < times; ++i) { sum += process(); } @@ -88,7 +96,23 @@ int main(int argc, char *argv[]) { return 1; } char *code = read_whole_file(argv[1]); - const bool ret = run(code); + if (code == NULL) { + return 1; + } + char *import = read_whole_file("stdlib/builtins.felan"); + if (import == NULL) { + return 1; + } + char *lastCode = code; + code = a404m_malloc(strlen(code) + strlen(import) + 2); + strcpy(code, import); + strcat(code, "\n"); + strcat(code, lastCode); + free(lastCode); + free(import); + + bool ret = runner(code); + free(code); if (ret) { return 0; diff --git a/src/utils/types.h b/src/utils/types.h index df4d8b3..e2d8053 100644 --- a/src/utils/types.h +++ b/src/utils/types.h @@ -10,6 +10,8 @@ typedef enum bool : uint8_t { false = 0, true = 1 } bool; #endif #endif +typedef const char *const SourceCode; + typedef struct SizedString { char *str; size_t size; diff --git a/src/vm/runner/runner.c b/src/vm/runner/runner.c index de1a478..4fcea52 100644 --- a/src/vm/runner/runner.c +++ b/src/vm/runner/runner.c @@ -3,6 +3,7 @@ #include <compiler/code_generator/code_generator.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include <utils/memory/memory.h> #include <utils/types.h> @@ -15,18 +16,39 @@ const char *BUILTIN_FUNCTION_NAMES[] = { const size_t BUILTIN_FUNCTIONS_SIZE = sizeof(BUILTIN_FUNCTIONS) / sizeof(BuiltinFunction); -bool runner(Instructions instructions) { +bool runner(SourceCode code) { + Instructions instructions = codeGenerator(code); + if (instructions.size != ERROR_SIZE) { + bool ranSuccess = _runner(instructions); + deleteInstructions(instructions); + return ranSuccess; + } + return false; +} + +bool _runner(Instructions instructions) { size_t stack_size = 0; void **stack = a404m_malloc(stack_size * sizeof(void *)); size_t stack_inserted = 0; + size_t variables_size = 0; + RunnerVariable **variables = + a404m_malloc(variables_size * sizeof(RunnerVariable *)); + size_t variables_inserted = 0; + for (size_t i = 0; i < instructions.size; ++i) { if (!runInstruction(instructions.instructions[i], &stack, &stack_size, - &stack_inserted)) { + &stack_inserted, &variables, &variables_size, + &variables_inserted)) { goto RETURN_ERROR; } } + for(size_t i = 0;i < variables_size;++i){ + free(variables[i]); + } + + free(variables); free(stack); return true; @@ -38,7 +60,7 @@ RETURN_ERROR: BuiltinFunction getBuiltinFunction(SizedString string) { for (size_t i = 0; i < BUILTIN_FUNCTIONS_SIZE; ++i) { const char *search = BUILTIN_FUNCTION_NAMES[i]; - // faster than strlen+strncpy + // faster than strlen+strncmp for (size_t j = 0;; ++j) { const char searchChar = search[j]; if (j == string.size) { @@ -59,21 +81,25 @@ BuiltinFunction getBuiltinFunction(SizedString string) { bool runInstruction(Instruction instruction, void ***restrict stack, size_t *restrict stack_size, - size_t *restrict stack_inserted) { + size_t *restrict stack_inserted, + RunnerVariable ***restrict variables, + size_t *restrict variables_size, + size_t *restrict variables_inserted) { switch (instruction.command) { + case COMMAND_PUSH_IDENTIFIER: { + const CommandPushIdentifierOperand *operand = instruction.operand; + pushToStack(getRunnerVariable(operand, variables, variables_inserted)->value, + stack, stack_size, stack_inserted); + return true; + } case COMMAND_PUSH_STRING: { - SizedString *string = instruction.operand; - if (*stack_inserted == *stack_size) { - *stack_size += *stack_size / 2 + 1; - *stack = a404m_realloc(*stack, *stack_size * sizeof(void *)); - } - (*stack)[*stack_inserted] = string; - ++*stack_inserted; + CommandPushStringOperand *operand = instruction.operand; + pushToStack(operand, stack, stack_size, stack_inserted); return true; } case COMMAND_CALL_FUNCTION: { SizedString *functionName = instruction.operand; - BuiltinFunction function = getBuiltinFunction(*functionName); + const BuiltinFunction function = getBuiltinFunction(*functionName); if (function == NULL) { fprintf(stderr, "function '%.*s' not found\n", (int)functionName->size, functionName->str); @@ -82,13 +108,74 @@ bool runInstruction(Instruction instruction, void ***restrict stack, function(stack, stack_inserted); return true; } + case COMMAND_POP_IDENTIFIER: { + const CommandPopIdentifierOperand *operand = instruction.operand; + setRunnerVariable(popFromStack(stack, stack_inserted), operand, variables, + variables_size, variables_inserted); + return true; + } case COMMAND_NONE: } - fprintf(stderr, "unknown command '%d'\n", instruction.command); + fprintf(stderr, "bad command '%d'\n", instruction.command); return false; } +RunnerVariable *getRunnerVariable(const SizedString *varName, + RunnerVariable ***restrict variables, + size_t *restrict variables_inserted) { + for (size_t i = *variables_inserted - 1; i != (typeof(i))-1; ++i) { + RunnerVariable *variable = (*variables)[i]; + if (variable->name->size == varName->size && + strncmp(varName->str, variable->name->str, varName->size) == 0) { + return variable; + } + } + return NULL; +} + +void setRunnerVariable(void *value, const SizedString *varName, + RunnerVariable ***restrict variables, + size_t *restrict variables_size, + size_t *restrict variables_inserted) { + RunnerVariable *variable = + getRunnerVariable(varName, variables, variables_inserted); + if (variable != NULL) { + variable->value = value; + return; + } + + variable = a404m_malloc(sizeof(*variable)); + variable->name = varName; + variable->value = value; + + if (*variables_inserted == *variables_size) { + *variables_size += *variables_size / 2 + 1; + *variables = + a404m_realloc(*variables, *variables_size * sizeof(RunnerVariable *)); + } + (*variables)[*variables_inserted] = variable; + *variables_inserted += 1; +} + +void *popFromStack(void ***restrict stack, size_t *restrict stack_inserted) { + if (*stack_inserted == 0) { + fprintf(stderr, "stack underflow\n"); + exit(1); + } + return (*stack)[--*stack_inserted]; +} + +void pushToStack(void *value, void ***restrict stack, + size_t *restrict stack_size, size_t *restrict stack_inserted) { + if (*stack_inserted == *stack_size) { + *stack_size += *stack_size / 2 + 1; + *stack = a404m_realloc(*stack, *stack_size * sizeof(void *)); + } + (*stack)[*stack_inserted] = value; + ++*stack_inserted; +} + void print(void ***restrict stack, size_t *restrict stack_inserted) { - const SizedString *string = (*stack)[--*stack_inserted]; + const SizedString *string = popFromStack(stack, stack_inserted); printf("%.*s", (int)string->size, string->str); } diff --git a/src/vm/runner/runner.h b/src/vm/runner/runner.h index e81aed7..d2df5d7 100644 --- a/src/vm/runner/runner.h +++ b/src/vm/runner/runner.h @@ -2,6 +2,11 @@ #include <compiler/code_generator/code_generator.h> +typedef struct RunnerVariable { + SizedString const *name; + void *value; +} RunnerVariable; + typedef void (*BuiltinFunction)(void ***restrict stack, size_t *restrict stack_inserted); @@ -9,10 +14,28 @@ extern const BuiltinFunction BUILTIN_FUNCTIONS[]; extern const char *BUILTIN_FUNCTION_NAMES[]; extern const size_t BUILTIN_FUNCTIONS_SIZE; -extern bool runner(Instructions instructions); +extern bool runner(SourceCode code); +extern bool _runner(Instructions instructions); extern bool runInstruction(Instruction instruction, void ***restrict stack, size_t *restrict stack_size, - size_t *restrict stack_inserted); + size_t *restrict stack_inserted, + RunnerVariable ***restrict variables, + size_t *restrict variables_size, + size_t *restrict variables_inserted); + +extern RunnerVariable *getRunnerVariable(const SizedString *varName, + RunnerVariable ***restrict variables, + size_t *restrict variables_inserted); +extern void setRunnerVariable(void *value, const SizedString *varName, + RunnerVariable ***restrict variables, + size_t *restrict variables_size, + size_t *restrict variables_inserted); + +extern void *popFromStack(void ***restrict stack, + size_t *restrict stack_inserted); +extern void pushToStack(void *value, void ***restrict stack, + size_t *restrict stack_size, + size_t *restrict stack_inserted); extern void print(void ***restrict stack, size_t *restrict stack_inserted); diff --git a/stdlib/builtins.felan b/stdlib/builtins.felan new file mode 100644 index 0000000..96f7335 --- /dev/null +++ b/stdlib/builtins.felan @@ -0,0 +1,19 @@ +void :: struct {}; + +i8 :: struct external; +i16 :: struct external; +i32 :: struct external; +i64 :: struct external; + +u8 :: struct external; +u16 :: struct external; +u32 :: struct external; +u64 :: struct external; + +f32 :: struct external; +f64 :: struct external; + +String :: struct external; + +print :: (str:String)->void external; + diff --git a/test.felan b/test.felan new file mode 100644 index 0000000..8d747a1 --- /dev/null +++ b/test.felan @@ -0,0 +1,2 @@ +"Hello"; +"new hello |