diff options
Diffstat (limited to 'src/compiler/lexer/lexer.c')
-rw-r--r-- | src/compiler/lexer/lexer.c | 145 |
1 files changed, 82 insertions, 63 deletions
diff --git a/src/compiler/lexer/lexer.c b/src/compiler/lexer/lexer.c index b48be3c..ebcccc8 100644 --- a/src/compiler/lexer/lexer.c +++ b/src/compiler/lexer/lexer.c @@ -5,6 +5,8 @@ #include <stdlib.h> #include <utils/memory/memory.h> +#include "utils/types.h" + const char *TOKEN_STRINGS[] = { "TOKEN_NONE", "TOKEN_IDENTIFIER", @@ -18,17 +20,30 @@ const char *TOKEN_STRINGS[] = { "TOKEN_OPERATOR_ASSIGN", "TOKEN_OPERATOR_EQUAL", "TOKEN_OPERATOR_COLON", + "TOKEN_OPERATOR_COMMA", "TOKEN_OPERATOR_EOL", + "TOKEN_OPERATOR_FUNCTION", "TOKEN_SYMBOL", + "TOKEN_KEYWORD_STRUCT", + "TOKEN_KEYWORD_EXTERNAL", + "TOKEN_KEYWORD_IMPORT", "TOKEN_PARSED", }; -static const char *KEYWORDS_STRINGS[] = {}; -static const Token KEYWORDS_TOKENS[] = {}; +static const char *KEYWORDS_STRINGS[] = { + "struct", + "external", + "import", +}; +static const Token KEYWORDS_TOKENS[] = { + TOKEN_KEYWORD_STRUCT, + TOKEN_KEYWORD_EXTERNAL, + TOKEN_KEYWORD_IMPORT, +}; static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *); static const char *OPERATORS_STRINGS[] = { - "(", ")", "{", "}", "=", "==", ":", ";", + "(", ")", "{", "}", "=", "==", ":", ",", ";", "->", }; static const Token OPERATORS_TOKENS[] = { TOKEN_OPERATOR_PARENTHESES_OPEN, @@ -38,7 +53,9 @@ static const Token OPERATORS_TOKENS[] = { TOKEN_OPERATOR_ASSIGN, TOKEN_OPERATOR_EQUAL, TOKEN_OPERATOR_COLON, + TOKEN_OPERATOR_COMMA, TOKEN_OPERATOR_EOL, + TOKEN_OPERATOR_FUNCTION, }; static const size_t OPERATORS_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *); @@ -52,161 +69,156 @@ void printNodes(Nodes nodes) { void deleteNodes(Nodes nodes) { free(nodes.nodes); } -Nodes lexer(char const *restrict str) { +Nodes lexer(char const *const restrict code) { size_t nodes_size = 10; Node *nodes = a404m_malloc(nodes_size * sizeof(Node)); size_t nodes_inserted = 0; Node node = { - .strBegin = str, - .strEnd = str, + .strBegin = code, + .strEnd = code, .token = TOKEN_NONE, }; for (int i = 0;; ++i) { - const char c = str[i]; + const char c = code[i]; if (c == '\0') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); break; } else if (c == '/') { - const char follow = str[i + 1]; + const char follow = code[i + 1]; if (follow == '/') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i); - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + for (i += 2; code[i] != '\0' && code[i] != '\n'; ++i); + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - if (str[i] == '\0') { + if (code[i] == '\0') { goto RETURN_SUCCESS; } continue; } else if (follow == '*') { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); int in = 1; for (i += 2;; ++i) { - switch (str[i]) { + switch (code[i]) { case '\0': - fprintf(stderr, - "expected multi line comment to end at compiler line %d " - "and in=%d\n", - __LINE__, in); - exit(1); + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; case '*': ++i; - if (str[i] == '/') { + if (code[i] == '/') { --in; if (in == 0) { goto END_OF_BLOCK_COMMENT_LOOP; } - } else if (str[i] == '\0') { - fprintf(stderr, - "expected multi line comment to end at compiler line " - "%d and in=%d\n", - __LINE__, in); - exit(1); + } else if (code[i] == '\0') { + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; } break; case '/': ++i; - if (str[i] == '*') { + if (code[i] == '*') { ++in; - } else if (str[i] == '\0') { - fprintf(stderr, - "expected multi line comment to end at compiler line " - "%d and in=%d\n", - __LINE__, in); - exit(1); + } else if (code[i] == '\0') { + printError("Expected multi line comment to end", code, + node.strBegin, code + i); + goto RETURN_ERROR; } break; } } END_OF_BLOCK_COMMENT_LOOP: - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); - if (str[i] == '\0') { + if (code[i] == '\0') { goto RETURN_SUCCESS; } continue; } } if (isSpace(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); } else if (isIdentifier(c)) { if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_IDENTIFIER); } } else if (isIdentifierSymbol(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_IDENTIFIER); for (++i;; ++i) { - const char current = str[i]; + const char current = code[i]; if (current == c) { break; } else if (current == '\0') { - fprintf(stderr, "expected %c to end\n", c); - exit(1); + printError("Expected %c to end", code, node.strBegin, code + i, c); + goto RETURN_ERROR; } } ++node.strBegin; - push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str, - i, TOKEN_NONE); + push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, + code, i, TOKEN_NONE); } else if (isNumber(c)) { if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NUMBER); } } else if (isString(c)) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_STRING); for (++i;; ++i) { - const char current = str[i]; + const char current = code[i]; if (current == c) { break; } else if (current == '\\') { ++i; } else if (current == '\0') { - fprintf(stderr, "expected %c to end\n", c); - exit(1); + printError("Expected %c to end", code, node.strBegin, code + i, c); + goto RETURN_ERROR; } } ++i; - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_NONE); --i; } else if (isOperator(c)) { if (node.token == TOKEN_OPERATOR) { - const Token token = getOperator(node.strBegin, str + i + 1); + const Token token = getOperator(node.strBegin, code + i + 1); if (token != TOKEN_NONE) { continue; } else { - node.token = getOperator(node.strBegin, str + i); + node.token = getOperator(node.strBegin, code + i); if (node.token == TOKEN_NONE) { - fprintf(stderr, "unknown operator '%.*s'\n", - (int)(str + i - node.strBegin), node.strBegin); - exit(1); + printError("Unknown operator '%.*s'", code, node.strBegin, + node.strEnd, (int)(code + i - node.strBegin), + node.strBegin); + goto RETURN_ERROR; } push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, - str, i, TOKEN_OPERATOR); + code, i, TOKEN_OPERATOR); } } else { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_OPERATOR); } } else if (isSymbol(c)) { if (node.token != TOKEN_SYMBOL) { - push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i, TOKEN_SYMBOL); } } else { - fprintf(stderr, "unexpected char '%c'\n", c); - exit(1); + printError("Unexpected char '%c'", code, code + i, code + i + 1, c); + goto RETURN_ERROR; } } @@ -217,6 +229,14 @@ RETURN_SUCCESS: }; return result; +RETURN_ERROR: + free(nodes); + Nodes error = { + .nodes = NULL, + .size = ERROR_SIZE, + }; + + return error; } void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size, @@ -298,9 +318,8 @@ bool isOperator(char c) { case ',': case ';': return true; - default: - return false; } + return false; } bool isSymbol(char c) { return c == '#'; } @@ -312,7 +331,7 @@ Token getTokenInStrings(char const *strBegin, char const *strEnd, for (size_t i = 0; i < size; ++i) { const char *search = strings[i]; - // faster than strlen+strncpy + // faster than strlen+strncmp for (size_t j = 0;; ++j) { const char searchChar = search[j]; if (j == strSize) { |