aboutsummaryrefslogtreecommitdiff
path: root/src/compiler/lexer/lexer.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/lexer/lexer.c')
-rw-r--r--src/compiler/lexer/lexer.c145
1 files changed, 82 insertions, 63 deletions
diff --git a/src/compiler/lexer/lexer.c b/src/compiler/lexer/lexer.c
index b48be3c..ebcccc8 100644
--- a/src/compiler/lexer/lexer.c
+++ b/src/compiler/lexer/lexer.c
@@ -5,6 +5,8 @@
#include <stdlib.h>
#include <utils/memory/memory.h>
+#include "utils/types.h"
+
const char *TOKEN_STRINGS[] = {
"TOKEN_NONE",
"TOKEN_IDENTIFIER",
@@ -18,17 +20,30 @@ const char *TOKEN_STRINGS[] = {
"TOKEN_OPERATOR_ASSIGN",
"TOKEN_OPERATOR_EQUAL",
"TOKEN_OPERATOR_COLON",
+ "TOKEN_OPERATOR_COMMA",
"TOKEN_OPERATOR_EOL",
+ "TOKEN_OPERATOR_FUNCTION",
"TOKEN_SYMBOL",
+ "TOKEN_KEYWORD_STRUCT",
+ "TOKEN_KEYWORD_EXTERNAL",
+ "TOKEN_KEYWORD_IMPORT",
"TOKEN_PARSED",
};
-static const char *KEYWORDS_STRINGS[] = {};
-static const Token KEYWORDS_TOKENS[] = {};
+static const char *KEYWORDS_STRINGS[] = {
+ "struct",
+ "external",
+ "import",
+};
+static const Token KEYWORDS_TOKENS[] = {
+ TOKEN_KEYWORD_STRUCT,
+ TOKEN_KEYWORD_EXTERNAL,
+ TOKEN_KEYWORD_IMPORT,
+};
static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *);
static const char *OPERATORS_STRINGS[] = {
- "(", ")", "{", "}", "=", "==", ":", ";",
+ "(", ")", "{", "}", "=", "==", ":", ",", ";", "->",
};
static const Token OPERATORS_TOKENS[] = {
TOKEN_OPERATOR_PARENTHESES_OPEN,
@@ -38,7 +53,9 @@ static const Token OPERATORS_TOKENS[] = {
TOKEN_OPERATOR_ASSIGN,
TOKEN_OPERATOR_EQUAL,
TOKEN_OPERATOR_COLON,
+ TOKEN_OPERATOR_COMMA,
TOKEN_OPERATOR_EOL,
+ TOKEN_OPERATOR_FUNCTION,
};
static const size_t OPERATORS_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *);
@@ -52,161 +69,156 @@ void printNodes(Nodes nodes) {
void deleteNodes(Nodes nodes) { free(nodes.nodes); }
-Nodes lexer(char const *restrict str) {
+Nodes lexer(char const *const restrict code) {
size_t nodes_size = 10;
Node *nodes = a404m_malloc(nodes_size * sizeof(Node));
size_t nodes_inserted = 0;
Node node = {
- .strBegin = str,
- .strEnd = str,
+ .strBegin = code,
+ .strEnd = code,
.token = TOKEN_NONE,
};
for (int i = 0;; ++i) {
- const char c = str[i];
+ const char c = code[i];
if (c == '\0') {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
break;
} else if (c == '/') {
- const char follow = str[i + 1];
+ const char follow = code[i + 1];
if (follow == '/') {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
- for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i);
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ for (i += 2; code[i] != '\0' && code[i] != '\n'; ++i);
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
- if (str[i] == '\0') {
+ if (code[i] == '\0') {
goto RETURN_SUCCESS;
}
continue;
} else if (follow == '*') {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
int in = 1;
for (i += 2;; ++i) {
- switch (str[i]) {
+ switch (code[i]) {
case '\0':
- fprintf(stderr,
- "expected multi line comment to end at compiler line %d "
- "and in=%d\n",
- __LINE__, in);
- exit(1);
+ printError("Expected multi line comment to end", code,
+ node.strBegin, code + i);
+ goto RETURN_ERROR;
case '*':
++i;
- if (str[i] == '/') {
+ if (code[i] == '/') {
--in;
if (in == 0) {
goto END_OF_BLOCK_COMMENT_LOOP;
}
- } else if (str[i] == '\0') {
- fprintf(stderr,
- "expected multi line comment to end at compiler line "
- "%d and in=%d\n",
- __LINE__, in);
- exit(1);
+ } else if (code[i] == '\0') {
+ printError("Expected multi line comment to end", code,
+ node.strBegin, code + i);
+ goto RETURN_ERROR;
}
break;
case '/':
++i;
- if (str[i] == '*') {
+ if (code[i] == '*') {
++in;
- } else if (str[i] == '\0') {
- fprintf(stderr,
- "expected multi line comment to end at compiler line "
- "%d and in=%d\n",
- __LINE__, in);
- exit(1);
+ } else if (code[i] == '\0') {
+ printError("Expected multi line comment to end", code,
+ node.strBegin, code + i);
+ goto RETURN_ERROR;
}
break;
}
}
END_OF_BLOCK_COMMENT_LOOP:
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
- if (str[i] == '\0') {
+ if (code[i] == '\0') {
goto RETURN_SUCCESS;
}
continue;
}
}
if (isSpace(c)) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
} else if (isIdentifier(c)) {
if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_IDENTIFIER);
}
} else if (isIdentifierSymbol(c)) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_IDENTIFIER);
for (++i;; ++i) {
- const char current = str[i];
+ const char current = code[i];
if (current == c) {
break;
} else if (current == '\0') {
- fprintf(stderr, "expected %c to end\n", c);
- exit(1);
+ printError("Expected %c to end", code, node.strBegin, code + i, c);
+ goto RETURN_ERROR;
}
}
++node.strBegin;
- push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str,
- i, TOKEN_NONE);
+ push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
+ code, i, TOKEN_NONE);
} else if (isNumber(c)) {
if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER &&
node.token != TOKEN_SYMBOL) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NUMBER);
}
} else if (isString(c)) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_STRING);
for (++i;; ++i) {
- const char current = str[i];
+ const char current = code[i];
if (current == c) {
break;
} else if (current == '\\') {
++i;
} else if (current == '\0') {
- fprintf(stderr, "expected %c to end\n", c);
- exit(1);
+ printError("Expected %c to end", code, node.strBegin, code + i, c);
+ goto RETURN_ERROR;
}
}
++i;
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_NONE);
--i;
} else if (isOperator(c)) {
if (node.token == TOKEN_OPERATOR) {
- const Token token = getOperator(node.strBegin, str + i + 1);
+ const Token token = getOperator(node.strBegin, code + i + 1);
if (token != TOKEN_NONE) {
continue;
} else {
- node.token = getOperator(node.strBegin, str + i);
+ node.token = getOperator(node.strBegin, code + i);
if (node.token == TOKEN_NONE) {
- fprintf(stderr, "unknown operator '%.*s'\n",
- (int)(str + i - node.strBegin), node.strBegin);
- exit(1);
+ printError("Unknown operator '%.*s'", code, node.strBegin,
+ node.strEnd, (int)(code + i - node.strBegin),
+ node.strBegin);
+ goto RETURN_ERROR;
}
push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
- str, i, TOKEN_OPERATOR);
+ code, i, TOKEN_OPERATOR);
}
} else {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_OPERATOR);
}
} else if (isSymbol(c)) {
if (node.token != TOKEN_SYMBOL) {
- push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, code, i,
TOKEN_SYMBOL);
}
} else {
- fprintf(stderr, "unexpected char '%c'\n", c);
- exit(1);
+ printError("Unexpected char '%c'", code, code + i, code + i + 1, c);
+ goto RETURN_ERROR;
}
}
@@ -217,6 +229,14 @@ RETURN_SUCCESS:
};
return result;
+RETURN_ERROR:
+ free(nodes);
+ Nodes error = {
+ .nodes = NULL,
+ .size = ERROR_SIZE,
+ };
+
+ return error;
}
void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size,
@@ -298,9 +318,8 @@ bool isOperator(char c) {
case ',':
case ';':
return true;
- default:
- return false;
}
+ return false;
}
bool isSymbol(char c) { return c == '#'; }
@@ -312,7 +331,7 @@ Token getTokenInStrings(char const *strBegin, char const *strEnd,
for (size_t i = 0; i < size; ++i) {
const char *search = strings[i];
- // faster than strlen+strncpy
+ // faster than strlen+strncmp
for (size_t j = 0;; ++j) {
const char searchChar = search[j];
if (j == strSize) {