aboutsummaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/code_generator/code_generator.c199
-rw-r--r--src/compiler/code_generator/code_generator.h41
-rw-r--r--src/compiler/lexer/lexer.c307
-rw-r--r--src/compiler/lexer/lexer.h66
-rw-r--r--src/compiler/parser/parser.c351
-rw-r--r--src/compiler/parser/parser.h62
6 files changed, 1026 insertions, 0 deletions
diff --git a/src/compiler/code_generator/code_generator.c b/src/compiler/code_generator/code_generator.c
new file mode 100644
index 0000000..0beca33
--- /dev/null
+++ b/src/compiler/code_generator/code_generator.c
@@ -0,0 +1,199 @@
+#include "code_generator.h"
+
+#include <compiler/parser/parser.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "utils/memory/memory.h"
+#include "utils/types.h"
+
+const char *COMMAND_STRINGS[] = {
+ "COMMAND_NONE",
+ "COMMAND_PRINT",
+ "COMMAND_PUSH_STRING",
+};
+
+void printInstruction(Instruction instruction) {
+ printf("%s", COMMAND_STRINGS[instruction.command]);
+ switch (instruction.command) {
+ case COMMAND_NONE:
+ case COMMAND_PRINT:
+ break;
+ case COMMAND_PUSH_STRING:
+ SizedString *sizedString = instruction.operand;
+ printf(" '%.*s'", (int)sizedString->size, sizedString->str);
+ break;
+ default:
+ fprintf(stderr, "bad instruction %d\n", instruction.command);
+ }
+ printf("\n");
+}
+
+void printInstructions(Instructions instructions) {
+ for (size_t i = 0; i < instructions.size; ++i) {
+ printInstruction(instructions.instructions[i]);
+ }
+}
+
+void deleteInstruction(Instruction instruction) {
+ switch (instruction.command) {
+ case COMMAND_NONE:
+ case COMMAND_PRINT:
+ break;
+ case COMMAND_PUSH_STRING:
+ SizedString *sizedString = instruction.operand;
+ free(sizedString->str);
+ free(sizedString);
+ break;
+ default:
+ fprintf(stderr, "bad instruction %d\n", instruction.command);
+ }
+}
+
+void deleteInstructions(Instructions instructions) {
+ for (size_t i = 0; i < instructions.size; ++i) {
+ deleteInstruction(instructions.instructions[i]);
+ }
+ free(instructions.instructions);
+}
+
+Instructions codeGenerator(ParsedNode *root) {
+ const ScopeMetadata *metadata = root->metadata;
+
+ size_t instructions_size = 10;
+ Instruction *instructions =
+ a404m_malloc(instructions_size * sizeof(Instruction));
+ size_t instructions_inserted = 0;
+
+ for (size_t i = 0; i < metadata->operands_size; ++i) {
+ ParsedNode *node = metadata->operands[i];
+ if (!nodeToInstruction(node, &instructions, &instructions_size,
+ &instructions_inserted)) {
+ goto RETURN_ERROR;
+ }
+ }
+
+ Instructions result = {
+ .instructions = a404m_realloc(
+ instructions, instructions_inserted * sizeof(Instruction)),
+ .size = instructions_inserted,
+ };
+ return result;
+
+RETURN_ERROR:
+ const Instructions error = {
+ .instructions = NULL,
+ .size = ERROR_SIZE,
+ };
+ return error;
+}
+
+bool nodeToInstruction(ParsedNode *node, Instruction **instructions,
+ size_t *instructions_size,
+ size_t *instructions_inserted) {
+ switch (node->token) {
+ // TODO: this is wrong when you want functions
+ case PARSED_TOKEN_PARENTHESIS: {
+ const ScopeMetadata *metadata = node->metadata;
+ for (size_t i = 0; i < metadata->operands_size; ++i) {
+ if (!nodeToInstruction(metadata->operands[i], instructions,
+ instructions_size, instructions_inserted)) {
+ return false;
+ }
+ }
+ return true;
+ }
+ case PARSED_TOKEN_EOL:
+ return nodeToInstruction(node->metadata, instructions, instructions_size,
+ instructions_inserted);
+ break;
+ case PARSED_TOKEN_PRINT:
+ if (nodeToInstruction(node->metadata, instructions, instructions_size,
+ instructions_inserted)) {
+ const Instruction instruction = {
+ .command = COMMAND_PRINT,
+ .operand = NULL,
+ };
+ insertInstruction(instruction, instructions, instructions_size,
+ instructions_inserted);
+ return true;
+ } else {
+ return false;
+ }
+ break;
+ case PARSED_TOKEN_VALUE_STRING: {
+ SizedString *string = nodeToString(node);
+ if (string == NULL) {
+ return false;
+ }
+ const Instruction instruction = {
+ .command = COMMAND_PUSH_STRING,
+ .operand = string,
+ };
+ insertInstruction(instruction, instructions, instructions_size,
+ instructions_inserted);
+ return true;
+ }
+ default:
+ fprintf(stderr, "unexpected token %s\n",
+ PARSED_TOKEN_STRINGS[node->token]);
+ return false;
+ }
+}
+
+void insertInstruction(const Instruction instruction,
+ Instruction **restrict instructions,
+ size_t *restrict instructions_size,
+ size_t *restrict instructions_inserted) {
+ if (*instructions_inserted == *instructions_size) {
+ *instructions_size += *instructions_size / 2 + 1;
+ *instructions =
+ a404m_realloc(*instructions, *instructions_size * sizeof(Instruction));
+ }
+ (*instructions)[*instructions_inserted] = instruction;
+ ++*instructions_inserted;
+}
+
+SizedString *nodeToString(ParsedNode const *node) {
+ const char *strBegin = node->strBegin + 1;
+ const char *strEnd = node->strEnd - 1;
+
+ char *str = a404m_malloc((strEnd - strBegin + 1) * sizeof(char));
+ size_t inserted = 0;
+
+ for (char const *iter = strBegin; iter < strEnd; ++iter) {
+ char c = *iter;
+ if (c == '\\') {
+ if (++iter < strEnd) {
+ switch (*iter) {
+ case '\'':
+ c = '\'';
+ break;
+ case '\"':
+ c = '\"';
+ break;
+ case 'n':
+ c = '\n';
+ break;
+ default:
+ fprintf(stderr, "bad string, bad '\\'\n");
+ goto RETURN_ERROR;
+ }
+ } else {
+ fprintf(stderr, "bad string, bad '\\'\n");
+ goto RETURN_ERROR;
+ }
+ }
+ str[inserted] = c;
+ ++inserted;
+ }
+
+ str[inserted] = '\0';
+ SizedString *string = a404m_malloc(sizeof(SizedString));
+ string->str = a404m_realloc(str, (inserted + 1) * sizeof(char));
+ string->size = inserted;
+ return string;
+RETURN_ERROR:
+ free(str);
+ return NULL;
+}
diff --git a/src/compiler/code_generator/code_generator.h b/src/compiler/code_generator/code_generator.h
new file mode 100644
index 0000000..1a9b4ff
--- /dev/null
+++ b/src/compiler/code_generator/code_generator.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include <stdio.h>
+
+#include "compiler/parser/parser.h"
+typedef enum Command {
+ COMMAND_NONE = 0,
+ COMMAND_PRINT,
+ COMMAND_PUSH_STRING,
+} Command;
+
+extern const char *COMMAND_STRINGS[];
+
+typedef struct Instruction {
+ Command command;
+ void *operand;
+} Instruction;
+
+typedef struct Instructions {
+ Instruction *restrict instructions;
+ size_t size;
+} Instructions;
+
+extern void printInstruction(Instruction instruction);
+extern void printInstructions(Instructions instructions);
+
+extern void deleteInstruction(Instruction instruction);
+extern void deleteInstructions(Instructions instructions);
+
+extern Instructions codeGenerator(ParsedNode *root);
+
+extern bool nodeToInstruction(ParsedNode *node, Instruction **instructions,
+ size_t *instructions_size,
+ size_t *instructions_inserted);
+
+extern void insertInstruction(const Instruction instruction,
+ Instruction **restrict instructions,
+ size_t *restrict instructions_size,
+ size_t *restrict instructions_inserted);
+
+extern SizedString *nodeToString(ParsedNode const *node);
diff --git a/src/compiler/lexer/lexer.c b/src/compiler/lexer/lexer.c
new file mode 100644
index 0000000..35631f5
--- /dev/null
+++ b/src/compiler/lexer/lexer.c
@@ -0,0 +1,307 @@
+#include "lexer.h"
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <utils/memory/memory.h>
+
+const char *TOKEN_STRINGS[] = {
+ "TOKEN_NONE",
+ "TOKEN_IDENTIFIER",
+ "TOKEN_KEYWORD_PRINT",
+ "TOKEN_NUMBER",
+ "TOKEN_STRING",
+ "TOKEN_OPERATOR",
+ "TOKEN_OPERATOR_PARENTHESES_OPEN",
+ "TOKEN_OPERATOR_PARENTHESES_CLOSE",
+ "TOKEN_OPERATOR_CURLY_BRACKET_OPEN",
+ "TOKEN_OPERATOR_CURLY_BRACKET_CLOSE",
+ "TOKEN_OPERATOR_ASSIGN",
+ "TOKEN_OPERATOR_EQUAL",
+ "TOKEN_OPERATOR_COLON",
+ "TOKEN_OPERATOR_EOL",
+ "TOKEN_SYMBOL",
+ "TOKEN_PARSED",
+};
+
+static const char *KEYWORDS_STRINGS[] = {
+ "print",
+};
+static const Token KEYWORDS_TOKENS[] = {
+ TOKEN_KEYWORD_PRINT,
+};
+static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *);
+
+static const char *OPERATORS_STRINGS[] = {
+ "(", ")", "{", "}", "=", "==", ":", ";",
+};
+static const Token OPERATORS_TOKENS[] = {
+ TOKEN_OPERATOR_PARENTHESES_OPEN,
+ TOKEN_OPERATOR_PARENTHESES_CLOSE,
+ TOKEN_OPERATOR_CURLY_BRACKET_OPEN,
+ TOKEN_OPERATOR_CURLY_BRACKET_CLOSE,
+ TOKEN_OPERATOR_ASSIGN,
+ TOKEN_OPERATOR_EQUAL,
+ TOKEN_OPERATOR_COLON,
+ TOKEN_OPERATOR_EOL,
+};
+static const size_t OPERATOR_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *);
+
+void printNodes(Nodes nodes) {
+ for (size_t i = 0; i < nodes.size; ++i) {
+ const Node node = nodes.nodes[i];
+ printf("{'%.*s',%s}\n", (int)(node.strEnd - node.strBegin), node.strBegin,
+ TOKEN_STRINGS[node.token]);
+ }
+}
+
+void deleteNodes(Nodes nodes) { free(nodes.nodes); }
+
+Nodes lexer(char const *restrict str) {
+ size_t nodes_size = 10;
+ Node *nodes = a404m_malloc(nodes_size * sizeof(Node));
+ size_t nodes_inserted = 0;
+
+ Node node = {
+ .strBegin = str,
+ .strEnd = str,
+ .token = TOKEN_NONE,
+ };
+
+ for (int i = 0;; ++i) {
+ const char c = str[i];
+ if (c == '\0') {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ break;
+ }
+
+ if (c == '/') {
+ const char follow = str[i + 1];
+ if (follow == '/') {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i);
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ if (str[i] == '\0') {
+ goto RETURN_SUCCESS;
+ }
+ continue;
+ } else if (follow == '*') {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ for (i += 2; str[i] != '\0' && str[i + 1] != '\0' &&
+ (str[i] != '*' || str[i + 1] != '/');
+ ++i);
+ if (str[i] == '\0' || str[i + 1] == '\0') {
+ perror("expected multi line comment to end\n");
+ exit(1);
+ }
+ i += 1;
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ if (str[i] == '\0') {
+ goto RETURN_SUCCESS;
+ }
+ continue;
+ }
+ }
+ if (isSpace(c)) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ } else if (isIdentifier(c)) {
+ if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_IDENTIFIER);
+ }
+ } else if (isIdentifierSymbol(c)) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_IDENTIFIER);
+ for (++i;; ++i) {
+ const char current = str[i];
+ if (current == c) {
+ break;
+ } else if (current == '\0') {
+ fprintf(stderr, "expected %c to end\n", c);
+ exit(1);
+ }
+ }
+ ++node.strBegin;
+ push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str,
+ i, TOKEN_NONE);
+ } else if (isNumber(c)) {
+ if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER &&
+ node.token != TOKEN_SYMBOL) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NUMBER);
+ }
+ } else if (isString(c)) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_STRING);
+
+ for (++i;; ++i) {
+ const char current = str[i];
+ if (current == c) {
+ break;
+ } else if (current == '\0') {
+ fprintf(stderr, "expected %c to end\n", c);
+ exit(1);
+ }
+ }
+
+ ++i;
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_NONE);
+ --i;
+ } else if (isOperator(c)) {
+ if (node.token == TOKEN_OPERATOR) {
+ const Token token = getOperator(node.strBegin, str + i + 1);
+ if (token != TOKEN_NONE) {
+ continue;
+ } else {
+ node.token = getOperator(node.strBegin, str + i);
+ if (node.token == TOKEN_NONE) {
+ fprintf(stderr, "unknown operator '%.*s'\n",
+ (int)(str + i - node.strBegin), node.strBegin);
+ exit(1);
+ }
+ push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
+ str, i, TOKEN_OPERATOR);
+ }
+ } else {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_OPERATOR);
+ }
+ } else if (isSymbol(c)) {
+ if (node.token != TOKEN_SYMBOL) {
+ push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
+ TOKEN_SYMBOL);
+ }
+ } else {
+ fprintf(stderr, "unexpected char '%c'\n", c);
+ exit(1);
+ }
+ }
+
+RETURN_SUCCESS:
+ Nodes result = {
+ .nodes = a404m_realloc(nodes, nodes_inserted * sizeof(Node)),
+ .size = nodes_inserted,
+ };
+
+ return result;
+}
+
+void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size,
+ size_t *restrict nodes_inserted, Node *restrict node,
+ char const *restrict str, int i, Token token) {
+ if (node->token != TOKEN_NONE) {
+ if (*nodes_size == *nodes_inserted) {
+ *nodes_size += *nodes_size / 2 + 1;
+ *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
+ }
+ node->strEnd = str + i;
+ if (node->token == TOKEN_IDENTIFIER) {
+ const Token foundToken = getKeyword(node->strBegin, node->strEnd);
+ if (foundToken != TOKEN_NONE) {
+ node->token = foundToken;
+ }
+ } else if (node->token == TOKEN_OPERATOR) {
+ const Token foundToken = getOperator(node->strBegin, node->strEnd);
+ if (foundToken != TOKEN_NONE) {
+ node->token = foundToken;
+ }
+ }
+
+ (*nodes)[*nodes_inserted] = *node;
+ ++*nodes_inserted;
+ }
+ node->strBegin = str + i;
+ node->token = token;
+}
+
+void push_clear_without_check(Node **restrict nodes,
+ size_t *restrict nodes_size,
+ size_t *restrict nodes_inserted, Node *node,
+ char const *restrict str, int i, Token token) {
+ if (*nodes_size == *nodes_inserted) {
+ *nodes_size += *nodes_size / 2 + 1;
+ *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
+ }
+ node->strEnd = str + i;
+ (*nodes)[*nodes_inserted] = *node;
+ ++*nodes_inserted;
+ node->strBegin = str + i;
+ node->token = token;
+}
+
+bool isSpace(char c) { return isspace(c); }
+
+bool isIdentifier(char c) {
+ return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
+}
+
+bool isIdentifierSymbol(char c) { return c == '`'; }
+
+bool isNumber(char c) { return '0' <= c && c <= '9'; }
+
+bool isString(char c) { return c == '"' || c == '\''; }
+
+bool isOperator(char c) {
+ switch (c) {
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '*':
+ case '/':
+ case '%':
+ case '+':
+ case '-':
+ case ':':
+ case '!':
+ case '=':
+ case '<':
+ case '>':
+ case '&':
+ case '|':
+ case '.':
+ case ',':
+ case ';':
+ return true;
+ default:
+ return false;
+ }
+}
+
+bool isSymbol(char c) { return c == '#'; }
+
+
+Token getKeyword(char const *strBegin, char const *strEnd) {
+ const size_t strSize = strEnd - strBegin;
+
+ for (size_t i = 0; i < KEYWORDS_SIZE; ++i) {
+ const char *search = KEYWORDS_STRINGS[i];
+ if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) {
+ return KEYWORDS_TOKENS[i];
+ }
+ }
+
+ return TOKEN_NONE;
+}
+Token getOperator(char const *strBegin, char const *strEnd) {
+ const size_t strSize = strEnd - strBegin;
+
+ for (size_t i = 0; i < OPERATOR_SIZE; ++i) {
+ const char *search = OPERATORS_STRINGS[i];
+ if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) {
+ return OPERATORS_TOKENS[i];
+ }
+ }
+
+ return TOKEN_NONE;
+}
diff --git a/src/compiler/lexer/lexer.h b/src/compiler/lexer/lexer.h
new file mode 100644
index 0000000..7d60101
--- /dev/null
+++ b/src/compiler/lexer/lexer.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <stddef.h>
+#include <utils/memory/memory.h>
+#include <utils/types.h>
+
+typedef enum Token {
+ TOKEN_NONE = 0,
+ TOKEN_IDENTIFIER,
+ TOKEN_KEYWORD_PRINT,
+ TOKEN_NUMBER,
+ TOKEN_STRING,
+ TOKEN_OPERATOR,
+ TOKEN_OPERATOR_PARENTHESES_OPEN,
+ TOKEN_OPERATOR_PARENTHESES_CLOSE,
+ TOKEN_OPERATOR_CURLY_BRACKET_OPEN,
+ TOKEN_OPERATOR_CURLY_BRACKET_CLOSE,
+ TOKEN_OPERATOR_ASSIGN,
+ TOKEN_OPERATOR_EQUAL,
+ TOKEN_OPERATOR_COLON,
+ TOKEN_OPERATOR_EOL,
+ TOKEN_SYMBOL,
+ TOKEN_PARSED,
+} Token;
+
+extern const char *TOKEN_STRINGS[];
+
+typedef struct Node {
+ char const *strBegin;
+ char const *strEnd;
+ Token token;
+ void *parsedNode;
+} Node;
+
+typedef struct Nodes {
+ Node *nodes;
+ size_t size;
+} Nodes;
+
+extern void printNodes(Nodes nodes);
+
+extern void deleteNodes(Nodes nodes);
+
+extern Nodes lexer(char const *restrict str);
+
+extern void push_if_not_empty(Node **restrict nodes,
+ size_t *restrict nodes_size,
+ size_t *restrict nodes_inserted,
+ Node *restrict node, char const *restrict str,
+ int i, Token token);
+extern void push_clear_without_check(Node **restrict nodes,
+ size_t *restrict nodes_size,
+ size_t *restrict nodes_inserted,
+ Node *node, char const *restrict str,
+ int i, Token token);
+
+extern bool isSpace(char c);
+extern bool isIdentifier(char c);
+extern bool isIdentifierSymbol(char c);
+extern bool isNumber(char c);
+extern bool isString(char c);
+extern bool isOperator(char c);
+extern bool isSymbol(char c);
+
+extern Token getKeyword(char const *strBegin, char const *strEnd);
+extern Token getOperator(char const *strBegin, char const *strEnd);
diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c
new file mode 100644
index 0000000..c679d39
--- /dev/null
+++ b/src/compiler/parser/parser.c
@@ -0,0 +1,351 @@
+#include "parser.h"
+
+#include <compiler/lexer/lexer.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <utils/memory/memory.h>
+
+const char *PARSED_TOKEN_STRINGS[] = {
+ "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT",
+ "PARSED_TOKEN_PRINT", "PARSED_TOKEN_PARENTHESIS",
+ "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL",
+};
+
+static const ParseOrder PARSE_ORDER[] = {
+ {
+ .ltr = false,
+ .size = 2,
+ .tokens =
+ {
+ TOKEN_OPERATOR_PARENTHESES_CLOSE,
+ TOKEN_OPERATOR_CURLY_BRACKET_CLOSE,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 3,
+ .tokens =
+ {
+ TOKEN_OPERATOR_ASSIGN,
+ TOKEN_OPERATOR_EQUAL,
+ TOKEN_OPERATOR_COLON,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 1,
+ .tokens =
+ {
+ TOKEN_KEYWORD_PRINT,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 1,
+ .tokens =
+ {
+ TOKEN_STRING,
+ },
+ },
+ {
+ .ltr = true,
+ .size = 1,
+ .tokens =
+ {
+ TOKEN_OPERATOR_EOL,
+ },
+ },
+};
+static size_t PARSE_ORDER_SIZE = sizeof(PARSE_ORDER) / sizeof(ParseOrder);
+
+ParsedNode *newParsedNode(char const *strBegin, char const *strEnd,
+ ParsedToken token, void *metadata,
+ ParsedNode *parent) {
+ ParsedNode *parsedNode = a404m_malloc(sizeof(*parsedNode));
+ parsedNode->strBegin = strBegin;
+ parsedNode->strEnd = strEnd;
+ parsedNode->token = token;
+ parsedNode->metadata = metadata;
+ parsedNode->parent = parent;
+ return parsedNode;
+}
+
+void _printParsedNode(const ParsedNode *parsedNode, int indent) {
+ for (int i = 0; i < indent; ++i) printf(" ");
+ printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]);
+ switch (parsedNode->token) {
+ case PARSED_TOKEN_NONE:
+ break;
+ case PARSED_TOKEN_PARENTHESIS:
+ case PARSED_TOKEN_ROOT: {
+ ++indent;
+ const ScopeMetadata *metadata = parsedNode->metadata;
+ printf(",operands=[\n");
+ for (size_t i = 0; i < metadata->operands_size; ++i) {
+ _printParsedNode(metadata->operands[i], indent + 1);
+ }
+ for (int i = 0; i < indent; ++i) printf(" ");
+ printf("]\n");
+ --indent;
+ } break;
+ case PARSED_TOKEN_PRINT: {
+ const PrintMetadata *metadata = parsedNode->metadata;
+ printf(",operand=\n");
+ _printParsedNode(metadata, indent + 1);
+ } break;
+ case PARSED_TOKEN_VALUE_STRING:
+ printf("\n");
+ break;
+ case PARSED_TOKEN_EOL:
+ EOLMetadata *metadata = parsedNode->metadata;
+ printf(",operand=\n");
+ _printParsedNode(metadata, indent + 1);
+ break;
+ default:
+ fprintf(stderr, "bad parsed token %d at compiler line %d\n",
+ parsedNode->token, __LINE__);
+ exit(1);
+ }
+ for (int i = 0; i < indent; ++i) printf(" ");
+ printf("}\n");
+}
+
+void printParsedNode(const ParsedNode *parsedNode) {
+ _printParsedNode(parsedNode, 0);
+}
+
+ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) {
+ while (parsedNode != NULL && parsedNode->parent != parent) {
+ parsedNode = parsedNode->parent;
+ }
+ return parsedNode;
+}
+
+void deleteParsedNode(ParsedNode *parsedNode) {
+ switch (parsedNode->token) {
+ case PARSED_TOKEN_NONE:
+ break;
+ case PARSED_TOKEN_PARENTHESIS:
+ case PARSED_TOKEN_ROOT: {
+ ScopeMetadata *metadata = parsedNode->metadata;
+ for (size_t i = 0; i < metadata->operands_size; ++i) {
+ deleteParsedNode(metadata->operands[i]);
+ }
+ free(metadata->operands);
+ free(metadata);
+ } break;
+ case PARSED_TOKEN_PRINT: {
+ PrintMetadata *metadata = parsedNode->metadata;
+ deleteParsedNode(metadata);
+ } break;
+ case PARSED_TOKEN_VALUE_STRING:
+ break;
+ case PARSED_TOKEN_EOL:
+ EOLMetadata *metadata = parsedNode->metadata;
+ deleteParsedNode(metadata);
+ break;
+ default:
+ fprintf(stderr, "bad parsed token %d at compiler line %d\n",
+ parsedNode->token, __LINE__);
+ exit(1);
+ }
+ free(parsedNode);
+}
+
+ParsedNode *parser(Nodes lexedNodes) {
+ ParsedNode *root = a404m_malloc(sizeof(ParsedNode));
+ root->token = PARSED_TOKEN_ROOT;
+ root->parent = NULL;
+ root->metadata =
+ parserScope(lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root);
+ if (root->metadata == NULL) {
+ free(root);
+ return NULL;
+ }
+ return root;
+}
+
+ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd,
+ ParsedNode *parent) {
+ if (nodesBegin >= nodesEnd) {
+ return NULL;
+ }
+
+ size_t nodes_size = 0;
+ ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *));
+ size_t nodes_inserted = 0;
+
+ for (size_t i = 0; i < PARSE_ORDER_SIZE; ++i) {
+ const ParseOrder order = PARSE_ORDER[i];
+
+ for (Node *node = order.ltr ? nodesBegin : (nodesEnd - 1);
+ nodesBegin <= node && node < nodesEnd; order.ltr ? ++node : --node) {
+ for (size_t j = 0; j < order.size; ++j) {
+ const Token token = order.tokens[j];
+ if (node->token == token) {
+ ParsedNode *parsedNode =
+ parseNode(nodesBegin, nodesEnd, node, parent);
+ if (parsedNode == NULL) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ goto RETURN_ERROR;
+ }
+ if (nodes_size == nodes_inserted) {
+ nodes_size += nodes_size / 2 + 1;
+ nodes = a404m_realloc(nodes, nodes_size * sizeof(ParsedNode *));
+ }
+ nodes[nodes_inserted] = parsedNode;
+ ++nodes_inserted;
+ }
+ }
+ }
+ }
+
+ for (Node *node = nodesBegin; node < nodesEnd; ++node) {
+ if (node->token != TOKEN_PARSED) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ goto RETURN_ERROR;
+ }
+ }
+
+ ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata));
+ metadata->operands = a404m_malloc(nodes_inserted * sizeof(ParsedNode *));
+
+ nodes_size = nodes_inserted;
+ nodes_inserted = 0;
+
+ for (size_t i = 0; i < nodes_size; ++i) {
+ ParsedNode *currentNode = nodes[i];
+ if (currentNode->parent == parent) {
+ metadata->operands[nodes_inserted] = currentNode;
+ ++nodes_inserted;
+ }
+ }
+ free(nodes);
+
+ metadata->operands =
+ a404m_realloc(metadata->operands, nodes_inserted * sizeof(ParsedNode *));
+ metadata->operands_size = nodes_inserted;
+
+ return metadata;
+
+RETURN_ERROR:
+ free(nodes);
+ return NULL;
+}
+
+ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node,
+ ParsedNode *parent) {
+ switch (node->token) {
+ case TOKEN_KEYWORD_PRINT:
+ return parserPrint(nodesBegin, nodesEnd, node, parent);
+ case TOKEN_OPERATOR_PARENTHESES_CLOSE:
+ return parseParenthesis(nodesBegin, nodesEnd, node, parent);
+ case TOKEN_STRING:
+ return parseString(node, parent);
+ case TOKEN_OPERATOR_EOL:
+ return parseEOL(nodesBegin, nodesEnd, node, parent);
+ default:
+ fprintf(stderr, "unexpected token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ return NULL;
+ }
+}
+
+ParsedNode *parserPrint(Node *, Node *nodesEnd, Node *node,
+ ParsedNode *parent) {
+ Node *follow = node + 1;
+
+ if (follow < nodesEnd && follow->token == TOKEN_PARSED) {
+ ParsedNode *root = a404m_malloc(sizeof(ParsedNode));
+ root->strBegin = node->strBegin;
+ root->strEnd = node->strEnd;
+ root->token = PARSED_TOKEN_PRINT;
+ root->parent = parent;
+
+ ParsedNode *operand = root->metadata =
+ getUntilCommonFather(follow->parsedNode, parent);
+ if (operand == NULL || operand->token != PARSED_TOKEN_PARENTHESIS) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ free(root);
+ return NULL;
+ }
+ operand->parent = root;
+
+ node->parsedNode = root;
+ node->token = TOKEN_PARSED;
+ return root;
+ }
+ return NULL;
+}
+
+ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node,
+ ParsedNode *parent) {
+ Node *opening = NULL;
+ Node *closing = node;
+ for (Node *iter = closing - 1; iter >= nodesBegin; --iter) {
+ if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) {
+ opening = iter;
+ break;
+ }
+ }
+ if (opening == NULL) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ return NULL;
+ }
+
+ ParsedNode *parsedNode = closing->parsedNode = opening->parsedNode =
+ a404m_malloc(sizeof(*parsedNode));
+ parsedNode->strBegin = opening->strBegin;
+ parsedNode->strEnd = closing->strEnd;
+ parsedNode->token = PARSED_TOKEN_PARENTHESIS;
+ parsedNode->parent = parent;
+ parsedNode->metadata = parserScope(opening + 1, closing, parsedNode);
+
+ if (parsedNode->metadata == NULL) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ free(parsedNode);
+ return NULL;
+ }
+
+ opening->token = closing->token = TOKEN_PARSED;
+ return parsedNode;
+}
+
+ParsedNode *parseString(Node *node, ParsedNode *parent) {
+ node->token = TOKEN_PARSED;
+ return node->parsedNode =
+ newParsedNode(node->strBegin, node->strEnd,
+ PARSED_TOKEN_VALUE_STRING, NULL, parent);
+}
+
+ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) {
+ Node *before = node - 1;
+ if (before < nodesBegin && before->token != TOKEN_PARSED) {
+ return NULL;
+ }
+
+ ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent);
+ if (experession == NULL) {
+ fprintf(stderr, "error in parsing token '%s' at compiler line %d\n",
+ TOKEN_STRINGS[node->token], __LINE__);
+ return NULL;
+ }
+
+ ParsedNode *root = a404m_malloc(sizeof(ParsedNode));
+ root->strBegin = node->strBegin;
+ root->strEnd = node->strEnd;
+ root->token = PARSED_TOKEN_EOL;
+ root->parent = parent;
+
+ root->metadata = experession;
+ experession->parent = root;
+
+ node->token = TOKEN_PARSED;
+ node->parsedNode = root;
+ return root;
+}
diff --git a/src/compiler/parser/parser.h b/src/compiler/parser/parser.h
new file mode 100644
index 0000000..510e4e9
--- /dev/null
+++ b/src/compiler/parser/parser.h
@@ -0,0 +1,62 @@
+#pragma once
+
+#include <compiler/lexer/lexer.h>
+
+typedef enum ParsedToken {
+ PARSED_TOKEN_NONE = 0,
+ PARSED_TOKEN_ROOT,
+ PARSED_TOKEN_PRINT,
+ PARSED_TOKEN_PARENTHESIS,
+ PARSED_TOKEN_VALUE_STRING,
+ PARSED_TOKEN_EOL,
+} ParsedToken;
+
+extern const char *PARSED_TOKEN_STRINGS[];
+
+typedef struct ParseOrder {
+ bool ltr;
+ size_t size;
+ Token tokens[10];
+} ParseOrder;
+
+typedef struct ScopeMetadata {
+ struct ParsedNode **operands;
+ size_t operands_size;
+} ScopeMetadata;
+
+typedef struct ParsedNode {
+ char const *strBegin;
+ char const *strEnd;
+ ParsedToken token;
+ void *metadata;
+ struct ParsedNode *parent;
+} ParsedNode;
+
+typedef ParsedNode PrintMetadata;
+typedef ParsedNode EOLMetadata;
+
+extern ParsedNode *newParsedNode(char const *strBegin, char const *strEnd,
+ ParsedToken token, void *metadata,
+ ParsedNode *parent);
+extern void _printParsedNode(const ParsedNode *parsedNode,int indent);
+extern void printParsedNode(const ParsedNode *parsedNode);
+extern ParsedNode *getUntilCommonFather(ParsedNode *parsedNode,ParsedNode *parent);
+extern void deleteParsedNode(ParsedNode *parsedNode);
+
+extern ParsedNode *parser(Nodes lexedNodes);
+
+extern ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd,
+ ParsedNode *parent);
+
+extern ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node,
+ ParsedNode *parent);
+
+extern ParsedNode *parserPrint(Node *nodesBegin, Node *nodesEnd, Node *node,
+ ParsedNode *parent);
+
+extern ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd,
+ Node *node, ParsedNode *parent);
+
+extern ParsedNode *parseString(Node *node, ParsedNode *parent);
+
+extern ParsedNode *parseEOL(Node *nodesBegin, Node *nodesEnd,Node *node, ParsedNode *parent);