diff options
author | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-10-08 04:16:27 +0330 |
---|---|---|
committer | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-10-08 04:17:08 +0330 |
commit | addd54dc31603dc204773d3108dba4e000cd7657 (patch) | |
tree | 621620c4ca5634680d7655e3474cf0b0bcec8e01 | |
parent | bf84010e01bb11874689ce53ea4df853b2e41c2b (diff) |
added fasm support
added compiler options
tried to compile to fasm first
-rwxr-xr-x | project | 11 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.c | 19 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.h | 1 | ||||
-rw-r--r-- | src/compiler/fasm_generator/fasm_generator.c | 208 | ||||
-rw-r--r-- | src/compiler/fasm_generator/fasm_generator.h | 31 | ||||
-rw-r--r-- | src/compiler/lexer/lexer.h | 1 | ||||
-rw-r--r-- | src/compiler/parser/parser.c | 87 | ||||
-rw-r--r-- | src/compiler/parser/parser.h | 1 | ||||
-rw-r--r-- | src/compiler/tree_parser/tree_parser.c | 262 | ||||
-rw-r--r-- | src/compiler/tree_parser/tree_parser.h | 2 | ||||
-rw-r--r-- | src/fasm/code_generator/code_generator.c | 42 | ||||
-rw-r--r-- | src/fasm/code_generator/code_generator.h | 17 | ||||
-rw-r--r-- | src/fasm/lexer/lexer.c | 643 | ||||
-rw-r--r-- | src/fasm/lexer/lexer.h | 363 | ||||
-rw-r--r-- | src/fasm/linker/linker.c | 870 | ||||
-rw-r--r-- | src/fasm/linker/linker.h | 78 | ||||
-rw-r--r-- | src/fasm/runner/runner.c | 646 | ||||
-rw-r--r-- | src/fasm/runner/runner.h | 36 | ||||
-rw-r--r-- | src/main.c | 240 | ||||
-rw-r--r-- | src/utils/file.c | 3 | ||||
-rw-r--r-- | src/vm/runner/runner.c | 16 | ||||
-rw-r--r-- | src/vm/runner/runner.h | 3 | ||||
-rw-r--r-- | std/builtins-main.felan | 23 |
23 files changed, 3421 insertions, 182 deletions
@@ -10,7 +10,7 @@ function compile(){ fi fi - gcc -Wall -Wextra -std=gnu23 -I./src/ -O3 \ + gcc -Wall -Wextra -std=gnu23 -I./src/ -g \ ./src/main.c \ ./src/compiler/source_code/source_code.c \ ./src/compiler/error_helper/error_helper.c \ @@ -18,6 +18,11 @@ function compile(){ ./src/compiler/parser/parser.c \ ./src/compiler/tree_parser/tree_parser.c \ ./src/compiler/code_generator/code_generator.c \ + ./src/compiler/fasm_generator/fasm_generator.c \ + ./src/fasm/lexer/lexer.c \ + ./src/fasm/linker/linker.c \ + ./src/fasm/code_generator/code_generator.c \ + ./src/fasm/runner/runner.c \ ./src/vm/runner/runner.c \ ./src/utils/memory/memory.c \ ./src/utils/file.c \ @@ -56,7 +61,7 @@ function test(){ for file_path in ./test/input/*; do local file_name=$(basename "$file_path") local start=`date +%s.%N` - local ret=$(eval "./build/$project_name $file_path > ./test/generated_output/$file_name") + local ret=$(eval "./build/$project_name run-felan $file_path > ./test/generated_output/$file_name") local end=`date +%s.%N` runtime="$(jq -n $end-$start)" $ret && \ @@ -78,7 +83,7 @@ function val_test(){ for file_path in ./test/input/*; do local file_name=$(basename "$file_path") - valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all -s "./build/$project_name" "$file_path" ./test/generated_output/ && \ + valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all -s "./build/$project_name" run-felan "$file_path" ./test/generated_output/ && \ cmp --silent "./test/generated_output/$file_name" "./test/output/$file_name" && \ printf "${GREEN}PASSED${NC} $file_path\n" || \ printf "${RED}FAILED${NC} $file_path\n" diff --git a/src/compiler/code_generator/code_generator.c b/src/compiler/code_generator/code_generator.c index 75b89c6..bf77419 100644 --- a/src/compiler/code_generator/code_generator.c +++ b/src/compiler/code_generator/code_generator.c @@ -76,6 +76,24 @@ Instructions codeGenerator(SourceCode *code) { return error; } +Instructions codeGeneratorWithPrint(SourceCode *code) { + ParsedTree *root = treeParserWithPrint(code); + if (root != NULL) { + printf("----tree parsed:\n"); + printParsedTreeNode(root); + Instructions instructions = _codeGenerator(root, code); + + deleteParsedTree(root); + return instructions; + } + fprintf(stderr, "error in tree parser\n"); + const Instructions error = { + .instructions = NULL, + .size = ERROR_SIZE, + }; + return error; +} + Instructions _codeGenerator(ParsedTree *root, SourceCode *code) { const TreeScopeMetadata *metadata = root->metadata; @@ -112,6 +130,7 @@ RETURN_ERROR: bool nodeToInstruction(ParsedTree *tree, Instruction **instructions, size_t *instructions_size, size_t *instructions_inserted, SourceCode *code) { + /*printf("Parsing token = %s\n", TREE_TOKEN_STRINGS[tree->token]);*/ switch (tree->token) { case TREE_TOKEN_FUNCTION_CALL: { const TreeFunctionCallMetadata *tree_metadata = tree->metadata; diff --git a/src/compiler/code_generator/code_generator.h b/src/compiler/code_generator/code_generator.h index 0ae3219..902d76b 100644 --- a/src/compiler/code_generator/code_generator.h +++ b/src/compiler/code_generator/code_generator.h @@ -37,6 +37,7 @@ extern void deleteInstruction(Instruction instruction); extern void deleteInstructions(Instructions instructions); extern Instructions codeGenerator(SourceCode *code); +extern Instructions codeGeneratorWithPrint(SourceCode *code); extern Instructions _codeGenerator(ParsedTree *root, SourceCode *code); extern bool nodeToInstruction(ParsedTree *tree, Instruction **instructions, diff --git a/src/compiler/fasm_generator/fasm_generator.c b/src/compiler/fasm_generator/fasm_generator.c new file mode 100644 index 0000000..b83f3d2 --- /dev/null +++ b/src/compiler/fasm_generator/fasm_generator.c @@ -0,0 +1,208 @@ +#include "fasm_generator.h" + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> + +#include "compiler/tree_parser/tree_parser.h" +#include "utils/memory/memory.h" + +FasmLines generateFasm(SourceCode *code) { + ParsedTree *root = treeParser(code); + if (root != NULL) { + FasmLines lines = _generateFasm(root, code); + + deleteParsedTree(root); + return lines; + } + const FasmLines error = { + .lines = NULL, + /*.size = ERROR_SIZE,*/ + }; + return error; +} + +FasmLines generateFasmWithPrint(SourceCode *code) { + ParsedTree *root = treeParser(code); + if (root != NULL) { + printf("----tree parsed:\n"); + printParsedTreeNode(root); + FasmLines lines = _generateFasm(root, code); + + deleteParsedTree(root); + return lines; + } + fprintf(stderr, "error in tree parser\n"); + const FasmLines error = { + .lines = NULL, + /*.size = ERROR_SIZE,*/ + }; + return error; +} + +FasmLines _generateFasm(const ParsedTree *root, SourceCode *code) { + const TreeScopeMetadata *metadata = root->metadata; + + FasmVariables thisVariables = { + .variables = a404m_malloc(metadata->variables_size * + sizeof(*thisVariables.variables)), + .size = metadata->variables_size, + }; + + size_t size = 0; + + for (size_t i = 0; i < metadata->variables_size; ++i) { + const TreeDefineVariableMetadata *variable = metadata->variables[i]; + FasmVariable keyValue = { + .variable = variable, + .value = size, + }; + thisVariables.variables[i] = keyValue; + size += 8; + } + + const size_t variables_size = 1; + FasmVariables *variables[1] = { + &thisVariables, + }; + + FasmLines lines = { + .lines = a404m_malloc(0), + /*.size = 0,*/ + }; + + for (size_t i = 0; i < metadata->lines_size; ++i) { + const ParsedTree *node = metadata->lines[i]; + if (!nodeToFasmLine(node, &lines, variables, variables_size, code)) { + goto RETURN_ERROR; + } + } + + return lines; + +RETURN_ERROR: + free(lines.lines); + + const FasmLines error = { + .lines = NULL, + /*.size = ERROR_SIZE,*/ + }; + return error; +} + +bool nodeToFasmLine(const ParsedTree *node, FasmLines *lines, + FasmVariables *variables[], size_t variables_size, + SourceCode *code) { + switch (node->token) { + case TREE_TOKEN_NONE: + case TREE_TOKEN_ROOT: + printError("Is not allowed in compiler line %s:%d", code, node->strBegin, + node->strBegin, __FILE_NAME__, __LINE__); + exit(1); + case TREE_TOKEN_GLOBAL_SCOPE: + case TREE_TOKEN_LOCAL_SCOPE: { + const TreeScopeMetadata *metadata = node->metadata; + + FasmVariables thisVariables = { + .variables = a404m_malloc(metadata->variables_size * + sizeof(*thisVariables.variables)), + .size = metadata->variables_size, + }; + + size_t size = 0; + + for (size_t i = 0; i < metadata->variables_size; ++i) { + const TreeDefineVariableMetadata *variable = metadata->variables[i]; + FasmVariable keyValue = { + .variable = variable, + .value = size, + .isGlobal = node->token == TREE_TOKEN_GLOBAL_SCOPE, + }; + thisVariables.variables[i] = keyValue; + size += getSizeOfVariable(variable); + } + + size_t newVariables_size = variables_size + 1; + FasmVariables *newVariables[newVariables_size]; + + for (size_t i = 0; i < variables_size; ++i) { + newVariables[i] = variables[i]; + } + newVariables[variables_size] = &thisVariables; + + for (size_t i = 0; i < metadata->lines_size; ++i) { + if (!nodeToFasmLine(metadata->lines[i], lines, newVariables, + newVariables_size, code)) { + return false; + } + } + return true; + } + case TREE_TOKEN_FUNCTION_CALL: { + const TreeFunctionCallMetadata *metadata = node->metadata; + return true; + } + case TREE_TOKEN_FUNCTION: { + const TreeFunctionMetadata *metadata = node->metadata; + return true; + } + case TREE_TOKEN_IDENTIFIER: { + const TreeIdentifierMetadata *metadata = node->metadata; + + const FasmVariable fasmVariable = getVariableFasmKeyValue( + metadata->variable, variables, variables_size); + if (fasmVariable.variable == NULL) { + return false; + } + + FasmLine line = { + /*.label = NULL,*/ + // TODO: do it + }; + + pushFasmLine(lines, line); + + return true; + } + case TREE_TOKEN_DEFINE_VARIABLE: + case TREE_TOKEN_DEFINE_CONSTANT: + case TREE_TOKEN_VALUE_STRING: + case TREE_TOKEN_STRUCT: + } + fprintf(stderr, "Bad parsed token '%d' %s:%d", node->token, __FILE_NAME__, + __LINE__); + exit(1); +} + +void pushFasmLine(FasmLines *lines, FasmLine line) { + const size_t size = + a404m_malloc_usable_size(lines->lines) / sizeof(*lines->lines); + /*if (size == lines->size) { + lines->lines = + a404m_realloc(lines->lines, (size * 2 + 1) * sizeof(*lines->lines)); + } + lines->lines[lines->size] = line; + lines->size += 1;*/ +} + +size_t getSizeOfVariable(const TreeDefineVariableMetadata *variable) { + return 8; // TODO: do it; +} + +FasmVariable getVariableFasmKeyValue(const TreeDefineVariableMetadata *variable, + FasmVariables *variables[], + size_t variables_size) { + for (size_t i = 0; i < variables_size; ++i) { + for (size_t j = 0; j < variables[i]->size; ++j) { + if (variables[i]->variables[j].variable == variable) { + return variables[i]->variables[j]; + } + } + } + const FasmVariable error = { + .variable = NULL, + .value = 0, + .isGlobal = false, + }; + return error; +} diff --git a/src/compiler/fasm_generator/fasm_generator.h b/src/compiler/fasm_generator/fasm_generator.h new file mode 100644 index 0000000..c6925ad --- /dev/null +++ b/src/compiler/fasm_generator/fasm_generator.h @@ -0,0 +1,31 @@ +#pragma once + +#include <compiler/tree_parser/tree_parser.h> +#include <fasm/lexer/lexer.h> +#include <stdint.h> + +typedef struct FasmVariable { + TreeDefineVariableMetadata const *variable; + uint64_t value; + bool isGlobal; +} FasmVariable; + +typedef struct FasmVariables { + FasmVariable *variables; + size_t size; +} FasmVariables; + +extern FasmLines generateFasm(SourceCode *code); +extern FasmLines generateFasmWithPrint(SourceCode *code); +extern FasmLines _generateFasm(const ParsedTree *root, SourceCode *code); + +extern bool nodeToFasmLine(const ParsedTree *node, FasmLines *lines, + FasmVariables *variables[], size_t variables_size, + SourceCode *code); + +extern void pushFasmLine(FasmLines *lines, FasmLine line); + +extern size_t getSizeOfVariable(const TreeDefineVariableMetadata *variable); +extern FasmVariable getVariableFasmKeyValue( + const TreeDefineVariableMetadata *variable, FasmVariables *variables[], + size_t variables_size); diff --git a/src/compiler/lexer/lexer.h b/src/compiler/lexer/lexer.h index 66db1cf..30a5d0f 100644 --- a/src/compiler/lexer/lexer.h +++ b/src/compiler/lexer/lexer.h @@ -1,7 +1,6 @@ #pragma once #include <compiler/error_helper/error_helper.h> -#include <stddef.h> #include <utils/memory/memory.h> #include <utils/types.h> diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c index ee9e69b..b60dd60 100644 --- a/src/compiler/parser/parser.c +++ b/src/compiler/parser/parser.c @@ -287,6 +287,20 @@ ParsedNode *parser(SourceCode *code, size_t sourceIndex) { return root; } +ParsedNode *parserWithPrint(SourceCode *code, size_t sourceIndex) { + Nodes nodes = lexer(code, sourceIndex); + if (nodes.size == ERROR_SIZE) { + fprintf(stderr, "Error in lexer"); + return NULL; + } + printf("----lexed '%s'\n", code->codes[sourceIndex]->filePath); + printNodes(nodes); + ParsedNode *root = _parser(nodes, code); + + deleteNodes(nodes); + return root; +} + ParsedNode *_parser(Nodes lexedNodes, SourceCode *code) { ParsedNode *root = a404m_malloc(sizeof(*root)); root->token = PARSED_TOKEN_ROOT; @@ -303,6 +317,12 @@ ParsedNode *_parser(Nodes lexedNodes, SourceCode *code) { ParserScopeMetadata *parserScope( Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, bool (*isAllowed)(ParsedToken token, bool isLast), SourceCode *code) { + if (nodesBegin == nodesEnd) { + ParserScopeMetadata *metadata = a404m_malloc(sizeof(*metadata)); + metadata->operands = a404m_malloc(0); + metadata->operands_size = 0; + return metadata; + } ParsedNode **nodes = a404m_malloc(0); size_t nodes_inserted = 0; @@ -314,7 +334,7 @@ ParserScopeMetadata *parserScope( for (size_t order_tokens_index = 0; order_tokens_index < order->size; ++order_tokens_index) { if (node->token == order->tokens[order_tokens_index]) { - ParsedNode *parsedNode = + ParsedNode *const parsedNode = parseNode(nodesBegin, nodesEnd, node, parent, code); if (parsedNode == NULL) { goto RETURN_ERROR; @@ -344,26 +364,25 @@ ParserScopeMetadata *parserScope( size_t nodes_size = nodes_inserted; nodes_inserted = 0; - ParsedNode *last = NULL; - - for (size_t i = nodes_size - 1; i != (typeof(i))-1; ++i) { - ParsedNode *currentNode = nodes[i]; - if (currentNode->parent == parent) { - last = currentNode; - break; - } + const Node *lastNode = (nodesEnd - 1); + ParsedNode *last = getUntilCommonFather(lastNode->parsedNode, parent); + if (last == NULL) { + printError("Unexpected token '%s'", code, lastNode->strBegin, + lastNode->strEnd, + PARSED_TOKEN_STRINGS[lastNode->parsedNode->token]); + goto RETURN_ERROR; } for (size_t i = 0; i < nodes_size; ++i) { ParsedNode *currentNode = nodes[i]; if (currentNode->parent == parent) { if (!isAllowed(currentNode->token, currentNode == last)) { - printError("Token '%s' is not allowed here", code, - currentNode->strBegin, currentNode->strEnd, - PARSED_TOKEN_STRINGS[currentNode->token]); + printError( + "Token '%s' is not allowed here at %ld current = %p, last = %p", + code, currentNode->strBegin, currentNode->strEnd, + PARSED_TOKEN_STRINGS[currentNode->token], i, currentNode, last); goto RETURN_ERROR; - } - if (nodes_inserted != i) { + } else if (nodes_inserted != i) { nodes[nodes_inserted] = currentNode; } ++nodes_inserted; @@ -378,6 +397,11 @@ ParserScopeMetadata *parserScope( return metadata; RETURN_ERROR: + for (size_t i = 0; i < nodes_inserted; ++i) { + if (nodes[i]->parent == parent) { + deleteParsedNode(nodes[i]); + } + } free(nodes); return NULL; } @@ -641,8 +665,8 @@ ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd, Node *closing, root->metadata = metadata; } - closing->parsedNode = opening->parsedNode = root; opening->token = closing->token = TOKEN_PARSED; + closing->parsedNode = opening->parsedNode = root; return root; RETURN_ERROR: @@ -676,8 +700,8 @@ ParsedNode *parseCurly(Node *nodesBegin, Node *closing, ParsedNode *parent, root->strEnd = opening->strEnd; root->parent = parent; root->token = PARSED_TOKEN_CODE_BODY; - closing->parsedNode = opening->parsedNode = root; opening->token = closing->token = TOKEN_PARSED; + closing->parsedNode = opening->parsedNode = root; return root; @@ -775,6 +799,7 @@ ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, follow->strEnd); goto RETURN_ERROR; } + type->parent = root; ++follow; while (follow->token == TOKEN_PARSED && getUntilCommonFather(follow->parsedNode, type) != NULL) { @@ -794,8 +819,8 @@ ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, else root->token = PARSED_TOKEN_DEFINE_CONSTANT; - follow->parsedNode = root; follow->token = TOKEN_PARSED; + follow->parsedNode = root; ++follow; if (follow == nodesEnd) { --follow; @@ -804,8 +829,8 @@ ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, goto RETURN_ERROR; } else if (follow->token == TOKEN_PARSED) { value = getUntilCommonFather(follow->parsedNode, parent); + value->parent = root; } else { - BAD_VALUE: printError("Expected value after assignment but got something else", code, follow->strBegin, follow->strEnd); goto RETURN_ERROR; @@ -817,21 +842,8 @@ ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, node->strBegin, node->strEnd); goto RETURN_ERROR; } - if (type != NULL) { - metadata->type = type; - type->parent = root; - } else { - metadata->type = NULL; - } - if (value != NULL) { - metadata->value = value = getUntilCommonFather(value, parent); - if (value == NULL) { - goto BAD_VALUE; - } - value->parent = root; - } else { - metadata->value = NULL; - } + metadata->type = type; + metadata->value = value; root->strBegin = variableName->strBegin; root->strEnd = follow->strEnd; @@ -938,6 +950,7 @@ ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, metadata->params = params->parsedNode; metadata->params->parent = root; } + if (type >= nodesEnd || type->token != TOKEN_PARSED || type->parsedNode->token != PARSED_TOKEN_IDENTIFIER) { printError("Function definition needs a type to be identifier (for now)", @@ -957,9 +970,9 @@ ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, body->token = TOKEN_PARSED; body->parsedNode = root; metadata->body = NULL; - } else if (type->token == TOKEN_PARSED && - type->parsedNode->token == PARSED_TOKEN_CODE_BODY) { - metadata->body = type->parsedNode; + } else if (body->token == TOKEN_PARSED && + body->parsedNode->token == PARSED_TOKEN_CODE_BODY) { + metadata->body = body->parsedNode; metadata->body->parent = root; } else { goto NEED_BODY; @@ -974,7 +987,7 @@ ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, return root; RETURN_ERROR: - + // TODO: doesn't delete all free(metadata); free(root); return NULL; diff --git a/src/compiler/parser/parser.h b/src/compiler/parser/parser.h index 234264b..81a5040 100644 --- a/src/compiler/parser/parser.h +++ b/src/compiler/parser/parser.h @@ -74,6 +74,7 @@ extern ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, extern void deleteParsedNode(ParsedNode *parsedNode); extern ParsedNode *parser(SourceCode *code,size_t sourceIndex); +extern ParsedNode *parserWithPrint(SourceCode *code,size_t sourceIndex); extern ParsedNode *_parser(Nodes lexedNodes, SourceCode *code); extern ParserScopeMetadata *parserScope( diff --git a/src/compiler/tree_parser/tree_parser.c b/src/compiler/tree_parser/tree_parser.c index f657f97..07bad0a 100644 --- a/src/compiler/tree_parser/tree_parser.c +++ b/src/compiler/tree_parser/tree_parser.c @@ -1,15 +1,9 @@ #include "tree_parser.h" -#include <compiler/error_helper/error_helper.h> -#include <compiler/lexer/lexer.h> -#include <compiler/parser/parser.h> -#include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <utils/file.h> -#include <utils/memory/memory.h> -#include <utils/types.h> const char *TREE_TOKEN_STRINGS[] = { "TREE_TOKEN_NONE", @@ -25,10 +19,27 @@ const char *TREE_TOKEN_STRINGS[] = { "TREE_TOKEN_FUNCTION", }; +static void _printParsedTreeVariable(const TreeDefineVariableMetadata *variable, + int indent) { + if (variable == NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("null\n"); + return; + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("{name='%.*s',type=%p,value=\n", + (int)(variable->nameEnd - variable->nameBegin), variable->nameBegin, + variable->type); + _printParsedTreeNode(variable->value, indent + 1); + for (int i = 0; i < indent; ++i) printf(" "); + printf("}\n"); +} + void _printParsedTreeNode(const ParsedTree *parsedTree, int indent) { + for (int i = 0; i < indent; ++i) printf(" "); if (parsedTree == NULL) { - for (int i = 0; i < indent; ++i) printf(" "); printf("null\n"); + return; } printf("{token=%s", TREE_TOKEN_STRINGS[parsedTree->token]); switch (parsedTree->token) { @@ -36,19 +47,83 @@ void _printParsedTreeNode(const ParsedTree *parsedTree, int indent) { goto RETURN_SUCCESS; case TREE_TOKEN_ROOT: case TREE_TOKEN_GLOBAL_SCOPE: - case TREE_TOKEN_LOCAL_SCOPE: - case TREE_TOKEN_FUNCTION_CALL: + case TREE_TOKEN_LOCAL_SCOPE: { + const TreeScopeMetadata *metadata = parsedTree->metadata; + for (int i = 0; i < indent; ++i) printf(" "); + printf(",lines=\n"); + for (size_t i = 0; i < metadata->lines_size; ++i) { + _printParsedTreeNode(metadata->lines[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf(",variables=\n"); + for (size_t i = 0; i < metadata->variables_size; ++i) { + _printParsedTreeVariable(metadata->variables[i], indent + 1); + } + goto RETURN_SUCCESS; + } + case TREE_TOKEN_FUNCTION_CALL: { + const TreeFunctionCallMetadata *metadata = parsedTree->metadata; + printf(",lines=\n"); + for (size_t i = 0; i < metadata->values_size; ++i) { + _printParsedTreeNode(metadata->values[i], indent + 1); + } + goto RETURN_SUCCESS; + } case TREE_TOKEN_DEFINE_VARIABLE: - case TREE_TOKEN_IDENTIFIER: - case TREE_TOKEN_VALUE_STRING: - case TREE_TOKEN_STRUCT: - case TREE_TOKEN_DEFINE_CONSTANT: - case TREE_TOKEN_FUNCTION: + case TREE_TOKEN_DEFINE_CONSTANT: { + const TreeDefineVariableMetadata *metadata = parsedTree->metadata; + printf(",define=\n"); + _printParsedTreeVariable(metadata, indent + 1); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_IDENTIFIER: { + const TreeIdentifierMetadata *metadata = parsedTree->metadata; + printf(",variable=\n"); + _printParsedTreeVariable(metadata->variable, indent + 1); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_VALUE_STRING: { + const TreeStringValueMetadata *metadata = parsedTree->metadata; + printf(",str='%.*s'\n", (int)metadata->size, metadata->str); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_STRUCT: { + const TreeStructMetadata *metadata = parsedTree->metadata; + printf(",metadata=%p\n", metadata); + goto RETURN_SUCCESS; + } + case TREE_TOKEN_FUNCTION: { + const TreeFunctionMetadata *metadata = parsedTree->metadata; + const TreeScopeMetadata *scope = metadata->scope; + printf(",scope=\n"); + if (scope != NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf(",lines=\n"); + for (size_t i = 0; i < scope->lines_size; ++i) { + _printParsedTreeNode(scope->lines[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf(",variables=\n"); + for (size_t i = 0; i < scope->variables_size; ++i) { + _printParsedTreeVariable(scope->variables[i], indent + 1); + } + } else { + for (int i = 0; i < indent; ++i) printf(" "); + printf("null\n"); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf(",params=\n"); + for (size_t i = 0; i < metadata->params_size; ++i) { + _printParsedTreeVariable(metadata->params[i], indent + 1); + } + goto RETURN_SUCCESS; + } } fprintf(stderr, "bad parsed tree token %d at %s:%d", parsedTree->token, __FILE_NAME__, __LINE__); exit(1); RETURN_SUCCESS: + for (int i = 0; i < indent; ++i) printf(" "); printf("}\n"); }; @@ -143,6 +218,9 @@ ParsedTree *treeParser(SourceCode *code) { for (size_t i = 0; i < code->size; ++i) { ParsedNode *const nParsedNode = parser(code, i); + if (nParsedNode == NULL) { + goto RETURN_ERROR; + } ParserScopeMetadata *const nscope = nParsedNode->metadata; for (size_t j = 0; j < nscope->operands_size; ++j) { size_t scopeSize = @@ -160,12 +238,52 @@ ParsedTree *treeParser(SourceCode *code) { nscope->operands_size = 0; deleteParsedNode(nParsedNode); } - if (parsedNode == NULL) { - return NULL; + ParsedTree *tree = _treeParser(parsedNode, code); + deleteParsedNode(parsedNode); + return tree; +RETURN_ERROR: + deleteParsedNode(parsedNode); + return NULL; +} + +ParsedTree *treeParserWithPrint(SourceCode *code) { + ParserScopeMetadata *scope = a404m_malloc(sizeof(*scope)); + scope->operands_size = 0; + scope->operands = a404m_malloc(scope->operands_size * sizeof(ParsedNode *)); + ParsedNode *const parsedNode = + newParsedNode(NULL, NULL, PARSED_TOKEN_ROOT, scope, NULL); + + for (size_t i = 0; i < code->size; ++i) { + ParsedNode *const nParsedNode = parserWithPrint(code, i); + if (nParsedNode == NULL) { + fprintf(stderr, "Error in parser"); + goto RETURN_ERROR; + } + printf("----parsed '%s'\n", code->codes[i]->filePath); + printParsedNode(parsedNode); + ParserScopeMetadata *const nscope = nParsedNode->metadata; + for (size_t j = 0; j < nscope->operands_size; ++j) { + size_t scopeSize = + a404m_malloc_usable_size(scope->operands) / sizeof(ParsedNode *); + if (scopeSize == scope->operands_size) { + scopeSize += scopeSize / 2 + 1; + scope->operands = + a404m_realloc(scope->operands, scopeSize * sizeof(ParsedNode *)); + } + scope->operands[scope->operands_size] = nscope->operands[j]; + scope->operands_size += 1; + } + free(nscope->operands); + nscope->operands = NULL; + nscope->operands_size = 0; + deleteParsedNode(nParsedNode); } ParsedTree *tree = _treeParser(parsedNode, code); deleteParsedNode(parsedNode); return tree; +RETURN_ERROR: + deleteParsedNode(parsedNode); + return NULL; } ParsedTree *_treeParser(const ParsedNode *node, SourceCode *code) { @@ -275,7 +393,7 @@ ParsedTree *treeParseLocalScope(const ParsedNode *node, SourceCode *code, const size_t newScopes_size = scopes_size + 1; TreeScopeMetadata *newScopes[newScopes_size]; - memcpy(newScopes, scopes, scopes_size); + memcpy(newScopes, scopes, scopes_size * sizeof(TreeScopeMetadata *)); newScopes[newScopes_size - 1] = metadata; for (size_t i = 0; i < operands_size; ++i) { @@ -285,31 +403,18 @@ ParsedTree *treeParseLocalScope(const ParsedNode *node, SourceCode *code, if (parsedTree == NULL) { goto RETURN_ERROR; } - switch (parsedTree->token) { - case TREE_TOKEN_ROOT: - printError("It is not allowed here", code, operand->strBegin, - operand->strEnd); - goto RETURN_ERROR; - case TREE_TOKEN_DEFINE_CONSTANT: - case TREE_TOKEN_DEFINE_VARIABLE: { - TreeDefineVariableMetadata *const variableDefine = parsedTree->metadata; - if (variableDefine == NULL) { - goto RETURN_ERROR; - } - pushVariableToScope(metadata, variableDefine); - } - /* fall through */ - default: - pushLineToScope(metadata, parsedTree); - continue; + if (parsedTree->token != TREE_TOKEN_ROOT) { + pushLineToScope(metadata, parsedTree); + } else { + printError("'%s' Is not allowed here", code, operand->strBegin, + operand->strEnd, PARSED_TOKEN_STRINGS[operand->token]); + goto RETURN_ERROR; } - printError("'%s' Is not allowed here", code, operand->strBegin, - operand->strEnd, PARSED_TOKEN_STRINGS[operand->token]); - goto RETURN_ERROR; } - metadata->variables = - a404m_realloc(metadata->variables, metadata->variables_size); + metadata->variables = a404m_realloc( + metadata->variables, + metadata->variables_size * sizeof(TreeDefineVariableMetadata *)); return tree; @@ -327,37 +432,37 @@ TreeDefineVariableMetadata *treeParseDefineVariable(ParsedTree *tree, TreeDefineVariableMetadata *define = a404m_malloc(sizeof(*define)); define->tree = tree; - ParserVariableDefineMetadata *metadata = node->metadata; + const ParserVariableDefineMetadata *node_metadata = node->metadata; - if (metadata->value == NULL) { + if (node_metadata->value == NULL) { define->value = NULL; - } else if ((define->value = treeParseExpr(metadata->value, code, scopes, + } else if ((define->value = treeParseExpr(node_metadata->value, code, scopes, scopes_size)) == NULL) { goto RETURN_ERROR; } - if (metadata->name->token == PARSED_TOKEN_IDENTIFIER) { - define->nameBegin = metadata->name->strBegin; - define->nameEnd = metadata->name->strEnd; + if (node_metadata->name->token == PARSED_TOKEN_IDENTIFIER) { + define->nameBegin = node_metadata->name->strBegin; + define->nameEnd = node_metadata->name->strEnd; } else { - printError("Names should be an identifier", code, metadata->name->strBegin, - metadata->name->strEnd); + printError("Names should be an identifier", code, + node_metadata->name->strBegin, node_metadata->name->strEnd); goto RETURN_ERROR; } - if (metadata->type == NULL) { + if (node_metadata->type == NULL) { define->type = getTreeExpressionType(define->value); - } else if (metadata->type->token == PARSED_TOKEN_IDENTIFIER) { + } else if (node_metadata->type->token == PARSED_TOKEN_IDENTIFIER) { const TreeDefineVariableMetadata *variable = - getVariable(metadata->type->strBegin, metadata->type->strEnd, code, - scopes, scopes_size); + getVariable(node_metadata->type->strBegin, node_metadata->type->strEnd, + code, scopes, scopes_size); if (variable == NULL) { goto RETURN_ERROR; } define->type = getType(variable); } else { printError("Types should be an identifier (for now)", code, - metadata->type->strBegin, metadata->type->strEnd); + node_metadata->type->strBegin, node_metadata->type->strEnd); goto RETURN_ERROR; } @@ -366,6 +471,8 @@ TreeDefineVariableMetadata *treeParseDefineVariable(ParsedTree *tree, goto RETURN_ERROR; } + pushVariableToScope(scopes[scopes_size - 1], define); + return define; RETURN_ERROR: @@ -536,14 +643,8 @@ ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode *code, ParsedTree *parsedTree = treeParseNode(node_metadata->type, code, scopes, scopes_size); - if (parsedTree == NULL) { - goto RETURN_ERROR; - } - metadata->returnType = getTreeExpressionType(parsedTree); - // TODO: this is not right - deleteParsedTree(parsedTree); - - const ParserScopeMetadata *params = node_metadata->params->metadata; + const size_t newScopes_size = scopes_size + 1; + TreeScopeMetadata *newScopes[newScopes_size]; metadata->params = a404m_malloc(0); metadata->params_size = 0; @@ -555,10 +656,30 @@ ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode *code, metadata->scope->lines = a404m_malloc(0); metadata->scope->lines_size = 0; + memcpy(newScopes, scopes, scopes_size * sizeof(TreeScopeMetadata *)); + newScopes[newScopes_size - 1] = metadata->scope; + + if (parsedTree == NULL) { + goto RETURN_ERROR; + } + metadata->returnType = getTreeExpressionType(parsedTree); + // TODO: this is not right + deleteParsedTree(parsedTree); + + const ParserScopeMetadata *params = node_metadata->params->metadata; + for (size_t i = 0; i < params->operands_size; ++i) { const ParsedNode *operand = params->operands[i]; + if (operand->token == PARSED_TOKEN_COMMA) { + operand = (ParserCommaMetadata *)operand->metadata; + } + if (operand->token != PARSED_TOKEN_DEFINE_VARIABLE) { + printError( + "Only variable definition is allowed in function parameter list", + code, operand->strBegin, operand->strEnd); + } TreeDefineVariableMetadata *define = - treeParseDefineVariable(tree, operand, code, scopes, scopes_size); + treeParseDefineVariable(tree, operand, code, newScopes, newScopes_size); if (define == NULL) { goto RETURN_ERROR; } @@ -578,10 +699,15 @@ ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode *code, sizeof(TreeDefineVariableMetadata *)); if (node_metadata->body != NULL) { - printError("Not implemented", code, node->strBegin, node->strEnd); - - goto RETURN_ERROR; - return NULL; + const ParserScopeMetadata *body = node_metadata->body->metadata; + for (size_t i = 0; i < body->operands_size; ++i) { + ParsedTree *parsedTree = + treeParseNode(body->operands[i], code, newScopes, newScopes_size); + if (parsedTree == NULL) { + goto RETURN_ERROR; + } + pushLineToScope(metadata->scope, parsedTree); + } } else { free(metadata->scope->lines); metadata->scope->lines_size = 0; @@ -589,7 +715,7 @@ ParsedTree *treeParseFunction(const ParsedNode *node, SourceCode *code, return tree; RETURN_ERROR: - + // TODO: doesn't delete all of them free(metadata); free(tree); return NULL; @@ -598,12 +724,12 @@ RETURN_ERROR: ParsedTree *treeParseImport(const ParsedNode *node, SourceCode *code, TreeScopeMetadata *[], size_t) { const ParserImportMetadata *node_metadata = node->metadata; - SizedString *path = nodeToString(node_metadata, code); + SizedString *const path = nodeToString(node_metadata, code); if (path == NULL) { return NULL; } - Code *fileCode = read_whole_file(path->str); + Code *const fileCode = read_whole_file(path->str); if (fileCode == NULL) { goto RETURN_ERROR; } diff --git a/src/compiler/tree_parser/tree_parser.h b/src/compiler/tree_parser/tree_parser.h index 8a9f7b5..b598348 100644 --- a/src/compiler/tree_parser/tree_parser.h +++ b/src/compiler/tree_parser/tree_parser.h @@ -1,7 +1,6 @@ #pragma once #include <compiler/parser/parser.h> -#include <stdint.h> typedef enum TreeToken { TREE_TOKEN_NONE = 0, @@ -71,6 +70,7 @@ extern void printParsedTreeNode(const ParsedTree *parsedTree); extern void deleteParsedTree(ParsedTree *parsedTree); extern ParsedTree *treeParser(SourceCode *code); +extern ParsedTree *treeParserWithPrint(SourceCode *code); extern ParsedTree *_treeParser(const ParsedNode *node, SourceCode *code); extern ParsedTree *treeParseNode(const ParsedNode *node, SourceCode *code, diff --git a/src/fasm/code_generator/code_generator.c b/src/fasm/code_generator/code_generator.c new file mode 100644 index 0000000..0df70f5 --- /dev/null +++ b/src/fasm/code_generator/code_generator.c @@ -0,0 +1,42 @@ +#include "code_generator.h" + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +void deleteByteCodeInners(ByteCode bytecode) { + free(bytecode.code); + free(bytecode.data); + free(bytecode.labels); +} + +ByteCode fasmCodeGenerator(FasmLinkedLines *lines) { + ByteCode bytecode = { + .code = a404m_malloc(0), + .code_size = 0, + .data = lines->data, + .data_size = lines->data_size, + .labels = lines->variables, + .labels_size = lines->variables_size, + }; + + lines->data = a404m_malloc(0); + lines->data_size = 0; + lines->variables = a404m_malloc(0); + lines->variables_size = 0; + + for (size_t i = 0; i < lines->lines_size; ++i) { + const FasmLinkedLine line = lines->lines[i]; + const size_t size = sizeof(line.instruction) + line.operands_size; + bytecode.code = a404m_realloc(bytecode.code, bytecode.code_size + size); + memcpy(bytecode.code + bytecode.code_size, &line.instruction, + sizeof(line.instruction)); + bytecode.code_size += sizeof(line.instruction); + memcpy(bytecode.code + bytecode.code_size, line.operands, + line.operands_size); + bytecode.code_size += line.operands_size; + } + + return bytecode; +} diff --git a/src/fasm/code_generator/code_generator.h b/src/fasm/code_generator/code_generator.h new file mode 100644 index 0000000..85715e4 --- /dev/null +++ b/src/fasm/code_generator/code_generator.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fasm/linker/linker.h> +#include <stdint.h> + +typedef struct ByteCode { + uint8_t *code; + size_t code_size; + uint8_t *data; + size_t data_size; + FasmVariable *labels; + size_t labels_size; +}ByteCode; + +extern void deleteByteCodeInners(ByteCode bytecode); + +extern ByteCode fasmCodeGenerator(FasmLinkedLines *lines); diff --git a/src/fasm/lexer/lexer.c b/src/fasm/lexer/lexer.c new file mode 100644 index 0000000..e3e9610 --- /dev/null +++ b/src/fasm/lexer/lexer.c @@ -0,0 +1,643 @@ +#include "lexer.h" + +#include <compiler/error_helper/error_helper.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> +#include <utils/types.h> + +const char *FASM_TOKEN_STRINGS[] = { + "NOOP", + "PUSH8", + "PUSH16", + "PUSH32", + "PUSH64", + "LOAD8", + "LOAD16", + "LOAD32", + "LOAD64", + "POP8", + "POP16", + "POP32", + "POP64", + "DUP8", + "DUP16", + "DUP32", + "DUP64", + "SWAP8", + "SWAP16", + "SWAP32", + "SWAP64", + "DROP8", + "DROP16", + "DROP32", + "DROP64", + "ADD_I8", + "ADD_I16", + "ADD_I32", + "ADD_I64", + "ADD_F32", + "ADD_F64", + "SUB_I8", + "SUB_I16", + "SUB_I32", + "SUB_I64", + "SUB_F32", + "SUB_F64", + "NEG_I8", + "NEG_I16", + "NEG_I32", + "NEG_I64", + "NEG_F32", + "NEG_F64", + "MUL_I8", + "MUL_I16", + "MUL_I32", + "MUL_I64", + "MUL_U8", + "MUL_U16", + "MUL_U32", + "MUL_U64", + "MUL_F32", + "MUL_F64", + "DIV_I8", + "DIV_I16", + "DIV_I32", + "DIV_I64", + "DIV_U8", + "DIV_U16", + "DIV_U32", + "DIV_U64", + "DIV_F32", + "DIV_F64", + "REM_I8", + "REM_I16", + "REM_I32", + "REM_I64", + "REM_U8", + "REM_U16", + "REM_U32", + "REM_U64", + "CAST_I8_I64", + "CAST_I16_I64", + "CAST_I32_I64", + "CAST_I64_I8", + "CAST_I64_I16", + "CAST_I64_I32", + "CAST_F64_I64", + "CAST_I64_F64", + "CAST_U8_U64", + "CAST_U16_U64", + "CAST_U32_U64", + "CAST_U64_U8", + "CAST_U64_U16", + "CAST_U64_U32", + "CAST_F64_U64", + "CAST_U64_F64", + "CAST_F32_F64", + "CAST_F64_F32", + "JUMP", + "JZ_I8", + "JNZ_I8", + "JN_I8", + "JNN_I8", + "JP_I8", + "JNP_I8", + "JZ_I16", + "JNZ_I16", + "JN_I16", + "JNN_I16", + "JP_I16", + "JNP_I16", + "JZ_I32", + "JNZ_I32", + "JN_I32", + "JNN_I32", + "JP_I32", + "JNP_I32", + "JZ_I64", + "JNZ_I64", + "JN_I64", + "JNN_I64", + "JP_I64", + "JNP_I64", + "JZ_F32", + "JNZ_F32", + "JN_F32", + "JNN_F32", + "JP_F32", + "JNP_F32", + "JZ_F64", + "JNZ_F64", + "JN_F64", + "JNN_F64", + "JP_F64", + "JNP_F64", + "ALLOC_HEAP", + "ALLOC_STACK", + "FREE_HEAP", + "GET_STACK_ADDRESS", + "GET_GLOBAL_ADDRESS", + "CALL", + "RET", + "SYSCALL", + "DEFINE_BYTE", + "DEFINE_WORD", + "DEFINE_DWORD", + "DEFINE_QWORD", + "NONE", +}; + +const char *FASM_LINE_LOOKING_FOR_STRINGS[] = { + "FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_OPERAND", + "FASM_LINE_LOOKING_FOR_OPERAND_OR_END", + "FASM_LINE_LOOKING_FOR_COMMA_OR_END", +}; + +const size_t FASM_TOKEN_STRINGS_SIZE = + sizeof(FASM_TOKEN_STRINGS) / sizeof(char *); + +void fasmLinePrint(FasmLine line) { + printf("{label='%.*s',instruction='%s',operands=[\n", + (int)(line.labelEnd - line.labelBegin), line.labelBegin, + FASM_TOKEN_STRINGS[line.instruction]); + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + printf(" {'%.*s'},\n", (int)(operand.end - operand.begin), operand.begin); + } + printf("]}\n"); +} + +void fasmLinesPrint(FasmLines lines) { + printf("section code\n"); + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinePrint(lines.lines[i]); + } + printf("section data\n"); + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLinePrint(lines.data[i]); + } +} + +void fasmLineDeleteInner(FasmLine line) { free(line.operands); } + +void fasmLinesDeleteInner(FasmLines lines) { + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLineDeleteInner(lines.lines[i]); + } + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLineDeleteInner(lines.data[i]); + } + free(lines.lines); + free(lines.data); +} + +FasmLines *fasmLexer(SourceCode *sourceCode) { + FasmLines *lines = a404m_malloc(sourceCode->size * sizeof(FasmLines)); + + for (size_t i = 0; i < sourceCode->size; ++i) { + if ((lines[i] = fasmLexerCode(sourceCode->codes[i], sourceCode)) + .lines_size == ERROR_SIZE) { + goto RETURN_ERROR; + } + } + + return lines; +RETURN_ERROR: + free(lines); + return NULL; +} + +FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode) { + FasmLineLookingFor lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + FasmLine line = { + .begin = code->code, + .end = code->code, + .labelBegin = NULL, + .labelEnd = NULL, + .instruction = FASM_TOKEN_NONE, + .operands = a404m_malloc(0), + .operands_size = 0, + }; + + FasmLines lines = { + .lines = a404m_malloc(0), + .lines_size = 0, + .data = a404m_malloc(0), + .data_size = 0, + }; + + FasmSection section = FASM_SECTION_NONE; + + for (char *iter = code->code;; ++iter) { + LOOP_BEGIN: + const char c = *iter; + if (c == '\0') { + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + case FASM_LINE_LOOKING_FOR_OPERAND: + printError("Expected instruction", sourceCode, line.begin, iter); + goto RETURN_ERROR; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + /* pass through */ + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + goto RETURN_SUCCESS; + } + } else if (fasmLexerIsSpace(c)) { + continue; + } + /*fprintf(stderr, "a404m: Char '%c' at %ld and looking for '%s'\n", c,*/ + /* iter - code->code, FASM_LINE_LOOKING_FOR_STRINGS[lookingFor]);*/ + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + if (fasmLexerIsLineSeparator(c)) { + continue; + } else if (fasmLexerIsSectionIndicator(c)) { + static const char *const SECTIONS_STRINGS[] = { + "code", + "data", + }; + static const FasmSection SECTIONS[] = { + FASM_SECTION_CODE, + FASM_SECTION_DATA, + }; + static const size_t SECTIONS_SIZE = + sizeof(SECTIONS_STRINGS) / sizeof(*SECTIONS_STRINGS); + + ++iter; + + for (size_t i = 0; i < SECTIONS_SIZE; ++i) { + const char *const sectionStr = SECTIONS_STRINGS[i]; + for (size_t j = 0;; ++j) { + const char c0 = sectionStr[j]; + const char c1 = iter[j]; + if (c0 == '\0') { + if (c1 == '\0' || isspace(c1)) { + iter += j; + section = SECTIONS[i]; + /*fprintf(stderr, "section changed to '%s'\n", sectionStr);*/ + goto LOOP_BEGIN; + } else { + break; + } + } else if (c0 != c1) { // no need for c1 == '\0' + break; + } + } + } + printError("Invalid section", sourceCode, iter - 1, iter); + goto RETURN_ERROR; + } else if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.begin = begin; + line.end = end; + if (fasmLexerIsLabel(*iter)) { + ++iter; + line.labelBegin = begin; + line.labelEnd = end; + lookingFor = FASM_LINE_LOOKING_FOR_INSTRUCTION; + } else { + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + } + goto LOOP_BEGIN; + } else { + UNEXPECTED: + fasmLinePrint(line); + printError("Unexpected character", sourceCode, iter, iter + 1); + goto RETURN_ERROR; + } + break; + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.end = end; + + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND: + if (fasmLexerIsWord(c)) { + LEX_OPERAND: + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + const size_t size = + a404m_malloc_usable_size(line.operands) / sizeof(*line.operands); + if (line.operands_size == size) { + line.operands = a404m_realloc( + line.operands, + (line.operands_size + line.operands_size / 2 + 1) * + sizeof(*line.operands)); + } + line.operands[line.operands_size].begin = begin; + line.operands[line.operands_size].end = end; + line.operands_size += 1; + line.end = end; + lookingFor = FASM_LINE_LOOKING_FOR_COMMA_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + if (fasmLexerIsWord(c)) { + goto LEX_OPERAND; + } else if (fasmLexerIsLineSeparator(c)) { + goto LEX_END; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + if (fasmLexerIsLineSeparator(c)) { + LEX_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + } else if (fasmLexerIsOperandSeparator(c)) { + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + } + } + +RETURN_SUCCESS: + return lines; + +RETURN_ERROR: + free(lines.lines); + const FasmLines error = { + .lines_size = ERROR_SIZE, + }; + return error; +} + +bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, char const *iter, + FasmSection section, SourceCode *sourceCode) { + if (!fasmLexerIsAllowed(*line, section)) { + printError("Instruction is not allowed here", sourceCode, line->begin, + line->end); + return false; + } + line->operands = a404m_realloc(line->operands, + line->operands_size * sizeof(*line->operands)); + switch (section) { + case FASM_SECTION_NONE: + printError("Instruction is in no section", sourceCode, line->begin, + line->end); + return true; + case FASM_SECTION_CODE: + _fasmLexerPushLine(&lines->lines, &lines->lines_size, line, iter); + return true; + case FASM_SECTION_DATA: + _fasmLexerPushLine(&lines->data, &lines->data_size, line, iter); + return true; + } + fprintf(stderr, "Bad section '%d'\n", section); + return false; +} + +void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, FasmLine *line, + char const *) { + const size_t size = a404m_malloc_usable_size(*lines) / sizeof(**lines); + if (size == *lines_size) { + *lines = a404m_realloc(*lines, (size * 2 + 1) * sizeof(**lines)); + } + // no need + /*line->end = iter;*/ + (*lines)[*lines_size] = *line; + *lines_size += 1; + + line->operands = a404m_malloc(0); + line->operands_size = 0; + + /*line->begin = iter;*/ + line->labelBegin = NULL; + line->labelEnd = NULL; + /*line->instruction = FASM_TOKEN_NONE;*/ +} + +bool fasmLexerIsAllowed(FasmLine line, FasmSection section) { + switch (line.instruction) { + case FASM_TOKEN_NOOP: + case FASM_TOKEN_PUSH8: + case FASM_TOKEN_PUSH16: + case FASM_TOKEN_PUSH32: + case FASM_TOKEN_PUSH64: + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_SYSCALL: + return section == FASM_SECTION_CODE; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + return section == FASM_SECTION_DATA; + case FASM_TOKEN_NONE: + return false; + } + fprintf(stderr, "Bad token %d at %s:%d\n", line.instruction, __FILE_NAME__, + __LINE__); + exit(1); +} + +char *fasmLexerGetNextWord(char *iter) { + if (fasmLexerIsString(*iter)) { + const char begin = *iter; + for (++iter; *iter != begin; ++iter) { + if (*iter == '\0') { + fprintf(stderr, "No ending for string at %s:%d\n", __FILE_NAME__, + __LINE__); + exit(1); + } + } + ++iter; + } else { + for (++iter; *iter != '\0' && fasmLexerIsWord(*iter); ++iter); + } + return iter; +} + +FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end) { + const size_t size = end - begin; + char *uppered = a404m_malloc((size + 1) * sizeof(char)); + for (char *iter = begin; iter < end; ++iter) { + uppered[iter - begin] = toupper(*iter); + } + uppered[size] = '\0'; + + for (size_t i = 0; i < FASM_TOKEN_STRINGS_SIZE; ++i) { + const char *str = FASM_TOKEN_STRINGS[i]; + if (strcmp(uppered, str) == 0) { + free(uppered); + return (FasmToken)i; + } + } + + free(uppered); + return FASM_TOKEN_NONE; +} + +bool fasmLexerIsSpace(char c) { return c != '\n' && isspace(c); } +bool fasmLexerIsSectionIndicator(char c) { return c == '.'; } +bool fasmLexerIsLabel(char c) { return c == ':'; } +bool fasmLexerIsWord(char c) { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || c == '_' || fasmLexerIsString(c); +} +extern bool fasmLexerIsIdentifierSymbol(char c) { return c == '`'; } +bool fasmLexerIsString(char c) { return c == '\'' || c == '\"'; } +bool fasmLexerIsOperandSeparator(char c) { return c == ','; } +bool fasmLexerIsLineSeparator(char c) { return c == '\n'; } diff --git a/src/fasm/lexer/lexer.h b/src/fasm/lexer/lexer.h new file mode 100644 index 0000000..2e5f227 --- /dev/null +++ b/src/fasm/lexer/lexer.h @@ -0,0 +1,363 @@ +#pragma once + +#include <stdint.h> +#include <utils/types.h> + +#include "compiler/source_code/source_code.h" + +typedef enum FasmToken : uint8_t { + // no operation (does nothing) + FASM_TOKEN_NOOP = 0, + + // pushes operand to stack + FASM_TOKEN_PUSH8, + FASM_TOKEN_PUSH16, + FASM_TOKEN_PUSH32, + FASM_TOKEN_PUSH64, + + // dereferences stack.top and pushes its' value to stack + FASM_TOKEN_LOAD8, + FASM_TOKEN_LOAD16, + FASM_TOKEN_LOAD32, + FASM_TOKEN_LOAD64, + + // pops value stack.(top-1) to address which is stack.top + FASM_TOKEN_POP8, + FASM_TOKEN_POP16, + FASM_TOKEN_POP32, + FASM_TOKEN_POP64, + + // duplicates stack.top + FASM_TOKEN_DUP8, + FASM_TOKEN_DUP16, + FASM_TOKEN_DUP32, + FASM_TOKEN_DUP64, + + // swaps stack.top with stack.(top-1) + FASM_TOKEN_SWAP8, + FASM_TOKEN_SWAP16, + FASM_TOKEN_SWAP32, + FASM_TOKEN_SWAP64, + + // drops stack.top + FASM_TOKEN_DROP8, + FASM_TOKEN_DROP16, + FASM_TOKEN_DROP32, + FASM_TOKEN_DROP64, + + // adds two stack top integers + FASM_TOKEN_ADD_I8, + FASM_TOKEN_ADD_I16, + FASM_TOKEN_ADD_I32, + FASM_TOKEN_ADD_I64, + + // adds two stack top floats + FASM_TOKEN_ADD_F32, + FASM_TOKEN_ADD_F64, + + // subtracts two stack top integers + FASM_TOKEN_SUB_I8, + FASM_TOKEN_SUB_I16, + FASM_TOKEN_SUB_I32, + FASM_TOKEN_SUB_I64, + + // subtracts two stack top floats + FASM_TOKEN_SUB_F32, + FASM_TOKEN_SUB_F64, + + // negates stack top integer and pushes it back + FASM_TOKEN_NEG_I8, + FASM_TOKEN_NEG_I16, + FASM_TOKEN_NEG_I32, + FASM_TOKEN_NEG_I64, + + // negates stack top float and pushes it back + FASM_TOKEN_NEG_F32, + FASM_TOKEN_NEG_F64, + + // multiplies two stack top singed integers + FASM_TOKEN_MUL_I8, + FASM_TOKEN_MUL_I16, + FASM_TOKEN_MUL_I32, + FASM_TOKEN_MUL_I64, + + // multiplies two stack top unsinged integers + FASM_TOKEN_MUL_U8, + FASM_TOKEN_MUL_U16, + FASM_TOKEN_MUL_U32, + FASM_TOKEN_MUL_U64, + + // multiplies two stack top floats + FASM_TOKEN_MUL_F32, + FASM_TOKEN_MUL_F64, + + // divides two stack top singed integers + FASM_TOKEN_DIV_I8, + FASM_TOKEN_DIV_I16, + FASM_TOKEN_DIV_I32, + FASM_TOKEN_DIV_I64, + + // divides two stack top unsinged integers + FASM_TOKEN_DIV_U8, + FASM_TOKEN_DIV_U16, + FASM_TOKEN_DIV_U32, + FASM_TOKEN_DIV_U64, + + // divides two stack top floats + FASM_TOKEN_DIV_F32, + FASM_TOKEN_DIV_F64, + + // reminders two stack top singed integers + FASM_TOKEN_REM_I8, + FASM_TOKEN_REM_I16, + FASM_TOKEN_REM_I32, + FASM_TOKEN_REM_I64, + + // reminders two stack top unsinged integers + FASM_TOKEN_REM_U8, + FASM_TOKEN_REM_U16, + FASM_TOKEN_REM_U32, + FASM_TOKEN_REM_U64, + + // unsigned casts 8 bit to 64 bit + FASM_TOKEN_CAST_I8_I64, + // unsigned casts 16 bit to 64 bit + FASM_TOKEN_CAST_I16_I64, + // unsigned casts 32 bit to 64 bit + FASM_TOKEN_CAST_I32_I64, + + // unsigned casts 64 bit to 8 bit + FASM_TOKEN_CAST_I64_I8, + // unsigned casts 64 bit to 16 bit + FASM_TOKEN_CAST_I64_I16, + // unsigned casts 64 bit to 32 bit + FASM_TOKEN_CAST_I64_I32, + + // casts unsigned int 64 bit to float 64 bit + FASM_TOKEN_CAST_F64_I64, + // casts float 64 bit to unsigned int 64 bit + FASM_TOKEN_CAST_I64_F64, + + // signed casts 8 bit to 64 bit + FASM_TOKEN_CAST_U8_U64, + // signed casts 16 bit to 64 bit + FASM_TOKEN_CAST_U16_U64, + // signed casts 32 bit to 64 bit + FASM_TOKEN_CAST_U32_U64, + + // signed casts 64 bit to 8 bit + FASM_TOKEN_CAST_U64_U8, + // signed casts 64 bit to 16 bit + FASM_TOKEN_CAST_U64_U16, + // signed casts 64 bit to 32 bit + FASM_TOKEN_CAST_U64_U32, + + // casts signed int 64 bit to float 64 bit + FASM_TOKEN_CAST_F64_U64, + // casts float 64 bit to signed int 64 bit + FASM_TOKEN_CAST_U64_F64, + + // casts float 32 bit to float 64 bit + FASM_TOKEN_CAST_F32_F64, + // casts float 64 bit to float 32 bit + FASM_TOKEN_CAST_F64_F32, + + // unconditional jump to instruction (sets IP to stack.top) + FASM_TOKEN_JUMP, + + // conditionally jumps to stack.top if stack.(top-1) as 8 bit is + // zero + FASM_TOKEN_JZ_I8, + // not zero + FASM_TOKEN_JNZ_I8, + // negative + FASM_TOKEN_JN_I8, + // not negative + FASM_TOKEN_JNN_I8, + // positive + FASM_TOKEN_JP_I8, + // not positive + FASM_TOKEN_JNP_I8, + + // conditionally jumps to stack.top if stack.(top-1) as 16 bit is + // zero + FASM_TOKEN_JZ_I16, + // not zero + FASM_TOKEN_JNZ_I16, + // negative + FASM_TOKEN_JN_I16, + // not negative + FASM_TOKEN_JNN_I16, + // positive + FASM_TOKEN_JP_I16, + // not positive + FASM_TOKEN_JNP_I16, + + // conditionally jumps to stack.top if stack.(top-1) as 32 bit is + // zero + FASM_TOKEN_JZ_I32, + // not zero + FASM_TOKEN_JNZ_I32, + // negative + FASM_TOKEN_JN_I32, + // not negative + FASM_TOKEN_JNN_I32, + // positive + FASM_TOKEN_JP_I32, + // not positive + FASM_TOKEN_JNP_I32, + + // conditionally jumps to stack.top if stack.(top-1) as 64 bit is + // zero + FASM_TOKEN_JZ_I64, + // not zero + FASM_TOKEN_JNZ_I64, + // negative + FASM_TOKEN_JN_I64, + // not negative + FASM_TOKEN_JNN_I64, + // positive + FASM_TOKEN_JP_I64, + // not positive + FASM_TOKEN_JNP_I64, + + // conditionally jumps to stack.top if stack.(top-1) as 32 bit float is + // zero + FASM_TOKEN_JZ_F32, + // not zero + FASM_TOKEN_JNZ_F32, + // negative + FASM_TOKEN_JN_F32, + // not negative + FASM_TOKEN_JNN_F32, + // positive + FASM_TOKEN_JP_F32, + // not positive + FASM_TOKEN_JNP_F32, + + // conditionally jumps to stack.top if stack.(top-1) as 64 bit float is + // zero + FASM_TOKEN_JZ_F64, + // not zero + FASM_TOKEN_JNZ_F64, + // negative + FASM_TOKEN_JN_F64, + // not negative + FASM_TOKEN_JNN_F64, + // positive + FASM_TOKEN_JP_F64, + // not positive + FASM_TOKEN_JNP_F64, + + // allocates n bytes to heap and pushes its' address to stack (n = stack.top) + FASM_TOKEN_ALLOC_HEAP, + // allocates n bytes to stack (n = stack.top) + FASM_TOKEN_ALLOC_STACK, + // frees what address is in stack.top + FASM_TOKEN_FREE_HEAP, + // gives stack root address + FASM_TOKEN_GET_STACK_ADDRESS, + // gives global root address + FASM_TOKEN_GET_GLOBAL_ADDRESS, + + // calls function (stores to current IP (instruction pointer) into call stack) + FASM_TOKEN_CALL, + // pops call stack to IP (instruction pointer) + FASM_TOKEN_RET, + + // stack.top as u8 is id of syscall + FASM_TOKEN_SYSCALL, + + FASM_TOKEN_DEFINE_BYTE, + FASM_TOKEN_DEFINE_WORD, + FASM_TOKEN_DEFINE_DWORD, + FASM_TOKEN_DEFINE_QWORD, + + FASM_TOKEN_NONE, +} FasmToken; + +extern const char *FASM_TOKEN_STRINGS[]; +extern const size_t FASM_TOKEN_STRINGS_SIZE; + +typedef enum FasmSyscall : uint8_t { + // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...] + FASM_SYSCALL_READ = 0, + // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...] + FASM_SYSCALL_WRITE, + // stack = [...,i32 mode,i32 flags,const i8* filename] -> stack = [...,u32 fd] + FASM_SYSCALL_OPEN, + // stack = [...,u32 fd] -> stack = [...] + FASM_SYSCALL_CLOSE, + // stack = [...,u32 status] -> stack = [...] + FASM_SYSCALL_EXIT, +} FasmSyscall; + +typedef enum FasmLineLookingFor { + FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION = 0, + FASM_LINE_LOOKING_FOR_INSTRUCTION, + FASM_LINE_LOOKING_FOR_OPERAND, + FASM_LINE_LOOKING_FOR_OPERAND_OR_END, + FASM_LINE_LOOKING_FOR_COMMA_OR_END, +} FasmLineLookingFor; + +extern const char *FASM_LINE_LOOKING_FOR_STRINGS[]; + +typedef enum FasmSection { + FASM_SECTION_NONE, + FASM_SECTION_CODE, + FASM_SECTION_DATA, +} FasmSection; + +typedef struct FasmOperand { + char *begin; + char *end; +} FasmOperand; + +typedef struct FasmLine { + char const *begin; + char const *end; + + char const *labelBegin; + char const *labelEnd; + + FasmToken instruction; + + FasmOperand *operands; + size_t operands_size; +} FasmLine; + +typedef struct FasmLines { + FasmLine *lines; + size_t lines_size; + FasmLine *data; + size_t data_size; +} FasmLines; + +extern void fasmLinePrint(FasmLine line); +extern void fasmLinesPrint(FasmLines lines); + +extern void fasmLineDeleteInner(FasmLine line); +extern void fasmLinesDeleteInner(FasmLines lines); + +extern FasmLines *fasmLexer(SourceCode *sourceCode); +extern FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode); + +extern bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, + char const *iter, FasmSection section, + SourceCode *sourceCode); +extern void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, + FasmLine *line, char const *iter); + +extern bool fasmLexerIsAllowed(FasmLine line, FasmSection section); + +extern char *fasmLexerGetNextWord(char *iter); +extern FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end); + +extern bool fasmLexerIsSpace(char c); +extern bool fasmLexerIsSectionIndicator(char c); +extern bool fasmLexerIsLabel(char c); +extern bool fasmLexerIsWord(char c); +extern bool fasmLexerIsIdentifierSymbol(char c); +extern bool fasmLexerIsString(char c); +extern bool fasmLexerIsOperandSeparator(char c); +extern bool fasmLexerIsLineSeparator(char c); diff --git a/src/fasm/linker/linker.c b/src/fasm/linker/linker.c new file mode 100644 index 0000000..faa3cd8 --- /dev/null +++ b/src/fasm/linker/linker.c @@ -0,0 +1,870 @@ +#include "linker.h" + +#include <compiler/error_helper/error_helper.h> +#include <compiler/source_code/source_code.h> +#include <fasm/lexer/lexer.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +void fasmVariablePrint(FasmVariable variable) { + printf(" {name='%.*s',value=0x%ld}\n", (int)(variable.end - variable.begin), + variable.begin, variable.value); +} + +void fasmLinkedLinePrint(FasmLinkedLine line) { + printf(" {instruction='%s',operands=[\n", + FASM_TOKEN_STRINGS[line.instruction]); + for (size_t i = 0; i < line.operands_size; ++i) { + printf(" 0x%x,\n", line.operands[i]); + } + printf(" ]}\n"); +} + +void fasmLinkedLinesPrint(FasmLinkedLines lines) { + printf("{lines=[\n"); + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinkedLinePrint(lines.lines[i]); + } + printf("],\ndata=[\n"); + for (size_t i = 0; i < lines.data_size; ++i) { + printf(" 0x%.2x,\n", lines.data[i]); + } + printf("],\nvariable=[\n"); + for (size_t i = 0; i < lines.variables_size; ++i) { + fasmVariablePrint(lines.variables[i]); + } + printf("]}\n"); +} + +void fasmLinkedLineDeleteInner(FasmLinkedLine line) { free(line.operands); } + +void fasmLinkedLinesDeleteInner(FasmLinkedLines lines) { + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinkedLineDeleteInner(lines.lines[i]); + } + free(lines.lines); + free(lines.variables); + free(lines.data); +} + +FasmLinkedLines fasmLinker(const FasmLines *lines, SourceCode *sourceCode) { + FasmLinkedLines linkedLines = { + .lines = a404m_malloc(0), + .lines_size = 0, + .variables = a404m_malloc(0), + .variables_size = 0, + .data = a404m_malloc(0), + .data_size = 0, + }; + + fasmLinesSetVariables(&linkedLines, lines, sourceCode); + fasmLinesSetLines(&linkedLines, lines, sourceCode); + fasmLinesSetData(&linkedLines, lines, sourceCode); + + return linkedLines; + + // RETURN_ERROR: + const FasmLinkedLines ERROR = { + .lines = NULL, + .lines_size = ERROR_SIZE, + .variables = NULL, + .variables_size = ERROR_SIZE, + .data = NULL, + .data_size = ERROR_SIZE, + }; + + return ERROR; +} + +void fasmLinesSetVariables(FasmLinkedLines *linkedLines, const FasmLines *lines, + SourceCode *sourceCode) { + size_t inserted = 0; + + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.lines_size; ++j) { + const FasmLine line = lines->lines[j]; + if (line.labelBegin != line.labelEnd) { + const FasmVariable variable = { + .begin = line.labelBegin, + .end = line.labelEnd, + .value = inserted, + }; + fasmLinesPushVariable(linkedLines, variable); + } + inserted += getSizeOfLine(line); + } + } + + inserted = 0; + + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.data_size; ++j) { + const FasmLine line = lines->data[j]; + if (line.labelBegin != line.labelEnd) { + const FasmVariable variable = { + .begin = line.labelBegin, + .end = line.labelEnd, + .value = inserted, + }; + fasmLinesPushVariable(linkedLines, variable); + } + inserted += getSizeOfLine(line); + } + } +} + +void fasmLinesSetLines(FasmLinkedLines *linkedLines, const FasmLines *lines, + SourceCode *sourceCode) { + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.lines_size; ++j) { + const FasmLine line = lines->lines[j]; + fasmLinesPushLine(linkedLines, + fasmLinesParseLine(linkedLines, line, sourceCode)); + } + } +} + +extern void fasmLinesSetData(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode) { + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.data_size; ++j) { + const FasmLine line = lines->data[j]; + FasmLinkedLine linkedLine = + fasmLinesParseLine(linkedLines, line, sourceCode); + fasmLinesPushData(linkedLines, linkedLine.operands, + linkedLine.operands_size); + fasmLinkedLineDeleteInner(linkedLine); + } + } +} + +FasmLinkedLine fasmLinesParseLine(FasmLinkedLines *linkedLines, FasmLine line, + SourceCode *sourceCode) { + const FasmLinkedLine linkedLine = { + .begin = line.begin, + .end = line.end, + .instruction = line.instruction, + .operands = a404m_malloc(getSizeOfLineOperands(line)), + .operands_size = getSizeOfLineOperands(line), + }; + + const size_t elementSize = getSizeOfLineOperandElementSize(line); + + size_t inserted = 0; + + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + if (isOperandString(operand)) { + for (char *iter = operand.begin + 1; iter + 1 < operand.end; ++iter) { + switch (elementSize) { + case 1: + ((uint8_t *)linkedLine.operands)[inserted] = *iter; + break; + case 2: + ((uint16_t *)linkedLine.operands)[inserted] = *iter; + break; + case 4: + ((uint32_t *)linkedLine.operands)[inserted] = *iter; + break; + case 8: + ((uint64_t *)linkedLine.operands)[inserted] = *iter; + break; + } + inserted += 1; + } + } else { + switch (elementSize) { + case 1: + ((uint8_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 2: + ((uint16_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 4: + ((uint32_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 8: + ((uint64_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + } + inserted += 1; + } + } + + return linkedLine; +} + +bool fasmLinkerOperandSizeCorrect(FasmToken token, int size) { + switch (token) { + case FASM_TOKEN_NOOP: + return size == 0; + case FASM_TOKEN_PUSH8: + case FASM_TOKEN_PUSH16: + case FASM_TOKEN_PUSH32: + case FASM_TOKEN_PUSH64: + return size == 1; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + return size == 0; + case FASM_TOKEN_SYSCALL: + return size == 1; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + return size > 1; + case FASM_TOKEN_NONE: + return false; + } + fprintf(stderr, "Bad fasm token '%d' at %s:%d", token, __FILE_NAME__, + __LINE__); + exit(1); +} + +size_t getSizeOfLine(const FasmLine line) { + return sizeof(line.instruction) + getSizeOfLineOperands(line); +} + +size_t getSizeOfLineOperands(const FasmLine line) { + switch (line.instruction) { + case FASM_TOKEN_PUSH8: + return 1; + case FASM_TOKEN_PUSH16: + return 2; + case FASM_TOKEN_PUSH32: + return 4; + case FASM_TOKEN_PUSH64: + return 8; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_NOOP: + case FASM_TOKEN_SYSCALL: + return 0; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: { + size_t elementSize; + + switch (line.instruction) { + case FASM_TOKEN_DEFINE_BYTE: + elementSize = 1; + break; + case FASM_TOKEN_DEFINE_WORD: + elementSize = 2; + break; + case FASM_TOKEN_DEFINE_DWORD: + elementSize = 4; + break; + case FASM_TOKEN_DEFINE_QWORD: + elementSize = 8; + break; + default: + } + + size_t size = 0; + + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + if (isOperandString(operand)) { + size += (operand.end - operand.begin - 2) * elementSize; + } else { + size += elementSize; + } + } + + return size; + } + case FASM_TOKEN_NONE: + } + fprintf(stderr, "Bad fasm token '%d' at %s:%d", line.instruction, + __FILE_NAME__, __LINE__); + exit(1); +} + +size_t getSizeOfLineOperandElementSize(const FasmLine line) { + switch (line.instruction) { + case FASM_TOKEN_NOOP: + return 0; + case FASM_TOKEN_PUSH8: + return 1; + case FASM_TOKEN_PUSH16: + return 2; + case FASM_TOKEN_PUSH32: + return 4; + case FASM_TOKEN_PUSH64: + return 8; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_SYSCALL: + return 0; + case FASM_TOKEN_DEFINE_BYTE: + return 1; + case FASM_TOKEN_DEFINE_WORD: + return 2; + case FASM_TOKEN_DEFINE_DWORD: + return 4; + case FASM_TOKEN_DEFINE_QWORD: + return 8; + case FASM_TOKEN_NONE: + } + + fprintf(stderr, "Bad fasm token '%d' at %s:%d", line.instruction, + __FILE_NAME__, __LINE__); + exit(1); +} + +void fasmLinesPushVariable(FasmLinkedLines *linkedLines, + FasmVariable variable) { + const size_t size = a404m_malloc_usable_size(linkedLines->variables) / + sizeof(*linkedLines->variables); + if (size == linkedLines->variables_size) { + linkedLines->variables = + a404m_realloc(linkedLines->variables, + (size + size / 2 + 1) * sizeof(*linkedLines->variables)); + } + linkedLines->variables[linkedLines->variables_size] = variable; + linkedLines->variables_size += 1; +} + +void fasmLinesPushLine(FasmLinkedLines *linkedLines, FasmLinkedLine line) { + const size_t size = a404m_malloc_usable_size(linkedLines->lines) / + sizeof(*linkedLines->lines); + if (size == linkedLines->lines_size) { + linkedLines->lines = + a404m_realloc(linkedLines->lines, + (size + size / 2 + 1) * sizeof(*linkedLines->lines)); + } + linkedLines->lines[linkedLines->lines_size] = line; + linkedLines->lines_size += 1; +} + +void fasmLinesPushData(FasmLinkedLines *linkedLines, uint8_t *data, + size_t size) { + linkedLines->data = + a404m_realloc(linkedLines->data, (linkedLines->data_size + size) * + sizeof(*linkedLines->data)); + memcpy(linkedLines->data + linkedLines->data_size, data, size); + linkedLines->data_size += size; +} + +FasmVariable fasmLinesGetVariable(const FasmLinkedLines *linkedLines, + char const *nameBegin, char const *nameEnd) { + const size_t size = nameEnd - nameBegin; + for (size_t i = 0; i < linkedLines->variables_size; ++i) { + const FasmVariable variable = linkedLines->variables[i]; + const size_t variable_str_size = variable.end - variable.begin; + if (size == variable_str_size && + strncmp(variable.begin, nameBegin, size) == 0) { + return variable; + } + } + + FasmVariable ERROR = { + .begin = NULL, + .end = NULL, + .value = 0, + }; + + return ERROR; +} + +bool isOperandString(FasmOperand operand) { return *(operand.begin) == '"'; } + +uint64_t getOperandValue(FasmLinkedLines *linkedLines, FasmOperand operand, + SourceCode *sourceCode) { + char c = *(operand.begin); + + if (c == '"') { + fprintf(stderr, "It shoulden't be here"); + exit(1); + } else if (c == '0') { + ++(operand.begin); + if (operand.begin == operand.end) { + return 0; + } + c = *(operand.begin); + if (c == 'x') { + return hexStrToInt(operand.begin + 1, operand.end, sourceCode); + } else if (c == 'b') { + return binStrToInt(operand.begin + 1, operand.end, sourceCode); + } else { + return strToInt(operand.begin + 1, operand.end, sourceCode); + } + } else if (c == '-') { + ++(operand.begin); + if (operand.begin == operand.end) { + printError("Expected value after -", sourceCode, operand.begin, + operand.end); + exit(1); + } + c = *(operand.begin); + if (c == '0') { + ++(operand.begin); + if (operand.begin == operand.end) { + return 0; + } + c = *(operand.begin); + if (c == 'x') { + return -hexStrToInt(operand.begin + 1, operand.end, sourceCode); + } else if (c == 'b') { + return -binStrToInt(operand.begin + 1, operand.end, sourceCode); + } else { + return -strToInt(operand.begin + 1, operand.end, sourceCode); + } + } + } else if ('0' <= c && c <= '9') { + return strToInt(operand.begin, operand.end, sourceCode); + } else { + const FasmVariable variable = + fasmLinesGetVariable(linkedLines, operand.begin, operand.end); + if (variable.begin == NULL) { + printError("Label '%.*s' not found", sourceCode, operand.begin, + operand.end, (int)(operand.end - operand.begin), + operand.begin); + exit(1); + } + return variable.value; + } + printError("Should not come here %s:%d", sourceCode, operand.begin, + operand.end, __FILE_NAME__, __LINE__); + exit(1); +} + +uint64_t strToInt(const char *begin, const char *end, SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 10; + if ('0' <= c && c <= '9') { + result += c - '0'; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} +uint64_t hexStrToInt(const char *begin, const char *end, + SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 16; + if ('0' <= c && c <= '9') { + result += c - '0'; + } else if ('A' <= c && c < 'F') { + result += c - 'A' + 10; + } else if ('a' <= c && c < 'f') { + result += c - 'a' + 10; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} +uint64_t binStrToInt(const char *begin, const char *end, + SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 2; + if ('0' <= c && c <= '1') { + result += c - '0'; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} diff --git a/src/fasm/linker/linker.h b/src/fasm/linker/linker.h new file mode 100644 index 0000000..fd7813b --- /dev/null +++ b/src/fasm/linker/linker.h @@ -0,0 +1,78 @@ +#pragma once + +#include <fasm/lexer/lexer.h> +#include <stdint.h> + +typedef struct FasmLinkedLine { + char const *begin; + char const *end; + FasmToken instruction; + uint8_t *operands; + size_t operands_size; +} FasmLinkedLine; + +typedef struct FasmVariable { + char const *begin; + char const *end; + uint64_t value; +} FasmVariable; + +typedef struct FasmLinkedLines { + FasmLinkedLine *lines; + size_t lines_size; + + FasmVariable *variables; + size_t variables_size; + + uint8_t *data; + size_t data_size; +} FasmLinkedLines; + +extern void fasmVariablePrint(FasmVariable variable); +extern void fasmLinkedLinePrint(FasmLinkedLine line); +extern void fasmLinkedLinesPrint(FasmLinkedLines lines); + +extern void fasmLinkedLineDeleteInner(FasmLinkedLine line); +extern void fasmLinkedLinesDeleteInner(FasmLinkedLines lines); + +extern FasmLinkedLines fasmLinker(const FasmLines *lines, + SourceCode *sourceCode); + +extern void fasmLinesSetVariables(FasmLinkedLines *linkedLines, + const FasmLines *lines, + SourceCode *sourceCode); +extern void fasmLinesSetLines(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode); +extern void fasmLinesSetData(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode); + +extern FasmLinkedLine fasmLinesParseLine(FasmLinkedLines *linkedLines, + FasmLine line, SourceCode *sourceCode); + +extern bool fasmLinkerOperandSizeCorrect(FasmToken token, int size); + +extern size_t getSizeOfLine(const FasmLine line); +extern size_t getSizeOfLineOperands(const FasmLine line); +extern size_t getSizeOfLineOperandElementSize(const FasmLine line); + +extern void fasmLinesPushVariable(FasmLinkedLines *linkedLines, + FasmVariable variable); +extern void fasmLinesPushLine(FasmLinkedLines *linkedLines, + FasmLinkedLine line); +extern void fasmLinesPushData(FasmLinkedLines *linkedLines, uint8_t *data, + size_t size); +extern FasmVariable fasmLinesGetVariable(const FasmLinkedLines *linkedLines, + char const *nameBegin, + char const *nameEnd); + +extern bool isOperandString(FasmOperand operand); + +extern uint64_t getOperandValue(FasmLinkedLines *linkedLines, + FasmOperand operand, SourceCode *sourceCode); + +extern uint64_t strToInt(const char *begin, const char *end, + SourceCode *sourceCode); +extern uint64_t hexStrToInt(const char *begin, const char *end, + SourceCode *sourceCode); +extern uint64_t binStrToInt(const char *begin, const char *end, + SourceCode *sourceCode); diff --git a/src/fasm/runner/runner.c b/src/fasm/runner/runner.c new file mode 100644 index 0000000..c01db18 --- /dev/null +++ b/src/fasm/runner/runner.c @@ -0,0 +1,646 @@ +#include "runner.h" + +#include <fasm/lexer/lexer.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> +#include <utils/memory/memory.h> + +#define PUSHN(bits) \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + getNext##bits##Bits(&ip)); \ + ip += bits / 8 + +#define LOADN(bits) \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *((uint##bits##_t *)popFromStack64Bits( \ + &stack, &stack_size, &stack_filled))) + +#define POPN(bits) \ + { \ + uint##bits##_t *pointer = (uint##bits##_t *)popFromStack64Bits( \ + &stack, &stack_size, &stack_filled); \ + *pointer = popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + } + +#define DUPN(bits) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + } + +#define SWAPN(bits) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const uint##bits##_t b = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, b); \ + } + +#define DROPN(bits) \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); + +#define OPERATION(type, bits, op) \ + { \ + const type a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type b = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type result = a op b; \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##bits##_t *)&result); \ + } + +#define ADD(type, bits) OPERATION(type, bits, +) + +#define SUB(type, bits) OPERATION(type, bits, -) + +#define NEG(type, bits) \ + { \ + const type a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type result = -a; \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##bits##_t *)&result); \ + } + +#define MUL(type, bits) OPERATION(type, bits, *) + +#define DIV(type, bits) OPERATION(type, bits, /) + +#define REM(type, bits) OPERATION(type, bits, %) + +#define CAST(from, from_bits, to, to_bits) \ + { \ + const from a = \ + popFromStack##from_bits##Bits(&stack, &stack_size, &stack_filled); \ + const to result = a; \ + pushToStack##to_bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##to_bits##_t *)&result); \ + } + +#define COND_JUMP(type, bits, op) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + \ + if (*((type *)&a)op 0) { \ + ip = (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); \ + } \ + } + +int fasmRunner(ByteCode bytecode) { + uint8_t *data = a404m_malloc(bytecode.data_size); + size_t data_size = bytecode.data_size; + memcpy(data, bytecode.data, data_size); + + size_t stack_size = 0; + size_t stack_filled = 0; + uint8_t *stack = a404m_malloc(stack_size); + + size_t functions_size = 0; + size_t functions_index = -1; + FasmFunction *functions = a404m_malloc(functions_size * sizeof(*functions)); + + uint8_t *ip = bytecode.code; + + while (true) { + const FasmToken instruction = *ip; + ++ip; + switch (instruction) { + case FASM_TOKEN_NOOP: + break; + case FASM_TOKEN_PUSH8: + PUSHN(8); + break; + case FASM_TOKEN_PUSH16: + PUSHN(16); + break; + case FASM_TOKEN_PUSH32: + PUSHN(32); + break; + case FASM_TOKEN_PUSH64: + PUSHN(64); + break; + case FASM_TOKEN_LOAD8: + LOADN(8); + break; + case FASM_TOKEN_LOAD16: + LOADN(16); + break; + case FASM_TOKEN_LOAD32: + LOADN(32); + break; + case FASM_TOKEN_LOAD64: + LOADN(64); + break; + case FASM_TOKEN_POP8: + POPN(8); + break; + case FASM_TOKEN_POP16: + POPN(16); + break; + case FASM_TOKEN_POP32: + POPN(32); + break; + case FASM_TOKEN_POP64: + POPN(64); + break; + case FASM_TOKEN_DUP8: + DUPN(8); + break; + case FASM_TOKEN_DUP16: + DUPN(16); + break; + case FASM_TOKEN_DUP32: + DUPN(32); + break; + case FASM_TOKEN_DUP64: + DUPN(64); + break; + case FASM_TOKEN_SWAP8: + SWAPN(8); + break; + case FASM_TOKEN_SWAP16: + SWAPN(16); + break; + case FASM_TOKEN_SWAP32: + SWAPN(32); + break; + case FASM_TOKEN_SWAP64: + SWAPN(64); + break; + case FASM_TOKEN_DROP8: + DROPN(8); + break; + case FASM_TOKEN_DROP16: + DROPN(16); + break; + case FASM_TOKEN_DROP32: + DROPN(32); + break; + case FASM_TOKEN_DROP64: + DROPN(64); + break; + case FASM_TOKEN_ADD_I8: + ADD(uint8_t, 8); + break; + case FASM_TOKEN_ADD_I16: + ADD(uint16_t, 16); + break; + case FASM_TOKEN_ADD_I32: + ADD(uint32_t, 32); + break; + case FASM_TOKEN_ADD_I64: + ADD(uint64_t, 64); + break; + case FASM_TOKEN_ADD_F32: + ADD(float, 32); + break; + case FASM_TOKEN_ADD_F64: + ADD(double, 64); + break; + case FASM_TOKEN_SUB_I8: + SUB(uint8_t, 8); + break; + case FASM_TOKEN_SUB_I16: + SUB(uint16_t, 16); + break; + case FASM_TOKEN_SUB_I32: + SUB(uint32_t, 32); + break; + case FASM_TOKEN_SUB_I64: + SUB(uint64_t, 64); + break; + case FASM_TOKEN_SUB_F32: + SUB(float, 32); + break; + case FASM_TOKEN_SUB_F64: + SUB(double, 64); + break; + case FASM_TOKEN_NEG_I8: + NEG(int8_t, 8); + break; + case FASM_TOKEN_NEG_I16: + NEG(int16_t, 16); + break; + case FASM_TOKEN_NEG_I32: + NEG(int32_t, 32); + break; + case FASM_TOKEN_NEG_I64: + NEG(int64_t, 64); + break; + case FASM_TOKEN_NEG_F32: + NEG(float, 32); + break; + case FASM_TOKEN_NEG_F64: + NEG(double, 64); + break; + case FASM_TOKEN_MUL_I8: + MUL(int8_t, 8); + break; + case FASM_TOKEN_MUL_I16: + MUL(int16_t, 16); + break; + case FASM_TOKEN_MUL_I32: + MUL(int32_t, 32); + break; + case FASM_TOKEN_MUL_I64: + MUL(int64_t, 64); + break; + case FASM_TOKEN_MUL_U8: + MUL(uint8_t, 8); + break; + case FASM_TOKEN_MUL_U16: + MUL(uint16_t, 16); + break; + case FASM_TOKEN_MUL_U32: + MUL(uint32_t, 32); + break; + case FASM_TOKEN_MUL_U64: + MUL(uint64_t, 64); + break; + case FASM_TOKEN_MUL_F32: + MUL(float, 32); + break; + case FASM_TOKEN_MUL_F64: + MUL(double, 64); + break; + case FASM_TOKEN_DIV_I8: + DIV(int8_t, 8); + break; + case FASM_TOKEN_DIV_I16: + DIV(int16_t, 16); + break; + case FASM_TOKEN_DIV_I32: + DIV(int32_t, 32); + break; + case FASM_TOKEN_DIV_I64: + DIV(int64_t, 64); + break; + case FASM_TOKEN_DIV_U8: + DIV(uint8_t, 8); + break; + case FASM_TOKEN_DIV_U16: + DIV(uint16_t, 16); + break; + case FASM_TOKEN_DIV_U32: + DIV(uint32_t, 32); + break; + case FASM_TOKEN_DIV_U64: + DIV(uint64_t, 64); + break; + case FASM_TOKEN_DIV_F32: + DIV(float, 32); + break; + case FASM_TOKEN_DIV_F64: + DIV(double, 64); + break; + case FASM_TOKEN_REM_I8: + REM(int8_t, 8); + break; + case FASM_TOKEN_REM_I16: + REM(int16_t, 16); + break; + case FASM_TOKEN_REM_I32: + REM(int32_t, 32); + break; + case FASM_TOKEN_REM_I64: + REM(int64_t, 64); + break; + case FASM_TOKEN_REM_U8: + REM(uint8_t, 8); + break; + case FASM_TOKEN_REM_U16: + REM(uint16_t, 16); + break; + case FASM_TOKEN_REM_U32: + REM(uint32_t, 32); + break; + case FASM_TOKEN_REM_U64: + REM(uint64_t, 64); + break; + case FASM_TOKEN_CAST_I8_I64: + CAST(int8_t, 8, int64_t, 64); + break; + case FASM_TOKEN_CAST_I16_I64: + CAST(int16_t, 16, int64_t, 64); + break; + case FASM_TOKEN_CAST_I32_I64: + CAST(int32_t, 32, int64_t, 64); + break; + case FASM_TOKEN_CAST_I64_I8: + CAST(int64_t, 64, int8_t, 8); + break; + case FASM_TOKEN_CAST_I64_I16: + CAST(int64_t, 64, int16_t, 16); + break; + case FASM_TOKEN_CAST_I64_I32: + CAST(int64_t, 64, int32_t, 32); + break; + case FASM_TOKEN_CAST_F64_I64: + CAST(double, 64, int64_t, 64); + break; + case FASM_TOKEN_CAST_I64_F64: + CAST(int64_t, 64, double, 64); + break; + case FASM_TOKEN_CAST_U8_U64: + CAST(uint8_t, 8, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U16_U64: + CAST(uint16_t, 16, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U32_U64: + CAST(uint32_t, 32, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U64_U8: + CAST(uint64_t, 64, uint8_t, 8); + break; + case FASM_TOKEN_CAST_U64_U16: + CAST(uint64_t, 64, uint16_t, 16); + break; + case FASM_TOKEN_CAST_U64_U32: + CAST(uint64_t, 64, uint32_t, 32); + break; + case FASM_TOKEN_CAST_F64_U64: + CAST(double, 64, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U64_F64: + CAST(uint64_t, 64, double, 64); + break; + case FASM_TOKEN_CAST_F32_F64: + CAST(float, 32, double, 64); + break; + case FASM_TOKEN_CAST_F64_F32: + CAST(double, 64, float, 32); + break; + case FASM_TOKEN_JUMP: + ip = (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); + break; + case FASM_TOKEN_JZ_I8: + COND_JUMP(int8_t, 8, ==); + break; + case FASM_TOKEN_JNZ_I8: + COND_JUMP(int8_t, 8, !=); + break; + case FASM_TOKEN_JN_I8: + COND_JUMP(int8_t, 8, <); + break; + case FASM_TOKEN_JNN_I8: + COND_JUMP(int8_t, 8, >=); + break; + case FASM_TOKEN_JP_I8: + COND_JUMP(int8_t, 8, >); + break; + case FASM_TOKEN_JNP_I8: + COND_JUMP(int8_t, 8, <=); + break; + case FASM_TOKEN_JZ_I16: + COND_JUMP(int16_t, 16, ==); + break; + case FASM_TOKEN_JNZ_I16: + COND_JUMP(int16_t, 16, !=); + break; + case FASM_TOKEN_JN_I16: + COND_JUMP(int16_t, 16, <); + break; + case FASM_TOKEN_JNN_I16: + COND_JUMP(int16_t, 16, >=); + break; + case FASM_TOKEN_JP_I16: + COND_JUMP(int16_t, 16, >); + break; + case FASM_TOKEN_JNP_I16: + COND_JUMP(int16_t, 16, <=); + break; + case FASM_TOKEN_JZ_I32: + COND_JUMP(int32_t, 32, ==); + break; + case FASM_TOKEN_JNZ_I32: + COND_JUMP(int32_t, 32, !=); + break; + case FASM_TOKEN_JN_I32: + COND_JUMP(int32_t, 32, <); + break; + case FASM_TOKEN_JNN_I32: + COND_JUMP(int32_t, 32, >=); + break; + case FASM_TOKEN_JP_I32: + COND_JUMP(int32_t, 32, >); + break; + case FASM_TOKEN_JNP_I32: + COND_JUMP(int32_t, 32, <=); + break; + case FASM_TOKEN_JZ_I64: + COND_JUMP(int64_t, 64, ==); + break; + case FASM_TOKEN_JNZ_I64: + COND_JUMP(int64_t, 64, !=); + break; + case FASM_TOKEN_JN_I64: + COND_JUMP(int64_t, 64, <); + break; + case FASM_TOKEN_JNN_I64: + COND_JUMP(int64_t, 64, >=); + break; + case FASM_TOKEN_JP_I64: + COND_JUMP(int64_t, 64, >); + break; + case FASM_TOKEN_JNP_I64: + COND_JUMP(int64_t, 64, <=); + break; + case FASM_TOKEN_JZ_F32: + COND_JUMP(float, 32, ==); + break; + case FASM_TOKEN_JNZ_F32: + COND_JUMP(float, 32, !=); + break; + case FASM_TOKEN_JN_F32: + COND_JUMP(float, 32, <); + break; + case FASM_TOKEN_JNN_F32: + COND_JUMP(float, 32, >=); + break; + case FASM_TOKEN_JP_F32: + COND_JUMP(float, 32, >); + break; + case FASM_TOKEN_JNP_F32: + COND_JUMP(float, 32, <=); + break; + case FASM_TOKEN_JZ_F64: + COND_JUMP(double, 64, ==); + break; + case FASM_TOKEN_JNZ_F64: + COND_JUMP(double, 64, !=); + break; + case FASM_TOKEN_JN_F64: + COND_JUMP(double, 64, <); + break; + case FASM_TOKEN_JNN_F64: + COND_JUMP(double, 64, >=); + break; + case FASM_TOKEN_JP_F64: + COND_JUMP(double, 64, >); + break; + case FASM_TOKEN_JNP_F64: + COND_JUMP(double, 64, <=); + break; + case FASM_TOKEN_ALLOC_HEAP: + pushToStack64Bits(&stack, &stack_size, &stack_filled, + (uint64_t)a404m_malloc(popFromStack64Bits( + &stack, &stack_size, &stack_filled))); + break; + case FASM_TOKEN_ALLOC_STACK: + functions[functions_index].stack_size = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + functions[functions_index].stack = + a404m_malloc(functions[functions_index].stack_size); + break; + case FASM_TOKEN_FREE_HEAP: + free((void *)popFromStack64Bits(&stack, &stack_size, &stack_filled)); + break; + case FASM_TOKEN_GET_STACK_ADDRESS: + pushToStack64Bits(&stack, &stack_size, &stack_filled, + (uint64_t)functions[functions_index].stack); + break; + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + pushToStack64Bits(&stack, &stack_size, &stack_filled, (uint64_t)data); + break; + case FASM_TOKEN_CALL: { + uint8_t *const newIp = + (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); + ++functions_index; + if (functions_index == functions_size) { + functions_size += functions_size / 2 + 1; + functions = + a404m_realloc(functions, functions_size * sizeof(*functions)); + } + FasmFunction function = { + .returnTo = ip, + .stack = a404m_malloc(0), + .stack_size = 0, + }; + functions[functions_index] = function; + ip = newIp; + } break; + case FASM_TOKEN_RET: { + FasmFunction function = functions[functions_index]; + free(function.stack); + ip = function.returnTo; + --functions_index; + if (functions_index + sizeof(*functions) < functions_size / 2) { + functions_size = functions_size / 2; + functions = + a404m_realloc(functions, functions_size * sizeof(*functions)); + } + } break; + case FASM_TOKEN_SYSCALL: { + switch ((FasmSyscall)popFromStack8Bits(&stack, &stack_size, + &stack_filled)) { + case FASM_SYSCALL_READ: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + int8_t *buf = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint64_t count = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_read, fd, buf, count); + } break; + case FASM_SYSCALL_WRITE: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + int8_t *buf = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint64_t count = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_write, fd, buf, count); + } break; + case FASM_SYSCALL_OPEN: { + int8_t *filename = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint32_t flags = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + uint32_t mode = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_open, filename, flags, mode); + } break; + case FASM_SYSCALL_CLOSE: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_close, fd); + } break; + case FASM_SYSCALL_EXIT: + for (size_t i = functions_index; i != (size_t)-1; ++i) { + free(functions[i].stack); + } + const uint32_t status = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + free(functions); + free(stack); + free(data); + return status; + } + } break; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + case FASM_TOKEN_NONE: + default: + fprintf(stderr, "Bad fasm instruction %d", instruction); + exit(1); + } + } +} + +#define getNextNBits(bits) \ + uint##bits##_t getNext##bits##Bits(uint8_t **pos) { \ + uint##bits##_t *p = *((uint##bits##_t **)pos); \ + pos += sizeof(*p); \ + return *p; \ + } + +getNextNBits(8); +getNextNBits(16); +getNextNBits(32); +getNextNBits(64); + +#define pushToStackNBits(bits) \ + void pushToStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled, uint##bits##_t value) { \ + const size_t new_stack_size = *stack_filled + sizeof(value); \ + if (new_stack_size >= *stack_size) { \ + *stack_size = new_stack_size + new_stack_size / 2 + 1; \ + *stack = a404m_realloc(*stack, *stack_size); \ + } \ + *((uint##bits##_t *)(*stack + *stack_filled)) = value; \ + *stack_filled = new_stack_size; \ + } +pushToStackNBits(8); +pushToStackNBits(16); +pushToStackNBits(32); +pushToStackNBits(64); + +#define popFromStackNBit(bits) \ + uint##bits##_t popFromStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled) { \ + uint##bits##_t value; \ + *stack_filled -= sizeof(value); \ + value = *((uint##bits##_t *)(*stack + *stack_filled)); \ + if (*stack_filled < *stack_size / 2) { \ + *stack_size = *stack_filled; \ + *stack = a404m_realloc(*stack, *stack_size); \ + } \ + return value; \ + } +popFromStackNBit(8); +popFromStackNBit(16); +popFromStackNBit(32); +popFromStackNBit(64); diff --git a/src/fasm/runner/runner.h b/src/fasm/runner/runner.h new file mode 100644 index 0000000..0dfa338 --- /dev/null +++ b/src/fasm/runner/runner.h @@ -0,0 +1,36 @@ +#pragma once + +#include <fasm/code_generator/code_generator.h> +#include <stdint.h> + +typedef struct FasmFunction { + uint8_t *returnTo; + uint8_t *stack; + size_t stack_size; +}FasmFunction; + +extern int fasmRunner(ByteCode bytecode); + +#define getNextNBitsHeader(bits) \ + extern uint##bits##_t getNext##bits##Bits(uint8_t **pos) +getNextNBitsHeader(8); +getNextNBitsHeader(16); +getNextNBitsHeader(32); +getNextNBitsHeader(64); + +#define pushToStackNBitsHeader(bits) \ + extern void pushToStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled, \ + uint##bits##_t value) +pushToStackNBitsHeader(8); +pushToStackNBitsHeader(16); +pushToStackNBitsHeader(32); +pushToStackNBitsHeader(64); + +#define popFromStackNBitsHeader(bits) \ + extern uint##bits##_t popFromStack##bits##Bits( \ + uint8_t **stack, size_t *stack_size, size_t *stack_filled) +popFromStackNBitsHeader(8); +popFromStackNBitsHeader(16); +popFromStackNBitsHeader(32); +popFromStackNBitsHeader(64); @@ -1,79 +1,26 @@ -#include <compiler/tree_parser/tree_parser.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <utils/file.h> -#include <utils/time.h> -#include <utils/types.h> #include <vm/runner/runner.h> #include "compiler/source_code/source_code.h" +#include "fasm/code_generator/code_generator.h" +#include "fasm/lexer/lexer.h" +#include "fasm/linker/linker.h" +#include "fasm/runner/runner.h" +#include "utils/types.h" -Clock runWithPrint(SourceCode *code) { - Clock sum = 0; - Clock diff = 0; - fprintf(stderr, "----code:\n%s\n----\n", code->codes[0]->code); - Clock start = getTimeInNano(); - const Nodes nodes = lexer(code, 0); - diff += getTimeInNano() - start; - sum += diff; - if (nodes.size != ERROR_SIZE) { - printNodes(nodes); - fprintf(stderr, "----lexing in %ldns\n", diff); - start = getTimeInNano(); - ParsedNode *parsedNode = _parser(nodes, code); - diff = getTimeInNano() - start; - sum += diff; - if (parsedNode != NULL) { - printParsedNode(parsedNode); - fprintf(stderr, "----node parsing in %ldns\n", diff); - start = getTimeInNano(); - ParsedTree *parsedTree = _treeParser(parsedNode, code); - diff = getTimeInNano() - start; - sum += diff; - if (parsedTree != NULL) { - printParsedTreeNode(parsedTree); - fprintf(stderr, "----tree parsing in %ldns\n", diff); - start = getTimeInNano(); - Instructions instructions = _codeGenerator(parsedTree, code); - diff = getTimeInNano() - start; - sum += diff; - if (instructions.size != ERROR_SIZE) { - printInstructions(instructions); - fprintf(stderr, "----code_generator in %ldns\n", diff); - start = getTimeInNano(); - bool ranSuccess = _runner(instructions); - diff = getTimeInNano() - start; - sum += diff; - fprintf(stderr, "----runner in %ldns\n", diff); - fprintf(stderr, "ran sucessfully = %s\n", - ranSuccess ? "true" : "false"); - fprintf(stderr, "----sum %ldns\n", sum); - deleteInstructions(instructions); - } else { - fprintf(stderr, "----returned error"); - } - deleteParsedTree(parsedTree); - } else { - fprintf(stderr, "----returned error"); - } - deleteParsedNode(parsedNode); - } else { - fprintf(stderr, "----returned error"); - } - } else { - fprintf(stderr, "----returned error"); - } - deleteNodes(nodes); - return sum; -} +typedef enum FelanMode { + FELAN_MODE_NONE, + FELAN_MODE_COMPILE_FASM, + FELAN_MODE_COMPILE_FELAN, + FELAN_MODE_RUN_FASM, + FELAN_MODE_RUN_FELAN, +} FelanMode; -int main(int argc, char *argv[]) { - if (argc < 2) { - fprintf(stderr, "no file"); - return 1; - } +int runFelan(const char *const filePath) { SourceCode sourceCode = makeSourceCode(); - const char *const filePath = argv[1]; Code *code; code = read_whole_file("std/builtins.felan"); @@ -99,3 +46,162 @@ RETURN_ERROR: deleteSourceCodeInners(sourceCode); return 1; } + +int compileFasm(const char *const filePath) { + SourceCode sourceCode = makeSourceCode(); + + Code *code = read_whole_file(filePath); + if (code == NULL) { + goto RETURN_ERROR; + } + pushToSourceCode(&sourceCode, code); + + printf("----lexing:\n"); + FasmLines *lines; + if ((lines = fasmLexer(&sourceCode)) == NULL) { + goto RETURN_ERROR; + } + + for (size_t i = 0; i < sourceCode.size; ++i) { + fasmLinesPrint(lines[i]); + } + + printf("----linking:\n"); + + FasmLinkedLines linkedLines = fasmLinker(lines, &sourceCode); + + if (linkedLines.lines_size == ERROR_SIZE) { + goto RETURN_LINKED_ERROR; + } + + fasmLinkedLinesPrint(linkedLines); + + ByteCode bytecode = fasmCodeGenerator(&linkedLines); + + deleteByteCodeInners(bytecode); + + fasmLinkedLinesDeleteInner(linkedLines); + + for (size_t i = 0; i < sourceCode.size; ++i) { + fasmLinesDeleteInner(lines[i]); + } + + free(lines); + deleteSourceCodeInners(sourceCode); + return 0; + +RETURN_LINKED_ERROR: + for (size_t i = 0; i < sourceCode.size; ++i) { + fasmLinesDeleteInner(lines[i]); + } +RETURN_ERROR: + deleteSourceCodeInners(sourceCode); + return 1; +} + +int runFasm(const char *const filePath) { + SourceCode sourceCode = makeSourceCode(); + + Code *code = read_whole_file(filePath); + if (code == NULL) { + goto RETURN_ERROR; + } + pushToSourceCode(&sourceCode, code); + + FasmLines *lines; + if ((lines = fasmLexer(&sourceCode)) == NULL) { + goto RETURN_ERROR; + } + + FasmLinkedLines linkedLines = fasmLinker(lines, &sourceCode); + + if (linkedLines.lines_size == ERROR_SIZE) { + goto RETURN_LINKED_ERROR; + } + + ByteCode bytecode = fasmCodeGenerator(&linkedLines); + + uint32_t result = fasmRunner(bytecode); + + deleteByteCodeInners(bytecode); + + fasmLinkedLinesDeleteInner(linkedLines); + + for (size_t i = 0; i < sourceCode.size; ++i) { + fasmLinesDeleteInner(lines[i]); + } + + free(lines); + deleteSourceCodeInners(sourceCode); + return result; + +RETURN_LINKED_ERROR: + for (size_t i = 0; i < sourceCode.size; ++i) { + fasmLinesDeleteInner(lines[i]); + } +RETURN_ERROR: + deleteSourceCodeInners(sourceCode); + return 1; +} + +int main(int argc, char *argv[]) { + FelanMode compileMode = FELAN_MODE_NONE; + + char const *filePath = NULL; + + for (int i = 1; i < argc; ++i) { + const char *const arg = argv[i]; + if (strcmp(arg, "compile-fasm") == 0) { + if (compileMode == FELAN_MODE_NONE) { + compileMode = FELAN_MODE_COMPILE_FASM; + } else { + fprintf(stderr, "'%s' is not expected\n", arg); + return 1; + } + } else if (strcmp(arg, "compile-felan") == 0) { + if (compileMode == FELAN_MODE_NONE) { + compileMode = FELAN_MODE_COMPILE_FELAN; + } else { + fprintf(stderr, "'%s' is not expected\n", arg); + return 1; + } + fprintf(stderr, "'%s' is not yet supported\n", arg); + return 1; + } else if (strcmp(arg, "run-fasm") == 0) { + if (compileMode == FELAN_MODE_NONE) { + compileMode = FELAN_MODE_RUN_FASM; + } else { + fprintf(stderr, "'%s' is not expected\n", arg); + return 1; + } + } else if (strcmp(arg, "run-felan") == 0) { + if (compileMode == FELAN_MODE_NONE) { + compileMode = FELAN_MODE_RUN_FELAN; + } else { + fprintf(stderr, "'%s' is not expected\n", arg); + return 1; + } + } else if (filePath == NULL) { + filePath = arg; + } else { + fprintf(stderr, "'%s' is not expected\n", arg); + return 1; + } + } + + if (filePath == NULL) { + fprintf(stderr, "Need a file path to operate\n"); + } + + switch (compileMode) { + case FELAN_MODE_COMPILE_FASM: + return compileFasm(filePath); + case FELAN_MODE_COMPILE_FELAN: + return 1; + case FELAN_MODE_RUN_FASM: + return runFasm(filePath); + case FELAN_MODE_NONE: + case FELAN_MODE_RUN_FELAN: + return runFelan(filePath); + } +} diff --git a/src/utils/file.c b/src/utils/file.c index cdb2dfe..7620257 100644 --- a/src/utils/file.c +++ b/src/utils/file.c @@ -1,6 +1,5 @@ #include "file.h" -#include <stdio.h> #include <string.h> #include <utils/memory/memory.h> @@ -24,7 +23,7 @@ Code *read_whole_file(const char *path) { size_t pathLen = strlen(path); code->code = str; - code->filePath = a404m_malloc(pathLen+1); + code->filePath = a404m_malloc((pathLen+1)*sizeof(char)); memcpy(code->filePath, path, pathLen+1); diff --git a/src/vm/runner/runner.c b/src/vm/runner/runner.c index 9836ad0..742ec16 100644 --- a/src/vm/runner/runner.c +++ b/src/vm/runner/runner.c @@ -4,8 +4,6 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <utils/memory/memory.h> -#include <utils/types.h> const BuiltinFunction BUILTIN_FUNCTIONS[] = { print, @@ -26,6 +24,20 @@ bool runner(SourceCode *sourceCode) { return false; } +bool runnerWithPrint(SourceCode *sourceCode) { + Instructions instructions = codeGeneratorWithPrint(sourceCode); + if (instructions.size != ERROR_SIZE) { + printf("----generated code:\n"); + printInstructions(instructions); + printf("----running:\n"); + bool ranSuccess = _runner(instructions); + deleteInstructions(instructions); + return ranSuccess; + } + fprintf(stderr,"error in code generator\n"); + return false; +} + bool _runner(Instructions instructions) { size_t stack_size = 0; void **stack = a404m_malloc(stack_size * sizeof(void *)); diff --git a/src/vm/runner/runner.h b/src/vm/runner/runner.h index 80ef571..71fde6d 100644 --- a/src/vm/runner/runner.h +++ b/src/vm/runner/runner.h @@ -14,7 +14,8 @@ extern const BuiltinFunction BUILTIN_FUNCTIONS[]; extern const char *BUILTIN_FUNCTION_NAMES[]; extern const size_t BUILTIN_FUNCTIONS_SIZE; -extern bool runner(SourceCode *code); +extern bool runner(SourceCode *souceCode); +extern bool runnerWithPrint(SourceCode *souceCode); extern bool _runner(Instructions instructions); extern bool runInstruction(Instruction instruction, void ***restrict stack, diff --git a/std/builtins-main.felan b/std/builtins-main.felan new file mode 100644 index 0000000..0045b45 --- /dev/null +++ b/std/builtins-main.felan @@ -0,0 +1,23 @@ +void :: struct(0){}; + +i8 :: struct(1){}; +i16 :: struct(2){}; +i32 :: struct(4){}; +i64 :: struct(8){}; + +u8 :: struct(1){}; +u16 :: struct(2){}; +u32 :: struct(4){}; +u64 :: struct(8){}; + +f32 :: struct(4){}; +f64 :: struct(8){}; + +String :: struct(8*3){}; + +print :: (str:String)->void{ + felan asm () { + + }; +}; + |