diff options
author | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-10-08 04:16:27 +0330 |
---|---|---|
committer | A404M <ahmadmahmoudiprogrammer@gmail.com> | 2024-10-08 04:17:08 +0330 |
commit | addd54dc31603dc204773d3108dba4e000cd7657 (patch) | |
tree | 621620c4ca5634680d7655e3474cf0b0bcec8e01 /src/fasm | |
parent | bf84010e01bb11874689ce53ea4df853b2e41c2b (diff) |
added fasm support
added compiler options
tried to compile to fasm first
Diffstat (limited to 'src/fasm')
-rw-r--r-- | src/fasm/code_generator/code_generator.c | 42 | ||||
-rw-r--r-- | src/fasm/code_generator/code_generator.h | 17 | ||||
-rw-r--r-- | src/fasm/lexer/lexer.c | 643 | ||||
-rw-r--r-- | src/fasm/lexer/lexer.h | 363 | ||||
-rw-r--r-- | src/fasm/linker/linker.c | 870 | ||||
-rw-r--r-- | src/fasm/linker/linker.h | 78 | ||||
-rw-r--r-- | src/fasm/runner/runner.c | 646 | ||||
-rw-r--r-- | src/fasm/runner/runner.h | 36 |
8 files changed, 2695 insertions, 0 deletions
diff --git a/src/fasm/code_generator/code_generator.c b/src/fasm/code_generator/code_generator.c new file mode 100644 index 0000000..0df70f5 --- /dev/null +++ b/src/fasm/code_generator/code_generator.c @@ -0,0 +1,42 @@ +#include "code_generator.h" + +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +void deleteByteCodeInners(ByteCode bytecode) { + free(bytecode.code); + free(bytecode.data); + free(bytecode.labels); +} + +ByteCode fasmCodeGenerator(FasmLinkedLines *lines) { + ByteCode bytecode = { + .code = a404m_malloc(0), + .code_size = 0, + .data = lines->data, + .data_size = lines->data_size, + .labels = lines->variables, + .labels_size = lines->variables_size, + }; + + lines->data = a404m_malloc(0); + lines->data_size = 0; + lines->variables = a404m_malloc(0); + lines->variables_size = 0; + + for (size_t i = 0; i < lines->lines_size; ++i) { + const FasmLinkedLine line = lines->lines[i]; + const size_t size = sizeof(line.instruction) + line.operands_size; + bytecode.code = a404m_realloc(bytecode.code, bytecode.code_size + size); + memcpy(bytecode.code + bytecode.code_size, &line.instruction, + sizeof(line.instruction)); + bytecode.code_size += sizeof(line.instruction); + memcpy(bytecode.code + bytecode.code_size, line.operands, + line.operands_size); + bytecode.code_size += line.operands_size; + } + + return bytecode; +} diff --git a/src/fasm/code_generator/code_generator.h b/src/fasm/code_generator/code_generator.h new file mode 100644 index 0000000..85715e4 --- /dev/null +++ b/src/fasm/code_generator/code_generator.h @@ -0,0 +1,17 @@ +#pragma once + +#include <fasm/linker/linker.h> +#include <stdint.h> + +typedef struct ByteCode { + uint8_t *code; + size_t code_size; + uint8_t *data; + size_t data_size; + FasmVariable *labels; + size_t labels_size; +}ByteCode; + +extern void deleteByteCodeInners(ByteCode bytecode); + +extern ByteCode fasmCodeGenerator(FasmLinkedLines *lines); diff --git a/src/fasm/lexer/lexer.c b/src/fasm/lexer/lexer.c new file mode 100644 index 0000000..e3e9610 --- /dev/null +++ b/src/fasm/lexer/lexer.c @@ -0,0 +1,643 @@ +#include "lexer.h" + +#include <compiler/error_helper/error_helper.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> +#include <utils/types.h> + +const char *FASM_TOKEN_STRINGS[] = { + "NOOP", + "PUSH8", + "PUSH16", + "PUSH32", + "PUSH64", + "LOAD8", + "LOAD16", + "LOAD32", + "LOAD64", + "POP8", + "POP16", + "POP32", + "POP64", + "DUP8", + "DUP16", + "DUP32", + "DUP64", + "SWAP8", + "SWAP16", + "SWAP32", + "SWAP64", + "DROP8", + "DROP16", + "DROP32", + "DROP64", + "ADD_I8", + "ADD_I16", + "ADD_I32", + "ADD_I64", + "ADD_F32", + "ADD_F64", + "SUB_I8", + "SUB_I16", + "SUB_I32", + "SUB_I64", + "SUB_F32", + "SUB_F64", + "NEG_I8", + "NEG_I16", + "NEG_I32", + "NEG_I64", + "NEG_F32", + "NEG_F64", + "MUL_I8", + "MUL_I16", + "MUL_I32", + "MUL_I64", + "MUL_U8", + "MUL_U16", + "MUL_U32", + "MUL_U64", + "MUL_F32", + "MUL_F64", + "DIV_I8", + "DIV_I16", + "DIV_I32", + "DIV_I64", + "DIV_U8", + "DIV_U16", + "DIV_U32", + "DIV_U64", + "DIV_F32", + "DIV_F64", + "REM_I8", + "REM_I16", + "REM_I32", + "REM_I64", + "REM_U8", + "REM_U16", + "REM_U32", + "REM_U64", + "CAST_I8_I64", + "CAST_I16_I64", + "CAST_I32_I64", + "CAST_I64_I8", + "CAST_I64_I16", + "CAST_I64_I32", + "CAST_F64_I64", + "CAST_I64_F64", + "CAST_U8_U64", + "CAST_U16_U64", + "CAST_U32_U64", + "CAST_U64_U8", + "CAST_U64_U16", + "CAST_U64_U32", + "CAST_F64_U64", + "CAST_U64_F64", + "CAST_F32_F64", + "CAST_F64_F32", + "JUMP", + "JZ_I8", + "JNZ_I8", + "JN_I8", + "JNN_I8", + "JP_I8", + "JNP_I8", + "JZ_I16", + "JNZ_I16", + "JN_I16", + "JNN_I16", + "JP_I16", + "JNP_I16", + "JZ_I32", + "JNZ_I32", + "JN_I32", + "JNN_I32", + "JP_I32", + "JNP_I32", + "JZ_I64", + "JNZ_I64", + "JN_I64", + "JNN_I64", + "JP_I64", + "JNP_I64", + "JZ_F32", + "JNZ_F32", + "JN_F32", + "JNN_F32", + "JP_F32", + "JNP_F32", + "JZ_F64", + "JNZ_F64", + "JN_F64", + "JNN_F64", + "JP_F64", + "JNP_F64", + "ALLOC_HEAP", + "ALLOC_STACK", + "FREE_HEAP", + "GET_STACK_ADDRESS", + "GET_GLOBAL_ADDRESS", + "CALL", + "RET", + "SYSCALL", + "DEFINE_BYTE", + "DEFINE_WORD", + "DEFINE_DWORD", + "DEFINE_QWORD", + "NONE", +}; + +const char *FASM_LINE_LOOKING_FOR_STRINGS[] = { + "FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_OPERAND", + "FASM_LINE_LOOKING_FOR_OPERAND_OR_END", + "FASM_LINE_LOOKING_FOR_COMMA_OR_END", +}; + +const size_t FASM_TOKEN_STRINGS_SIZE = + sizeof(FASM_TOKEN_STRINGS) / sizeof(char *); + +void fasmLinePrint(FasmLine line) { + printf("{label='%.*s',instruction='%s',operands=[\n", + (int)(line.labelEnd - line.labelBegin), line.labelBegin, + FASM_TOKEN_STRINGS[line.instruction]); + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + printf(" {'%.*s'},\n", (int)(operand.end - operand.begin), operand.begin); + } + printf("]}\n"); +} + +void fasmLinesPrint(FasmLines lines) { + printf("section code\n"); + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinePrint(lines.lines[i]); + } + printf("section data\n"); + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLinePrint(lines.data[i]); + } +} + +void fasmLineDeleteInner(FasmLine line) { free(line.operands); } + +void fasmLinesDeleteInner(FasmLines lines) { + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLineDeleteInner(lines.lines[i]); + } + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLineDeleteInner(lines.data[i]); + } + free(lines.lines); + free(lines.data); +} + +FasmLines *fasmLexer(SourceCode *sourceCode) { + FasmLines *lines = a404m_malloc(sourceCode->size * sizeof(FasmLines)); + + for (size_t i = 0; i < sourceCode->size; ++i) { + if ((lines[i] = fasmLexerCode(sourceCode->codes[i], sourceCode)) + .lines_size == ERROR_SIZE) { + goto RETURN_ERROR; + } + } + + return lines; +RETURN_ERROR: + free(lines); + return NULL; +} + +FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode) { + FasmLineLookingFor lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + FasmLine line = { + .begin = code->code, + .end = code->code, + .labelBegin = NULL, + .labelEnd = NULL, + .instruction = FASM_TOKEN_NONE, + .operands = a404m_malloc(0), + .operands_size = 0, + }; + + FasmLines lines = { + .lines = a404m_malloc(0), + .lines_size = 0, + .data = a404m_malloc(0), + .data_size = 0, + }; + + FasmSection section = FASM_SECTION_NONE; + + for (char *iter = code->code;; ++iter) { + LOOP_BEGIN: + const char c = *iter; + if (c == '\0') { + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + case FASM_LINE_LOOKING_FOR_OPERAND: + printError("Expected instruction", sourceCode, line.begin, iter); + goto RETURN_ERROR; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + /* pass through */ + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + goto RETURN_SUCCESS; + } + } else if (fasmLexerIsSpace(c)) { + continue; + } + /*fprintf(stderr, "a404m: Char '%c' at %ld and looking for '%s'\n", c,*/ + /* iter - code->code, FASM_LINE_LOOKING_FOR_STRINGS[lookingFor]);*/ + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + if (fasmLexerIsLineSeparator(c)) { + continue; + } else if (fasmLexerIsSectionIndicator(c)) { + static const char *const SECTIONS_STRINGS[] = { + "code", + "data", + }; + static const FasmSection SECTIONS[] = { + FASM_SECTION_CODE, + FASM_SECTION_DATA, + }; + static const size_t SECTIONS_SIZE = + sizeof(SECTIONS_STRINGS) / sizeof(*SECTIONS_STRINGS); + + ++iter; + + for (size_t i = 0; i < SECTIONS_SIZE; ++i) { + const char *const sectionStr = SECTIONS_STRINGS[i]; + for (size_t j = 0;; ++j) { + const char c0 = sectionStr[j]; + const char c1 = iter[j]; + if (c0 == '\0') { + if (c1 == '\0' || isspace(c1)) { + iter += j; + section = SECTIONS[i]; + /*fprintf(stderr, "section changed to '%s'\n", sectionStr);*/ + goto LOOP_BEGIN; + } else { + break; + } + } else if (c0 != c1) { // no need for c1 == '\0' + break; + } + } + } + printError("Invalid section", sourceCode, iter - 1, iter); + goto RETURN_ERROR; + } else if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.begin = begin; + line.end = end; + if (fasmLexerIsLabel(*iter)) { + ++iter; + line.labelBegin = begin; + line.labelEnd = end; + lookingFor = FASM_LINE_LOOKING_FOR_INSTRUCTION; + } else { + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + } + goto LOOP_BEGIN; + } else { + UNEXPECTED: + fasmLinePrint(line); + printError("Unexpected character", sourceCode, iter, iter + 1); + goto RETURN_ERROR; + } + break; + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.end = end; + + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND: + if (fasmLexerIsWord(c)) { + LEX_OPERAND: + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + const size_t size = + a404m_malloc_usable_size(line.operands) / sizeof(*line.operands); + if (line.operands_size == size) { + line.operands = a404m_realloc( + line.operands, + (line.operands_size + line.operands_size / 2 + 1) * + sizeof(*line.operands)); + } + line.operands[line.operands_size].begin = begin; + line.operands[line.operands_size].end = end; + line.operands_size += 1; + line.end = end; + lookingFor = FASM_LINE_LOOKING_FOR_COMMA_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + if (fasmLexerIsWord(c)) { + goto LEX_OPERAND; + } else if (fasmLexerIsLineSeparator(c)) { + goto LEX_END; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + if (fasmLexerIsLineSeparator(c)) { + LEX_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + } else if (fasmLexerIsOperandSeparator(c)) { + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + } + } + +RETURN_SUCCESS: + return lines; + +RETURN_ERROR: + free(lines.lines); + const FasmLines error = { + .lines_size = ERROR_SIZE, + }; + return error; +} + +bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, char const *iter, + FasmSection section, SourceCode *sourceCode) { + if (!fasmLexerIsAllowed(*line, section)) { + printError("Instruction is not allowed here", sourceCode, line->begin, + line->end); + return false; + } + line->operands = a404m_realloc(line->operands, + line->operands_size * sizeof(*line->operands)); + switch (section) { + case FASM_SECTION_NONE: + printError("Instruction is in no section", sourceCode, line->begin, + line->end); + return true; + case FASM_SECTION_CODE: + _fasmLexerPushLine(&lines->lines, &lines->lines_size, line, iter); + return true; + case FASM_SECTION_DATA: + _fasmLexerPushLine(&lines->data, &lines->data_size, line, iter); + return true; + } + fprintf(stderr, "Bad section '%d'\n", section); + return false; +} + +void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, FasmLine *line, + char const *) { + const size_t size = a404m_malloc_usable_size(*lines) / sizeof(**lines); + if (size == *lines_size) { + *lines = a404m_realloc(*lines, (size * 2 + 1) * sizeof(**lines)); + } + // no need + /*line->end = iter;*/ + (*lines)[*lines_size] = *line; + *lines_size += 1; + + line->operands = a404m_malloc(0); + line->operands_size = 0; + + /*line->begin = iter;*/ + line->labelBegin = NULL; + line->labelEnd = NULL; + /*line->instruction = FASM_TOKEN_NONE;*/ +} + +bool fasmLexerIsAllowed(FasmLine line, FasmSection section) { + switch (line.instruction) { + case FASM_TOKEN_NOOP: + case FASM_TOKEN_PUSH8: + case FASM_TOKEN_PUSH16: + case FASM_TOKEN_PUSH32: + case FASM_TOKEN_PUSH64: + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_SYSCALL: + return section == FASM_SECTION_CODE; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + return section == FASM_SECTION_DATA; + case FASM_TOKEN_NONE: + return false; + } + fprintf(stderr, "Bad token %d at %s:%d\n", line.instruction, __FILE_NAME__, + __LINE__); + exit(1); +} + +char *fasmLexerGetNextWord(char *iter) { + if (fasmLexerIsString(*iter)) { + const char begin = *iter; + for (++iter; *iter != begin; ++iter) { + if (*iter == '\0') { + fprintf(stderr, "No ending for string at %s:%d\n", __FILE_NAME__, + __LINE__); + exit(1); + } + } + ++iter; + } else { + for (++iter; *iter != '\0' && fasmLexerIsWord(*iter); ++iter); + } + return iter; +} + +FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end) { + const size_t size = end - begin; + char *uppered = a404m_malloc((size + 1) * sizeof(char)); + for (char *iter = begin; iter < end; ++iter) { + uppered[iter - begin] = toupper(*iter); + } + uppered[size] = '\0'; + + for (size_t i = 0; i < FASM_TOKEN_STRINGS_SIZE; ++i) { + const char *str = FASM_TOKEN_STRINGS[i]; + if (strcmp(uppered, str) == 0) { + free(uppered); + return (FasmToken)i; + } + } + + free(uppered); + return FASM_TOKEN_NONE; +} + +bool fasmLexerIsSpace(char c) { return c != '\n' && isspace(c); } +bool fasmLexerIsSectionIndicator(char c) { return c == '.'; } +bool fasmLexerIsLabel(char c) { return c == ':'; } +bool fasmLexerIsWord(char c) { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || c == '_' || fasmLexerIsString(c); +} +extern bool fasmLexerIsIdentifierSymbol(char c) { return c == '`'; } +bool fasmLexerIsString(char c) { return c == '\'' || c == '\"'; } +bool fasmLexerIsOperandSeparator(char c) { return c == ','; } +bool fasmLexerIsLineSeparator(char c) { return c == '\n'; } diff --git a/src/fasm/lexer/lexer.h b/src/fasm/lexer/lexer.h new file mode 100644 index 0000000..2e5f227 --- /dev/null +++ b/src/fasm/lexer/lexer.h @@ -0,0 +1,363 @@ +#pragma once + +#include <stdint.h> +#include <utils/types.h> + +#include "compiler/source_code/source_code.h" + +typedef enum FasmToken : uint8_t { + // no operation (does nothing) + FASM_TOKEN_NOOP = 0, + + // pushes operand to stack + FASM_TOKEN_PUSH8, + FASM_TOKEN_PUSH16, + FASM_TOKEN_PUSH32, + FASM_TOKEN_PUSH64, + + // dereferences stack.top and pushes its' value to stack + FASM_TOKEN_LOAD8, + FASM_TOKEN_LOAD16, + FASM_TOKEN_LOAD32, + FASM_TOKEN_LOAD64, + + // pops value stack.(top-1) to address which is stack.top + FASM_TOKEN_POP8, + FASM_TOKEN_POP16, + FASM_TOKEN_POP32, + FASM_TOKEN_POP64, + + // duplicates stack.top + FASM_TOKEN_DUP8, + FASM_TOKEN_DUP16, + FASM_TOKEN_DUP32, + FASM_TOKEN_DUP64, + + // swaps stack.top with stack.(top-1) + FASM_TOKEN_SWAP8, + FASM_TOKEN_SWAP16, + FASM_TOKEN_SWAP32, + FASM_TOKEN_SWAP64, + + // drops stack.top + FASM_TOKEN_DROP8, + FASM_TOKEN_DROP16, + FASM_TOKEN_DROP32, + FASM_TOKEN_DROP64, + + // adds two stack top integers + FASM_TOKEN_ADD_I8, + FASM_TOKEN_ADD_I16, + FASM_TOKEN_ADD_I32, + FASM_TOKEN_ADD_I64, + + // adds two stack top floats + FASM_TOKEN_ADD_F32, + FASM_TOKEN_ADD_F64, + + // subtracts two stack top integers + FASM_TOKEN_SUB_I8, + FASM_TOKEN_SUB_I16, + FASM_TOKEN_SUB_I32, + FASM_TOKEN_SUB_I64, + + // subtracts two stack top floats + FASM_TOKEN_SUB_F32, + FASM_TOKEN_SUB_F64, + + // negates stack top integer and pushes it back + FASM_TOKEN_NEG_I8, + FASM_TOKEN_NEG_I16, + FASM_TOKEN_NEG_I32, + FASM_TOKEN_NEG_I64, + + // negates stack top float and pushes it back + FASM_TOKEN_NEG_F32, + FASM_TOKEN_NEG_F64, + + // multiplies two stack top singed integers + FASM_TOKEN_MUL_I8, + FASM_TOKEN_MUL_I16, + FASM_TOKEN_MUL_I32, + FASM_TOKEN_MUL_I64, + + // multiplies two stack top unsinged integers + FASM_TOKEN_MUL_U8, + FASM_TOKEN_MUL_U16, + FASM_TOKEN_MUL_U32, + FASM_TOKEN_MUL_U64, + + // multiplies two stack top floats + FASM_TOKEN_MUL_F32, + FASM_TOKEN_MUL_F64, + + // divides two stack top singed integers + FASM_TOKEN_DIV_I8, + FASM_TOKEN_DIV_I16, + FASM_TOKEN_DIV_I32, + FASM_TOKEN_DIV_I64, + + // divides two stack top unsinged integers + FASM_TOKEN_DIV_U8, + FASM_TOKEN_DIV_U16, + FASM_TOKEN_DIV_U32, + FASM_TOKEN_DIV_U64, + + // divides two stack top floats + FASM_TOKEN_DIV_F32, + FASM_TOKEN_DIV_F64, + + // reminders two stack top singed integers + FASM_TOKEN_REM_I8, + FASM_TOKEN_REM_I16, + FASM_TOKEN_REM_I32, + FASM_TOKEN_REM_I64, + + // reminders two stack top unsinged integers + FASM_TOKEN_REM_U8, + FASM_TOKEN_REM_U16, + FASM_TOKEN_REM_U32, + FASM_TOKEN_REM_U64, + + // unsigned casts 8 bit to 64 bit + FASM_TOKEN_CAST_I8_I64, + // unsigned casts 16 bit to 64 bit + FASM_TOKEN_CAST_I16_I64, + // unsigned casts 32 bit to 64 bit + FASM_TOKEN_CAST_I32_I64, + + // unsigned casts 64 bit to 8 bit + FASM_TOKEN_CAST_I64_I8, + // unsigned casts 64 bit to 16 bit + FASM_TOKEN_CAST_I64_I16, + // unsigned casts 64 bit to 32 bit + FASM_TOKEN_CAST_I64_I32, + + // casts unsigned int 64 bit to float 64 bit + FASM_TOKEN_CAST_F64_I64, + // casts float 64 bit to unsigned int 64 bit + FASM_TOKEN_CAST_I64_F64, + + // signed casts 8 bit to 64 bit + FASM_TOKEN_CAST_U8_U64, + // signed casts 16 bit to 64 bit + FASM_TOKEN_CAST_U16_U64, + // signed casts 32 bit to 64 bit + FASM_TOKEN_CAST_U32_U64, + + // signed casts 64 bit to 8 bit + FASM_TOKEN_CAST_U64_U8, + // signed casts 64 bit to 16 bit + FASM_TOKEN_CAST_U64_U16, + // signed casts 64 bit to 32 bit + FASM_TOKEN_CAST_U64_U32, + + // casts signed int 64 bit to float 64 bit + FASM_TOKEN_CAST_F64_U64, + // casts float 64 bit to signed int 64 bit + FASM_TOKEN_CAST_U64_F64, + + // casts float 32 bit to float 64 bit + FASM_TOKEN_CAST_F32_F64, + // casts float 64 bit to float 32 bit + FASM_TOKEN_CAST_F64_F32, + + // unconditional jump to instruction (sets IP to stack.top) + FASM_TOKEN_JUMP, + + // conditionally jumps to stack.top if stack.(top-1) as 8 bit is + // zero + FASM_TOKEN_JZ_I8, + // not zero + FASM_TOKEN_JNZ_I8, + // negative + FASM_TOKEN_JN_I8, + // not negative + FASM_TOKEN_JNN_I8, + // positive + FASM_TOKEN_JP_I8, + // not positive + FASM_TOKEN_JNP_I8, + + // conditionally jumps to stack.top if stack.(top-1) as 16 bit is + // zero + FASM_TOKEN_JZ_I16, + // not zero + FASM_TOKEN_JNZ_I16, + // negative + FASM_TOKEN_JN_I16, + // not negative + FASM_TOKEN_JNN_I16, + // positive + FASM_TOKEN_JP_I16, + // not positive + FASM_TOKEN_JNP_I16, + + // conditionally jumps to stack.top if stack.(top-1) as 32 bit is + // zero + FASM_TOKEN_JZ_I32, + // not zero + FASM_TOKEN_JNZ_I32, + // negative + FASM_TOKEN_JN_I32, + // not negative + FASM_TOKEN_JNN_I32, + // positive + FASM_TOKEN_JP_I32, + // not positive + FASM_TOKEN_JNP_I32, + + // conditionally jumps to stack.top if stack.(top-1) as 64 bit is + // zero + FASM_TOKEN_JZ_I64, + // not zero + FASM_TOKEN_JNZ_I64, + // negative + FASM_TOKEN_JN_I64, + // not negative + FASM_TOKEN_JNN_I64, + // positive + FASM_TOKEN_JP_I64, + // not positive + FASM_TOKEN_JNP_I64, + + // conditionally jumps to stack.top if stack.(top-1) as 32 bit float is + // zero + FASM_TOKEN_JZ_F32, + // not zero + FASM_TOKEN_JNZ_F32, + // negative + FASM_TOKEN_JN_F32, + // not negative + FASM_TOKEN_JNN_F32, + // positive + FASM_TOKEN_JP_F32, + // not positive + FASM_TOKEN_JNP_F32, + + // conditionally jumps to stack.top if stack.(top-1) as 64 bit float is + // zero + FASM_TOKEN_JZ_F64, + // not zero + FASM_TOKEN_JNZ_F64, + // negative + FASM_TOKEN_JN_F64, + // not negative + FASM_TOKEN_JNN_F64, + // positive + FASM_TOKEN_JP_F64, + // not positive + FASM_TOKEN_JNP_F64, + + // allocates n bytes to heap and pushes its' address to stack (n = stack.top) + FASM_TOKEN_ALLOC_HEAP, + // allocates n bytes to stack (n = stack.top) + FASM_TOKEN_ALLOC_STACK, + // frees what address is in stack.top + FASM_TOKEN_FREE_HEAP, + // gives stack root address + FASM_TOKEN_GET_STACK_ADDRESS, + // gives global root address + FASM_TOKEN_GET_GLOBAL_ADDRESS, + + // calls function (stores to current IP (instruction pointer) into call stack) + FASM_TOKEN_CALL, + // pops call stack to IP (instruction pointer) + FASM_TOKEN_RET, + + // stack.top as u8 is id of syscall + FASM_TOKEN_SYSCALL, + + FASM_TOKEN_DEFINE_BYTE, + FASM_TOKEN_DEFINE_WORD, + FASM_TOKEN_DEFINE_DWORD, + FASM_TOKEN_DEFINE_QWORD, + + FASM_TOKEN_NONE, +} FasmToken; + +extern const char *FASM_TOKEN_STRINGS[]; +extern const size_t FASM_TOKEN_STRINGS_SIZE; + +typedef enum FasmSyscall : uint8_t { + // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...] + FASM_SYSCALL_READ = 0, + // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...] + FASM_SYSCALL_WRITE, + // stack = [...,i32 mode,i32 flags,const i8* filename] -> stack = [...,u32 fd] + FASM_SYSCALL_OPEN, + // stack = [...,u32 fd] -> stack = [...] + FASM_SYSCALL_CLOSE, + // stack = [...,u32 status] -> stack = [...] + FASM_SYSCALL_EXIT, +} FasmSyscall; + +typedef enum FasmLineLookingFor { + FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION = 0, + FASM_LINE_LOOKING_FOR_INSTRUCTION, + FASM_LINE_LOOKING_FOR_OPERAND, + FASM_LINE_LOOKING_FOR_OPERAND_OR_END, + FASM_LINE_LOOKING_FOR_COMMA_OR_END, +} FasmLineLookingFor; + +extern const char *FASM_LINE_LOOKING_FOR_STRINGS[]; + +typedef enum FasmSection { + FASM_SECTION_NONE, + FASM_SECTION_CODE, + FASM_SECTION_DATA, +} FasmSection; + +typedef struct FasmOperand { + char *begin; + char *end; +} FasmOperand; + +typedef struct FasmLine { + char const *begin; + char const *end; + + char const *labelBegin; + char const *labelEnd; + + FasmToken instruction; + + FasmOperand *operands; + size_t operands_size; +} FasmLine; + +typedef struct FasmLines { + FasmLine *lines; + size_t lines_size; + FasmLine *data; + size_t data_size; +} FasmLines; + +extern void fasmLinePrint(FasmLine line); +extern void fasmLinesPrint(FasmLines lines); + +extern void fasmLineDeleteInner(FasmLine line); +extern void fasmLinesDeleteInner(FasmLines lines); + +extern FasmLines *fasmLexer(SourceCode *sourceCode); +extern FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode); + +extern bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, + char const *iter, FasmSection section, + SourceCode *sourceCode); +extern void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, + FasmLine *line, char const *iter); + +extern bool fasmLexerIsAllowed(FasmLine line, FasmSection section); + +extern char *fasmLexerGetNextWord(char *iter); +extern FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end); + +extern bool fasmLexerIsSpace(char c); +extern bool fasmLexerIsSectionIndicator(char c); +extern bool fasmLexerIsLabel(char c); +extern bool fasmLexerIsWord(char c); +extern bool fasmLexerIsIdentifierSymbol(char c); +extern bool fasmLexerIsString(char c); +extern bool fasmLexerIsOperandSeparator(char c); +extern bool fasmLexerIsLineSeparator(char c); diff --git a/src/fasm/linker/linker.c b/src/fasm/linker/linker.c new file mode 100644 index 0000000..faa3cd8 --- /dev/null +++ b/src/fasm/linker/linker.c @@ -0,0 +1,870 @@ +#include "linker.h" + +#include <compiler/error_helper/error_helper.h> +#include <compiler/source_code/source_code.h> +#include <fasm/lexer/lexer.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +void fasmVariablePrint(FasmVariable variable) { + printf(" {name='%.*s',value=0x%ld}\n", (int)(variable.end - variable.begin), + variable.begin, variable.value); +} + +void fasmLinkedLinePrint(FasmLinkedLine line) { + printf(" {instruction='%s',operands=[\n", + FASM_TOKEN_STRINGS[line.instruction]); + for (size_t i = 0; i < line.operands_size; ++i) { + printf(" 0x%x,\n", line.operands[i]); + } + printf(" ]}\n"); +} + +void fasmLinkedLinesPrint(FasmLinkedLines lines) { + printf("{lines=[\n"); + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinkedLinePrint(lines.lines[i]); + } + printf("],\ndata=[\n"); + for (size_t i = 0; i < lines.data_size; ++i) { + printf(" 0x%.2x,\n", lines.data[i]); + } + printf("],\nvariable=[\n"); + for (size_t i = 0; i < lines.variables_size; ++i) { + fasmVariablePrint(lines.variables[i]); + } + printf("]}\n"); +} + +void fasmLinkedLineDeleteInner(FasmLinkedLine line) { free(line.operands); } + +void fasmLinkedLinesDeleteInner(FasmLinkedLines lines) { + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinkedLineDeleteInner(lines.lines[i]); + } + free(lines.lines); + free(lines.variables); + free(lines.data); +} + +FasmLinkedLines fasmLinker(const FasmLines *lines, SourceCode *sourceCode) { + FasmLinkedLines linkedLines = { + .lines = a404m_malloc(0), + .lines_size = 0, + .variables = a404m_malloc(0), + .variables_size = 0, + .data = a404m_malloc(0), + .data_size = 0, + }; + + fasmLinesSetVariables(&linkedLines, lines, sourceCode); + fasmLinesSetLines(&linkedLines, lines, sourceCode); + fasmLinesSetData(&linkedLines, lines, sourceCode); + + return linkedLines; + + // RETURN_ERROR: + const FasmLinkedLines ERROR = { + .lines = NULL, + .lines_size = ERROR_SIZE, + .variables = NULL, + .variables_size = ERROR_SIZE, + .data = NULL, + .data_size = ERROR_SIZE, + }; + + return ERROR; +} + +void fasmLinesSetVariables(FasmLinkedLines *linkedLines, const FasmLines *lines, + SourceCode *sourceCode) { + size_t inserted = 0; + + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.lines_size; ++j) { + const FasmLine line = lines->lines[j]; + if (line.labelBegin != line.labelEnd) { + const FasmVariable variable = { + .begin = line.labelBegin, + .end = line.labelEnd, + .value = inserted, + }; + fasmLinesPushVariable(linkedLines, variable); + } + inserted += getSizeOfLine(line); + } + } + + inserted = 0; + + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.data_size; ++j) { + const FasmLine line = lines->data[j]; + if (line.labelBegin != line.labelEnd) { + const FasmVariable variable = { + .begin = line.labelBegin, + .end = line.labelEnd, + .value = inserted, + }; + fasmLinesPushVariable(linkedLines, variable); + } + inserted += getSizeOfLine(line); + } + } +} + +void fasmLinesSetLines(FasmLinkedLines *linkedLines, const FasmLines *lines, + SourceCode *sourceCode) { + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.lines_size; ++j) { + const FasmLine line = lines->lines[j]; + fasmLinesPushLine(linkedLines, + fasmLinesParseLine(linkedLines, line, sourceCode)); + } + } +} + +extern void fasmLinesSetData(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode) { + for (size_t i = 0; i < sourceCode->size; ++i) { + const FasmLines linesHolder = lines[i]; + for (size_t j = 0; j < linesHolder.data_size; ++j) { + const FasmLine line = lines->data[j]; + FasmLinkedLine linkedLine = + fasmLinesParseLine(linkedLines, line, sourceCode); + fasmLinesPushData(linkedLines, linkedLine.operands, + linkedLine.operands_size); + fasmLinkedLineDeleteInner(linkedLine); + } + } +} + +FasmLinkedLine fasmLinesParseLine(FasmLinkedLines *linkedLines, FasmLine line, + SourceCode *sourceCode) { + const FasmLinkedLine linkedLine = { + .begin = line.begin, + .end = line.end, + .instruction = line.instruction, + .operands = a404m_malloc(getSizeOfLineOperands(line)), + .operands_size = getSizeOfLineOperands(line), + }; + + const size_t elementSize = getSizeOfLineOperandElementSize(line); + + size_t inserted = 0; + + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + if (isOperandString(operand)) { + for (char *iter = operand.begin + 1; iter + 1 < operand.end; ++iter) { + switch (elementSize) { + case 1: + ((uint8_t *)linkedLine.operands)[inserted] = *iter; + break; + case 2: + ((uint16_t *)linkedLine.operands)[inserted] = *iter; + break; + case 4: + ((uint32_t *)linkedLine.operands)[inserted] = *iter; + break; + case 8: + ((uint64_t *)linkedLine.operands)[inserted] = *iter; + break; + } + inserted += 1; + } + } else { + switch (elementSize) { + case 1: + ((uint8_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 2: + ((uint16_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 4: + ((uint32_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + case 8: + ((uint64_t *)linkedLine.operands)[inserted] = + getOperandValue(linkedLines, operand, sourceCode); + break; + } + inserted += 1; + } + } + + return linkedLine; +} + +bool fasmLinkerOperandSizeCorrect(FasmToken token, int size) { + switch (token) { + case FASM_TOKEN_NOOP: + return size == 0; + case FASM_TOKEN_PUSH8: + case FASM_TOKEN_PUSH16: + case FASM_TOKEN_PUSH32: + case FASM_TOKEN_PUSH64: + return size == 1; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + return size == 0; + case FASM_TOKEN_SYSCALL: + return size == 1; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + return size > 1; + case FASM_TOKEN_NONE: + return false; + } + fprintf(stderr, "Bad fasm token '%d' at %s:%d", token, __FILE_NAME__, + __LINE__); + exit(1); +} + +size_t getSizeOfLine(const FasmLine line) { + return sizeof(line.instruction) + getSizeOfLineOperands(line); +} + +size_t getSizeOfLineOperands(const FasmLine line) { + switch (line.instruction) { + case FASM_TOKEN_PUSH8: + return 1; + case FASM_TOKEN_PUSH16: + return 2; + case FASM_TOKEN_PUSH32: + return 4; + case FASM_TOKEN_PUSH64: + return 8; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_NOOP: + case FASM_TOKEN_SYSCALL: + return 0; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: { + size_t elementSize; + + switch (line.instruction) { + case FASM_TOKEN_DEFINE_BYTE: + elementSize = 1; + break; + case FASM_TOKEN_DEFINE_WORD: + elementSize = 2; + break; + case FASM_TOKEN_DEFINE_DWORD: + elementSize = 4; + break; + case FASM_TOKEN_DEFINE_QWORD: + elementSize = 8; + break; + default: + } + + size_t size = 0; + + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + if (isOperandString(operand)) { + size += (operand.end - operand.begin - 2) * elementSize; + } else { + size += elementSize; + } + } + + return size; + } + case FASM_TOKEN_NONE: + } + fprintf(stderr, "Bad fasm token '%d' at %s:%d", line.instruction, + __FILE_NAME__, __LINE__); + exit(1); +} + +size_t getSizeOfLineOperandElementSize(const FasmLine line) { + switch (line.instruction) { + case FASM_TOKEN_NOOP: + return 0; + case FASM_TOKEN_PUSH8: + return 1; + case FASM_TOKEN_PUSH16: + return 2; + case FASM_TOKEN_PUSH32: + return 4; + case FASM_TOKEN_PUSH64: + return 8; + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_SYSCALL: + return 0; + case FASM_TOKEN_DEFINE_BYTE: + return 1; + case FASM_TOKEN_DEFINE_WORD: + return 2; + case FASM_TOKEN_DEFINE_DWORD: + return 4; + case FASM_TOKEN_DEFINE_QWORD: + return 8; + case FASM_TOKEN_NONE: + } + + fprintf(stderr, "Bad fasm token '%d' at %s:%d", line.instruction, + __FILE_NAME__, __LINE__); + exit(1); +} + +void fasmLinesPushVariable(FasmLinkedLines *linkedLines, + FasmVariable variable) { + const size_t size = a404m_malloc_usable_size(linkedLines->variables) / + sizeof(*linkedLines->variables); + if (size == linkedLines->variables_size) { + linkedLines->variables = + a404m_realloc(linkedLines->variables, + (size + size / 2 + 1) * sizeof(*linkedLines->variables)); + } + linkedLines->variables[linkedLines->variables_size] = variable; + linkedLines->variables_size += 1; +} + +void fasmLinesPushLine(FasmLinkedLines *linkedLines, FasmLinkedLine line) { + const size_t size = a404m_malloc_usable_size(linkedLines->lines) / + sizeof(*linkedLines->lines); + if (size == linkedLines->lines_size) { + linkedLines->lines = + a404m_realloc(linkedLines->lines, + (size + size / 2 + 1) * sizeof(*linkedLines->lines)); + } + linkedLines->lines[linkedLines->lines_size] = line; + linkedLines->lines_size += 1; +} + +void fasmLinesPushData(FasmLinkedLines *linkedLines, uint8_t *data, + size_t size) { + linkedLines->data = + a404m_realloc(linkedLines->data, (linkedLines->data_size + size) * + sizeof(*linkedLines->data)); + memcpy(linkedLines->data + linkedLines->data_size, data, size); + linkedLines->data_size += size; +} + +FasmVariable fasmLinesGetVariable(const FasmLinkedLines *linkedLines, + char const *nameBegin, char const *nameEnd) { + const size_t size = nameEnd - nameBegin; + for (size_t i = 0; i < linkedLines->variables_size; ++i) { + const FasmVariable variable = linkedLines->variables[i]; + const size_t variable_str_size = variable.end - variable.begin; + if (size == variable_str_size && + strncmp(variable.begin, nameBegin, size) == 0) { + return variable; + } + } + + FasmVariable ERROR = { + .begin = NULL, + .end = NULL, + .value = 0, + }; + + return ERROR; +} + +bool isOperandString(FasmOperand operand) { return *(operand.begin) == '"'; } + +uint64_t getOperandValue(FasmLinkedLines *linkedLines, FasmOperand operand, + SourceCode *sourceCode) { + char c = *(operand.begin); + + if (c == '"') { + fprintf(stderr, "It shoulden't be here"); + exit(1); + } else if (c == '0') { + ++(operand.begin); + if (operand.begin == operand.end) { + return 0; + } + c = *(operand.begin); + if (c == 'x') { + return hexStrToInt(operand.begin + 1, operand.end, sourceCode); + } else if (c == 'b') { + return binStrToInt(operand.begin + 1, operand.end, sourceCode); + } else { + return strToInt(operand.begin + 1, operand.end, sourceCode); + } + } else if (c == '-') { + ++(operand.begin); + if (operand.begin == operand.end) { + printError("Expected value after -", sourceCode, operand.begin, + operand.end); + exit(1); + } + c = *(operand.begin); + if (c == '0') { + ++(operand.begin); + if (operand.begin == operand.end) { + return 0; + } + c = *(operand.begin); + if (c == 'x') { + return -hexStrToInt(operand.begin + 1, operand.end, sourceCode); + } else if (c == 'b') { + return -binStrToInt(operand.begin + 1, operand.end, sourceCode); + } else { + return -strToInt(operand.begin + 1, operand.end, sourceCode); + } + } + } else if ('0' <= c && c <= '9') { + return strToInt(operand.begin, operand.end, sourceCode); + } else { + const FasmVariable variable = + fasmLinesGetVariable(linkedLines, operand.begin, operand.end); + if (variable.begin == NULL) { + printError("Label '%.*s' not found", sourceCode, operand.begin, + operand.end, (int)(operand.end - operand.begin), + operand.begin); + exit(1); + } + return variable.value; + } + printError("Should not come here %s:%d", sourceCode, operand.begin, + operand.end, __FILE_NAME__, __LINE__); + exit(1); +} + +uint64_t strToInt(const char *begin, const char *end, SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 10; + if ('0' <= c && c <= '9') { + result += c - '0'; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} +uint64_t hexStrToInt(const char *begin, const char *end, + SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 16; + if ('0' <= c && c <= '9') { + result += c - '0'; + } else if ('A' <= c && c < 'F') { + result += c - 'A' + 10; + } else if ('a' <= c && c < 'f') { + result += c - 'a' + 10; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} +uint64_t binStrToInt(const char *begin, const char *end, + SourceCode *sourceCode) { + uint64_t result = 0; + for (char const *iter = begin; iter < end; ++iter) { + const char c = *iter; + result *= 2; + if ('0' <= c && c <= '1') { + result += c - '0'; + } else { + printError("Unexpected character", sourceCode, iter, iter + 1); + exit(1); + } + } + return result; +} diff --git a/src/fasm/linker/linker.h b/src/fasm/linker/linker.h new file mode 100644 index 0000000..fd7813b --- /dev/null +++ b/src/fasm/linker/linker.h @@ -0,0 +1,78 @@ +#pragma once + +#include <fasm/lexer/lexer.h> +#include <stdint.h> + +typedef struct FasmLinkedLine { + char const *begin; + char const *end; + FasmToken instruction; + uint8_t *operands; + size_t operands_size; +} FasmLinkedLine; + +typedef struct FasmVariable { + char const *begin; + char const *end; + uint64_t value; +} FasmVariable; + +typedef struct FasmLinkedLines { + FasmLinkedLine *lines; + size_t lines_size; + + FasmVariable *variables; + size_t variables_size; + + uint8_t *data; + size_t data_size; +} FasmLinkedLines; + +extern void fasmVariablePrint(FasmVariable variable); +extern void fasmLinkedLinePrint(FasmLinkedLine line); +extern void fasmLinkedLinesPrint(FasmLinkedLines lines); + +extern void fasmLinkedLineDeleteInner(FasmLinkedLine line); +extern void fasmLinkedLinesDeleteInner(FasmLinkedLines lines); + +extern FasmLinkedLines fasmLinker(const FasmLines *lines, + SourceCode *sourceCode); + +extern void fasmLinesSetVariables(FasmLinkedLines *linkedLines, + const FasmLines *lines, + SourceCode *sourceCode); +extern void fasmLinesSetLines(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode); +extern void fasmLinesSetData(FasmLinkedLines *linkedLines, + const FasmLines *lines, SourceCode *sourceCode); + +extern FasmLinkedLine fasmLinesParseLine(FasmLinkedLines *linkedLines, + FasmLine line, SourceCode *sourceCode); + +extern bool fasmLinkerOperandSizeCorrect(FasmToken token, int size); + +extern size_t getSizeOfLine(const FasmLine line); +extern size_t getSizeOfLineOperands(const FasmLine line); +extern size_t getSizeOfLineOperandElementSize(const FasmLine line); + +extern void fasmLinesPushVariable(FasmLinkedLines *linkedLines, + FasmVariable variable); +extern void fasmLinesPushLine(FasmLinkedLines *linkedLines, + FasmLinkedLine line); +extern void fasmLinesPushData(FasmLinkedLines *linkedLines, uint8_t *data, + size_t size); +extern FasmVariable fasmLinesGetVariable(const FasmLinkedLines *linkedLines, + char const *nameBegin, + char const *nameEnd); + +extern bool isOperandString(FasmOperand operand); + +extern uint64_t getOperandValue(FasmLinkedLines *linkedLines, + FasmOperand operand, SourceCode *sourceCode); + +extern uint64_t strToInt(const char *begin, const char *end, + SourceCode *sourceCode); +extern uint64_t hexStrToInt(const char *begin, const char *end, + SourceCode *sourceCode); +extern uint64_t binStrToInt(const char *begin, const char *end, + SourceCode *sourceCode); diff --git a/src/fasm/runner/runner.c b/src/fasm/runner/runner.c new file mode 100644 index 0000000..c01db18 --- /dev/null +++ b/src/fasm/runner/runner.c @@ -0,0 +1,646 @@ +#include "runner.h" + +#include <fasm/lexer/lexer.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <syscall.h> +#include <unistd.h> +#include <utils/memory/memory.h> + +#define PUSHN(bits) \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + getNext##bits##Bits(&ip)); \ + ip += bits / 8 + +#define LOADN(bits) \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *((uint##bits##_t *)popFromStack64Bits( \ + &stack, &stack_size, &stack_filled))) + +#define POPN(bits) \ + { \ + uint##bits##_t *pointer = (uint##bits##_t *)popFromStack64Bits( \ + &stack, &stack_size, &stack_filled); \ + *pointer = popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + } + +#define DUPN(bits) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + } + +#define SWAPN(bits) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const uint##bits##_t b = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, a); \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, b); \ + } + +#define DROPN(bits) \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); + +#define OPERATION(type, bits, op) \ + { \ + const type a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type b = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type result = a op b; \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##bits##_t *)&result); \ + } + +#define ADD(type, bits) OPERATION(type, bits, +) + +#define SUB(type, bits) OPERATION(type, bits, -) + +#define NEG(type, bits) \ + { \ + const type a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + const type result = -a; \ + pushToStack##bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##bits##_t *)&result); \ + } + +#define MUL(type, bits) OPERATION(type, bits, *) + +#define DIV(type, bits) OPERATION(type, bits, /) + +#define REM(type, bits) OPERATION(type, bits, %) + +#define CAST(from, from_bits, to, to_bits) \ + { \ + const from a = \ + popFromStack##from_bits##Bits(&stack, &stack_size, &stack_filled); \ + const to result = a; \ + pushToStack##to_bits##Bits(&stack, &stack_size, &stack_filled, \ + *(uint##to_bits##_t *)&result); \ + } + +#define COND_JUMP(type, bits, op) \ + { \ + const uint##bits##_t a = \ + popFromStack##bits##Bits(&stack, &stack_size, &stack_filled); \ + \ + if (*((type *)&a)op 0) { \ + ip = (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); \ + } \ + } + +int fasmRunner(ByteCode bytecode) { + uint8_t *data = a404m_malloc(bytecode.data_size); + size_t data_size = bytecode.data_size; + memcpy(data, bytecode.data, data_size); + + size_t stack_size = 0; + size_t stack_filled = 0; + uint8_t *stack = a404m_malloc(stack_size); + + size_t functions_size = 0; + size_t functions_index = -1; + FasmFunction *functions = a404m_malloc(functions_size * sizeof(*functions)); + + uint8_t *ip = bytecode.code; + + while (true) { + const FasmToken instruction = *ip; + ++ip; + switch (instruction) { + case FASM_TOKEN_NOOP: + break; + case FASM_TOKEN_PUSH8: + PUSHN(8); + break; + case FASM_TOKEN_PUSH16: + PUSHN(16); + break; + case FASM_TOKEN_PUSH32: + PUSHN(32); + break; + case FASM_TOKEN_PUSH64: + PUSHN(64); + break; + case FASM_TOKEN_LOAD8: + LOADN(8); + break; + case FASM_TOKEN_LOAD16: + LOADN(16); + break; + case FASM_TOKEN_LOAD32: + LOADN(32); + break; + case FASM_TOKEN_LOAD64: + LOADN(64); + break; + case FASM_TOKEN_POP8: + POPN(8); + break; + case FASM_TOKEN_POP16: + POPN(16); + break; + case FASM_TOKEN_POP32: + POPN(32); + break; + case FASM_TOKEN_POP64: + POPN(64); + break; + case FASM_TOKEN_DUP8: + DUPN(8); + break; + case FASM_TOKEN_DUP16: + DUPN(16); + break; + case FASM_TOKEN_DUP32: + DUPN(32); + break; + case FASM_TOKEN_DUP64: + DUPN(64); + break; + case FASM_TOKEN_SWAP8: + SWAPN(8); + break; + case FASM_TOKEN_SWAP16: + SWAPN(16); + break; + case FASM_TOKEN_SWAP32: + SWAPN(32); + break; + case FASM_TOKEN_SWAP64: + SWAPN(64); + break; + case FASM_TOKEN_DROP8: + DROPN(8); + break; + case FASM_TOKEN_DROP16: + DROPN(16); + break; + case FASM_TOKEN_DROP32: + DROPN(32); + break; + case FASM_TOKEN_DROP64: + DROPN(64); + break; + case FASM_TOKEN_ADD_I8: + ADD(uint8_t, 8); + break; + case FASM_TOKEN_ADD_I16: + ADD(uint16_t, 16); + break; + case FASM_TOKEN_ADD_I32: + ADD(uint32_t, 32); + break; + case FASM_TOKEN_ADD_I64: + ADD(uint64_t, 64); + break; + case FASM_TOKEN_ADD_F32: + ADD(float, 32); + break; + case FASM_TOKEN_ADD_F64: + ADD(double, 64); + break; + case FASM_TOKEN_SUB_I8: + SUB(uint8_t, 8); + break; + case FASM_TOKEN_SUB_I16: + SUB(uint16_t, 16); + break; + case FASM_TOKEN_SUB_I32: + SUB(uint32_t, 32); + break; + case FASM_TOKEN_SUB_I64: + SUB(uint64_t, 64); + break; + case FASM_TOKEN_SUB_F32: + SUB(float, 32); + break; + case FASM_TOKEN_SUB_F64: + SUB(double, 64); + break; + case FASM_TOKEN_NEG_I8: + NEG(int8_t, 8); + break; + case FASM_TOKEN_NEG_I16: + NEG(int16_t, 16); + break; + case FASM_TOKEN_NEG_I32: + NEG(int32_t, 32); + break; + case FASM_TOKEN_NEG_I64: + NEG(int64_t, 64); + break; + case FASM_TOKEN_NEG_F32: + NEG(float, 32); + break; + case FASM_TOKEN_NEG_F64: + NEG(double, 64); + break; + case FASM_TOKEN_MUL_I8: + MUL(int8_t, 8); + break; + case FASM_TOKEN_MUL_I16: + MUL(int16_t, 16); + break; + case FASM_TOKEN_MUL_I32: + MUL(int32_t, 32); + break; + case FASM_TOKEN_MUL_I64: + MUL(int64_t, 64); + break; + case FASM_TOKEN_MUL_U8: + MUL(uint8_t, 8); + break; + case FASM_TOKEN_MUL_U16: + MUL(uint16_t, 16); + break; + case FASM_TOKEN_MUL_U32: + MUL(uint32_t, 32); + break; + case FASM_TOKEN_MUL_U64: + MUL(uint64_t, 64); + break; + case FASM_TOKEN_MUL_F32: + MUL(float, 32); + break; + case FASM_TOKEN_MUL_F64: + MUL(double, 64); + break; + case FASM_TOKEN_DIV_I8: + DIV(int8_t, 8); + break; + case FASM_TOKEN_DIV_I16: + DIV(int16_t, 16); + break; + case FASM_TOKEN_DIV_I32: + DIV(int32_t, 32); + break; + case FASM_TOKEN_DIV_I64: + DIV(int64_t, 64); + break; + case FASM_TOKEN_DIV_U8: + DIV(uint8_t, 8); + break; + case FASM_TOKEN_DIV_U16: + DIV(uint16_t, 16); + break; + case FASM_TOKEN_DIV_U32: + DIV(uint32_t, 32); + break; + case FASM_TOKEN_DIV_U64: + DIV(uint64_t, 64); + break; + case FASM_TOKEN_DIV_F32: + DIV(float, 32); + break; + case FASM_TOKEN_DIV_F64: + DIV(double, 64); + break; + case FASM_TOKEN_REM_I8: + REM(int8_t, 8); + break; + case FASM_TOKEN_REM_I16: + REM(int16_t, 16); + break; + case FASM_TOKEN_REM_I32: + REM(int32_t, 32); + break; + case FASM_TOKEN_REM_I64: + REM(int64_t, 64); + break; + case FASM_TOKEN_REM_U8: + REM(uint8_t, 8); + break; + case FASM_TOKEN_REM_U16: + REM(uint16_t, 16); + break; + case FASM_TOKEN_REM_U32: + REM(uint32_t, 32); + break; + case FASM_TOKEN_REM_U64: + REM(uint64_t, 64); + break; + case FASM_TOKEN_CAST_I8_I64: + CAST(int8_t, 8, int64_t, 64); + break; + case FASM_TOKEN_CAST_I16_I64: + CAST(int16_t, 16, int64_t, 64); + break; + case FASM_TOKEN_CAST_I32_I64: + CAST(int32_t, 32, int64_t, 64); + break; + case FASM_TOKEN_CAST_I64_I8: + CAST(int64_t, 64, int8_t, 8); + break; + case FASM_TOKEN_CAST_I64_I16: + CAST(int64_t, 64, int16_t, 16); + break; + case FASM_TOKEN_CAST_I64_I32: + CAST(int64_t, 64, int32_t, 32); + break; + case FASM_TOKEN_CAST_F64_I64: + CAST(double, 64, int64_t, 64); + break; + case FASM_TOKEN_CAST_I64_F64: + CAST(int64_t, 64, double, 64); + break; + case FASM_TOKEN_CAST_U8_U64: + CAST(uint8_t, 8, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U16_U64: + CAST(uint16_t, 16, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U32_U64: + CAST(uint32_t, 32, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U64_U8: + CAST(uint64_t, 64, uint8_t, 8); + break; + case FASM_TOKEN_CAST_U64_U16: + CAST(uint64_t, 64, uint16_t, 16); + break; + case FASM_TOKEN_CAST_U64_U32: + CAST(uint64_t, 64, uint32_t, 32); + break; + case FASM_TOKEN_CAST_F64_U64: + CAST(double, 64, uint64_t, 64); + break; + case FASM_TOKEN_CAST_U64_F64: + CAST(uint64_t, 64, double, 64); + break; + case FASM_TOKEN_CAST_F32_F64: + CAST(float, 32, double, 64); + break; + case FASM_TOKEN_CAST_F64_F32: + CAST(double, 64, float, 32); + break; + case FASM_TOKEN_JUMP: + ip = (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); + break; + case FASM_TOKEN_JZ_I8: + COND_JUMP(int8_t, 8, ==); + break; + case FASM_TOKEN_JNZ_I8: + COND_JUMP(int8_t, 8, !=); + break; + case FASM_TOKEN_JN_I8: + COND_JUMP(int8_t, 8, <); + break; + case FASM_TOKEN_JNN_I8: + COND_JUMP(int8_t, 8, >=); + break; + case FASM_TOKEN_JP_I8: + COND_JUMP(int8_t, 8, >); + break; + case FASM_TOKEN_JNP_I8: + COND_JUMP(int8_t, 8, <=); + break; + case FASM_TOKEN_JZ_I16: + COND_JUMP(int16_t, 16, ==); + break; + case FASM_TOKEN_JNZ_I16: + COND_JUMP(int16_t, 16, !=); + break; + case FASM_TOKEN_JN_I16: + COND_JUMP(int16_t, 16, <); + break; + case FASM_TOKEN_JNN_I16: + COND_JUMP(int16_t, 16, >=); + break; + case FASM_TOKEN_JP_I16: + COND_JUMP(int16_t, 16, >); + break; + case FASM_TOKEN_JNP_I16: + COND_JUMP(int16_t, 16, <=); + break; + case FASM_TOKEN_JZ_I32: + COND_JUMP(int32_t, 32, ==); + break; + case FASM_TOKEN_JNZ_I32: + COND_JUMP(int32_t, 32, !=); + break; + case FASM_TOKEN_JN_I32: + COND_JUMP(int32_t, 32, <); + break; + case FASM_TOKEN_JNN_I32: + COND_JUMP(int32_t, 32, >=); + break; + case FASM_TOKEN_JP_I32: + COND_JUMP(int32_t, 32, >); + break; + case FASM_TOKEN_JNP_I32: + COND_JUMP(int32_t, 32, <=); + break; + case FASM_TOKEN_JZ_I64: + COND_JUMP(int64_t, 64, ==); + break; + case FASM_TOKEN_JNZ_I64: + COND_JUMP(int64_t, 64, !=); + break; + case FASM_TOKEN_JN_I64: + COND_JUMP(int64_t, 64, <); + break; + case FASM_TOKEN_JNN_I64: + COND_JUMP(int64_t, 64, >=); + break; + case FASM_TOKEN_JP_I64: + COND_JUMP(int64_t, 64, >); + break; + case FASM_TOKEN_JNP_I64: + COND_JUMP(int64_t, 64, <=); + break; + case FASM_TOKEN_JZ_F32: + COND_JUMP(float, 32, ==); + break; + case FASM_TOKEN_JNZ_F32: + COND_JUMP(float, 32, !=); + break; + case FASM_TOKEN_JN_F32: + COND_JUMP(float, 32, <); + break; + case FASM_TOKEN_JNN_F32: + COND_JUMP(float, 32, >=); + break; + case FASM_TOKEN_JP_F32: + COND_JUMP(float, 32, >); + break; + case FASM_TOKEN_JNP_F32: + COND_JUMP(float, 32, <=); + break; + case FASM_TOKEN_JZ_F64: + COND_JUMP(double, 64, ==); + break; + case FASM_TOKEN_JNZ_F64: + COND_JUMP(double, 64, !=); + break; + case FASM_TOKEN_JN_F64: + COND_JUMP(double, 64, <); + break; + case FASM_TOKEN_JNN_F64: + COND_JUMP(double, 64, >=); + break; + case FASM_TOKEN_JP_F64: + COND_JUMP(double, 64, >); + break; + case FASM_TOKEN_JNP_F64: + COND_JUMP(double, 64, <=); + break; + case FASM_TOKEN_ALLOC_HEAP: + pushToStack64Bits(&stack, &stack_size, &stack_filled, + (uint64_t)a404m_malloc(popFromStack64Bits( + &stack, &stack_size, &stack_filled))); + break; + case FASM_TOKEN_ALLOC_STACK: + functions[functions_index].stack_size = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + functions[functions_index].stack = + a404m_malloc(functions[functions_index].stack_size); + break; + case FASM_TOKEN_FREE_HEAP: + free((void *)popFromStack64Bits(&stack, &stack_size, &stack_filled)); + break; + case FASM_TOKEN_GET_STACK_ADDRESS: + pushToStack64Bits(&stack, &stack_size, &stack_filled, + (uint64_t)functions[functions_index].stack); + break; + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + pushToStack64Bits(&stack, &stack_size, &stack_filled, (uint64_t)data); + break; + case FASM_TOKEN_CALL: { + uint8_t *const newIp = + (uint8_t *)popFromStack64Bits(&stack, &stack_size, &stack_filled); + ++functions_index; + if (functions_index == functions_size) { + functions_size += functions_size / 2 + 1; + functions = + a404m_realloc(functions, functions_size * sizeof(*functions)); + } + FasmFunction function = { + .returnTo = ip, + .stack = a404m_malloc(0), + .stack_size = 0, + }; + functions[functions_index] = function; + ip = newIp; + } break; + case FASM_TOKEN_RET: { + FasmFunction function = functions[functions_index]; + free(function.stack); + ip = function.returnTo; + --functions_index; + if (functions_index + sizeof(*functions) < functions_size / 2) { + functions_size = functions_size / 2; + functions = + a404m_realloc(functions, functions_size * sizeof(*functions)); + } + } break; + case FASM_TOKEN_SYSCALL: { + switch ((FasmSyscall)popFromStack8Bits(&stack, &stack_size, + &stack_filled)) { + case FASM_SYSCALL_READ: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + int8_t *buf = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint64_t count = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_read, fd, buf, count); + } break; + case FASM_SYSCALL_WRITE: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + int8_t *buf = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint64_t count = + popFromStack64Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_write, fd, buf, count); + } break; + case FASM_SYSCALL_OPEN: { + int8_t *filename = (int8_t *)popFromStack64Bits(&stack, &stack_size, + &stack_filled); + uint32_t flags = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + uint32_t mode = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_open, filename, flags, mode); + } break; + case FASM_SYSCALL_CLOSE: { + uint32_t fd = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + syscall(SYS_close, fd); + } break; + case FASM_SYSCALL_EXIT: + for (size_t i = functions_index; i != (size_t)-1; ++i) { + free(functions[i].stack); + } + const uint32_t status = + popFromStack32Bits(&stack, &stack_size, &stack_filled); + free(functions); + free(stack); + free(data); + return status; + } + } break; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + case FASM_TOKEN_NONE: + default: + fprintf(stderr, "Bad fasm instruction %d", instruction); + exit(1); + } + } +} + +#define getNextNBits(bits) \ + uint##bits##_t getNext##bits##Bits(uint8_t **pos) { \ + uint##bits##_t *p = *((uint##bits##_t **)pos); \ + pos += sizeof(*p); \ + return *p; \ + } + +getNextNBits(8); +getNextNBits(16); +getNextNBits(32); +getNextNBits(64); + +#define pushToStackNBits(bits) \ + void pushToStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled, uint##bits##_t value) { \ + const size_t new_stack_size = *stack_filled + sizeof(value); \ + if (new_stack_size >= *stack_size) { \ + *stack_size = new_stack_size + new_stack_size / 2 + 1; \ + *stack = a404m_realloc(*stack, *stack_size); \ + } \ + *((uint##bits##_t *)(*stack + *stack_filled)) = value; \ + *stack_filled = new_stack_size; \ + } +pushToStackNBits(8); +pushToStackNBits(16); +pushToStackNBits(32); +pushToStackNBits(64); + +#define popFromStackNBit(bits) \ + uint##bits##_t popFromStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled) { \ + uint##bits##_t value; \ + *stack_filled -= sizeof(value); \ + value = *((uint##bits##_t *)(*stack + *stack_filled)); \ + if (*stack_filled < *stack_size / 2) { \ + *stack_size = *stack_filled; \ + *stack = a404m_realloc(*stack, *stack_size); \ + } \ + return value; \ + } +popFromStackNBit(8); +popFromStackNBit(16); +popFromStackNBit(32); +popFromStackNBit(64); diff --git a/src/fasm/runner/runner.h b/src/fasm/runner/runner.h new file mode 100644 index 0000000..0dfa338 --- /dev/null +++ b/src/fasm/runner/runner.h @@ -0,0 +1,36 @@ +#pragma once + +#include <fasm/code_generator/code_generator.h> +#include <stdint.h> + +typedef struct FasmFunction { + uint8_t *returnTo; + uint8_t *stack; + size_t stack_size; +}FasmFunction; + +extern int fasmRunner(ByteCode bytecode); + +#define getNextNBitsHeader(bits) \ + extern uint##bits##_t getNext##bits##Bits(uint8_t **pos) +getNextNBitsHeader(8); +getNextNBitsHeader(16); +getNextNBitsHeader(32); +getNextNBitsHeader(64); + +#define pushToStackNBitsHeader(bits) \ + extern void pushToStack##bits##Bits(uint8_t **stack, size_t *stack_size, \ + size_t *stack_filled, \ + uint##bits##_t value) +pushToStackNBitsHeader(8); +pushToStackNBitsHeader(16); +pushToStackNBitsHeader(32); +pushToStackNBitsHeader(64); + +#define popFromStackNBitsHeader(bits) \ + extern uint##bits##_t popFromStack##bits##Bits( \ + uint8_t **stack, size_t *stack_size, size_t *stack_filled) +popFromStackNBitsHeader(8); +popFromStackNBitsHeader(16); +popFromStackNBitsHeader(32); +popFromStackNBitsHeader(64); |