From addd54dc31603dc204773d3108dba4e000cd7657 Mon Sep 17 00:00:00 2001 From: A404M Date: Tue, 8 Oct 2024 04:16:27 +0330 Subject: added fasm support added compiler options tried to compile to fasm first --- src/fasm/lexer/lexer.c | 643 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 643 insertions(+) create mode 100644 src/fasm/lexer/lexer.c (limited to 'src/fasm/lexer/lexer.c') diff --git a/src/fasm/lexer/lexer.c b/src/fasm/lexer/lexer.c new file mode 100644 index 0000000..e3e9610 --- /dev/null +++ b/src/fasm/lexer/lexer.c @@ -0,0 +1,643 @@ +#include "lexer.h" + +#include +#include +#include +#include +#include +#include +#include + +const char *FASM_TOKEN_STRINGS[] = { + "NOOP", + "PUSH8", + "PUSH16", + "PUSH32", + "PUSH64", + "LOAD8", + "LOAD16", + "LOAD32", + "LOAD64", + "POP8", + "POP16", + "POP32", + "POP64", + "DUP8", + "DUP16", + "DUP32", + "DUP64", + "SWAP8", + "SWAP16", + "SWAP32", + "SWAP64", + "DROP8", + "DROP16", + "DROP32", + "DROP64", + "ADD_I8", + "ADD_I16", + "ADD_I32", + "ADD_I64", + "ADD_F32", + "ADD_F64", + "SUB_I8", + "SUB_I16", + "SUB_I32", + "SUB_I64", + "SUB_F32", + "SUB_F64", + "NEG_I8", + "NEG_I16", + "NEG_I32", + "NEG_I64", + "NEG_F32", + "NEG_F64", + "MUL_I8", + "MUL_I16", + "MUL_I32", + "MUL_I64", + "MUL_U8", + "MUL_U16", + "MUL_U32", + "MUL_U64", + "MUL_F32", + "MUL_F64", + "DIV_I8", + "DIV_I16", + "DIV_I32", + "DIV_I64", + "DIV_U8", + "DIV_U16", + "DIV_U32", + "DIV_U64", + "DIV_F32", + "DIV_F64", + "REM_I8", + "REM_I16", + "REM_I32", + "REM_I64", + "REM_U8", + "REM_U16", + "REM_U32", + "REM_U64", + "CAST_I8_I64", + "CAST_I16_I64", + "CAST_I32_I64", + "CAST_I64_I8", + "CAST_I64_I16", + "CAST_I64_I32", + "CAST_F64_I64", + "CAST_I64_F64", + "CAST_U8_U64", + "CAST_U16_U64", + "CAST_U32_U64", + "CAST_U64_U8", + "CAST_U64_U16", + "CAST_U64_U32", + "CAST_F64_U64", + "CAST_U64_F64", + "CAST_F32_F64", + "CAST_F64_F32", + "JUMP", + "JZ_I8", + "JNZ_I8", + "JN_I8", + "JNN_I8", + "JP_I8", + "JNP_I8", + "JZ_I16", + "JNZ_I16", + "JN_I16", + "JNN_I16", + "JP_I16", + "JNP_I16", + "JZ_I32", + "JNZ_I32", + "JN_I32", + "JNN_I32", + "JP_I32", + "JNP_I32", + "JZ_I64", + "JNZ_I64", + "JN_I64", + "JNN_I64", + "JP_I64", + "JNP_I64", + "JZ_F32", + "JNZ_F32", + "JN_F32", + "JNN_F32", + "JP_F32", + "JNP_F32", + "JZ_F64", + "JNZ_F64", + "JN_F64", + "JNN_F64", + "JP_F64", + "JNP_F64", + "ALLOC_HEAP", + "ALLOC_STACK", + "FREE_HEAP", + "GET_STACK_ADDRESS", + "GET_GLOBAL_ADDRESS", + "CALL", + "RET", + "SYSCALL", + "DEFINE_BYTE", + "DEFINE_WORD", + "DEFINE_DWORD", + "DEFINE_QWORD", + "NONE", +}; + +const char *FASM_LINE_LOOKING_FOR_STRINGS[] = { + "FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_INSTRUCTION", + "FASM_LINE_LOOKING_FOR_OPERAND", + "FASM_LINE_LOOKING_FOR_OPERAND_OR_END", + "FASM_LINE_LOOKING_FOR_COMMA_OR_END", +}; + +const size_t FASM_TOKEN_STRINGS_SIZE = + sizeof(FASM_TOKEN_STRINGS) / sizeof(char *); + +void fasmLinePrint(FasmLine line) { + printf("{label='%.*s',instruction='%s',operands=[\n", + (int)(line.labelEnd - line.labelBegin), line.labelBegin, + FASM_TOKEN_STRINGS[line.instruction]); + for (size_t i = 0; i < line.operands_size; ++i) { + const FasmOperand operand = line.operands[i]; + printf(" {'%.*s'},\n", (int)(operand.end - operand.begin), operand.begin); + } + printf("]}\n"); +} + +void fasmLinesPrint(FasmLines lines) { + printf("section code\n"); + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLinePrint(lines.lines[i]); + } + printf("section data\n"); + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLinePrint(lines.data[i]); + } +} + +void fasmLineDeleteInner(FasmLine line) { free(line.operands); } + +void fasmLinesDeleteInner(FasmLines lines) { + for (size_t i = 0; i < lines.lines_size; ++i) { + fasmLineDeleteInner(lines.lines[i]); + } + for (size_t i = 0; i < lines.data_size; ++i) { + fasmLineDeleteInner(lines.data[i]); + } + free(lines.lines); + free(lines.data); +} + +FasmLines *fasmLexer(SourceCode *sourceCode) { + FasmLines *lines = a404m_malloc(sourceCode->size * sizeof(FasmLines)); + + for (size_t i = 0; i < sourceCode->size; ++i) { + if ((lines[i] = fasmLexerCode(sourceCode->codes[i], sourceCode)) + .lines_size == ERROR_SIZE) { + goto RETURN_ERROR; + } + } + + return lines; +RETURN_ERROR: + free(lines); + return NULL; +} + +FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode) { + FasmLineLookingFor lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + FasmLine line = { + .begin = code->code, + .end = code->code, + .labelBegin = NULL, + .labelEnd = NULL, + .instruction = FASM_TOKEN_NONE, + .operands = a404m_malloc(0), + .operands_size = 0, + }; + + FasmLines lines = { + .lines = a404m_malloc(0), + .lines_size = 0, + .data = a404m_malloc(0), + .data_size = 0, + }; + + FasmSection section = FASM_SECTION_NONE; + + for (char *iter = code->code;; ++iter) { + LOOP_BEGIN: + const char c = *iter; + if (c == '\0') { + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + case FASM_LINE_LOOKING_FOR_OPERAND: + printError("Expected instruction", sourceCode, line.begin, iter); + goto RETURN_ERROR; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + /* pass through */ + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + goto RETURN_SUCCESS; + } + } else if (fasmLexerIsSpace(c)) { + continue; + } + /*fprintf(stderr, "a404m: Char '%c' at %ld and looking for '%s'\n", c,*/ + /* iter - code->code, FASM_LINE_LOOKING_FOR_STRINGS[lookingFor]);*/ + switch (lookingFor) { + case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION: + if (fasmLexerIsLineSeparator(c)) { + continue; + } else if (fasmLexerIsSectionIndicator(c)) { + static const char *const SECTIONS_STRINGS[] = { + "code", + "data", + }; + static const FasmSection SECTIONS[] = { + FASM_SECTION_CODE, + FASM_SECTION_DATA, + }; + static const size_t SECTIONS_SIZE = + sizeof(SECTIONS_STRINGS) / sizeof(*SECTIONS_STRINGS); + + ++iter; + + for (size_t i = 0; i < SECTIONS_SIZE; ++i) { + const char *const sectionStr = SECTIONS_STRINGS[i]; + for (size_t j = 0;; ++j) { + const char c0 = sectionStr[j]; + const char c1 = iter[j]; + if (c0 == '\0') { + if (c1 == '\0' || isspace(c1)) { + iter += j; + section = SECTIONS[i]; + /*fprintf(stderr, "section changed to '%s'\n", sectionStr);*/ + goto LOOP_BEGIN; + } else { + break; + } + } else if (c0 != c1) { // no need for c1 == '\0' + break; + } + } + } + printError("Invalid section", sourceCode, iter - 1, iter); + goto RETURN_ERROR; + } else if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.begin = begin; + line.end = end; + if (fasmLexerIsLabel(*iter)) { + ++iter; + line.labelBegin = begin; + line.labelEnd = end; + lookingFor = FASM_LINE_LOOKING_FOR_INSTRUCTION; + } else { + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + } + goto LOOP_BEGIN; + } else { + UNEXPECTED: + fasmLinePrint(line); + printError("Unexpected character", sourceCode, iter, iter + 1); + goto RETURN_ERROR; + } + break; + case FASM_LINE_LOOKING_FOR_INSTRUCTION: + if (fasmLexerIsWord(c)) { + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + line.end = end; + + if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) == + FASM_TOKEN_NONE) { + printError("Unknown instruction", sourceCode, begin, end); + goto RETURN_ERROR; + } + + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND: + if (fasmLexerIsWord(c)) { + LEX_OPERAND: + char *begin = iter; + char *end = iter = fasmLexerGetNextWord(iter); + const size_t size = + a404m_malloc_usable_size(line.operands) / sizeof(*line.operands); + if (line.operands_size == size) { + line.operands = a404m_realloc( + line.operands, + (line.operands_size + line.operands_size / 2 + 1) * + sizeof(*line.operands)); + } + line.operands[line.operands_size].begin = begin; + line.operands[line.operands_size].end = end; + line.operands_size += 1; + line.end = end; + lookingFor = FASM_LINE_LOOKING_FOR_COMMA_OR_END; + goto LOOP_BEGIN; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_OPERAND_OR_END: + if (fasmLexerIsWord(c)) { + goto LEX_OPERAND; + } else if (fasmLexerIsLineSeparator(c)) { + goto LEX_END; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + break; + case FASM_LINE_LOOKING_FOR_COMMA_OR_END: + if (fasmLexerIsLineSeparator(c)) { + LEX_END: + fasmLexerPushLine(&lines, &line, iter, section, sourceCode); + lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION; + } else if (fasmLexerIsOperandSeparator(c)) { + lookingFor = FASM_LINE_LOOKING_FOR_OPERAND; + } else { + fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__); + goto UNEXPECTED; + } + } + } + +RETURN_SUCCESS: + return lines; + +RETURN_ERROR: + free(lines.lines); + const FasmLines error = { + .lines_size = ERROR_SIZE, + }; + return error; +} + +bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, char const *iter, + FasmSection section, SourceCode *sourceCode) { + if (!fasmLexerIsAllowed(*line, section)) { + printError("Instruction is not allowed here", sourceCode, line->begin, + line->end); + return false; + } + line->operands = a404m_realloc(line->operands, + line->operands_size * sizeof(*line->operands)); + switch (section) { + case FASM_SECTION_NONE: + printError("Instruction is in no section", sourceCode, line->begin, + line->end); + return true; + case FASM_SECTION_CODE: + _fasmLexerPushLine(&lines->lines, &lines->lines_size, line, iter); + return true; + case FASM_SECTION_DATA: + _fasmLexerPushLine(&lines->data, &lines->data_size, line, iter); + return true; + } + fprintf(stderr, "Bad section '%d'\n", section); + return false; +} + +void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, FasmLine *line, + char const *) { + const size_t size = a404m_malloc_usable_size(*lines) / sizeof(**lines); + if (size == *lines_size) { + *lines = a404m_realloc(*lines, (size * 2 + 1) * sizeof(**lines)); + } + // no need + /*line->end = iter;*/ + (*lines)[*lines_size] = *line; + *lines_size += 1; + + line->operands = a404m_malloc(0); + line->operands_size = 0; + + /*line->begin = iter;*/ + line->labelBegin = NULL; + line->labelEnd = NULL; + /*line->instruction = FASM_TOKEN_NONE;*/ +} + +bool fasmLexerIsAllowed(FasmLine line, FasmSection section) { + switch (line.instruction) { + case FASM_TOKEN_NOOP: + case FASM_TOKEN_PUSH8: + case FASM_TOKEN_PUSH16: + case FASM_TOKEN_PUSH32: + case FASM_TOKEN_PUSH64: + case FASM_TOKEN_LOAD8: + case FASM_TOKEN_LOAD16: + case FASM_TOKEN_LOAD32: + case FASM_TOKEN_LOAD64: + case FASM_TOKEN_POP8: + case FASM_TOKEN_POP16: + case FASM_TOKEN_POP32: + case FASM_TOKEN_POP64: + case FASM_TOKEN_DUP8: + case FASM_TOKEN_DUP16: + case FASM_TOKEN_DUP32: + case FASM_TOKEN_DUP64: + case FASM_TOKEN_SWAP8: + case FASM_TOKEN_SWAP16: + case FASM_TOKEN_SWAP32: + case FASM_TOKEN_SWAP64: + case FASM_TOKEN_DROP8: + case FASM_TOKEN_DROP16: + case FASM_TOKEN_DROP32: + case FASM_TOKEN_DROP64: + case FASM_TOKEN_ADD_I8: + case FASM_TOKEN_ADD_I16: + case FASM_TOKEN_ADD_I32: + case FASM_TOKEN_ADD_I64: + case FASM_TOKEN_ADD_F32: + case FASM_TOKEN_ADD_F64: + case FASM_TOKEN_SUB_I8: + case FASM_TOKEN_SUB_I16: + case FASM_TOKEN_SUB_I32: + case FASM_TOKEN_SUB_I64: + case FASM_TOKEN_SUB_F32: + case FASM_TOKEN_SUB_F64: + case FASM_TOKEN_NEG_I8: + case FASM_TOKEN_NEG_I16: + case FASM_TOKEN_NEG_I32: + case FASM_TOKEN_NEG_I64: + case FASM_TOKEN_NEG_F32: + case FASM_TOKEN_NEG_F64: + case FASM_TOKEN_MUL_I8: + case FASM_TOKEN_MUL_I16: + case FASM_TOKEN_MUL_I32: + case FASM_TOKEN_MUL_I64: + case FASM_TOKEN_MUL_U8: + case FASM_TOKEN_MUL_U16: + case FASM_TOKEN_MUL_U32: + case FASM_TOKEN_MUL_U64: + case FASM_TOKEN_MUL_F32: + case FASM_TOKEN_MUL_F64: + case FASM_TOKEN_DIV_I8: + case FASM_TOKEN_DIV_I16: + case FASM_TOKEN_DIV_I32: + case FASM_TOKEN_DIV_I64: + case FASM_TOKEN_DIV_U8: + case FASM_TOKEN_DIV_U16: + case FASM_TOKEN_DIV_U32: + case FASM_TOKEN_DIV_U64: + case FASM_TOKEN_DIV_F32: + case FASM_TOKEN_DIV_F64: + case FASM_TOKEN_REM_I8: + case FASM_TOKEN_REM_I16: + case FASM_TOKEN_REM_I32: + case FASM_TOKEN_REM_I64: + case FASM_TOKEN_REM_U8: + case FASM_TOKEN_REM_U16: + case FASM_TOKEN_REM_U32: + case FASM_TOKEN_REM_U64: + case FASM_TOKEN_CAST_I8_I64: + case FASM_TOKEN_CAST_I16_I64: + case FASM_TOKEN_CAST_I32_I64: + case FASM_TOKEN_CAST_I64_I8: + case FASM_TOKEN_CAST_I64_I16: + case FASM_TOKEN_CAST_I64_I32: + case FASM_TOKEN_CAST_F64_I64: + case FASM_TOKEN_CAST_I64_F64: + case FASM_TOKEN_CAST_U8_U64: + case FASM_TOKEN_CAST_U16_U64: + case FASM_TOKEN_CAST_U32_U64: + case FASM_TOKEN_CAST_U64_U8: + case FASM_TOKEN_CAST_U64_U16: + case FASM_TOKEN_CAST_U64_U32: + case FASM_TOKEN_CAST_F64_U64: + case FASM_TOKEN_CAST_U64_F64: + case FASM_TOKEN_CAST_F32_F64: + case FASM_TOKEN_CAST_F64_F32: + case FASM_TOKEN_JUMP: + case FASM_TOKEN_JZ_I8: + case FASM_TOKEN_JNZ_I8: + case FASM_TOKEN_JN_I8: + case FASM_TOKEN_JNN_I8: + case FASM_TOKEN_JP_I8: + case FASM_TOKEN_JNP_I8: + case FASM_TOKEN_JZ_I16: + case FASM_TOKEN_JNZ_I16: + case FASM_TOKEN_JN_I16: + case FASM_TOKEN_JNN_I16: + case FASM_TOKEN_JP_I16: + case FASM_TOKEN_JNP_I16: + case FASM_TOKEN_JZ_I32: + case FASM_TOKEN_JNZ_I32: + case FASM_TOKEN_JN_I32: + case FASM_TOKEN_JNN_I32: + case FASM_TOKEN_JP_I32: + case FASM_TOKEN_JNP_I32: + case FASM_TOKEN_JZ_I64: + case FASM_TOKEN_JNZ_I64: + case FASM_TOKEN_JN_I64: + case FASM_TOKEN_JNN_I64: + case FASM_TOKEN_JP_I64: + case FASM_TOKEN_JNP_I64: + case FASM_TOKEN_JZ_F32: + case FASM_TOKEN_JNZ_F32: + case FASM_TOKEN_JN_F32: + case FASM_TOKEN_JNN_F32: + case FASM_TOKEN_JP_F32: + case FASM_TOKEN_JNP_F32: + case FASM_TOKEN_JZ_F64: + case FASM_TOKEN_JNZ_F64: + case FASM_TOKEN_JN_F64: + case FASM_TOKEN_JNN_F64: + case FASM_TOKEN_JP_F64: + case FASM_TOKEN_JNP_F64: + case FASM_TOKEN_ALLOC_HEAP: + case FASM_TOKEN_ALLOC_STACK: + case FASM_TOKEN_FREE_HEAP: + case FASM_TOKEN_GET_STACK_ADDRESS: + case FASM_TOKEN_GET_GLOBAL_ADDRESS: + case FASM_TOKEN_CALL: + case FASM_TOKEN_RET: + case FASM_TOKEN_SYSCALL: + return section == FASM_SECTION_CODE; + case FASM_TOKEN_DEFINE_BYTE: + case FASM_TOKEN_DEFINE_WORD: + case FASM_TOKEN_DEFINE_DWORD: + case FASM_TOKEN_DEFINE_QWORD: + return section == FASM_SECTION_DATA; + case FASM_TOKEN_NONE: + return false; + } + fprintf(stderr, "Bad token %d at %s:%d\n", line.instruction, __FILE_NAME__, + __LINE__); + exit(1); +} + +char *fasmLexerGetNextWord(char *iter) { + if (fasmLexerIsString(*iter)) { + const char begin = *iter; + for (++iter; *iter != begin; ++iter) { + if (*iter == '\0') { + fprintf(stderr, "No ending for string at %s:%d\n", __FILE_NAME__, + __LINE__); + exit(1); + } + } + ++iter; + } else { + for (++iter; *iter != '\0' && fasmLexerIsWord(*iter); ++iter); + } + return iter; +} + +FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end) { + const size_t size = end - begin; + char *uppered = a404m_malloc((size + 1) * sizeof(char)); + for (char *iter = begin; iter < end; ++iter) { + uppered[iter - begin] = toupper(*iter); + } + uppered[size] = '\0'; + + for (size_t i = 0; i < FASM_TOKEN_STRINGS_SIZE; ++i) { + const char *str = FASM_TOKEN_STRINGS[i]; + if (strcmp(uppered, str) == 0) { + free(uppered); + return (FasmToken)i; + } + } + + free(uppered); + return FASM_TOKEN_NONE; +} + +bool fasmLexerIsSpace(char c) { return c != '\n' && isspace(c); } +bool fasmLexerIsSectionIndicator(char c) { return c == '.'; } +bool fasmLexerIsLabel(char c) { return c == ':'; } +bool fasmLexerIsWord(char c) { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || + ('0' <= c && c <= '9') || c == '_' || fasmLexerIsString(c); +} +extern bool fasmLexerIsIdentifierSymbol(char c) { return c == '`'; } +bool fasmLexerIsString(char c) { return c == '\'' || c == '\"'; } +bool fasmLexerIsOperandSeparator(char c) { return c == ','; } +bool fasmLexerIsLineSeparator(char c) { return c == '\n'; } -- cgit v1.2.3