aboutsummaryrefslogtreecommitdiff
path: root/src/fasm/lexer
diff options
context:
space:
mode:
authorA404M <ahmadmahmoudiprogrammer@gmail.com>2024-10-08 04:16:27 +0330
committerA404M <ahmadmahmoudiprogrammer@gmail.com>2024-10-08 04:17:08 +0330
commitaddd54dc31603dc204773d3108dba4e000cd7657 (patch)
tree621620c4ca5634680d7655e3474cf0b0bcec8e01 /src/fasm/lexer
parentbf84010e01bb11874689ce53ea4df853b2e41c2b (diff)
added fasm support
added compiler options tried to compile to fasm first
Diffstat (limited to 'src/fasm/lexer')
-rw-r--r--src/fasm/lexer/lexer.c643
-rw-r--r--src/fasm/lexer/lexer.h363
2 files changed, 1006 insertions, 0 deletions
diff --git a/src/fasm/lexer/lexer.c b/src/fasm/lexer/lexer.c
new file mode 100644
index 0000000..e3e9610
--- /dev/null
+++ b/src/fasm/lexer/lexer.c
@@ -0,0 +1,643 @@
+#include "lexer.h"
+
+#include <compiler/error_helper/error_helper.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <utils/memory/memory.h>
+#include <utils/types.h>
+
+const char *FASM_TOKEN_STRINGS[] = {
+ "NOOP",
+ "PUSH8",
+ "PUSH16",
+ "PUSH32",
+ "PUSH64",
+ "LOAD8",
+ "LOAD16",
+ "LOAD32",
+ "LOAD64",
+ "POP8",
+ "POP16",
+ "POP32",
+ "POP64",
+ "DUP8",
+ "DUP16",
+ "DUP32",
+ "DUP64",
+ "SWAP8",
+ "SWAP16",
+ "SWAP32",
+ "SWAP64",
+ "DROP8",
+ "DROP16",
+ "DROP32",
+ "DROP64",
+ "ADD_I8",
+ "ADD_I16",
+ "ADD_I32",
+ "ADD_I64",
+ "ADD_F32",
+ "ADD_F64",
+ "SUB_I8",
+ "SUB_I16",
+ "SUB_I32",
+ "SUB_I64",
+ "SUB_F32",
+ "SUB_F64",
+ "NEG_I8",
+ "NEG_I16",
+ "NEG_I32",
+ "NEG_I64",
+ "NEG_F32",
+ "NEG_F64",
+ "MUL_I8",
+ "MUL_I16",
+ "MUL_I32",
+ "MUL_I64",
+ "MUL_U8",
+ "MUL_U16",
+ "MUL_U32",
+ "MUL_U64",
+ "MUL_F32",
+ "MUL_F64",
+ "DIV_I8",
+ "DIV_I16",
+ "DIV_I32",
+ "DIV_I64",
+ "DIV_U8",
+ "DIV_U16",
+ "DIV_U32",
+ "DIV_U64",
+ "DIV_F32",
+ "DIV_F64",
+ "REM_I8",
+ "REM_I16",
+ "REM_I32",
+ "REM_I64",
+ "REM_U8",
+ "REM_U16",
+ "REM_U32",
+ "REM_U64",
+ "CAST_I8_I64",
+ "CAST_I16_I64",
+ "CAST_I32_I64",
+ "CAST_I64_I8",
+ "CAST_I64_I16",
+ "CAST_I64_I32",
+ "CAST_F64_I64",
+ "CAST_I64_F64",
+ "CAST_U8_U64",
+ "CAST_U16_U64",
+ "CAST_U32_U64",
+ "CAST_U64_U8",
+ "CAST_U64_U16",
+ "CAST_U64_U32",
+ "CAST_F64_U64",
+ "CAST_U64_F64",
+ "CAST_F32_F64",
+ "CAST_F64_F32",
+ "JUMP",
+ "JZ_I8",
+ "JNZ_I8",
+ "JN_I8",
+ "JNN_I8",
+ "JP_I8",
+ "JNP_I8",
+ "JZ_I16",
+ "JNZ_I16",
+ "JN_I16",
+ "JNN_I16",
+ "JP_I16",
+ "JNP_I16",
+ "JZ_I32",
+ "JNZ_I32",
+ "JN_I32",
+ "JNN_I32",
+ "JP_I32",
+ "JNP_I32",
+ "JZ_I64",
+ "JNZ_I64",
+ "JN_I64",
+ "JNN_I64",
+ "JP_I64",
+ "JNP_I64",
+ "JZ_F32",
+ "JNZ_F32",
+ "JN_F32",
+ "JNN_F32",
+ "JP_F32",
+ "JNP_F32",
+ "JZ_F64",
+ "JNZ_F64",
+ "JN_F64",
+ "JNN_F64",
+ "JP_F64",
+ "JNP_F64",
+ "ALLOC_HEAP",
+ "ALLOC_STACK",
+ "FREE_HEAP",
+ "GET_STACK_ADDRESS",
+ "GET_GLOBAL_ADDRESS",
+ "CALL",
+ "RET",
+ "SYSCALL",
+ "DEFINE_BYTE",
+ "DEFINE_WORD",
+ "DEFINE_DWORD",
+ "DEFINE_QWORD",
+ "NONE",
+};
+
+const char *FASM_LINE_LOOKING_FOR_STRINGS[] = {
+ "FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION",
+ "FASM_LINE_LOOKING_FOR_INSTRUCTION",
+ "FASM_LINE_LOOKING_FOR_OPERAND",
+ "FASM_LINE_LOOKING_FOR_OPERAND_OR_END",
+ "FASM_LINE_LOOKING_FOR_COMMA_OR_END",
+};
+
+const size_t FASM_TOKEN_STRINGS_SIZE =
+ sizeof(FASM_TOKEN_STRINGS) / sizeof(char *);
+
+void fasmLinePrint(FasmLine line) {
+ printf("{label='%.*s',instruction='%s',operands=[\n",
+ (int)(line.labelEnd - line.labelBegin), line.labelBegin,
+ FASM_TOKEN_STRINGS[line.instruction]);
+ for (size_t i = 0; i < line.operands_size; ++i) {
+ const FasmOperand operand = line.operands[i];
+ printf(" {'%.*s'},\n", (int)(operand.end - operand.begin), operand.begin);
+ }
+ printf("]}\n");
+}
+
+void fasmLinesPrint(FasmLines lines) {
+ printf("section code\n");
+ for (size_t i = 0; i < lines.lines_size; ++i) {
+ fasmLinePrint(lines.lines[i]);
+ }
+ printf("section data\n");
+ for (size_t i = 0; i < lines.data_size; ++i) {
+ fasmLinePrint(lines.data[i]);
+ }
+}
+
+void fasmLineDeleteInner(FasmLine line) { free(line.operands); }
+
+void fasmLinesDeleteInner(FasmLines lines) {
+ for (size_t i = 0; i < lines.lines_size; ++i) {
+ fasmLineDeleteInner(lines.lines[i]);
+ }
+ for (size_t i = 0; i < lines.data_size; ++i) {
+ fasmLineDeleteInner(lines.data[i]);
+ }
+ free(lines.lines);
+ free(lines.data);
+}
+
+FasmLines *fasmLexer(SourceCode *sourceCode) {
+ FasmLines *lines = a404m_malloc(sourceCode->size * sizeof(FasmLines));
+
+ for (size_t i = 0; i < sourceCode->size; ++i) {
+ if ((lines[i] = fasmLexerCode(sourceCode->codes[i], sourceCode))
+ .lines_size == ERROR_SIZE) {
+ goto RETURN_ERROR;
+ }
+ }
+
+ return lines;
+RETURN_ERROR:
+ free(lines);
+ return NULL;
+}
+
+FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode) {
+ FasmLineLookingFor lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION;
+ FasmLine line = {
+ .begin = code->code,
+ .end = code->code,
+ .labelBegin = NULL,
+ .labelEnd = NULL,
+ .instruction = FASM_TOKEN_NONE,
+ .operands = a404m_malloc(0),
+ .operands_size = 0,
+ };
+
+ FasmLines lines = {
+ .lines = a404m_malloc(0),
+ .lines_size = 0,
+ .data = a404m_malloc(0),
+ .data_size = 0,
+ };
+
+ FasmSection section = FASM_SECTION_NONE;
+
+ for (char *iter = code->code;; ++iter) {
+ LOOP_BEGIN:
+ const char c = *iter;
+ if (c == '\0') {
+ switch (lookingFor) {
+ case FASM_LINE_LOOKING_FOR_INSTRUCTION:
+ case FASM_LINE_LOOKING_FOR_OPERAND:
+ printError("Expected instruction", sourceCode, line.begin, iter);
+ goto RETURN_ERROR;
+ case FASM_LINE_LOOKING_FOR_OPERAND_OR_END:
+ case FASM_LINE_LOOKING_FOR_COMMA_OR_END:
+ fasmLexerPushLine(&lines, &line, iter, section, sourceCode);
+ /* pass through */
+ case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION:
+ goto RETURN_SUCCESS;
+ }
+ } else if (fasmLexerIsSpace(c)) {
+ continue;
+ }
+ /*fprintf(stderr, "a404m: Char '%c' at %ld and looking for '%s'\n", c,*/
+ /* iter - code->code, FASM_LINE_LOOKING_FOR_STRINGS[lookingFor]);*/
+ switch (lookingFor) {
+ case FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION:
+ if (fasmLexerIsLineSeparator(c)) {
+ continue;
+ } else if (fasmLexerIsSectionIndicator(c)) {
+ static const char *const SECTIONS_STRINGS[] = {
+ "code",
+ "data",
+ };
+ static const FasmSection SECTIONS[] = {
+ FASM_SECTION_CODE,
+ FASM_SECTION_DATA,
+ };
+ static const size_t SECTIONS_SIZE =
+ sizeof(SECTIONS_STRINGS) / sizeof(*SECTIONS_STRINGS);
+
+ ++iter;
+
+ for (size_t i = 0; i < SECTIONS_SIZE; ++i) {
+ const char *const sectionStr = SECTIONS_STRINGS[i];
+ for (size_t j = 0;; ++j) {
+ const char c0 = sectionStr[j];
+ const char c1 = iter[j];
+ if (c0 == '\0') {
+ if (c1 == '\0' || isspace(c1)) {
+ iter += j;
+ section = SECTIONS[i];
+ /*fprintf(stderr, "section changed to '%s'\n", sectionStr);*/
+ goto LOOP_BEGIN;
+ } else {
+ break;
+ }
+ } else if (c0 != c1) { // no need for c1 == '\0'
+ break;
+ }
+ }
+ }
+ printError("Invalid section", sourceCode, iter - 1, iter);
+ goto RETURN_ERROR;
+ } else if (fasmLexerIsWord(c)) {
+ char *begin = iter;
+ char *end = iter = fasmLexerGetNextWord(iter);
+ line.begin = begin;
+ line.end = end;
+ if (fasmLexerIsLabel(*iter)) {
+ ++iter;
+ line.labelBegin = begin;
+ line.labelEnd = end;
+ lookingFor = FASM_LINE_LOOKING_FOR_INSTRUCTION;
+ } else {
+ if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) ==
+ FASM_TOKEN_NONE) {
+ printError("Unknown instruction", sourceCode, begin, end);
+ goto RETURN_ERROR;
+ }
+
+ lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END;
+ }
+ goto LOOP_BEGIN;
+ } else {
+ UNEXPECTED:
+ fasmLinePrint(line);
+ printError("Unexpected character", sourceCode, iter, iter + 1);
+ goto RETURN_ERROR;
+ }
+ break;
+ case FASM_LINE_LOOKING_FOR_INSTRUCTION:
+ if (fasmLexerIsWord(c)) {
+ char *begin = iter;
+ char *end = iter = fasmLexerGetNextWord(iter);
+ line.end = end;
+
+ if ((line.instruction = fasmLexerTokenFromIdentifier(begin, end)) ==
+ FASM_TOKEN_NONE) {
+ printError("Unknown instruction", sourceCode, begin, end);
+ goto RETURN_ERROR;
+ }
+
+ lookingFor = FASM_LINE_LOOKING_FOR_OPERAND_OR_END;
+ goto LOOP_BEGIN;
+ } else {
+ fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__);
+ goto UNEXPECTED;
+ }
+ break;
+ case FASM_LINE_LOOKING_FOR_OPERAND:
+ if (fasmLexerIsWord(c)) {
+ LEX_OPERAND:
+ char *begin = iter;
+ char *end = iter = fasmLexerGetNextWord(iter);
+ const size_t size =
+ a404m_malloc_usable_size(line.operands) / sizeof(*line.operands);
+ if (line.operands_size == size) {
+ line.operands = a404m_realloc(
+ line.operands,
+ (line.operands_size + line.operands_size / 2 + 1) *
+ sizeof(*line.operands));
+ }
+ line.operands[line.operands_size].begin = begin;
+ line.operands[line.operands_size].end = end;
+ line.operands_size += 1;
+ line.end = end;
+ lookingFor = FASM_LINE_LOOKING_FOR_COMMA_OR_END;
+ goto LOOP_BEGIN;
+ } else {
+ fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__);
+ goto UNEXPECTED;
+ }
+ break;
+ case FASM_LINE_LOOKING_FOR_OPERAND_OR_END:
+ if (fasmLexerIsWord(c)) {
+ goto LEX_OPERAND;
+ } else if (fasmLexerIsLineSeparator(c)) {
+ goto LEX_END;
+ } else {
+ fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__);
+ goto UNEXPECTED;
+ }
+ break;
+ case FASM_LINE_LOOKING_FOR_COMMA_OR_END:
+ if (fasmLexerIsLineSeparator(c)) {
+ LEX_END:
+ fasmLexerPushLine(&lines, &line, iter, section, sourceCode);
+ lookingFor = FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION;
+ } else if (fasmLexerIsOperandSeparator(c)) {
+ lookingFor = FASM_LINE_LOOKING_FOR_OPERAND;
+ } else {
+ fprintf(stderr, "a404m %s:%d\n", __FILE_NAME__, __LINE__);
+ goto UNEXPECTED;
+ }
+ }
+ }
+
+RETURN_SUCCESS:
+ return lines;
+
+RETURN_ERROR:
+ free(lines.lines);
+ const FasmLines error = {
+ .lines_size = ERROR_SIZE,
+ };
+ return error;
+}
+
+bool fasmLexerPushLine(FasmLines *lines, FasmLine *line, char const *iter,
+ FasmSection section, SourceCode *sourceCode) {
+ if (!fasmLexerIsAllowed(*line, section)) {
+ printError("Instruction is not allowed here", sourceCode, line->begin,
+ line->end);
+ return false;
+ }
+ line->operands = a404m_realloc(line->operands,
+ line->operands_size * sizeof(*line->operands));
+ switch (section) {
+ case FASM_SECTION_NONE:
+ printError("Instruction is in no section", sourceCode, line->begin,
+ line->end);
+ return true;
+ case FASM_SECTION_CODE:
+ _fasmLexerPushLine(&lines->lines, &lines->lines_size, line, iter);
+ return true;
+ case FASM_SECTION_DATA:
+ _fasmLexerPushLine(&lines->data, &lines->data_size, line, iter);
+ return true;
+ }
+ fprintf(stderr, "Bad section '%d'\n", section);
+ return false;
+}
+
+void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size, FasmLine *line,
+ char const *) {
+ const size_t size = a404m_malloc_usable_size(*lines) / sizeof(**lines);
+ if (size == *lines_size) {
+ *lines = a404m_realloc(*lines, (size * 2 + 1) * sizeof(**lines));
+ }
+ // no need
+ /*line->end = iter;*/
+ (*lines)[*lines_size] = *line;
+ *lines_size += 1;
+
+ line->operands = a404m_malloc(0);
+ line->operands_size = 0;
+
+ /*line->begin = iter;*/
+ line->labelBegin = NULL;
+ line->labelEnd = NULL;
+ /*line->instruction = FASM_TOKEN_NONE;*/
+}
+
+bool fasmLexerIsAllowed(FasmLine line, FasmSection section) {
+ switch (line.instruction) {
+ case FASM_TOKEN_NOOP:
+ case FASM_TOKEN_PUSH8:
+ case FASM_TOKEN_PUSH16:
+ case FASM_TOKEN_PUSH32:
+ case FASM_TOKEN_PUSH64:
+ case FASM_TOKEN_LOAD8:
+ case FASM_TOKEN_LOAD16:
+ case FASM_TOKEN_LOAD32:
+ case FASM_TOKEN_LOAD64:
+ case FASM_TOKEN_POP8:
+ case FASM_TOKEN_POP16:
+ case FASM_TOKEN_POP32:
+ case FASM_TOKEN_POP64:
+ case FASM_TOKEN_DUP8:
+ case FASM_TOKEN_DUP16:
+ case FASM_TOKEN_DUP32:
+ case FASM_TOKEN_DUP64:
+ case FASM_TOKEN_SWAP8:
+ case FASM_TOKEN_SWAP16:
+ case FASM_TOKEN_SWAP32:
+ case FASM_TOKEN_SWAP64:
+ case FASM_TOKEN_DROP8:
+ case FASM_TOKEN_DROP16:
+ case FASM_TOKEN_DROP32:
+ case FASM_TOKEN_DROP64:
+ case FASM_TOKEN_ADD_I8:
+ case FASM_TOKEN_ADD_I16:
+ case FASM_TOKEN_ADD_I32:
+ case FASM_TOKEN_ADD_I64:
+ case FASM_TOKEN_ADD_F32:
+ case FASM_TOKEN_ADD_F64:
+ case FASM_TOKEN_SUB_I8:
+ case FASM_TOKEN_SUB_I16:
+ case FASM_TOKEN_SUB_I32:
+ case FASM_TOKEN_SUB_I64:
+ case FASM_TOKEN_SUB_F32:
+ case FASM_TOKEN_SUB_F64:
+ case FASM_TOKEN_NEG_I8:
+ case FASM_TOKEN_NEG_I16:
+ case FASM_TOKEN_NEG_I32:
+ case FASM_TOKEN_NEG_I64:
+ case FASM_TOKEN_NEG_F32:
+ case FASM_TOKEN_NEG_F64:
+ case FASM_TOKEN_MUL_I8:
+ case FASM_TOKEN_MUL_I16:
+ case FASM_TOKEN_MUL_I32:
+ case FASM_TOKEN_MUL_I64:
+ case FASM_TOKEN_MUL_U8:
+ case FASM_TOKEN_MUL_U16:
+ case FASM_TOKEN_MUL_U32:
+ case FASM_TOKEN_MUL_U64:
+ case FASM_TOKEN_MUL_F32:
+ case FASM_TOKEN_MUL_F64:
+ case FASM_TOKEN_DIV_I8:
+ case FASM_TOKEN_DIV_I16:
+ case FASM_TOKEN_DIV_I32:
+ case FASM_TOKEN_DIV_I64:
+ case FASM_TOKEN_DIV_U8:
+ case FASM_TOKEN_DIV_U16:
+ case FASM_TOKEN_DIV_U32:
+ case FASM_TOKEN_DIV_U64:
+ case FASM_TOKEN_DIV_F32:
+ case FASM_TOKEN_DIV_F64:
+ case FASM_TOKEN_REM_I8:
+ case FASM_TOKEN_REM_I16:
+ case FASM_TOKEN_REM_I32:
+ case FASM_TOKEN_REM_I64:
+ case FASM_TOKEN_REM_U8:
+ case FASM_TOKEN_REM_U16:
+ case FASM_TOKEN_REM_U32:
+ case FASM_TOKEN_REM_U64:
+ case FASM_TOKEN_CAST_I8_I64:
+ case FASM_TOKEN_CAST_I16_I64:
+ case FASM_TOKEN_CAST_I32_I64:
+ case FASM_TOKEN_CAST_I64_I8:
+ case FASM_TOKEN_CAST_I64_I16:
+ case FASM_TOKEN_CAST_I64_I32:
+ case FASM_TOKEN_CAST_F64_I64:
+ case FASM_TOKEN_CAST_I64_F64:
+ case FASM_TOKEN_CAST_U8_U64:
+ case FASM_TOKEN_CAST_U16_U64:
+ case FASM_TOKEN_CAST_U32_U64:
+ case FASM_TOKEN_CAST_U64_U8:
+ case FASM_TOKEN_CAST_U64_U16:
+ case FASM_TOKEN_CAST_U64_U32:
+ case FASM_TOKEN_CAST_F64_U64:
+ case FASM_TOKEN_CAST_U64_F64:
+ case FASM_TOKEN_CAST_F32_F64:
+ case FASM_TOKEN_CAST_F64_F32:
+ case FASM_TOKEN_JUMP:
+ case FASM_TOKEN_JZ_I8:
+ case FASM_TOKEN_JNZ_I8:
+ case FASM_TOKEN_JN_I8:
+ case FASM_TOKEN_JNN_I8:
+ case FASM_TOKEN_JP_I8:
+ case FASM_TOKEN_JNP_I8:
+ case FASM_TOKEN_JZ_I16:
+ case FASM_TOKEN_JNZ_I16:
+ case FASM_TOKEN_JN_I16:
+ case FASM_TOKEN_JNN_I16:
+ case FASM_TOKEN_JP_I16:
+ case FASM_TOKEN_JNP_I16:
+ case FASM_TOKEN_JZ_I32:
+ case FASM_TOKEN_JNZ_I32:
+ case FASM_TOKEN_JN_I32:
+ case FASM_TOKEN_JNN_I32:
+ case FASM_TOKEN_JP_I32:
+ case FASM_TOKEN_JNP_I32:
+ case FASM_TOKEN_JZ_I64:
+ case FASM_TOKEN_JNZ_I64:
+ case FASM_TOKEN_JN_I64:
+ case FASM_TOKEN_JNN_I64:
+ case FASM_TOKEN_JP_I64:
+ case FASM_TOKEN_JNP_I64:
+ case FASM_TOKEN_JZ_F32:
+ case FASM_TOKEN_JNZ_F32:
+ case FASM_TOKEN_JN_F32:
+ case FASM_TOKEN_JNN_F32:
+ case FASM_TOKEN_JP_F32:
+ case FASM_TOKEN_JNP_F32:
+ case FASM_TOKEN_JZ_F64:
+ case FASM_TOKEN_JNZ_F64:
+ case FASM_TOKEN_JN_F64:
+ case FASM_TOKEN_JNN_F64:
+ case FASM_TOKEN_JP_F64:
+ case FASM_TOKEN_JNP_F64:
+ case FASM_TOKEN_ALLOC_HEAP:
+ case FASM_TOKEN_ALLOC_STACK:
+ case FASM_TOKEN_FREE_HEAP:
+ case FASM_TOKEN_GET_STACK_ADDRESS:
+ case FASM_TOKEN_GET_GLOBAL_ADDRESS:
+ case FASM_TOKEN_CALL:
+ case FASM_TOKEN_RET:
+ case FASM_TOKEN_SYSCALL:
+ return section == FASM_SECTION_CODE;
+ case FASM_TOKEN_DEFINE_BYTE:
+ case FASM_TOKEN_DEFINE_WORD:
+ case FASM_TOKEN_DEFINE_DWORD:
+ case FASM_TOKEN_DEFINE_QWORD:
+ return section == FASM_SECTION_DATA;
+ case FASM_TOKEN_NONE:
+ return false;
+ }
+ fprintf(stderr, "Bad token %d at %s:%d\n", line.instruction, __FILE_NAME__,
+ __LINE__);
+ exit(1);
+}
+
+char *fasmLexerGetNextWord(char *iter) {
+ if (fasmLexerIsString(*iter)) {
+ const char begin = *iter;
+ for (++iter; *iter != begin; ++iter) {
+ if (*iter == '\0') {
+ fprintf(stderr, "No ending for string at %s:%d\n", __FILE_NAME__,
+ __LINE__);
+ exit(1);
+ }
+ }
+ ++iter;
+ } else {
+ for (++iter; *iter != '\0' && fasmLexerIsWord(*iter); ++iter);
+ }
+ return iter;
+}
+
+FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end) {
+ const size_t size = end - begin;
+ char *uppered = a404m_malloc((size + 1) * sizeof(char));
+ for (char *iter = begin; iter < end; ++iter) {
+ uppered[iter - begin] = toupper(*iter);
+ }
+ uppered[size] = '\0';
+
+ for (size_t i = 0; i < FASM_TOKEN_STRINGS_SIZE; ++i) {
+ const char *str = FASM_TOKEN_STRINGS[i];
+ if (strcmp(uppered, str) == 0) {
+ free(uppered);
+ return (FasmToken)i;
+ }
+ }
+
+ free(uppered);
+ return FASM_TOKEN_NONE;
+}
+
+bool fasmLexerIsSpace(char c) { return c != '\n' && isspace(c); }
+bool fasmLexerIsSectionIndicator(char c) { return c == '.'; }
+bool fasmLexerIsLabel(char c) { return c == ':'; }
+bool fasmLexerIsWord(char c) {
+ return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') ||
+ ('0' <= c && c <= '9') || c == '_' || fasmLexerIsString(c);
+}
+extern bool fasmLexerIsIdentifierSymbol(char c) { return c == '`'; }
+bool fasmLexerIsString(char c) { return c == '\'' || c == '\"'; }
+bool fasmLexerIsOperandSeparator(char c) { return c == ','; }
+bool fasmLexerIsLineSeparator(char c) { return c == '\n'; }
diff --git a/src/fasm/lexer/lexer.h b/src/fasm/lexer/lexer.h
new file mode 100644
index 0000000..2e5f227
--- /dev/null
+++ b/src/fasm/lexer/lexer.h
@@ -0,0 +1,363 @@
+#pragma once
+
+#include <stdint.h>
+#include <utils/types.h>
+
+#include "compiler/source_code/source_code.h"
+
+typedef enum FasmToken : uint8_t {
+ // no operation (does nothing)
+ FASM_TOKEN_NOOP = 0,
+
+ // pushes operand to stack
+ FASM_TOKEN_PUSH8,
+ FASM_TOKEN_PUSH16,
+ FASM_TOKEN_PUSH32,
+ FASM_TOKEN_PUSH64,
+
+ // dereferences stack.top and pushes its' value to stack
+ FASM_TOKEN_LOAD8,
+ FASM_TOKEN_LOAD16,
+ FASM_TOKEN_LOAD32,
+ FASM_TOKEN_LOAD64,
+
+ // pops value stack.(top-1) to address which is stack.top
+ FASM_TOKEN_POP8,
+ FASM_TOKEN_POP16,
+ FASM_TOKEN_POP32,
+ FASM_TOKEN_POP64,
+
+ // duplicates stack.top
+ FASM_TOKEN_DUP8,
+ FASM_TOKEN_DUP16,
+ FASM_TOKEN_DUP32,
+ FASM_TOKEN_DUP64,
+
+ // swaps stack.top with stack.(top-1)
+ FASM_TOKEN_SWAP8,
+ FASM_TOKEN_SWAP16,
+ FASM_TOKEN_SWAP32,
+ FASM_TOKEN_SWAP64,
+
+ // drops stack.top
+ FASM_TOKEN_DROP8,
+ FASM_TOKEN_DROP16,
+ FASM_TOKEN_DROP32,
+ FASM_TOKEN_DROP64,
+
+ // adds two stack top integers
+ FASM_TOKEN_ADD_I8,
+ FASM_TOKEN_ADD_I16,
+ FASM_TOKEN_ADD_I32,
+ FASM_TOKEN_ADD_I64,
+
+ // adds two stack top floats
+ FASM_TOKEN_ADD_F32,
+ FASM_TOKEN_ADD_F64,
+
+ // subtracts two stack top integers
+ FASM_TOKEN_SUB_I8,
+ FASM_TOKEN_SUB_I16,
+ FASM_TOKEN_SUB_I32,
+ FASM_TOKEN_SUB_I64,
+
+ // subtracts two stack top floats
+ FASM_TOKEN_SUB_F32,
+ FASM_TOKEN_SUB_F64,
+
+ // negates stack top integer and pushes it back
+ FASM_TOKEN_NEG_I8,
+ FASM_TOKEN_NEG_I16,
+ FASM_TOKEN_NEG_I32,
+ FASM_TOKEN_NEG_I64,
+
+ // negates stack top float and pushes it back
+ FASM_TOKEN_NEG_F32,
+ FASM_TOKEN_NEG_F64,
+
+ // multiplies two stack top singed integers
+ FASM_TOKEN_MUL_I8,
+ FASM_TOKEN_MUL_I16,
+ FASM_TOKEN_MUL_I32,
+ FASM_TOKEN_MUL_I64,
+
+ // multiplies two stack top unsinged integers
+ FASM_TOKEN_MUL_U8,
+ FASM_TOKEN_MUL_U16,
+ FASM_TOKEN_MUL_U32,
+ FASM_TOKEN_MUL_U64,
+
+ // multiplies two stack top floats
+ FASM_TOKEN_MUL_F32,
+ FASM_TOKEN_MUL_F64,
+
+ // divides two stack top singed integers
+ FASM_TOKEN_DIV_I8,
+ FASM_TOKEN_DIV_I16,
+ FASM_TOKEN_DIV_I32,
+ FASM_TOKEN_DIV_I64,
+
+ // divides two stack top unsinged integers
+ FASM_TOKEN_DIV_U8,
+ FASM_TOKEN_DIV_U16,
+ FASM_TOKEN_DIV_U32,
+ FASM_TOKEN_DIV_U64,
+
+ // divides two stack top floats
+ FASM_TOKEN_DIV_F32,
+ FASM_TOKEN_DIV_F64,
+
+ // reminders two stack top singed integers
+ FASM_TOKEN_REM_I8,
+ FASM_TOKEN_REM_I16,
+ FASM_TOKEN_REM_I32,
+ FASM_TOKEN_REM_I64,
+
+ // reminders two stack top unsinged integers
+ FASM_TOKEN_REM_U8,
+ FASM_TOKEN_REM_U16,
+ FASM_TOKEN_REM_U32,
+ FASM_TOKEN_REM_U64,
+
+ // unsigned casts 8 bit to 64 bit
+ FASM_TOKEN_CAST_I8_I64,
+ // unsigned casts 16 bit to 64 bit
+ FASM_TOKEN_CAST_I16_I64,
+ // unsigned casts 32 bit to 64 bit
+ FASM_TOKEN_CAST_I32_I64,
+
+ // unsigned casts 64 bit to 8 bit
+ FASM_TOKEN_CAST_I64_I8,
+ // unsigned casts 64 bit to 16 bit
+ FASM_TOKEN_CAST_I64_I16,
+ // unsigned casts 64 bit to 32 bit
+ FASM_TOKEN_CAST_I64_I32,
+
+ // casts unsigned int 64 bit to float 64 bit
+ FASM_TOKEN_CAST_F64_I64,
+ // casts float 64 bit to unsigned int 64 bit
+ FASM_TOKEN_CAST_I64_F64,
+
+ // signed casts 8 bit to 64 bit
+ FASM_TOKEN_CAST_U8_U64,
+ // signed casts 16 bit to 64 bit
+ FASM_TOKEN_CAST_U16_U64,
+ // signed casts 32 bit to 64 bit
+ FASM_TOKEN_CAST_U32_U64,
+
+ // signed casts 64 bit to 8 bit
+ FASM_TOKEN_CAST_U64_U8,
+ // signed casts 64 bit to 16 bit
+ FASM_TOKEN_CAST_U64_U16,
+ // signed casts 64 bit to 32 bit
+ FASM_TOKEN_CAST_U64_U32,
+
+ // casts signed int 64 bit to float 64 bit
+ FASM_TOKEN_CAST_F64_U64,
+ // casts float 64 bit to signed int 64 bit
+ FASM_TOKEN_CAST_U64_F64,
+
+ // casts float 32 bit to float 64 bit
+ FASM_TOKEN_CAST_F32_F64,
+ // casts float 64 bit to float 32 bit
+ FASM_TOKEN_CAST_F64_F32,
+
+ // unconditional jump to instruction (sets IP to stack.top)
+ FASM_TOKEN_JUMP,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 8 bit is
+ // zero
+ FASM_TOKEN_JZ_I8,
+ // not zero
+ FASM_TOKEN_JNZ_I8,
+ // negative
+ FASM_TOKEN_JN_I8,
+ // not negative
+ FASM_TOKEN_JNN_I8,
+ // positive
+ FASM_TOKEN_JP_I8,
+ // not positive
+ FASM_TOKEN_JNP_I8,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 16 bit is
+ // zero
+ FASM_TOKEN_JZ_I16,
+ // not zero
+ FASM_TOKEN_JNZ_I16,
+ // negative
+ FASM_TOKEN_JN_I16,
+ // not negative
+ FASM_TOKEN_JNN_I16,
+ // positive
+ FASM_TOKEN_JP_I16,
+ // not positive
+ FASM_TOKEN_JNP_I16,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 32 bit is
+ // zero
+ FASM_TOKEN_JZ_I32,
+ // not zero
+ FASM_TOKEN_JNZ_I32,
+ // negative
+ FASM_TOKEN_JN_I32,
+ // not negative
+ FASM_TOKEN_JNN_I32,
+ // positive
+ FASM_TOKEN_JP_I32,
+ // not positive
+ FASM_TOKEN_JNP_I32,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 64 bit is
+ // zero
+ FASM_TOKEN_JZ_I64,
+ // not zero
+ FASM_TOKEN_JNZ_I64,
+ // negative
+ FASM_TOKEN_JN_I64,
+ // not negative
+ FASM_TOKEN_JNN_I64,
+ // positive
+ FASM_TOKEN_JP_I64,
+ // not positive
+ FASM_TOKEN_JNP_I64,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 32 bit float is
+ // zero
+ FASM_TOKEN_JZ_F32,
+ // not zero
+ FASM_TOKEN_JNZ_F32,
+ // negative
+ FASM_TOKEN_JN_F32,
+ // not negative
+ FASM_TOKEN_JNN_F32,
+ // positive
+ FASM_TOKEN_JP_F32,
+ // not positive
+ FASM_TOKEN_JNP_F32,
+
+ // conditionally jumps to stack.top if stack.(top-1) as 64 bit float is
+ // zero
+ FASM_TOKEN_JZ_F64,
+ // not zero
+ FASM_TOKEN_JNZ_F64,
+ // negative
+ FASM_TOKEN_JN_F64,
+ // not negative
+ FASM_TOKEN_JNN_F64,
+ // positive
+ FASM_TOKEN_JP_F64,
+ // not positive
+ FASM_TOKEN_JNP_F64,
+
+ // allocates n bytes to heap and pushes its' address to stack (n = stack.top)
+ FASM_TOKEN_ALLOC_HEAP,
+ // allocates n bytes to stack (n = stack.top)
+ FASM_TOKEN_ALLOC_STACK,
+ // frees what address is in stack.top
+ FASM_TOKEN_FREE_HEAP,
+ // gives stack root address
+ FASM_TOKEN_GET_STACK_ADDRESS,
+ // gives global root address
+ FASM_TOKEN_GET_GLOBAL_ADDRESS,
+
+ // calls function (stores to current IP (instruction pointer) into call stack)
+ FASM_TOKEN_CALL,
+ // pops call stack to IP (instruction pointer)
+ FASM_TOKEN_RET,
+
+ // stack.top as u8 is id of syscall
+ FASM_TOKEN_SYSCALL,
+
+ FASM_TOKEN_DEFINE_BYTE,
+ FASM_TOKEN_DEFINE_WORD,
+ FASM_TOKEN_DEFINE_DWORD,
+ FASM_TOKEN_DEFINE_QWORD,
+
+ FASM_TOKEN_NONE,
+} FasmToken;
+
+extern const char *FASM_TOKEN_STRINGS[];
+extern const size_t FASM_TOKEN_STRINGS_SIZE;
+
+typedef enum FasmSyscall : uint8_t {
+ // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...]
+ FASM_SYSCALL_READ = 0,
+ // stack = [...,u64 count,i8* buf,u32 fd] -> stack = [...]
+ FASM_SYSCALL_WRITE,
+ // stack = [...,i32 mode,i32 flags,const i8* filename] -> stack = [...,u32 fd]
+ FASM_SYSCALL_OPEN,
+ // stack = [...,u32 fd] -> stack = [...]
+ FASM_SYSCALL_CLOSE,
+ // stack = [...,u32 status] -> stack = [...]
+ FASM_SYSCALL_EXIT,
+} FasmSyscall;
+
+typedef enum FasmLineLookingFor {
+ FASM_LINE_LOOKING_FOR_LABEL_OR_INSTRUCTION = 0,
+ FASM_LINE_LOOKING_FOR_INSTRUCTION,
+ FASM_LINE_LOOKING_FOR_OPERAND,
+ FASM_LINE_LOOKING_FOR_OPERAND_OR_END,
+ FASM_LINE_LOOKING_FOR_COMMA_OR_END,
+} FasmLineLookingFor;
+
+extern const char *FASM_LINE_LOOKING_FOR_STRINGS[];
+
+typedef enum FasmSection {
+ FASM_SECTION_NONE,
+ FASM_SECTION_CODE,
+ FASM_SECTION_DATA,
+} FasmSection;
+
+typedef struct FasmOperand {
+ char *begin;
+ char *end;
+} FasmOperand;
+
+typedef struct FasmLine {
+ char const *begin;
+ char const *end;
+
+ char const *labelBegin;
+ char const *labelEnd;
+
+ FasmToken instruction;
+
+ FasmOperand *operands;
+ size_t operands_size;
+} FasmLine;
+
+typedef struct FasmLines {
+ FasmLine *lines;
+ size_t lines_size;
+ FasmLine *data;
+ size_t data_size;
+} FasmLines;
+
+extern void fasmLinePrint(FasmLine line);
+extern void fasmLinesPrint(FasmLines lines);
+
+extern void fasmLineDeleteInner(FasmLine line);
+extern void fasmLinesDeleteInner(FasmLines lines);
+
+extern FasmLines *fasmLexer(SourceCode *sourceCode);
+extern FasmLines fasmLexerCode(Code *code, SourceCode *sourceCode);
+
+extern bool fasmLexerPushLine(FasmLines *lines, FasmLine *line,
+ char const *iter, FasmSection section,
+ SourceCode *sourceCode);
+extern void _fasmLexerPushLine(FasmLine **lines, size_t *lines_size,
+ FasmLine *line, char const *iter);
+
+extern bool fasmLexerIsAllowed(FasmLine line, FasmSection section);
+
+extern char *fasmLexerGetNextWord(char *iter);
+extern FasmToken fasmLexerTokenFromIdentifier(char *begin, char *end);
+
+extern bool fasmLexerIsSpace(char c);
+extern bool fasmLexerIsSectionIndicator(char c);
+extern bool fasmLexerIsLabel(char c);
+extern bool fasmLexerIsWord(char c);
+extern bool fasmLexerIsIdentifierSymbol(char c);
+extern bool fasmLexerIsString(char c);
+extern bool fasmLexerIsOperandSeparator(char c);
+extern bool fasmLexerIsLineSeparator(char c);