diff options
-rwxr-xr-x | .clang-format | 4 | ||||
-rw-r--r-- | .gitignore | 4 | ||||
-rw-r--r-- | lang.felan | 159 | ||||
-rwxr-xr-x | project | 87 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.c | 199 | ||||
-rw-r--r-- | src/compiler/code_generator/code_generator.h | 41 | ||||
-rw-r--r-- | src/compiler/lexer/lexer.c | 307 | ||||
-rw-r--r-- | src/compiler/lexer/lexer.h | 66 | ||||
-rw-r--r-- | src/compiler/parser/parser.c | 351 | ||||
-rw-r--r-- | src/compiler/parser/parser.h | 62 | ||||
-rw-r--r-- | src/main.c | 104 | ||||
-rw-r--r-- | src/utils/file.c | 22 | ||||
-rw-r--r-- | src/utils/file.h | 3 | ||||
-rw-r--r-- | src/utils/memory/memory.c | 22 | ||||
-rw-r--r-- | src/utils/memory/memory.h | 6 | ||||
-rw-r--r-- | src/utils/time.c | 9 | ||||
-rw-r--r-- | src/utils/time.h | 7 | ||||
-rw-r--r-- | src/utils/types.h | 18 | ||||
-rw-r--r-- | src/vm/runner/runner.c | 50 | ||||
-rw-r--r-- | src/vm/runner/runner.h | 9 |
20 files changed, 1530 insertions, 0 deletions
diff --git a/.clang-format b/.clang-format new file mode 100755 index 0000000..5af5a7a --- /dev/null +++ b/.clang-format @@ -0,0 +1,4 @@ +# https://clang.llvm.org/docs/ClangFormatStyleOptions.html +# To disable for a line use `// clang-format off` +BasedOnStyle: Google # https://google.github.io/styleguide/cppguide.html +IndentPPDirectives: BeforeHash diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..58709fe --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +build/ +.cache/ +test/ +compile_commands.json diff --git a/lang.felan b/lang.felan new file mode 100644 index 0000000..b18785e --- /dev/null +++ b/lang.felan @@ -0,0 +1,159 @@ +// variable +// variable definition (full version) +variableName : Type = value; +constantName : Type : value; + +// i.e. +myInt : Int = 23; +myConstInt : Int : 23; +myFloat : Float = 23.0; + +// multiple variable definition (full version) +variableName0, ... : TypeForAllOfThem = value0, ...; + +// i.e. +myInt, yourInt : Int = 23 , 54; + +// variable definition (compiler knows types) +variableName := value; +constantName :: value; + +// i.e. +myInt := 23; +myConstInt :: 23; +myFloat := 23.0; + +// multiple variable definition (compiler knows types) +variableName0, ... := value0, ...; + +// i.e. +myInt, yourInt := 23, 54; // all values must have the same type + +// variable definition (without initialization) constants can't be defined this way +variableName : Type; + +// i.e. +myInt : Int; +myFloat : Float; + +// multiple variable definition (without initialization) +variableName0, ... : TypeForAllOfThem; + +// i.e. +myInt, yourInt : Int; + +// pointers +// pointers will be initialized to undefined + +// definition of a pointer which can't be null +pointerName : *Type = value; + +// i.e. +myPointer : *Int = new Int; + +// definition of a pointer which can be null +myPointer : nullable*Type = value; + +// i.e. +myPointer : nullable*Int = null; + +// arrays +arrayName : [size]Type = [size]Type{value,...}; + +// i.e. +myArray : [4]Int = [4]Int{11,2,23,4}; +myArray2 := [4]Int{11,2,23,4}; // identical to the last one +myArray3 : [4]Int; + +// function +// function definition +functionName :: (paramName:ParamType, ...) -> ReturnType { + // body +} + +// i.e. +foo :: () { + // no param no returning (void) +} +bar :: (myInt: Int,myFloat:Float) -> Int { + // 2 param Int , Float and Int return Type +} +boo :: (myInt: Int) -> Int, Int { + // this is the way to return multiple values + return myInt, myInt; +} + +// function calling +functionName(paramters, ...); + +// i.e +foo(); +retValue := bar(myInt,myFloat); +retValue0,retValue2 := boo(myInt); + +// lambdas +// lambdas are just functions that have been assigned to variables + +// structs (classes) +// definition +StructName :: struct { + // field + fieldName : Type; + + // constant field + constantFiledName : Type : value; + + // method (has this pointer) + methodName :: (paramName:ParamType, ...) -> ReturnType { + // body + } + + #static + staticFieldName : Type = value; + + // static method (has no this pointer) + #static + staticMethodName :: (paramName:ParamType, ...) -> ReturnType { + // body + } +} + +// instancing +instanceName : StructName = StructName { + .fieldName = value, + ... +}; + +// instancing as a pointer +pointerInstance := new StructName{ + .fieldName = value, + ... +} + +// with undefined value +pointerInstance := new StructName; + +// if +if (condition) { + // body +} + +// while +while (condition) { + // body +} + +// for +for ( definition ; condition ; step ) { + // body +} + +// i.e. +for (i := 0;i < 10;++i){ + // body +} + +// for each +for ( variableName in iteratorName ){ + // body +} @@ -0,0 +1,87 @@ +#!/bin/sh + +project_name="felan" + +function compile(){ + if [ ! -d build ]; then + if [ $(mkdir build) ]; then # if error + echo "cannot make 'build' dir" + exit + fi + fi + + gcc -Wall -Wextra -std=gnu23 -I./src/ -O3 \ + ./src/main.c \ + ./src/compiler/lexer/lexer.c \ + ./src/compiler/parser/parser.c \ + ./src/compiler/code_generator/code_generator.c \ + ./src/vm/runner/runner.c \ + ./src/utils/memory/memory.c \ + ./src/utils/file.c \ + ./src/utils/time.c \ + -o "build/$project_name" +} + +function run(){ + compile && "./build/$project_name" "$@" + printf "\n$?\n" +} + +function make_lsp_files(){ + bear -- ./project compile +} + +function clean(){ + if [ -d ./build ]; then + rm -r ./build/ + fi + if [ "$(ls ./test/generated_output/)" ]; then + rm -r ./test/generated_output/* + fi +} + +function test(){ + clean && compile + if [ $? -ne 0 ]; then + echo "compile error" + exit + fi + + for file_path in ./test/input/*; do + local file_name=$(basename "$file_path") + local start=`date +%s.%N` + local ret=$(eval "./build/$project_name $file_path > ./test/generated_output/$file_name") + local end=`date +%s.%N` + echo "$(jq -n $end-$start)s" + $ret && \ + cmp --silent "./test/generated_output/$file_name" "./test/output/$file_name" && \ + echo "PASSED $file_path" || \ + echo "FAILED $file_path" + done +} + +function val_test(){ + clean && compile + if [ $? -ne 0 ]; then + echo "compile error" + exit + fi + + for file_path in ./test/input/*; do + local file_name=$(basename "$file_path") + valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all -s "./build/$project_name" "$file_path" ./test/generated_output/ && \ + cmp --silent "./test/generated_output/$file_name" "./test/output/$file_name" && \ + echo "PASSED $file_path" || \ + echo "FAILED $file_path" + done +} + +function val_run(){ + compile && valgrind --leak-check=full --track-origins=yes --show-leak-kinds=all -s "./build/$project_name" "$@" + printf "\n$?\n" +} + +function_name="$1" +shift + +$function_name "$@" diff --git a/src/compiler/code_generator/code_generator.c b/src/compiler/code_generator/code_generator.c new file mode 100644 index 0000000..0beca33 --- /dev/null +++ b/src/compiler/code_generator/code_generator.c @@ -0,0 +1,199 @@ +#include "code_generator.h" + +#include <compiler/parser/parser.h> +#include <stdio.h> +#include <stdlib.h> + +#include "utils/memory/memory.h" +#include "utils/types.h" + +const char *COMMAND_STRINGS[] = { + "COMMAND_NONE", + "COMMAND_PRINT", + "COMMAND_PUSH_STRING", +}; + +void printInstruction(Instruction instruction) { + printf("%s", COMMAND_STRINGS[instruction.command]); + switch (instruction.command) { + case COMMAND_NONE: + case COMMAND_PRINT: + break; + case COMMAND_PUSH_STRING: + SizedString *sizedString = instruction.operand; + printf(" '%.*s'", (int)sizedString->size, sizedString->str); + break; + default: + fprintf(stderr, "bad instruction %d\n", instruction.command); + } + printf("\n"); +} + +void printInstructions(Instructions instructions) { + for (size_t i = 0; i < instructions.size; ++i) { + printInstruction(instructions.instructions[i]); + } +} + +void deleteInstruction(Instruction instruction) { + switch (instruction.command) { + case COMMAND_NONE: + case COMMAND_PRINT: + break; + case COMMAND_PUSH_STRING: + SizedString *sizedString = instruction.operand; + free(sizedString->str); + free(sizedString); + break; + default: + fprintf(stderr, "bad instruction %d\n", instruction.command); + } +} + +void deleteInstructions(Instructions instructions) { + for (size_t i = 0; i < instructions.size; ++i) { + deleteInstruction(instructions.instructions[i]); + } + free(instructions.instructions); +} + +Instructions codeGenerator(ParsedNode *root) { + const ScopeMetadata *metadata = root->metadata; + + size_t instructions_size = 10; + Instruction *instructions = + a404m_malloc(instructions_size * sizeof(Instruction)); + size_t instructions_inserted = 0; + + for (size_t i = 0; i < metadata->operands_size; ++i) { + ParsedNode *node = metadata->operands[i]; + if (!nodeToInstruction(node, &instructions, &instructions_size, + &instructions_inserted)) { + goto RETURN_ERROR; + } + } + + Instructions result = { + .instructions = a404m_realloc( + instructions, instructions_inserted * sizeof(Instruction)), + .size = instructions_inserted, + }; + return result; + +RETURN_ERROR: + const Instructions error = { + .instructions = NULL, + .size = ERROR_SIZE, + }; + return error; +} + +bool nodeToInstruction(ParsedNode *node, Instruction **instructions, + size_t *instructions_size, + size_t *instructions_inserted) { + switch (node->token) { + // TODO: this is wrong when you want functions + case PARSED_TOKEN_PARENTHESIS: { + const ScopeMetadata *metadata = node->metadata; + for (size_t i = 0; i < metadata->operands_size; ++i) { + if (!nodeToInstruction(metadata->operands[i], instructions, + instructions_size, instructions_inserted)) { + return false; + } + } + return true; + } + case PARSED_TOKEN_EOL: + return nodeToInstruction(node->metadata, instructions, instructions_size, + instructions_inserted); + break; + case PARSED_TOKEN_PRINT: + if (nodeToInstruction(node->metadata, instructions, instructions_size, + instructions_inserted)) { + const Instruction instruction = { + .command = COMMAND_PRINT, + .operand = NULL, + }; + insertInstruction(instruction, instructions, instructions_size, + instructions_inserted); + return true; + } else { + return false; + } + break; + case PARSED_TOKEN_VALUE_STRING: { + SizedString *string = nodeToString(node); + if (string == NULL) { + return false; + } + const Instruction instruction = { + .command = COMMAND_PUSH_STRING, + .operand = string, + }; + insertInstruction(instruction, instructions, instructions_size, + instructions_inserted); + return true; + } + default: + fprintf(stderr, "unexpected token %s\n", + PARSED_TOKEN_STRINGS[node->token]); + return false; + } +} + +void insertInstruction(const Instruction instruction, + Instruction **restrict instructions, + size_t *restrict instructions_size, + size_t *restrict instructions_inserted) { + if (*instructions_inserted == *instructions_size) { + *instructions_size += *instructions_size / 2 + 1; + *instructions = + a404m_realloc(*instructions, *instructions_size * sizeof(Instruction)); + } + (*instructions)[*instructions_inserted] = instruction; + ++*instructions_inserted; +} + +SizedString *nodeToString(ParsedNode const *node) { + const char *strBegin = node->strBegin + 1; + const char *strEnd = node->strEnd - 1; + + char *str = a404m_malloc((strEnd - strBegin + 1) * sizeof(char)); + size_t inserted = 0; + + for (char const *iter = strBegin; iter < strEnd; ++iter) { + char c = *iter; + if (c == '\\') { + if (++iter < strEnd) { + switch (*iter) { + case '\'': + c = '\''; + break; + case '\"': + c = '\"'; + break; + case 'n': + c = '\n'; + break; + default: + fprintf(stderr, "bad string, bad '\\'\n"); + goto RETURN_ERROR; + } + } else { + fprintf(stderr, "bad string, bad '\\'\n"); + goto RETURN_ERROR; + } + } + str[inserted] = c; + ++inserted; + } + + str[inserted] = '\0'; + SizedString *string = a404m_malloc(sizeof(SizedString)); + string->str = a404m_realloc(str, (inserted + 1) * sizeof(char)); + string->size = inserted; + return string; +RETURN_ERROR: + free(str); + return NULL; +} diff --git a/src/compiler/code_generator/code_generator.h b/src/compiler/code_generator/code_generator.h new file mode 100644 index 0000000..1a9b4ff --- /dev/null +++ b/src/compiler/code_generator/code_generator.h @@ -0,0 +1,41 @@ +#pragma once + +#include <stdio.h> + +#include "compiler/parser/parser.h" +typedef enum Command { + COMMAND_NONE = 0, + COMMAND_PRINT, + COMMAND_PUSH_STRING, +} Command; + +extern const char *COMMAND_STRINGS[]; + +typedef struct Instruction { + Command command; + void *operand; +} Instruction; + +typedef struct Instructions { + Instruction *restrict instructions; + size_t size; +} Instructions; + +extern void printInstruction(Instruction instruction); +extern void printInstructions(Instructions instructions); + +extern void deleteInstruction(Instruction instruction); +extern void deleteInstructions(Instructions instructions); + +extern Instructions codeGenerator(ParsedNode *root); + +extern bool nodeToInstruction(ParsedNode *node, Instruction **instructions, + size_t *instructions_size, + size_t *instructions_inserted); + +extern void insertInstruction(const Instruction instruction, + Instruction **restrict instructions, + size_t *restrict instructions_size, + size_t *restrict instructions_inserted); + +extern SizedString *nodeToString(ParsedNode const *node); diff --git a/src/compiler/lexer/lexer.c b/src/compiler/lexer/lexer.c new file mode 100644 index 0000000..35631f5 --- /dev/null +++ b/src/compiler/lexer/lexer.c @@ -0,0 +1,307 @@ +#include "lexer.h" + +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <utils/memory/memory.h> + +const char *TOKEN_STRINGS[] = { + "TOKEN_NONE", + "TOKEN_IDENTIFIER", + "TOKEN_KEYWORD_PRINT", + "TOKEN_NUMBER", + "TOKEN_STRING", + "TOKEN_OPERATOR", + "TOKEN_OPERATOR_PARENTHESES_OPEN", + "TOKEN_OPERATOR_PARENTHESES_CLOSE", + "TOKEN_OPERATOR_CURLY_BRACKET_OPEN", + "TOKEN_OPERATOR_CURLY_BRACKET_CLOSE", + "TOKEN_OPERATOR_ASSIGN", + "TOKEN_OPERATOR_EQUAL", + "TOKEN_OPERATOR_COLON", + "TOKEN_OPERATOR_EOL", + "TOKEN_SYMBOL", + "TOKEN_PARSED", +}; + +static const char *KEYWORDS_STRINGS[] = { + "print", +}; +static const Token KEYWORDS_TOKENS[] = { + TOKEN_KEYWORD_PRINT, +}; +static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *); + +static const char *OPERATORS_STRINGS[] = { + "(", ")", "{", "}", "=", "==", ":", ";", +}; +static const Token OPERATORS_TOKENS[] = { + TOKEN_OPERATOR_PARENTHESES_OPEN, + TOKEN_OPERATOR_PARENTHESES_CLOSE, + TOKEN_OPERATOR_CURLY_BRACKET_OPEN, + TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, + TOKEN_OPERATOR_ASSIGN, + TOKEN_OPERATOR_EQUAL, + TOKEN_OPERATOR_COLON, + TOKEN_OPERATOR_EOL, +}; +static const size_t OPERATOR_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *); + +void printNodes(Nodes nodes) { + for (size_t i = 0; i < nodes.size; ++i) { + const Node node = nodes.nodes[i]; + printf("{'%.*s',%s}\n", (int)(node.strEnd - node.strBegin), node.strBegin, + TOKEN_STRINGS[node.token]); + } +} + +void deleteNodes(Nodes nodes) { free(nodes.nodes); } + +Nodes lexer(char const *restrict str) { + size_t nodes_size = 10; + Node *nodes = a404m_malloc(nodes_size * sizeof(Node)); + size_t nodes_inserted = 0; + + Node node = { + .strBegin = str, + .strEnd = str, + .token = TOKEN_NONE, + }; + + for (int i = 0;; ++i) { + const char c = str[i]; + if (c == '\0') { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + break; + } + + if (c == '/') { + const char follow = str[i + 1]; + if (follow == '/') { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i); + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + if (str[i] == '\0') { + goto RETURN_SUCCESS; + } + continue; + } else if (follow == '*') { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + for (i += 2; str[i] != '\0' && str[i + 1] != '\0' && + (str[i] != '*' || str[i + 1] != '/'); + ++i); + if (str[i] == '\0' || str[i + 1] == '\0') { + perror("expected multi line comment to end\n"); + exit(1); + } + i += 1; + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + if (str[i] == '\0') { + goto RETURN_SUCCESS; + } + continue; + } + } + if (isSpace(c)) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + } else if (isIdentifier(c)) { + if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_IDENTIFIER); + } + } else if (isIdentifierSymbol(c)) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_IDENTIFIER); + for (++i;; ++i) { + const char current = str[i]; + if (current == c) { + break; + } else if (current == '\0') { + fprintf(stderr, "expected %c to end\n", c); + exit(1); + } + } + ++node.strBegin; + push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str, + i, TOKEN_NONE); + } else if (isNumber(c)) { + if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER && + node.token != TOKEN_SYMBOL) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NUMBER); + } + } else if (isString(c)) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_STRING); + + for (++i;; ++i) { + const char current = str[i]; + if (current == c) { + break; + } else if (current == '\0') { + fprintf(stderr, "expected %c to end\n", c); + exit(1); + } + } + + ++i; + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_NONE); + --i; + } else if (isOperator(c)) { + if (node.token == TOKEN_OPERATOR) { + const Token token = getOperator(node.strBegin, str + i + 1); + if (token != TOKEN_NONE) { + continue; + } else { + node.token = getOperator(node.strBegin, str + i); + if (node.token == TOKEN_NONE) { + fprintf(stderr, "unknown operator '%.*s'\n", + (int)(str + i - node.strBegin), node.strBegin); + exit(1); + } + push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, + str, i, TOKEN_OPERATOR); + } + } else { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_OPERATOR); + } + } else if (isSymbol(c)) { + if (node.token != TOKEN_SYMBOL) { + push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i, + TOKEN_SYMBOL); + } + } else { + fprintf(stderr, "unexpected char '%c'\n", c); + exit(1); + } + } + +RETURN_SUCCESS: + Nodes result = { + .nodes = a404m_realloc(nodes, nodes_inserted * sizeof(Node)), + .size = nodes_inserted, + }; + + return result; +} + +void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size, + size_t *restrict nodes_inserted, Node *restrict node, + char const *restrict str, int i, Token token) { + if (node->token != TOKEN_NONE) { + if (*nodes_size == *nodes_inserted) { + *nodes_size += *nodes_size / 2 + 1; + *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node)); + } + node->strEnd = str + i; + if (node->token == TOKEN_IDENTIFIER) { + const Token foundToken = getKeyword(node->strBegin, node->strEnd); + if (foundToken != TOKEN_NONE) { + node->token = foundToken; + } + } else if (node->token == TOKEN_OPERATOR) { + const Token foundToken = getOperator(node->strBegin, node->strEnd); + if (foundToken != TOKEN_NONE) { + node->token = foundToken; + } + } + + (*nodes)[*nodes_inserted] = *node; + ++*nodes_inserted; + } + node->strBegin = str + i; + node->token = token; +} + +void push_clear_without_check(Node **restrict nodes, + size_t *restrict nodes_size, + size_t *restrict nodes_inserted, Node *node, + char const *restrict str, int i, Token token) { + if (*nodes_size == *nodes_inserted) { + *nodes_size += *nodes_size / 2 + 1; + *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node)); + } + node->strEnd = str + i; + (*nodes)[*nodes_inserted] = *node; + ++*nodes_inserted; + node->strBegin = str + i; + node->token = token; +} + +bool isSpace(char c) { return isspace(c); } + +bool isIdentifier(char c) { + return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_'; +} + +bool isIdentifierSymbol(char c) { return c == '`'; } + +bool isNumber(char c) { return '0' <= c && c <= '9'; } + +bool isString(char c) { return c == '"' || c == '\''; } + +bool isOperator(char c) { + switch (c) { + case '(': + case ')': + case '[': + case ']': + case '{': + case '}': + case '*': + case '/': + case '%': + case '+': + case '-': + case ':': + case '!': + case '=': + case '<': + case '>': + case '&': + case '|': + case '.': + case ',': + case ';': + return true; + default: + return false; + } +} + +bool isSymbol(char c) { return c == '#'; } + + +Token getKeyword(char const *strBegin, char const *strEnd) { + const size_t strSize = strEnd - strBegin; + + for (size_t i = 0; i < KEYWORDS_SIZE; ++i) { + const char *search = KEYWORDS_STRINGS[i]; + if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) { + return KEYWORDS_TOKENS[i]; + } + } + + return TOKEN_NONE; +} +Token getOperator(char const *strBegin, char const *strEnd) { + const size_t strSize = strEnd - strBegin; + + for (size_t i = 0; i < OPERATOR_SIZE; ++i) { + const char *search = OPERATORS_STRINGS[i]; + if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) { + return OPERATORS_TOKENS[i]; + } + } + + return TOKEN_NONE; +} diff --git a/src/compiler/lexer/lexer.h b/src/compiler/lexer/lexer.h new file mode 100644 index 0000000..7d60101 --- /dev/null +++ b/src/compiler/lexer/lexer.h @@ -0,0 +1,66 @@ +#pragma once + +#include <stddef.h> +#include <utils/memory/memory.h> +#include <utils/types.h> + +typedef enum Token { + TOKEN_NONE = 0, + TOKEN_IDENTIFIER, + TOKEN_KEYWORD_PRINT, + TOKEN_NUMBER, + TOKEN_STRING, + TOKEN_OPERATOR, + TOKEN_OPERATOR_PARENTHESES_OPEN, + TOKEN_OPERATOR_PARENTHESES_CLOSE, + TOKEN_OPERATOR_CURLY_BRACKET_OPEN, + TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, + TOKEN_OPERATOR_ASSIGN, + TOKEN_OPERATOR_EQUAL, + TOKEN_OPERATOR_COLON, + TOKEN_OPERATOR_EOL, + TOKEN_SYMBOL, + TOKEN_PARSED, +} Token; + +extern const char *TOKEN_STRINGS[]; + +typedef struct Node { + char const *strBegin; + char const *strEnd; + Token token; + void *parsedNode; +} Node; + +typedef struct Nodes { + Node *nodes; + size_t size; +} Nodes; + +extern void printNodes(Nodes nodes); + +extern void deleteNodes(Nodes nodes); + +extern Nodes lexer(char const *restrict str); + +extern void push_if_not_empty(Node **restrict nodes, + size_t *restrict nodes_size, + size_t *restrict nodes_inserted, + Node *restrict node, char const *restrict str, + int i, Token token); +extern void push_clear_without_check(Node **restrict nodes, + size_t *restrict nodes_size, + size_t *restrict nodes_inserted, + Node *node, char const *restrict str, + int i, Token token); + +extern bool isSpace(char c); +extern bool isIdentifier(char c); +extern bool isIdentifierSymbol(char c); +extern bool isNumber(char c); +extern bool isString(char c); +extern bool isOperator(char c); +extern bool isSymbol(char c); + +extern Token getKeyword(char const *strBegin, char const *strEnd); +extern Token getOperator(char const *strBegin, char const *strEnd); diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c new file mode 100644 index 0000000..c679d39 --- /dev/null +++ b/src/compiler/parser/parser.c @@ -0,0 +1,351 @@ +#include "parser.h" + +#include <compiler/lexer/lexer.h> +#include <stdio.h> +#include <stdlib.h> +#include <utils/memory/memory.h> + +const char *PARSED_TOKEN_STRINGS[] = { + "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT", + "PARSED_TOKEN_PRINT", "PARSED_TOKEN_PARENTHESIS", + "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL", +}; + +static const ParseOrder PARSE_ORDER[] = { + { + .ltr = false, + .size = 2, + .tokens = + { + TOKEN_OPERATOR_PARENTHESES_CLOSE, + TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, + }, + }, + { + .ltr = true, + .size = 3, + .tokens = + { + TOKEN_OPERATOR_ASSIGN, + TOKEN_OPERATOR_EQUAL, + TOKEN_OPERATOR_COLON, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_KEYWORD_PRINT, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_STRING, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_OPERATOR_EOL, + }, + }, +}; +static size_t PARSE_ORDER_SIZE = sizeof(PARSE_ORDER) / sizeof(ParseOrder); + +ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, + ParsedToken token, void *metadata, + ParsedNode *parent) { + ParsedNode *parsedNode = a404m_malloc(sizeof(*parsedNode)); + parsedNode->strBegin = strBegin; + parsedNode->strEnd = strEnd; + parsedNode->token = token; + parsedNode->metadata = metadata; + parsedNode->parent = parent; + return parsedNode; +} + +void _printParsedNode(const ParsedNode *parsedNode, int indent) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]); + switch (parsedNode->token) { + case PARSED_TOKEN_NONE: + break; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_ROOT: { + ++indent; + const ScopeMetadata *metadata = parsedNode->metadata; + printf(",operands=[\n"); + for (size_t i = 0; i < metadata->operands_size; ++i) { + _printParsedNode(metadata->operands[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("]\n"); + --indent; + } break; + case PARSED_TOKEN_PRINT: { + const PrintMetadata *metadata = parsedNode->metadata; + printf(",operand=\n"); + _printParsedNode(metadata, indent + 1); + } break; + case PARSED_TOKEN_VALUE_STRING: + printf("\n"); + break; + case PARSED_TOKEN_EOL: + EOLMetadata *metadata = parsedNode->metadata; + printf(",operand=\n"); + _printParsedNode(metadata, indent + 1); + break; + default: + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("}\n"); +} + +void printParsedNode(const ParsedNode *parsedNode) { + _printParsedNode(parsedNode, 0); +} + +ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) { + while (parsedNode != NULL && parsedNode->parent != parent) { + parsedNode = parsedNode->parent; + } + return parsedNode; +} + +void deleteParsedNode(ParsedNode *parsedNode) { + switch (parsedNode->token) { + case PARSED_TOKEN_NONE: + break; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_ROOT: { + ScopeMetadata *metadata = parsedNode->metadata; + for (size_t i = 0; i < metadata->operands_size; ++i) { + deleteParsedNode(metadata->operands[i]); + } + free(metadata->operands); + free(metadata); + } break; + case PARSED_TOKEN_PRINT: { + PrintMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata); + } break; + case PARSED_TOKEN_VALUE_STRING: + break; + case PARSED_TOKEN_EOL: + EOLMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata); + break; + default: + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); + } + free(parsedNode); +} + +ParsedNode *parser(Nodes lexedNodes) { + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->token = PARSED_TOKEN_ROOT; + root->parent = NULL; + root->metadata = + parserScope(lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root); + if (root->metadata == NULL) { + free(root); + return NULL; + } + return root; +} + +ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent) { + if (nodesBegin >= nodesEnd) { + return NULL; + } + + size_t nodes_size = 0; + ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *)); + size_t nodes_inserted = 0; + + for (size_t i = 0; i < PARSE_ORDER_SIZE; ++i) { + const ParseOrder order = PARSE_ORDER[i]; + + for (Node *node = order.ltr ? nodesBegin : (nodesEnd - 1); + nodesBegin <= node && node < nodesEnd; order.ltr ? ++node : --node) { + for (size_t j = 0; j < order.size; ++j) { + const Token token = order.tokens[j]; + if (node->token == token) { + ParsedNode *parsedNode = + parseNode(nodesBegin, nodesEnd, node, parent); + if (parsedNode == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + goto RETURN_ERROR; + } + if (nodes_size == nodes_inserted) { + nodes_size += nodes_size / 2 + 1; + nodes = a404m_realloc(nodes, nodes_size * sizeof(ParsedNode *)); + } + nodes[nodes_inserted] = parsedNode; + ++nodes_inserted; + } + } + } + } + + for (Node *node = nodesBegin; node < nodesEnd; ++node) { + if (node->token != TOKEN_PARSED) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + goto RETURN_ERROR; + } + } + + ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata)); + metadata->operands = a404m_malloc(nodes_inserted * sizeof(ParsedNode *)); + + nodes_size = nodes_inserted; + nodes_inserted = 0; + + for (size_t i = 0; i < nodes_size; ++i) { + ParsedNode *currentNode = nodes[i]; + if (currentNode->parent == parent) { + metadata->operands[nodes_inserted] = currentNode; + ++nodes_inserted; + } + } + free(nodes); + + metadata->operands = + a404m_realloc(metadata->operands, nodes_inserted * sizeof(ParsedNode *)); + metadata->operands_size = nodes_inserted; + + return metadata; + +RETURN_ERROR: + free(nodes); + return NULL; +} + +ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent) { + switch (node->token) { + case TOKEN_KEYWORD_PRINT: + return parserPrint(nodesBegin, nodesEnd, node, parent); + case TOKEN_OPERATOR_PARENTHESES_CLOSE: + return parseParenthesis(nodesBegin, nodesEnd, node, parent); + case TOKEN_STRING: + return parseString(node, parent); + case TOKEN_OPERATOR_EOL: + return parseEOL(nodesBegin, nodesEnd, node, parent); + default: + fprintf(stderr, "unexpected token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } +} + +ParsedNode *parserPrint(Node *, Node *nodesEnd, Node *node, + ParsedNode *parent) { + Node *follow = node + 1; + + if (follow < nodesEnd && follow->token == TOKEN_PARSED) { + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_PRINT; + root->parent = parent; + + ParsedNode *operand = root->metadata = + getUntilCommonFather(follow->parsedNode, parent); + if (operand == NULL || operand->token != PARSED_TOKEN_PARENTHESIS) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + free(root); + return NULL; + } + operand->parent = root; + + node->parsedNode = root; + node->token = TOKEN_PARSED; + return root; + } + return NULL; +} + +ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, + ParsedNode *parent) { + Node *opening = NULL; + Node *closing = node; + for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { + if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) { + opening = iter; + break; + } + } + if (opening == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } + + ParsedNode *parsedNode = closing->parsedNode = opening->parsedNode = + a404m_malloc(sizeof(*parsedNode)); + parsedNode->strBegin = opening->strBegin; + parsedNode->strEnd = closing->strEnd; + parsedNode->token = PARSED_TOKEN_PARENTHESIS; + parsedNode->parent = parent; + parsedNode->metadata = parserScope(opening + 1, closing, parsedNode); + + if (parsedNode->metadata == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + free(parsedNode); + return NULL; + } + + opening->token = closing->token = TOKEN_PARSED; + return parsedNode; +} + +ParsedNode *parseString(Node *node, ParsedNode *parent) { + node->token = TOKEN_PARSED; + return node->parsedNode = + newParsedNode(node->strBegin, node->strEnd, + PARSED_TOKEN_VALUE_STRING, NULL, parent); +} + +ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { + Node *before = node - 1; + if (before < nodesBegin && before->token != TOKEN_PARSED) { + return NULL; + } + + ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent); + if (experession == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } + + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_EOL; + root->parent = parent; + + root->metadata = experession; + experession->parent = root; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; +} diff --git a/src/compiler/parser/parser.h b/src/compiler/parser/parser.h new file mode 100644 index 0000000..510e4e9 --- /dev/null +++ b/src/compiler/parser/parser.h @@ -0,0 +1,62 @@ +#pragma once + +#include <compiler/lexer/lexer.h> + +typedef enum ParsedToken { + PARSED_TOKEN_NONE = 0, + PARSED_TOKEN_ROOT, + PARSED_TOKEN_PRINT, + PARSED_TOKEN_PARENTHESIS, + PARSED_TOKEN_VALUE_STRING, + PARSED_TOKEN_EOL, +} ParsedToken; + +extern const char *PARSED_TOKEN_STRINGS[]; + +typedef struct ParseOrder { + bool ltr; + size_t size; + Token tokens[10]; +} ParseOrder; + +typedef struct ScopeMetadata { + struct ParsedNode **operands; + size_t operands_size; +} ScopeMetadata; + +typedef struct ParsedNode { + char const *strBegin; + char const *strEnd; + ParsedToken token; + void *metadata; + struct ParsedNode *parent; +} ParsedNode; + +typedef ParsedNode PrintMetadata; +typedef ParsedNode EOLMetadata; + +extern ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, + ParsedToken token, void *metadata, + ParsedNode *parent); +extern void _printParsedNode(const ParsedNode *parsedNode,int indent); +extern void printParsedNode(const ParsedNode *parsedNode); +extern ParsedNode *getUntilCommonFather(ParsedNode *parsedNode,ParsedNode *parent); +extern void deleteParsedNode(ParsedNode *parsedNode); + +extern ParsedNode *parser(Nodes lexedNodes); + +extern ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent); + +extern ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent); + +extern ParsedNode *parserPrint(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent); + +extern ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd, + Node *node, ParsedNode *parent); + +extern ParsedNode *parseString(Node *node, ParsedNode *parent); + +extern ParsedNode *parseEOL(Node *nodesBegin, Node *nodesEnd,Node *node, ParsedNode *parent); diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..c40bfec --- /dev/null +++ b/src/main.c @@ -0,0 +1,104 @@ +#include <compiler/lexer/lexer.h> +#include <stdio.h> +#include <stdlib.h> +#include <utils/file.h> + +#include "compiler/code_generator/code_generator.h" +#include "compiler/parser/parser.h" +#include "utils/time.h" +#include "utils/types.h" +#include "vm/runner/runner.h" + +static const char *codes[] = { + "print(\"\");", +}; + +bool run(char const *restrict code) { + bool ranSuccess = false; + const Nodes nodes = lexer(code); + ParsedNode *parsedNode = parser(nodes); + if (parsedNode != NULL) { + Instructions instructions = codeGenerator(parsedNode); + if (instructions.size != ERROR_SIZE) { + ranSuccess = runner(instructions); + deleteInstructions(instructions); + } + deleteParsedNode(parsedNode); + } + deleteNodes(nodes); + return ranSuccess; +} + +Clock runWithPrint(char const *restrict code) { + Clock sum = 0; + Clock diff = 0; + printf("----code:\n%s\n----\n", code); + Clock start = getTimeInNano(); + const Nodes nodes = lexer(code); + diff += getTimeInNano() - start; + sum += diff; + printNodes(nodes); + printf("----lexing in %ldns\n", diff); + start = getTimeInNano(); + ParsedNode *parsedNode = parser(nodes); + diff = getTimeInNano() - start; + sum += diff; + printf("----parsing in %ldns\n", diff); + if (parsedNode != NULL) { + printParsedNode(parsedNode); + start = getTimeInNano(); + Instructions instructions = codeGenerator(parsedNode); + if (instructions.size != ERROR_SIZE) { + diff = getTimeInNano() - start; + sum += diff; + printf("----code_generator in %ldns\n", diff); + printInstructions(instructions); + start = getTimeInNano(); + bool ranSuccess = runner(instructions); + diff = getTimeInNano() - start; + sum += diff; + printf("----runner in %ldns\n", diff); + printf("ran sucessfully = %s\n", ranSuccess ? "true" : "false"); + printf("----sum %ldns\n", sum); + deleteInstructions(instructions); + } + deleteParsedNode(parsedNode); + } + deleteNodes(nodes); + return sum; +} + +Clock process() { + Clock sumAll = 0; + for (size_t i = 0; i < sizeof(codes) / sizeof(char *); ++i) { + Clock start = getTimeInNano(); + run(codes[i]); + sumAll += getTimeInNano() - start; + } + return sumAll; +} + +void runBenchmark() { + Clock sum = 0; + const int times = 10000; + for (int i = 0; i < times; ++i) { + sum += process(); + } + printf("\n\navg = %fns", ((float)sum) / times); +} + +int main(int argc, char *argv[]) { + /*runBenchmark();*/ + if (argc < 2) { + fprintf(stderr, "no file"); + return 1; + } + char *code = read_whole_file(argv[1]); + const bool ret = run(code); + free(code); + if (ret) { + return 0; + } else { + return 1; + } +} diff --git a/src/utils/file.c b/src/utils/file.c new file mode 100644 index 0000000..410c218 --- /dev/null +++ b/src/utils/file.c @@ -0,0 +1,22 @@ +#include "file.h" +#include <stdio.h> +#include <utils/memory/memory.h> + +char *read_whole_file(const char *path) { + FILE *file = fopen(path, "r"); + if (!file) { + fprintf(stderr, "could not open file at path '%s'\n", path); + return NULL; + } + fseek(file, 0, SEEK_END); + const size_t file_size = ftell(file); + fseek(file, 0, SEEK_SET); + + char *str = a404m_malloc((file_size + 1) * sizeof(char)); + + fread(str, file_size, 1, file); + str[file_size] = '\0'; + + fclose(file); + return str; +} diff --git a/src/utils/file.h b/src/utils/file.h new file mode 100644 index 0000000..541e69d --- /dev/null +++ b/src/utils/file.h @@ -0,0 +1,3 @@ +#pragma once + +extern char *read_whole_file(const char *path); diff --git a/src/utils/memory/memory.c b/src/utils/memory/memory.c new file mode 100644 index 0000000..d793bc7 --- /dev/null +++ b/src/utils/memory/memory.c @@ -0,0 +1,22 @@ +#include "memory.h" + +#include <stdlib.h> + +void *a404m_malloc(size_t size) { + if (size == 0) { + return NULL; + } else { + return malloc(size); + } +} + +void *a404m_realloc(void *restrict pointer, size_t size) { + if (size == 0) { + free(pointer); + return NULL; + } else if(pointer != NULL) { + return realloc(pointer, size); + }else{ + return malloc(size); + } +} diff --git a/src/utils/memory/memory.h b/src/utils/memory/memory.h new file mode 100644 index 0000000..1c5017f --- /dev/null +++ b/src/utils/memory/memory.h @@ -0,0 +1,6 @@ +#pragma once + +#include <stddef.h> + +extern void *a404m_malloc(size_t size); +extern void *a404m_realloc(void *restrict pointer,size_t size); diff --git a/src/utils/time.c b/src/utils/time.c new file mode 100644 index 0000000..729e26f --- /dev/null +++ b/src/utils/time.c @@ -0,0 +1,9 @@ +#include "time.h" +#include <bits/time.h> +#include <time.h> + +Clock getTimeInNano(){ + struct timespec t; + clock_gettime(CLOCK_MONOTONIC, &t); + return t.tv_sec*1000000000+t.tv_nsec; +} diff --git a/src/utils/time.h b/src/utils/time.h new file mode 100644 index 0000000..691b26b --- /dev/null +++ b/src/utils/time.h @@ -0,0 +1,7 @@ +#pragma once + +#include <stdint.h> + +typedef uint64_t Clock; + +extern Clock getTimeInNano(); diff --git a/src/utils/types.h b/src/utils/types.h new file mode 100644 index 0000000..df4d8b3 --- /dev/null +++ b/src/utils/types.h @@ -0,0 +1,18 @@ +#pragma once + +#include <stdint.h> +#include <stdio.h> + +#ifndef __cplusplus + #if (__STDC_VERSION__ < 202000L) + #include <stdint.h> +typedef enum bool : uint8_t { false = 0, true = 1 } bool; + #endif +#endif + +typedef struct SizedString { + char *str; + size_t size; +} SizedString; + +#define ERROR_SIZE INT64_MAX diff --git a/src/vm/runner/runner.c b/src/vm/runner/runner.c new file mode 100644 index 0000000..5e63395 --- /dev/null +++ b/src/vm/runner/runner.c @@ -0,0 +1,50 @@ +#include "runner.h" + +#include <stdio.h> +#include <stdlib.h> +#include <utils/memory/memory.h> +#include <utils/types.h> + +bool runner(Instructions instructions) { + size_t stack_size = 0; + void **stack = a404m_malloc(stack_size * sizeof(void *)); + size_t stack_inserted = 0; + for (size_t i = 0; i < instructions.size; ++i) { + if (!runInstruction(instructions.instructions[i], &stack, &stack_size, + &stack_inserted)) { + goto RETURN_ERROR; + } + } + + free(stack); + return true; + +RETURN_ERROR: + free(stack); + return false; +} + +bool runInstruction(Instruction instruction, void ***restrict stack, + size_t *restrict stack_size, + size_t *restrict stack_inserted) { + switch (instruction.command) { + case COMMAND_PRINT: { + const SizedString *string = (*stack)[*stack_inserted - 1]; + --*stack_inserted; + printf("%.*s", (int)string->size, string->str); + } break; + case COMMAND_PUSH_STRING: { + SizedString *string = instruction.operand; + if (*stack_inserted == *stack_size) { + *stack_size += *stack_size / 2 + 1; + *stack = a404m_realloc(*stack, *stack_size * sizeof(void *)); + } + (*stack)[*stack_inserted] = string; + ++*stack_inserted; + } break; + default: + fprintf(stderr, "unknown command '%d'\n", instruction.command); + return false; + } + return true; +} diff --git a/src/vm/runner/runner.h b/src/vm/runner/runner.h new file mode 100644 index 0000000..e59b447 --- /dev/null +++ b/src/vm/runner/runner.h @@ -0,0 +1,9 @@ +#pragma once + +#include <compiler/code_generator/code_generator.h> + +extern bool runner(Instructions instructions); + +extern bool runInstruction(Instruction instruction, void ***restrict stack, + size_t *restrict stack_size, + size_t *restrict stack_inserted); |