diff options
Diffstat (limited to 'src/compiler')
-rw-r--r-- | src/compiler/ast-tree.c | 1 | ||||
-rw-r--r-- | src/compiler/lexer.c | 5 | ||||
-rw-r--r-- | src/compiler/lexer.h | 1 | ||||
-rw-r--r-- | src/compiler/parser.c | 177 | ||||
-rw-r--r-- | src/compiler/parser.h | 4 |
5 files changed, 143 insertions, 45 deletions
diff --git a/src/compiler/ast-tree.c b/src/compiler/ast-tree.c index 52a41c9..3fe867e 100644 --- a/src/compiler/ast-tree.c +++ b/src/compiler/ast-tree.c @@ -166,6 +166,7 @@ AstTree *astTreeParse(ParserNode *parserNode, AstTreeVariables *variables, case PARSER_TOKEN_SYMBOL_EOL: case PARSER_TOKEN_SYMBOL_PARENTHESIS: case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_COMMA: case PARSER_TOKEN_CONSTANT: case PARSER_TOKEN_IDENTIFIER: case PARSER_TOKEN_TYPE_VOID: diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c index 90eb36d..f248641 100644 --- a/src/compiler/lexer.c +++ b/src/compiler/lexer.c @@ -23,12 +23,13 @@ const char *LEXER_TOKEN_STRINGS[] = { "LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET", "LEXER_TOKEN_SYMBOL_FUNCTION_ARROW", "LEXER_TOKEN_SYMBOL_COLON", + "LEXER_TOKEN_SYMBOL_COMMA", "LEXER_TOKEN_NONE", }; const char *LEXER_SYMBOL_STRINGS[] = { - ";", "(", ")", "{", "}", "->", ":", + ";", "(", ")", "{", "}", "->", ":", ",", }; const LexerToken LEXER_SYMBOL_TOKENS[] = { LEXER_TOKEN_SYMBOL_EOL, @@ -38,6 +39,7 @@ const LexerToken LEXER_SYMBOL_TOKENS[] = { LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, LEXER_TOKEN_SYMBOL_FUNCTION_ARROW, LEXER_TOKEN_SYMBOL_COLON, + LEXER_TOKEN_SYMBOL_COMMA, }; const size_t LEXER_SYMBOL_SIZE = sizeof(LEXER_SYMBOL_TOKENS) / sizeof(*LEXER_SYMBOL_TOKENS); @@ -178,6 +180,7 @@ void lexerPushClear(LexerNodeArray *array, size_t *array_size, char *iter, case LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET: case LEXER_TOKEN_SYMBOL_FUNCTION_ARROW: case LEXER_TOKEN_SYMBOL_COLON: + case LEXER_TOKEN_SYMBOL_COMMA: if (*array_size == array->size) { *array_size += 1 + *array_size / 2; array->data = diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h index 3c3bc3f..f77522d 100644 --- a/src/compiler/lexer.h +++ b/src/compiler/lexer.h @@ -17,6 +17,7 @@ typedef enum LexerToken { LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, LEXER_TOKEN_SYMBOL_FUNCTION_ARROW, LEXER_TOKEN_SYMBOL_COLON, + LEXER_TOKEN_SYMBOL_COMMA, LEXER_TOKEN_NONE, } LexerToken; diff --git a/src/compiler/parser.c b/src/compiler/parser.c index e9fec5f..12c7ca9 100644 --- a/src/compiler/parser.c +++ b/src/compiler/parser.c @@ -22,6 +22,7 @@ const char *PARSER_TOKEN_STRINGS[] = { "PARSER_TOKEN_SYMBOL_EOL", "PARSER_TOKEN_SYMBOL_CURLY_BRACKET", "PARSER_TOKEN_SYMBOL_PARENTHESIS", + "PARSER_TOKEN_SYMBOL_COMMA", "PARSER_TOKEN_FUNCTION_DEFINITION", @@ -34,8 +35,8 @@ static constexpr ParserOrder PARSER_ORDER[] = { .size = 2, .data = { - LEXER_TOKEN_IDENTIFIER, - LEXER_TOKEN_KEYWORD_VOID, + LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS, + LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, }, }, { @@ -43,8 +44,8 @@ static constexpr ParserOrder PARSER_ORDER[] = { .size = 2, .data = { - LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS, - LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET, + LEXER_TOKEN_IDENTIFIER, + LEXER_TOKEN_KEYWORD_VOID, }, }, { @@ -66,10 +67,11 @@ static constexpr ParserOrder PARSER_ORDER[] = { }, { .ltr = true, - .size = 1, + .size = 2, .data = { LEXER_TOKEN_SYMBOL_EOL, + LEXER_TOKEN_SYMBOL_COMMA, }, }, }; @@ -129,6 +131,7 @@ void parserNodePrint(const ParserNode *node, int indent) { printf(" "); } goto RETURN_SUCCESS; + case PARSER_TOKEN_SYMBOL_COMMA: case PARSER_TOKEN_SYMBOL_EOL: { const ParserNodeEOLMetadata *metadata = node->metadata; printf(",\n"); @@ -201,6 +204,7 @@ void parserNodeDelete(ParserNode *node) { free(metadata); } goto RETURN_SUCCESS; + case PARSER_TOKEN_SYMBOL_COMMA: case PARSER_TOKEN_SYMBOL_EOL: { ParserNodeEOLMetadata *metadata = node->metadata; parserNodeDelete(metadata); @@ -271,34 +275,36 @@ bool parserNodeArray(LexerNode *begin, LexerNode *end, ParserNode *parent) { if (parserNode == NULL) { goto RETURN_ERROR; } - if (parsedNodes->size == parsedNodes_size) { - parsedNodes_size += parsedNodes_size / 2 + 1; - parsedNodes->data = - a404m_realloc(parsedNodes->data, - sizeof(*parsedNodes->data) * parsedNodes_size); - } - parsedNodes->data[parsedNodes->size] = parserNode; - parsedNodes->size += 1; } } } } - size_t inserted = 0; - for (size_t i = 0; i < parsedNodes->size; ++i) { - ParserNode *parserNode = parsedNodes->data[i]; - if (parserNode->parent == parent) { - // TODO: check here - if (inserted != i) { - parsedNodes->data[inserted] = parserNode; + for (LexerNode *iter = begin; iter < end; ++iter) { + ParserNode *pNode = iter->parserNode; + if (pNode->parent != parent) { + continue; + } else if (pNode == NULL) { + printLog("Bad child"); + goto RETURN_ERROR; + } + for (size_t i = 0; i < parsedNodes->size; ++i) { + if (parsedNodes->data[i] == pNode) { + goto CONTINUE; } - ++inserted; } + if (parsedNodes->size == parsedNodes_size) { + parsedNodes_size += parsedNodes_size / 2 + 1; + parsedNodes->data = a404m_realloc( + parsedNodes->data, sizeof(*parsedNodes->data) * parsedNodes_size); + } + parsedNodes->data[parsedNodes->size] = pNode; + parsedNodes->size += 1; + CONTINUE: } - parsedNodes->data = - a404m_realloc(parsedNodes->data, inserted * sizeof(*parsedNodes->data)); - parsedNodes->size = inserted; + parsedNodes->data = a404m_realloc( + parsedNodes->data, parsedNodes->size * sizeof(*parsedNodes->data)); parent->metadata = parsedNodes; @@ -332,6 +338,8 @@ ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end, return parserFunction(node, begin, end, parent); case LEXER_TOKEN_SYMBOL_COLON: return parserVariable(node, begin, end, parent); + case LEXER_TOKEN_SYMBOL_COMMA: + return parserComma(node, begin, parent); case LEXER_TOKEN_NONE: case LEXER_TOKEN_NUMBER: case LEXER_TOKEN_SYMBOL: @@ -395,6 +403,25 @@ ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent) { return parserNode; } +ParserNode *parserComma(LexerNode *node, LexerNode *begin, ParserNode *parent) { + LexerNode *nodeBefore = node - 1; + ParserNode *parserNodeBefore; + if (nodeBefore < begin || nodeBefore->parserNode == NULL) { + printLog("Bad Comma after %s", LEXER_TOKEN_STRINGS[nodeBefore->token]); + return NULL; + } else { + parserNodeBefore = getUntilCommonParent(nodeBefore->parserNode, parent); + if (parserNodeBefore == NULL || !isExpression(parserNodeBefore)) { + printLog("Bad Comma"); + return NULL; + } + } + printLog("%s", PARSER_TOKEN_STRINGS[parserNodeBefore->token]); + return node->parserNode = parserNodeBefore->parent = newParserNode( + PARSER_TOKEN_SYMBOL_COMMA, node->str_begin, node->str_end, + (ParserNodeEOLMetadata *)parserNodeBefore, parent); +} + ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin, ParserNode *parent) { LexerNode *opening = NULL; @@ -462,8 +489,6 @@ ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, LexerNode *paramsNode = node - 1; LexerNode *retTypeNode = node + 1; LexerNode *bodyNode = node + 2; - // TODO: if body is not there then it is a function type not a function - // definition if (paramsNode < begin || paramsNode->parserNode == NULL) { NO_PARAMS: printLog("No params"); @@ -480,21 +505,35 @@ ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, if (bodyNode >= end || bodyNode->parserNode == NULL) { body = NULL; // TODO: implement + // if body is not there then it is a function type not a function + // definition printLog("Not implemented"); return NULL; } else { body = getUntilCommonParent(bodyNode->parserNode, parent); + if (body == NULL || body->token != PARSER_TOKEN_SYMBOL_CURLY_BRACKET) { + printLog("Bad body"); + return NULL; + } + ParserNodeArray *bodyArray = body->metadata; + for (size_t i = 0; i < bodyArray->size; ++i) { + if (bodyArray->data[i]->token != PARSER_TOKEN_SYMBOL_EOL) { + printLog("Bad body %s", + PARSER_TOKEN_STRINGS[bodyArray->data[i]->token]); + return NULL; + } + } } - // TODO: check parenthesis if (params->token != PARSER_TOKEN_SYMBOL_PARENTHESIS) { goto NO_PARAMS; } else if (!isType(retType)) { goto NO_RETURN_TYPE; + } else if (!isAllArguments(params->metadata)) { + printLog("Bad arguments"); + return NULL; } - // TODO: check body - ParserNodeFunctionDefnitionMetadata *metadata = a404m_malloc(sizeof(*metadata)); @@ -509,10 +548,14 @@ ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end, ParserNode *parent) { + ParserNode *variableNode = a404m_malloc(sizeof(*variableNode)); + variableNode->token = PARSER_TOKEN_CONSTANT; + variableNode->parent = parent; + LexerNode *nameNode = node - 1; if (nameNode < begin || nameNode->parserNode == NULL) { printLog("No name"); - return NULL; + goto RETURN_ERROR; } ParserNode *name = getUntilCommonParent(nameNode->parserNode, parent); @@ -520,37 +563,80 @@ ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end, if (name->token != PARSER_TOKEN_IDENTIFIER) { printLog("Name should be identifier"); return NULL; + } else { + name->parent = variableNode; + variableNode->str_begin = name->str_begin; } LexerNode *node1 = node + 1; + ParserNode *type; - if (node1 >= end || node1->token != LEXER_TOKEN_SYMBOL_COLON) { + if (node1 >= end) { printLog("Bad variable definition"); return NULL; + } else if (node1->token == LEXER_TOKEN_SYMBOL_COLON) { + type = NULL; + node1->parserNode = variableNode; + } else if (node1->parserNode == NULL) { + printLog("Bad variable type with token '%s' %d", + LEXER_TOKEN_STRINGS[node1->token], node1->token); + return NULL; + } else { + type = getUntilCommonParent(node1->parserNode, parent); + if (type == NULL || !isType(type)) { + printLog("Bad variable type"); + return NULL; + } + type->parent = variableNode; + printLog("Here is"); } LexerNode *valueNode = node + 2; + ParserNode *value; if (valueNode >= end || valueNode->parserNode == NULL) { - printLog("No initialziation"); - return NULL; - } - - ParserNode *value = getUntilCommonParent(valueNode->parserNode, parent); - - if (!isValue(value)) { - printLog("No value"); - return NULL; + value = NULL; + } else { + value = getUntilCommonParent(valueNode->parserNode, parent); + if (!isValue(value)) { + printLog("No value"); + return NULL; + } + value->parent = variableNode; } ParserNodeVariableMetadata *metadata = a404m_malloc(sizeof(*metadata)); metadata->value = value; metadata->name = name; - metadata->type = NULL; + metadata->type = type; + + variableNode->metadata = metadata; + + if (value != NULL) { + variableNode->str_end = value->str_end; + } else { + variableNode->str_end = type->str_end; + } + + return node->parserNode = variableNode; - return name->parent = node1->parserNode = value->parent = node->parserNode = - newParserNode(PARSER_TOKEN_CONSTANT, name->str_begin, - value->str_end, metadata, parent); +RETURN_ERROR: + free(variableNode); + return NULL; +} + +bool isAllArguments(const ParserNodeArray *nodes) { + printLog("%d", nodes->size); + for (size_t i = 0; i < nodes->size; ++i) { + const ParserNode *node = nodes->data[i]; + if (node->token != PARSER_TOKEN_SYMBOL_COMMA && i + 1 != nodes->size) { + printLog("%d %s %d %.*s", i, PARSER_TOKEN_STRINGS[node->token], + node->token, (int)(node->str_end - node->str_begin), + node->str_begin); + return false; + } + } + return true; } bool isExpression(ParserNode *node) { @@ -566,6 +652,7 @@ bool isExpression(ParserNode *node) { case PARSER_TOKEN_TYPE_VOID: case PARSER_TOKEN_SYMBOL_EOL: case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_COMMA: return false; case PARSER_TOKEN_NONE: } @@ -586,6 +673,7 @@ bool isType(ParserNode *node) { case PARSER_TOKEN_TYPE_FUNCTION: case PARSER_TOKEN_SYMBOL_EOL: case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_COMMA: return false; case PARSER_TOKEN_NONE: } @@ -606,6 +694,7 @@ bool isValue(ParserNode *node) { case PARSER_TOKEN_TYPE_FUNCTION: case PARSER_TOKEN_SYMBOL_EOL: case PARSER_TOKEN_SYMBOL_CURLY_BRACKET: + case PARSER_TOKEN_SYMBOL_COMMA: return false; case PARSER_TOKEN_NONE: } diff --git a/src/compiler/parser.h b/src/compiler/parser.h index 1e6f487..7d98074 100644 --- a/src/compiler/parser.h +++ b/src/compiler/parser.h @@ -18,6 +18,7 @@ typedef enum ParserToken { PARSER_TOKEN_SYMBOL_EOL, PARSER_TOKEN_SYMBOL_CURLY_BRACKET, PARSER_TOKEN_SYMBOL_PARENTHESIS, + PARSER_TOKEN_SYMBOL_COMMA, PARSER_TOKEN_FUNCTION_DEFINITION, @@ -82,6 +83,7 @@ ParserNode *parserIdentifier(LexerNode *node, ParserNode *parent); ParserNode *parserVoid(LexerNode *node, ParserNode *parent); ParserNode *parserPrint(LexerNode *node, ParserNode *parent); ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent); +ParserNode *parserComma(LexerNode *node, LexerNode *begin, ParserNode *parent); ParserNode *parserParenthesis(LexerNode *closing, LexerNode *begin, ParserNode *parent); ParserNode *parserCurlyBrackets(LexerNode *closing, LexerNode *begin, @@ -91,6 +93,8 @@ ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end, ParserNode *parserVariable(LexerNode *node, LexerNode *begin, LexerNode *end, ParserNode *parent); +bool isAllArguments(const ParserNodeArray *nodes); + bool isExpression(ParserNode *node); bool isType(ParserNode *node); bool isValue(ParserNode *node); |