From f79290084948f3cf140395c270c07cf29ca58e8d Mon Sep 17 00:00:00 2001 From: A404M Date: Sun, 22 Sep 2024 19:34:43 +0330 Subject: Better errors Added variables --- src/compiler/parser/parser.c | 740 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 667 insertions(+), 73 deletions(-) (limited to 'src/compiler/parser/parser.c') diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c index 36b2574..61a0cf9 100644 --- a/src/compiler/parser/parser.c +++ b/src/compiler/parser/parser.c @@ -1,14 +1,27 @@ #include "parser.h" +#include #include #include #include #include +#include const char *PARSED_TOKEN_STRINGS[] = { - "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT", - "PARSED_TOKEN_PARENTHESIS", "PARSED_TOKEN_FUNCTION_CALL", - "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL", + "PARSED_TOKEN_NONE", + "PARSED_TOKEN_ROOT", + "PARSED_TOKEN_PARENTHESIS", + "PARSED_TOKEN_FUNCTION_CALL", + "PARSED_TOKEN_VALUE_STRING", + "PARSED_TOKEN_VALUE_IDENTIFIER", + "PARSED_TOKEN_DEFINE_VARIABLE", + "PARSED_TOKEN_DEFINE_CONSTANT", + "PARSED_TOKEN_EOL", + "PARSED_TOKEN_COMMA", + "PARSED_TOKEN_STRUCT", + "PARSED_TOKEN_FUNCTION", + "PARSED_TOKEN_FUNCTION_PARAMS", + "PARSED_TOKEN_CODE_BODY", }; static const ParseOrder PARSE_ORDER[] = { @@ -21,6 +34,31 @@ static const ParseOrder PARSE_ORDER[] = { TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, }, }, + { + .ltr = true, + .size = 2, + .tokens = + { + TOKEN_STRING, + TOKEN_IDENTIFIER, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_OPERATOR_FUNCTION, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_KEYWORD_STRUCT, + }, + }, { .ltr = true, .size = 3, @@ -32,11 +70,11 @@ static const ParseOrder PARSE_ORDER[] = { }, }, { - .ltr = true, + .ltr = false, .size = 1, .tokens = { - TOKEN_STRING, + TOKEN_OPERATOR_COMMA, }, }, { @@ -63,15 +101,20 @@ ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, } void _printParsedNode(const ParsedNode *parsedNode, int indent) { + if (parsedNode == NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("null\n"); + return; + } for (int i = 0; i < indent; ++i) printf(" "); printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]); switch (parsedNode->token) { - case PARSED_TOKEN_NONE: - break; + case PARSED_TOKEN_FUNCTION_PARAMS: case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_CODE_BODY: case PARSED_TOKEN_ROOT: { ++indent; - const ScopeMetadata *metadata = parsedNode->metadata; + const ParserScopeMetadata *metadata = parsedNode->metadata; printf(",operands=[\n"); for (size_t i = 0; i < metadata->operands_size; ++i) { _printParsedNode(metadata->operands[i], indent + 1); @@ -79,20 +122,77 @@ void _printParsedNode(const ParsedNode *parsedNode, int indent) { for (int i = 0; i < indent; ++i) printf(" "); printf("]\n"); --indent; - } break; + goto END_SUCCESS; + } case PARSED_TOKEN_VALUE_STRING: - printf("\n"); - break; + case PARSED_TOKEN_IDENTIFIER: + printf(",str='%.*s'\n", (int)(parsedNode->strEnd - parsedNode->strBegin), + parsedNode->strBegin); + goto END_SUCCESS; + case PARSED_TOKEN_COMMA: case PARSED_TOKEN_EOL: { - EOLMetadata *metadata = parsedNode->metadata; + const ParserEOLMetadata *metadata = parsedNode->metadata; printf(",operand=\n"); _printParsedNode(metadata, indent + 1); - } break; - default: - fprintf(stderr, "bad parsed token %d at compiler line %d\n", - parsedNode->token, __LINE__); - exit(1); + goto END_SUCCESS; + } + case PARSED_TOKEN_FUNCTION_CALL: { + const ParserFunctionCallMetadata *metadata = parsedNode->metadata; + printf(",functionName=%.*s,operands=[\n", + (int)(metadata->functionNameEnd - metadata->functionNameBegin), + metadata->functionNameBegin); + ++indent; + const ParserScopeMetadata *scope = metadata->scope; + for (size_t i = 0; i < scope->operands_size; ++i) { + _printParsedNode(scope->operands[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("]\n"); + --indent; + goto END_SUCCESS; + } + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_DEFINE_VARIABLE: { + const ParserVariableDefineMetadata *metadata = parsedNode->metadata; + printf(",name=%.*s\n", + (int)(metadata->name->strEnd - metadata->name->strBegin), + metadata->name->strBegin); + if (metadata->type != NULL) { + for (int i = 0; i < indent; ++i) printf(" "); + printf(",type=\n"); + _printParsedNode(metadata->type, indent + 1); + } + if (metadata->value) { + for (int i = 0; i < indent; ++i) printf(" "); + printf(",value=\n"); + _printParsedNode(metadata->value, indent + 1); + } + goto END_SUCCESS; + } + case PARSED_TOKEN_STRUCT: { + const ParserStructMetadata *metadata = parsedNode->metadata; + printf(",body=\n"); + _printParsedNode(metadata->body, indent + 1); + goto END_SUCCESS; + } + case PARSED_TOKEN_FUNCTION: { + const ParserFunctionMetadata *metadata = parsedNode->metadata; + printf(",params=\n"); + _printParsedNode(metadata->params, indent + 1); + for (int i = 0; i < indent; ++i) printf(" "); + printf(",type=\n"); + _printParsedNode(metadata->type, indent + 1); + for (int i = 0; i < indent; ++i) printf(" "); + printf(",body=\n"); + _printParsedNode(metadata->body, indent + 1); + goto END_SUCCESS; + } + case PARSED_TOKEN_NONE: } + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); +END_SUCCESS: for (int i = 0; i < indent; ++i) printf(" "); printf("}\n"); } @@ -109,13 +209,19 @@ ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) { } void deleteParsedNode(ParsedNode *parsedNode) { + if (parsedNode == NULL) { + return; + } switch (parsedNode->token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: goto FREE; + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_ROOT: { - ScopeMetadata *metadata = parsedNode->metadata; + ParserScopeMetadata *metadata = parsedNode->metadata; for (size_t i = 0; i < metadata->operands_size; ++i) { deleteParsedNode(metadata->operands[i]); } @@ -123,14 +229,15 @@ void deleteParsedNode(ParsedNode *parsedNode) { free(metadata); goto FREE; } + case PARSED_TOKEN_COMMA: case PARSED_TOKEN_EOL: { - EOLMetadata *metadata = parsedNode->metadata; + ParserEOLMetadata *metadata = parsedNode->metadata; deleteParsedNode(metadata); goto FREE; } case PARSED_TOKEN_FUNCTION_CALL: { - FunctionCallMetadata *metadata = parsedNode->metadata; - ScopeMetadata *scope = metadata->scope; + ParserFunctionCallMetadata *metadata = parsedNode->metadata; + ParserScopeMetadata *scope = metadata->scope; for (size_t i = 0; i < scope->operands_size; ++i) { deleteParsedNode(scope->operands[i]); } @@ -139,6 +246,29 @@ void deleteParsedNode(ParsedNode *parsedNode) { free(metadata); goto FREE; } + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_DEFINE_VARIABLE: { + ParserVariableDefineMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->name); + deleteParsedNode(metadata->type); + deleteParsedNode(metadata->value); + free(metadata); + goto FREE; + } + case PARSED_TOKEN_STRUCT: { + ParserStructMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->body); + free(metadata); + goto FREE; + } + case PARSED_TOKEN_FUNCTION: { + ParserFunctionMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata->params); + deleteParsedNode(metadata->type); + deleteParsedNode(metadata->body); + free(metadata); + goto FREE; + } } fprintf(stderr, "bad parsed token %d at compiler line %d\n", parsedNode->token, __LINE__); @@ -147,12 +277,23 @@ FREE: free(parsedNode); } -ParsedNode *parser(Nodes lexedNodes) { - ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); +ParsedNode *parser(SourceCode code) { + Nodes nodes = lexer(code); + if (nodes.size == ERROR_SIZE) { + return NULL; + } + ParsedNode *root = _parser(nodes, code); + + deleteNodes(nodes); + return root; +} + +ParsedNode *_parser(Nodes lexedNodes, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); root->token = PARSED_TOKEN_ROOT; root->parent = NULL; - root->metadata = parserScopeCode(lexedNodes.nodes, - lexedNodes.nodes + lexedNodes.size, root); + root->metadata = parserScopeCode( + lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root, code); if (root->metadata == NULL) { free(root); return NULL; @@ -160,8 +301,9 @@ ParsedNode *parser(Nodes lexedNodes) { return root; } -ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, - bool (*isAllowed)(ParsedToken)) { +ParserScopeMetadata *parserScope( + Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, + bool (*isAllowed)(ParsedToken token, bool isLast), SourceCode code) { size_t nodes_size = 0; ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *)); size_t nodes_inserted = 0; @@ -175,10 +317,8 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, ++order_tokens_index) { if (node->token == order->tokens[order_tokens_index]) { ParsedNode *parsedNode = - parseNode(nodesBegin, nodesEnd, node, parent); + parseNode(nodesBegin, nodesEnd, node, parent, code); if (parsedNode == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } if (nodes_size == nodes_inserted) { @@ -194,8 +334,8 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, for (Node *node = nodesBegin; node < nodesEnd; ++node) { if (node->token != TOKEN_PARSED) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Unexpected node with token '%s'", code, node->strBegin, + node->strEnd, TOKEN_STRINGS[node->token]); goto RETURN_ERROR; } } @@ -207,11 +347,10 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, for (size_t i = 0; i < nodes_size; ++i) { ParsedNode *currentNode = nodes[i]; if (currentNode->parent == parent) { - if (!isAllowed(currentNode->token)) { - fprintf( - stderr, - "error in parsing token '%s' is not allowed at compiler line %d\n", - PARSED_TOKEN_STRINGS[currentNode->token], __LINE__); + if (!isAllowed(currentNode->token, i + 1 == nodes_size)) { + printError("Token '%s' is not allowed here", code, + currentNode->strBegin, currentNode->strEnd, + PARSED_TOKEN_STRINGS[currentNode->token]); goto RETURN_ERROR; } operands[nodes_inserted] = currentNode; @@ -220,7 +359,7 @@ ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, } free(nodes); - ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata)); + ParserScopeMetadata *metadata = a404m_malloc(sizeof(*metadata)); metadata->operands = a404m_realloc(operands, nodes_inserted * sizeof(ParsedNode *)); metadata->operands_size = nodes_inserted; @@ -232,67 +371,197 @@ RETURN_ERROR: return NULL; } -static bool isAllowedCodeScope(ParsedToken token) { +static bool isAllowedCodeScope(ParsedToken token, bool) { switch (token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: return false; case PARSED_TOKEN_EOL: + case PARSED_TOKEN_CODE_BODY: return true; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } -static bool isAllowedParenthesisScope(ParsedToken token) { +static bool isAllowedParenthesisScope(ParsedToken token, bool) { switch (token) { case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: case PARSED_TOKEN_FUNCTION_CALL: return true; case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_EOL: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_CODE_BODY: + case PARSED_TOKEN_FUNCTION_PARAMS: return false; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } -ScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent) { - return parserScope(nodesBegin, nodesEnd, parent, isAllowedCodeScope); +static bool isAllowedFunctionCallScope(ParsedToken token, bool isLast) { + switch (token) { + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + return isLast; + case PARSED_TOKEN_COMMA: + return true; + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); +} + +static bool isAllowedFunctionParamScope(ParsedToken token, bool isLast) { + switch (token) { + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_DEFINE_CONSTANT: + return isLast; + case PARSED_TOKEN_COMMA: + return true; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); +} + +static bool isAllowedStructScope(ParsedToken token, bool) { + switch (token) { + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_VALUE_STRING: + case PARSED_TOKEN_IDENTIFIER: + case PARSED_TOKEN_FUNCTION_CALL: + case PARSED_TOKEN_DEFINE_VARIABLE: + case PARSED_TOKEN_COMMA: + case PARSED_TOKEN_EOL: + case PARSED_TOKEN_NONE: + case PARSED_TOKEN_ROOT: + case PARSED_TOKEN_FUNCTION: + case PARSED_TOKEN_STRUCT: + case PARSED_TOKEN_DEFINE_CONSTANT: + case PARSED_TOKEN_FUNCTION_PARAMS: + case PARSED_TOKEN_CODE_BODY: + return false; + } + fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); + exit(1); } -ScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, - ParsedNode *parent) { - return parserScope(nodesBegin, nodesEnd, parent, isAllowedParenthesisScope); +ParserScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedCodeScope, code); +} + +ParserScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedParenthesisScope, + code); +} + +ParserScopeMetadata *parserScopeFunctionCall(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedFunctionCallScope, + code); +} + +ParserScopeMetadata *parserScopeFunctionParam(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, + SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedFunctionParamScope, + code); +} + +ParserScopeMetadata *parserScopeStruct(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent, SourceCode code) { + return parserScope(nodesBegin, nodesEnd, parent, isAllowedStructScope, code); } ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, - ParsedNode *parent) { + ParsedNode *parent, SourceCode code) { switch (node->token) { case TOKEN_OPERATOR_PARENTHESES_CLOSE: - return parseParenthesis(nodesBegin, nodesEnd, node, parent); + return parseParenthesis(nodesBegin, nodesEnd, node, parent, code); case TOKEN_STRING: return parseString(node, parent); + case TOKEN_IDENTIFIER: + return parseIdentifier(node, parent); case TOKEN_OPERATOR_EOL: - return parseEOL(nodesBegin, nodesEnd, node, parent); - default: - fprintf(stderr, "unexpected token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); - return NULL; + return parseEOL(nodesBegin, node, parent, code); + case TOKEN_OPERATOR_COLON: + return parseVariable(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_OPERATOR_COMMA: + return parseComma(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_KEYWORD_EXTERNAL: + return parseStruct(nodesEnd, node, parent, code); + case TOKEN_OPERATOR_CURLY_BRACKET_CLOSE: + return parseCurly(nodesBegin, node, parent, code); + case TOKEN_KEYWORD_STRUCT: + return parseStruct(nodesEnd, node, parent, code); + case TOKEN_OPERATOR_FUNCTION: + return parseFunction(nodesBegin, nodesEnd, node, parent, code); + case TOKEN_KEYWORD_IMPORT: + return parseImport(nodesEnd, node, parent, code); + case TOKEN_NONE: + case TOKEN_NUMBER: + case TOKEN_OPERATOR: + case TOKEN_OPERATOR_PARENTHESES_OPEN: + case TOKEN_OPERATOR_CURLY_BRACKET_OPEN: + case TOKEN_OPERATOR_ASSIGN: + case TOKEN_OPERATOR_EQUAL: + case TOKEN_SYMBOL: + case TOKEN_PARSED: } + printError("Unexpected token '%s' at compiler line %d", code, node->strBegin, + node->strEnd, TOKEN_STRINGS[node->token], __LINE__); + return NULL; } -ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, - ParsedNode *parent) { +ParsedNode *parseParenthesis(Node *nodesBegin, Node *nodesEnd, Node *closing, + ParsedNode *parent, SourceCode code) { ParsedNode *root = a404m_malloc(sizeof(*root)); Node *opening = NULL; - Node *closing = node; for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) { opening = iter; @@ -300,12 +569,13 @@ ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, } } if (opening == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Found no opening for )", code, closing->strBegin, + closing->strEnd, __LINE__); goto RETURN_ERROR; } - Node *functionName = opening - 1; + Node *const functionName = opening - 1; + Node *const functionTailOperator = closing + 1; if (functionName >= nodesBegin && functionName->token == TOKEN_IDENTIFIER) { functionName->token = TOKEN_PARSED; functionName->parsedNode = root; @@ -315,30 +585,44 @@ ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, root->strEnd = closing->strEnd; root->parent = parent; - FunctionCallMetadata *metadata = root->metadata = + ParserFunctionCallMetadata *metadata = root->metadata = a404m_malloc(sizeof(*metadata)); metadata->functionNameBegin = functionName->strBegin; metadata->functionNameEnd = functionName->strEnd; - if ((metadata->scope = - parserScopeParenthesis(opening + 1, closing, root)) == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + if ((metadata->scope = parserScopeFunctionCall(opening + 1, closing, root, + code)) == NULL) { free(metadata); goto RETURN_ERROR; } + } else if (functionTailOperator < nodesEnd && + functionTailOperator->token == TOKEN_OPERATOR_FUNCTION) { + root->token = PARSED_TOKEN_FUNCTION_PARAMS; + root->strBegin = (opening - 1)->strBegin; + root->strEnd = closing->strEnd; + root->parent = parent; + + if ((root->metadata = parserScopeFunctionParam(opening + 1, closing, root, + code)) == NULL) { + goto RETURN_ERROR; + } } else { root->token = PARSED_TOKEN_PARENTHESIS; root->strBegin = opening->strBegin; root->strEnd = closing->strEnd; root->parent = parent; - if ((root->metadata = parserScopeParenthesis(opening + 1, closing, root)) == - NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + ParserScopeMetadata *const scope = + parserScopeParenthesis(opening + 1, closing, root, code); + if (scope == NULL) { + goto RETURN_ERROR; + } else if (scope->operands_size != 1) { + printError("Parenthesis should only contain one expression", code, + opening->strBegin, closing->strEnd); goto RETURN_ERROR; } + ParserParenthesisMetadata *const metadata = scope->operands[0]; + root->metadata = metadata; } closing->parsedNode = opening->parsedNode = root; @@ -350,6 +634,42 @@ RETURN_ERROR: return NULL; } +ParsedNode *parseCurly(Node *nodesBegin, Node *closing, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + + Node *opening = NULL; + for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { + if (iter->token == TOKEN_OPERATOR_CURLY_BRACKET_OPEN) { + opening = iter; + break; + } + } + if (opening == NULL) { + printError("Found no opening for }", code, closing->strBegin, + closing->strEnd, __LINE__); + goto RETURN_ERROR; + } + + if ((root->metadata = parserScopeCode(opening + 1, closing, parent, code)) == + NULL) { + goto RETURN_ERROR; + } + + root->strBegin = opening->strBegin; + root->strEnd = opening->strEnd; + root->parent = parent; + root->token = PARSED_TOKEN_CODE_BODY; + closing->parsedNode = opening->parsedNode = root; + opening->token = closing->token = TOKEN_PARSED; + + return root; + +RETURN_ERROR: + free(root); + return NULL; +} + ParsedNode *parseString(Node *node, ParsedNode *parent) { node->token = TOKEN_PARSED; return node->parsedNode = @@ -357,20 +677,34 @@ ParsedNode *parseString(Node *node, ParsedNode *parent) { PARSED_TOKEN_VALUE_STRING, NULL, parent); } -ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { +ParsedNode *parseIdentifier(Node *node, ParsedNode *parent) { + node->token = TOKEN_PARSED; + return node->parsedNode = + newParsedNode(node->strBegin, node->strEnd, + PARSED_TOKEN_IDENTIFIER, NULL, parent); +} + +ParsedNode *parseEOL(Node *nodesBegin, Node *node, ParsedNode *parent, + SourceCode code) { Node *before = node - 1; - if (before < nodesBegin || before->token != TOKEN_PARSED) { + if (before < nodesBegin) { + RETURN_EMPTY: + return newParsedNode(node->strBegin, node->strEnd, PARSED_TOKEN_EOL, NULL, + parent); + } else if (before->token != TOKEN_PARSED) { + printError("Unexpected EOL", code, node->strBegin, node->strEnd); return NULL; } ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent); if (experession == NULL) { - fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", - TOKEN_STRINGS[node->token], __LINE__); + printError("Unexpected EOL", code, node->strBegin, node->strEnd); return NULL; + } else if (experession->token == PARSED_TOKEN_EOL) { + goto RETURN_EMPTY; } - ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + ParsedNode *root = a404m_malloc(sizeof(*root)); root->strBegin = node->strBegin; root->strEnd = node->strEnd; root->token = PARSED_TOKEN_EOL; @@ -383,3 +717,263 @@ ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { node->parsedNode = root; return root; } + +ParsedNode *parseVariable(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserVariableDefineMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + Node *variableName = node - 1; + if (variableName < nodesBegin) { + printError("Variable definition needs name", code, node->strBegin, + node->strEnd); + goto RETURN_ERROR; + } else if (variableName->token != TOKEN_PARSED) { + BAD_VAR_NAME: + printError("Variable name should be an identifier but got something else", + code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + + ParsedNode *const variableNameParsed = + getUntilCommonFather(variableName->parsedNode, parent); + if (variableNameParsed == NULL || + variableNameParsed->token != PARSED_TOKEN_IDENTIFIER) { + goto BAD_VAR_NAME; + } + variableNameParsed->parent = root; + + metadata->name = variableNameParsed; + + Node *follow = node + 1; + ParsedNode *type; + ParsedNode *value = NULL; + if (follow == nodesEnd) { + printError("Variable definition needs type or assignment to a value", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else if (follow->token == TOKEN_PARSED) { + type = getUntilCommonFather(follow->parsedNode, parent); + if (type == NULL) { + printError("Expected type but got something else", code, follow->strBegin, + follow->strEnd); + goto RETURN_ERROR; + } + ++follow; + while (follow->token == TOKEN_PARSED && + getUntilCommonFather(follow->parsedNode, type) != NULL) { + ++follow; + } + } else { + type = NULL; + } + + if (follow == nodesEnd || (follow->token != TOKEN_OPERATOR_ASSIGN && + follow->token != TOKEN_OPERATOR_COLON)) { + root->token = PARSED_TOKEN_DEFINE_VARIABLE; + --follow; + } else { + if (follow->token == TOKEN_OPERATOR_ASSIGN) + root->token = PARSED_TOKEN_DEFINE_VARIABLE; + else + root->token = PARSED_TOKEN_DEFINE_CONSTANT; + + follow->parsedNode = root; + follow->token = TOKEN_PARSED; + ++follow; + if (follow == nodesEnd) { + --follow; + printError("Expected value after assignment but got nothing", code, + follow->strBegin, follow->strEnd); + goto RETURN_ERROR; + } else if (follow->token == TOKEN_PARSED) { + value = getUntilCommonFather(follow->parsedNode, parent); + } else { + BAD_VALUE: + printError("Expected value after assignment but got something else", code, + follow->strBegin, follow->strEnd); + goto RETURN_ERROR; + } + } + + if (type == NULL && value == NULL) { + printError("Variable definition needs type or assignment to a value", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + if (type != NULL) { + metadata->type = type; + type->parent = root; + } else { + metadata->type = NULL; + } + if (value != NULL) { + metadata->value = value = getUntilCommonFather(value, parent); + if (value == NULL) { + goto BAD_VALUE; + } + value->parent = root; + } else { + metadata->value = NULL; + } + + root->strBegin = variableName->strBegin; + root->strEnd = follow->strEnd; + root->parent = parent; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; + +RETURN_ERROR: + free(root); + free(metadata); + return NULL; +} + +ParsedNode *parseComma(Node *nodesBegin, Node *, Node *node, ParsedNode *parent, + SourceCode code) { + Node *before = node - 1; + if (before < nodesBegin || before->token != TOKEN_PARSED) { + UNEXPECTED: + printError("Unexpected comma", code, node->strBegin, node->strEnd); + return NULL; + } + + ParsedNode *const experession = + getUntilCommonFather(before->parsedNode, parent); + if (experession == NULL || experession->token == PARSED_TOKEN_COMMA) { + goto UNEXPECTED; + } + + ParsedNode *root = a404m_malloc(sizeof(*root)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_COMMA; + root->parent = parent; + + root->metadata = experession; + experession->parent = root; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; +} + +ParsedNode *parseStruct(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserStructMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + + Node *const body = node + 1; + if (body >= nodesEnd) { + NO_BODY: + printError("'struct' needs a body", code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } + + root->strBegin = node->strBegin; + root->strBegin = body->strEnd; + root->token = PARSED_TOKEN_STRUCT; + root->parent = parent; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + + if (body->token == TOKEN_KEYWORD_EXTERNAL) { + body->token = TOKEN_PARSED; + body->parsedNode = root; + metadata->body = NULL; + } else if (body->token == TOKEN_PARSED) { + if ((metadata->body = getUntilCommonFather(body->parsedNode, parent)) == + NULL) { + goto NO_BODY; + } else { + metadata->body->parent = root; + } + } else { + goto NO_BODY; + } + + return root; +RETURN_ERROR: + free(root); + free(metadata); + return NULL; +} + +ParsedNode *parseFunction(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent, SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + ParserFunctionMetadata *metadata = root->metadata = + a404m_malloc(sizeof(*metadata)); + + Node *const params = node - 1; + Node *const type = node + 1; + Node *const body = node + 2; + + if (params < nodesBegin || params->token != TOKEN_PARSED || + params->parsedNode->token != PARSED_TOKEN_FUNCTION_PARAMS) { + printError("Function definition needs a param list", code, node->strBegin, + node->strEnd); + goto RETURN_ERROR; + } else { + metadata->params = params->parsedNode; + metadata->params->parent = root; + } + if (type >= nodesEnd || type->token != TOKEN_PARSED || + type->parsedNode->token != PARSED_TOKEN_IDENTIFIER) { + printError("Function definition needs a type to be identifier (for now)", + code, node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else { + metadata->type = type->parsedNode; + metadata->type->parent = root; + } + + if (body >= nodesEnd) { + NEED_BODY: + printError("Function definition needs a body or be set to external", code, + node->strBegin, node->strEnd); + goto RETURN_ERROR; + } else if (body->token == TOKEN_KEYWORD_EXTERNAL) { + body->token = TOKEN_PARSED; + body->parsedNode = root; + metadata->body = NULL; + } else if (type->token == TOKEN_PARSED && + type->parsedNode->token == PARSED_TOKEN_CODE_BODY) { + metadata->body = type->parsedNode; + metadata->body->parent = root; + } else { + goto NEED_BODY; + } + + node->token = TOKEN_PARSED; + node->parsedNode = root; + root->strBegin = params->strBegin; + root->strEnd = params->strEnd; + root->parent = parent; + root->token = PARSED_TOKEN_FUNCTION; + + return root; +RETURN_ERROR: + + free(metadata); + free(root); + return NULL; +} + +ParsedNode *parseImport(Node *nodesEnd, Node *node, ParsedNode *parent, + SourceCode code) { + ParsedNode *root = a404m_malloc(sizeof(*root)); + + // TODO: do it + + return root; + +RETURN_ERROR: + + free(root); + return NULL; +} -- cgit v1.2.3