#include "parser.h" #include #include #include #include const char *PARSED_TOKEN_STRINGS[] = { "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT", "PARSED_TOKEN_PARENTHESIS", "PARSED_TOKEN_FUNCTION_CALL", "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL", }; static const ParseOrder PARSE_ORDER[] = { { .ltr = true, .size = 2, .tokens = { TOKEN_OPERATOR_PARENTHESES_CLOSE, TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, }, }, { .ltr = true, .size = 3, .tokens = { TOKEN_OPERATOR_ASSIGN, TOKEN_OPERATOR_EQUAL, TOKEN_OPERATOR_COLON, }, }, { .ltr = true, .size = 1, .tokens = { TOKEN_STRING, }, }, { .ltr = true, .size = 1, .tokens = { TOKEN_OPERATOR_EOL, }, }, }; static size_t PARSE_ORDER_SIZE = sizeof(PARSE_ORDER) / sizeof(ParseOrder); ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, ParsedToken token, void *metadata, ParsedNode *parent) { ParsedNode *parsedNode = a404m_malloc(sizeof(*parsedNode)); parsedNode->strBegin = strBegin; parsedNode->strEnd = strEnd; parsedNode->token = token; parsedNode->metadata = metadata; parsedNode->parent = parent; return parsedNode; } void _printParsedNode(const ParsedNode *parsedNode, int indent) { for (int i = 0; i < indent; ++i) printf(" "); printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]); switch (parsedNode->token) { case PARSED_TOKEN_NONE: break; case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_ROOT: { ++indent; const ScopeMetadata *metadata = parsedNode->metadata; printf(",operands=[\n"); for (size_t i = 0; i < metadata->operands_size; ++i) { _printParsedNode(metadata->operands[i], indent + 1); } for (int i = 0; i < indent; ++i) printf(" "); printf("]\n"); --indent; } break; case PARSED_TOKEN_VALUE_STRING: printf("\n"); break; case PARSED_TOKEN_EOL: { EOLMetadata *metadata = parsedNode->metadata; printf(",operand=\n"); _printParsedNode(metadata, indent + 1); } break; default: fprintf(stderr, "bad parsed token %d at compiler line %d\n", parsedNode->token, __LINE__); exit(1); } for (int i = 0; i < indent; ++i) printf(" "); printf("}\n"); } void printParsedNode(const ParsedNode *parsedNode) { _printParsedNode(parsedNode, 0); } ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) { while (parsedNode != NULL && parsedNode->parent != parent) { parsedNode = parsedNode->parent; } return parsedNode; } void deleteParsedNode(ParsedNode *parsedNode) { switch (parsedNode->token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_VALUE_STRING: goto FREE; case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_ROOT: { ScopeMetadata *metadata = parsedNode->metadata; for (size_t i = 0; i < metadata->operands_size; ++i) { deleteParsedNode(metadata->operands[i]); } free(metadata->operands); free(metadata); goto FREE; } case PARSED_TOKEN_EOL: { EOLMetadata *metadata = parsedNode->metadata; deleteParsedNode(metadata); goto FREE; } case PARSED_TOKEN_FUNCTION_CALL: { FunctionCallMetadata *metadata = parsedNode->metadata; ScopeMetadata *scope = metadata->scope; for (size_t i = 0; i < scope->operands_size; ++i) { deleteParsedNode(scope->operands[i]); } free(scope->operands); free(scope); free(metadata); goto FREE; } } fprintf(stderr, "bad parsed token %d at compiler line %d\n", parsedNode->token, __LINE__); exit(1); FREE: free(parsedNode); } ParsedNode *parser(Nodes lexedNodes) { ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); root->token = PARSED_TOKEN_ROOT; root->parent = NULL; root->metadata = parserScopeCode(lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root); if (root->metadata == NULL) { free(root); return NULL; } return root; } ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent, bool (*isAllowed)(ParsedToken)) { size_t nodes_size = 0; ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *)); size_t nodes_inserted = 0; for (size_t order_index = 0; order_index < PARSE_ORDER_SIZE; ++order_index) { const ParseOrder *order = &PARSE_ORDER[order_index]; for (Node *node = order->ltr ? nodesBegin : (nodesEnd - 1); nodesBegin <= node && node < nodesEnd; order->ltr ? ++node : --node) { for (size_t order_tokens_index = 0; order_tokens_index < order->size; ++order_tokens_index) { if (node->token == order->tokens[order_tokens_index]) { ParsedNode *parsedNode = parseNode(nodesBegin, nodesEnd, node, parent); if (parsedNode == NULL) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } if (nodes_size == nodes_inserted) { nodes_size += nodes_size / 2 + 1; nodes = a404m_realloc(nodes, nodes_size * sizeof(ParsedNode *)); } nodes[nodes_inserted] = parsedNode; ++nodes_inserted; } } } } for (Node *node = nodesBegin; node < nodesEnd; ++node) { if (node->token != TOKEN_PARSED) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } } ParsedNode **operands = a404m_malloc(nodes_inserted * sizeof(ParsedNode *)); nodes_size = nodes_inserted; nodes_inserted = 0; for (size_t i = 0; i < nodes_size; ++i) { ParsedNode *currentNode = nodes[i]; if (currentNode->parent == parent) { if (!isAllowed(currentNode->token)) { fprintf( stderr, "error in parsing token '%s' is not allowed at compiler line %d\n", PARSED_TOKEN_STRINGS[currentNode->token], __LINE__); goto RETURN_ERROR; } operands[nodes_inserted] = currentNode; ++nodes_inserted; } } free(nodes); ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata)); metadata->operands = a404m_realloc(operands, nodes_inserted * sizeof(ParsedNode *)); metadata->operands_size = nodes_inserted; return metadata; RETURN_ERROR: free(nodes); return NULL; } static bool isAllowedCodeScope(ParsedToken token) { switch (token) { case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: case PARSED_TOKEN_FUNCTION_CALL: return false; case PARSED_TOKEN_EOL: return true; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } static bool isAllowedParenthesisScope(ParsedToken token) { switch (token) { case PARSED_TOKEN_PARENTHESIS: case PARSED_TOKEN_VALUE_STRING: case PARSED_TOKEN_FUNCTION_CALL: return true; case PARSED_TOKEN_NONE: case PARSED_TOKEN_ROOT: case PARSED_TOKEN_EOL: return false; } fprintf(stderr, "bad token '%d' at compiler line %d\n", token, __LINE__); exit(1); } ScopeMetadata *parserScopeCode(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent) { return parserScope(nodesBegin, nodesEnd, parent, isAllowedCodeScope); } ScopeMetadata *parserScopeParenthesis(Node *nodesBegin, Node *nodesEnd, ParsedNode *parent) { return parserScope(nodesBegin, nodesEnd, parent, isAllowedParenthesisScope); } ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, ParsedNode *parent) { switch (node->token) { case TOKEN_OPERATOR_PARENTHESES_CLOSE: return parseParenthesis(nodesBegin, nodesEnd, node, parent); case TOKEN_STRING: return parseString(node, parent); case TOKEN_OPERATOR_EOL: return parseEOL(nodesBegin, nodesEnd, node, parent); default: fprintf(stderr, "unexpected token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); return NULL; } } ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { ParsedNode *root = a404m_malloc(sizeof(*root)); Node *opening = NULL; Node *closing = node; for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) { opening = iter; break; } } if (opening == NULL) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } Node *functionName = opening - 1; if (functionName >= nodesBegin && functionName->token == TOKEN_IDENTIFIER) { functionName->token = TOKEN_PARSED; functionName->parsedNode = root; root->token = PARSED_TOKEN_FUNCTION_CALL; root->strBegin = (opening - 1)->strBegin; root->strEnd = closing->strEnd; root->parent = parent; FunctionCallMetadata *metadata = root->metadata = a404m_malloc(sizeof(*metadata)); metadata->functionNameBegin = functionName->strBegin; metadata->functionNameEnd = functionName->strEnd; if ((metadata->scope = parserScopeParenthesis(opening + 1, closing, root)) == NULL) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); free(metadata); goto RETURN_ERROR; } } else { root->token = PARSED_TOKEN_PARENTHESIS; root->strBegin = opening->strBegin; root->strEnd = closing->strEnd; root->parent = parent; if ((root->metadata = parserScopeParenthesis(opening + 1, closing, root)) == NULL) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); goto RETURN_ERROR; } } closing->parsedNode = opening->parsedNode = root; opening->token = closing->token = TOKEN_PARSED; return root; RETURN_ERROR: free(root); return NULL; } ParsedNode *parseString(Node *node, ParsedNode *parent) { node->token = TOKEN_PARSED; return node->parsedNode = newParsedNode(node->strBegin, node->strEnd, PARSED_TOKEN_VALUE_STRING, NULL, parent); } ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { Node *before = node - 1; if (before < nodesBegin || before->token != TOKEN_PARSED) { return NULL; } ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent); if (experession == NULL) { fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", TOKEN_STRINGS[node->token], __LINE__); return NULL; } ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); root->strBegin = node->strBegin; root->strEnd = node->strEnd; root->token = PARSED_TOKEN_EOL; root->parent = parent; root->metadata = experession; experession->parent = root; node->token = TOKEN_PARSED; node->parsedNode = root; return root; }