From d6ba30b94a24607bce5db5e706eb20cc051a98f0 Mon Sep 17 00:00:00 2001 From: A404M Date: Wed, 18 Sep 2024 19:46:38 +0330 Subject: initial commit --- src/compiler/parser/parser.c | 351 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 351 insertions(+) create mode 100644 src/compiler/parser/parser.c (limited to 'src/compiler/parser/parser.c') diff --git a/src/compiler/parser/parser.c b/src/compiler/parser/parser.c new file mode 100644 index 0000000..c679d39 --- /dev/null +++ b/src/compiler/parser/parser.c @@ -0,0 +1,351 @@ +#include "parser.h" + +#include +#include +#include +#include + +const char *PARSED_TOKEN_STRINGS[] = { + "PARSED_TOKEN_NONE", "PARSED_TOKEN_ROOT", + "PARSED_TOKEN_PRINT", "PARSED_TOKEN_PARENTHESIS", + "PARSED_TOKEN_VALUE_STRING", "PARSED_TOKEN_EOL", +}; + +static const ParseOrder PARSE_ORDER[] = { + { + .ltr = false, + .size = 2, + .tokens = + { + TOKEN_OPERATOR_PARENTHESES_CLOSE, + TOKEN_OPERATOR_CURLY_BRACKET_CLOSE, + }, + }, + { + .ltr = true, + .size = 3, + .tokens = + { + TOKEN_OPERATOR_ASSIGN, + TOKEN_OPERATOR_EQUAL, + TOKEN_OPERATOR_COLON, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_KEYWORD_PRINT, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_STRING, + }, + }, + { + .ltr = true, + .size = 1, + .tokens = + { + TOKEN_OPERATOR_EOL, + }, + }, +}; +static size_t PARSE_ORDER_SIZE = sizeof(PARSE_ORDER) / sizeof(ParseOrder); + +ParsedNode *newParsedNode(char const *strBegin, char const *strEnd, + ParsedToken token, void *metadata, + ParsedNode *parent) { + ParsedNode *parsedNode = a404m_malloc(sizeof(*parsedNode)); + parsedNode->strBegin = strBegin; + parsedNode->strEnd = strEnd; + parsedNode->token = token; + parsedNode->metadata = metadata; + parsedNode->parent = parent; + return parsedNode; +} + +void _printParsedNode(const ParsedNode *parsedNode, int indent) { + for (int i = 0; i < indent; ++i) printf(" "); + printf("{token=%s", PARSED_TOKEN_STRINGS[parsedNode->token]); + switch (parsedNode->token) { + case PARSED_TOKEN_NONE: + break; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_ROOT: { + ++indent; + const ScopeMetadata *metadata = parsedNode->metadata; + printf(",operands=[\n"); + for (size_t i = 0; i < metadata->operands_size; ++i) { + _printParsedNode(metadata->operands[i], indent + 1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("]\n"); + --indent; + } break; + case PARSED_TOKEN_PRINT: { + const PrintMetadata *metadata = parsedNode->metadata; + printf(",operand=\n"); + _printParsedNode(metadata, indent + 1); + } break; + case PARSED_TOKEN_VALUE_STRING: + printf("\n"); + break; + case PARSED_TOKEN_EOL: + EOLMetadata *metadata = parsedNode->metadata; + printf(",operand=\n"); + _printParsedNode(metadata, indent + 1); + break; + default: + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); + } + for (int i = 0; i < indent; ++i) printf(" "); + printf("}\n"); +} + +void printParsedNode(const ParsedNode *parsedNode) { + _printParsedNode(parsedNode, 0); +} + +ParsedNode *getUntilCommonFather(ParsedNode *parsedNode, ParsedNode *parent) { + while (parsedNode != NULL && parsedNode->parent != parent) { + parsedNode = parsedNode->parent; + } + return parsedNode; +} + +void deleteParsedNode(ParsedNode *parsedNode) { + switch (parsedNode->token) { + case PARSED_TOKEN_NONE: + break; + case PARSED_TOKEN_PARENTHESIS: + case PARSED_TOKEN_ROOT: { + ScopeMetadata *metadata = parsedNode->metadata; + for (size_t i = 0; i < metadata->operands_size; ++i) { + deleteParsedNode(metadata->operands[i]); + } + free(metadata->operands); + free(metadata); + } break; + case PARSED_TOKEN_PRINT: { + PrintMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata); + } break; + case PARSED_TOKEN_VALUE_STRING: + break; + case PARSED_TOKEN_EOL: + EOLMetadata *metadata = parsedNode->metadata; + deleteParsedNode(metadata); + break; + default: + fprintf(stderr, "bad parsed token %d at compiler line %d\n", + parsedNode->token, __LINE__); + exit(1); + } + free(parsedNode); +} + +ParsedNode *parser(Nodes lexedNodes) { + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->token = PARSED_TOKEN_ROOT; + root->parent = NULL; + root->metadata = + parserScope(lexedNodes.nodes, lexedNodes.nodes + lexedNodes.size, root); + if (root->metadata == NULL) { + free(root); + return NULL; + } + return root; +} + +ScopeMetadata *parserScope(Node *nodesBegin, Node *nodesEnd, + ParsedNode *parent) { + if (nodesBegin >= nodesEnd) { + return NULL; + } + + size_t nodes_size = 0; + ParsedNode **nodes = a404m_malloc(nodes_size * sizeof(ParsedNode *)); + size_t nodes_inserted = 0; + + for (size_t i = 0; i < PARSE_ORDER_SIZE; ++i) { + const ParseOrder order = PARSE_ORDER[i]; + + for (Node *node = order.ltr ? nodesBegin : (nodesEnd - 1); + nodesBegin <= node && node < nodesEnd; order.ltr ? ++node : --node) { + for (size_t j = 0; j < order.size; ++j) { + const Token token = order.tokens[j]; + if (node->token == token) { + ParsedNode *parsedNode = + parseNode(nodesBegin, nodesEnd, node, parent); + if (parsedNode == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + goto RETURN_ERROR; + } + if (nodes_size == nodes_inserted) { + nodes_size += nodes_size / 2 + 1; + nodes = a404m_realloc(nodes, nodes_size * sizeof(ParsedNode *)); + } + nodes[nodes_inserted] = parsedNode; + ++nodes_inserted; + } + } + } + } + + for (Node *node = nodesBegin; node < nodesEnd; ++node) { + if (node->token != TOKEN_PARSED) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + goto RETURN_ERROR; + } + } + + ScopeMetadata *metadata = a404m_malloc(sizeof(ScopeMetadata)); + metadata->operands = a404m_malloc(nodes_inserted * sizeof(ParsedNode *)); + + nodes_size = nodes_inserted; + nodes_inserted = 0; + + for (size_t i = 0; i < nodes_size; ++i) { + ParsedNode *currentNode = nodes[i]; + if (currentNode->parent == parent) { + metadata->operands[nodes_inserted] = currentNode; + ++nodes_inserted; + } + } + free(nodes); + + metadata->operands = + a404m_realloc(metadata->operands, nodes_inserted * sizeof(ParsedNode *)); + metadata->operands_size = nodes_inserted; + + return metadata; + +RETURN_ERROR: + free(nodes); + return NULL; +} + +ParsedNode *parseNode(Node *nodesBegin, Node *nodesEnd, Node *node, + ParsedNode *parent) { + switch (node->token) { + case TOKEN_KEYWORD_PRINT: + return parserPrint(nodesBegin, nodesEnd, node, parent); + case TOKEN_OPERATOR_PARENTHESES_CLOSE: + return parseParenthesis(nodesBegin, nodesEnd, node, parent); + case TOKEN_STRING: + return parseString(node, parent); + case TOKEN_OPERATOR_EOL: + return parseEOL(nodesBegin, nodesEnd, node, parent); + default: + fprintf(stderr, "unexpected token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } +} + +ParsedNode *parserPrint(Node *, Node *nodesEnd, Node *node, + ParsedNode *parent) { + Node *follow = node + 1; + + if (follow < nodesEnd && follow->token == TOKEN_PARSED) { + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_PRINT; + root->parent = parent; + + ParsedNode *operand = root->metadata = + getUntilCommonFather(follow->parsedNode, parent); + if (operand == NULL || operand->token != PARSED_TOKEN_PARENTHESIS) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + free(root); + return NULL; + } + operand->parent = root; + + node->parsedNode = root; + node->token = TOKEN_PARSED; + return root; + } + return NULL; +} + +ParsedNode *parseParenthesis(Node *nodesBegin, Node *, Node *node, + ParsedNode *parent) { + Node *opening = NULL; + Node *closing = node; + for (Node *iter = closing - 1; iter >= nodesBegin; --iter) { + if (iter->token == TOKEN_OPERATOR_PARENTHESES_OPEN) { + opening = iter; + break; + } + } + if (opening == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } + + ParsedNode *parsedNode = closing->parsedNode = opening->parsedNode = + a404m_malloc(sizeof(*parsedNode)); + parsedNode->strBegin = opening->strBegin; + parsedNode->strEnd = closing->strEnd; + parsedNode->token = PARSED_TOKEN_PARENTHESIS; + parsedNode->parent = parent; + parsedNode->metadata = parserScope(opening + 1, closing, parsedNode); + + if (parsedNode->metadata == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + free(parsedNode); + return NULL; + } + + opening->token = closing->token = TOKEN_PARSED; + return parsedNode; +} + +ParsedNode *parseString(Node *node, ParsedNode *parent) { + node->token = TOKEN_PARSED; + return node->parsedNode = + newParsedNode(node->strBegin, node->strEnd, + PARSED_TOKEN_VALUE_STRING, NULL, parent); +} + +ParsedNode *parseEOL(Node *nodesBegin, Node *, Node *node, ParsedNode *parent) { + Node *before = node - 1; + if (before < nodesBegin && before->token != TOKEN_PARSED) { + return NULL; + } + + ParsedNode *experession = getUntilCommonFather(before->parsedNode, parent); + if (experession == NULL) { + fprintf(stderr, "error in parsing token '%s' at compiler line %d\n", + TOKEN_STRINGS[node->token], __LINE__); + return NULL; + } + + ParsedNode *root = a404m_malloc(sizeof(ParsedNode)); + root->strBegin = node->strBegin; + root->strEnd = node->strEnd; + root->token = PARSED_TOKEN_EOL; + root->parent = parent; + + root->metadata = experession; + experession->parent = root; + + node->token = TOKEN_PARSED; + node->parsedNode = root; + return root; +} -- cgit v1.2.3