summaryrefslogtreecommitdiff
path: root/src/compiler
diff options
context:
space:
mode:
authorA404M <ahmadmahmoudiprogrammer@gmail.com>2025-04-15 21:54:04 +0330
committerA404M <ahmadmahmoudiprogrammer@gmail.com>2025-04-15 21:54:04 +0330
commitcc12a3a946303ae89ce2076b3c17d5d35d7e864f (patch)
tree3429c081fecb65cf538b213e382016416dbca344 /src/compiler
parent52fac2c9232d39b3fe98438a47c22f985b260f58 (diff)
add string
Diffstat (limited to 'src/compiler')
-rw-r--r--src/compiler/ast-tree.c44
-rw-r--r--src/compiler/ast-tree.h1
-rw-r--r--src/compiler/lexer.c7
-rw-r--r--src/compiler/lexer.h1
-rw-r--r--src/compiler/parser.c139
-rw-r--r--src/compiler/parser.h9
6 files changed, 159 insertions, 42 deletions
diff --git a/src/compiler/ast-tree.c b/src/compiler/ast-tree.c
index e2f31fc..b8fe4a0 100644
--- a/src/compiler/ast-tree.c
+++ b/src/compiler/ast-tree.c
@@ -7,7 +7,6 @@
#include "utils/string.h"
#include <stddef.h>
#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
AstTree AST_TREE_TYPE_TYPE = {
@@ -1321,6 +1320,7 @@ AstTreeRoot *makeAstTree(ParserNode *parsedRoot) {
case PARSER_TOKEN_VALUE_INT:
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_FUNCTION_DEFINITION:
case PARSER_TOKEN_FUNCTION_CALL:
case PARSER_TOKEN_IDENTIFIER:
@@ -1552,6 +1552,8 @@ AstTree *astTreeParse(ParserNode *parserNode, AstTreeHelper *helper) {
case PARSER_TOKEN_VALUE_CHAR:
return astTreeParseValue(parserNode, AST_TREE_TOKEN_VALUE_INT,
sizeof(AstTreeInt), &AST_TREE_U8_TYPE);
+ case PARSER_TOKEN_VALUE_STRING:
+ return astTreeParseString(parserNode, helper);
case PARSER_TOKEN_KEYWORD_NULL:
return astTreeParseKeyword(parserNode, AST_TREE_TOKEN_VALUE_NULL);
case PARSER_TOKEN_KEYWORD_UNDEFINED:
@@ -1753,6 +1755,7 @@ AstTree *astTreeParseFunction(ParserNode *parserNode, AstTreeHelper *p_helper) {
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_TYPE_TYPE:
case PARSER_TOKEN_TYPE_FUNCTION:
case PARSER_TOKEN_TYPE_VOID:
@@ -1999,6 +2002,44 @@ AstTree *astTreeParseValue(ParserNode *parserNode, AstTreeToken token,
parserNode->str_end);
}
+AstTree *astTreeParseString(ParserNode *parserNode, AstTreeHelper *helper) {
+ (void)helper;
+ ParserNodeStringMetadata *node_metadata = parserNode->metadata;
+
+ AstTreeObject *metadata = a404m_malloc(sizeof(*metadata));
+
+ metadata->variables.size = node_metadata->end - node_metadata->begin;
+ metadata->variables.data = a404m_malloc(metadata->variables.size *
+ sizeof(*metadata->variables.data));
+
+ for (size_t i = 0; i < metadata->variables.size; ++i) {
+ AstTreeInt *cellMetadata = a404m_malloc(sizeof(*cellMetadata));
+ *cellMetadata = node_metadata->begin[i];
+
+ metadata->variables.data[i] =
+ a404m_malloc(sizeof(*metadata->variables.data[i]));
+ metadata->variables.data[i]->isConst = false;
+ metadata->variables.data[i]->name_begin = NULL;
+ metadata->variables.data[i]->name_end = NULL;
+ metadata->variables.data[i]->type = copyAstTree(&AST_TREE_U8_TYPE);
+ metadata->variables.data[i]->value =
+ newAstTree(AST_TREE_TOKEN_VALUE_INT, cellMetadata,
+ copyAstTree(&AST_TREE_U8_TYPE), NULL, NULL);
+ }
+
+ AstTreeBracket *type_metadata = a404m_malloc(sizeof(*type_metadata));
+ type_metadata->operand = &AST_TREE_U8_TYPE;
+ type_metadata->parameters.size = 1;
+ type_metadata->parameters.data = a404m_malloc(
+ type_metadata->parameters.size * sizeof(*type_metadata->parameters.data));
+ type_metadata->parameters.data[0] = &AST_TREE_U8_TYPE;
+
+ return newAstTree(AST_TREE_TOKEN_VALUE_OBJECT, metadata,
+ newAstTree(AST_TREE_TOKEN_TYPE_ARRAY, type_metadata,
+ &AST_TREE_TYPE_TYPE, NULL, NULL),
+ parserNode->str_begin, parserNode->str_end);
+}
+
AstTree *astTreeParseKeyword(ParserNode *parserNode, AstTreeToken token) {
return newAstTree(token, NULL, NULL, parserNode->str_begin,
parserNode->str_end);
@@ -2293,6 +2334,7 @@ AstTree *astTreeParseCurlyBracket(ParserNode *parserNode,
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_TYPE_TYPE:
case PARSER_TOKEN_TYPE_FUNCTION:
case PARSER_TOKEN_TYPE_VOID:
diff --git a/src/compiler/ast-tree.h b/src/compiler/ast-tree.h
index 988b48c..a5af775 100644
--- a/src/compiler/ast-tree.h
+++ b/src/compiler/ast-tree.h
@@ -292,6 +292,7 @@ AstTree *astTreeParseFunctionCall(ParserNode *parserNode,
AstTree *astTreeParseIdentifier(ParserNode *parserNode, AstTreeHelper *helper);
AstTree *astTreeParseValue(ParserNode *parserNode, AstTreeToken token,
size_t metadata_size, AstTree *type);
+AstTree *astTreeParseString(ParserNode *parserNode, AstTreeHelper *helper);
AstTree *astTreeParseKeyword(ParserNode *parserNode, AstTreeToken token);
AstTree *astTreeParsePrintU64(ParserNode *parserNode, AstTreeHelper *helper);
AstTree *astTreeParseReturn(ParserNode *parserNode, AstTreeHelper *helper);
diff --git a/src/compiler/lexer.c b/src/compiler/lexer.c
index 53d9f05..754593a 100644
--- a/src/compiler/lexer.c
+++ b/src/compiler/lexer.c
@@ -6,7 +6,6 @@
#include <stdint.h>
#include <stdio.h>
-#include <stdlib.h>
const char *LEXER_TOKEN_STRINGS[] = {
"LEXER_TOKEN_SYMBOL_CLOSE_CURLY_BRACKET",
@@ -37,6 +36,7 @@ const char *LEXER_TOKEN_STRINGS[] = {
"LEXER_TOKEN_KEYWORD_NULL",
"LEXER_TOKEN_NUMBER",
"LEXER_TOKEN_CHAR",
+ "LEXER_TOKEN_STRING",
"LEXER_TOKEN_KEYWORD_UNDEFINED",
"LEXER_TOKEN_SYMBOL_FUNCTION_ARROW",
@@ -244,6 +244,8 @@ LexerNodeArray lexer(char *str) {
LexerToken token;
if (opening == '\'') {
token = LEXER_TOKEN_CHAR;
+ } else if (opening == '\"') {
+ token = LEXER_TOKEN_STRING;
} else {
UNREACHABLE;
}
@@ -258,7 +260,7 @@ LexerNodeArray lexer(char *str) {
goto RETURN_ERROR;
} else if (*iter == '\\') {
++iter;
- } else if (*iter == '\'') {
+ } else if (*iter == opening) {
break;
}
}
@@ -357,6 +359,7 @@ void lexerPushClear(LexerNodeArray *array, size_t *array_size, char *iter,
case LEXER_TOKEN_KEYWORD_UNDEFINED:
case LEXER_TOKEN_NUMBER:
case LEXER_TOKEN_CHAR:
+ case LEXER_TOKEN_STRING:
case LEXER_TOKEN_SYMBOL_EOL:
case LEXER_TOKEN_SYMBOL_OPEN_PARENTHESIS:
case LEXER_TOKEN_SYMBOL_CLOSE_PARENTHESIS:
diff --git a/src/compiler/lexer.h b/src/compiler/lexer.h
index ad82b5a..a9b6dcd 100644
--- a/src/compiler/lexer.h
+++ b/src/compiler/lexer.h
@@ -35,6 +35,7 @@ typedef enum LexerToken {
LEXER_TOKEN_KEYWORD_NULL,
LEXER_TOKEN_NUMBER,
LEXER_TOKEN_CHAR,
+ LEXER_TOKEN_STRING,
LEXER_TOKEN_KEYWORD_UNDEFINED,
LEXER_TOKEN_SYMBOL_FUNCTION_ARROW,
diff --git a/src/compiler/parser.c b/src/compiler/parser.c
index ae2db33..6f3e956 100644
--- a/src/compiler/parser.c
+++ b/src/compiler/parser.c
@@ -19,6 +19,7 @@ const char *PARSER_TOKEN_STRINGS[] = {
"PARSER_TOKEN_VALUE_FLOAT",
"PARSER_TOKEN_VALUE_BOOL",
"PARSER_TOKEN_VALUE_CHAR",
+ "PARSER_TOKEN_VALUE_STRING",
"PARSER_TOKEN_TYPE_TYPE",
"PARSER_TOKEN_TYPE_FUNCTION",
@@ -260,6 +261,12 @@ void parserNodePrint(const ParserNode *node, int indent) {
printf(",value=%c", (char)*metadata);
}
goto RETURN_SUCCESS;
+ case PARSER_TOKEN_VALUE_STRING: {
+ ParserNodeStringMetadata *metadata = node->metadata;
+ printf(",value=%.*s", (int)(metadata->end - metadata->begin),
+ metadata->begin);
+ }
+ goto RETURN_SUCCESS;
case PARSER_TOKEN_CONSTANT:
case PARSER_TOKEN_VARIABLE: {
const ParserNodeVariableMetadata *metadata = node->metadata;
@@ -536,6 +543,12 @@ void parserNodeDelete(ParserNode *node) {
free(metadata);
}
goto RETURN_SUCCESS;
+ case PARSER_TOKEN_VALUE_STRING: {
+ ParserNodeStringMetadata *metadata = node->metadata;
+ free(metadata->begin);
+ free(metadata);
+ }
+ goto RETURN_SUCCESS;
case PARSER_TOKEN_CONSTANT:
case PARSER_TOKEN_VARIABLE: {
ParserNodeVariableMetadata *metadata = node->metadata;
@@ -799,6 +812,8 @@ ParserNode *parseNode(LexerNode *node, LexerNode *begin, LexerNode *end,
return parserNumber(node, parent);
case LEXER_TOKEN_CHAR:
return parserChar(node, parent);
+ case LEXER_TOKEN_STRING:
+ return parserString(node, parent);
case LEXER_TOKEN_SYMBOL_ASSIGN:
return parserBinaryOperator(node, begin, end, parent,
PARSER_TOKEN_OPERATOR_ASSIGN);
@@ -1077,57 +1092,57 @@ ParserNode *parserChar(LexerNode *node, ParserNode *parent) {
if (size == 0) {
printError(node->str_begin, node->str_end,
"Bad character: Character can't be empty");
- return NULL;
+ goto RETURN_ERROR;
} else if (size == 1) {
// knowingly left empty
} else if (size == 2 && c == '\\') {
- c = node->str_begin[2];
- switch (c) {
- case 'a':
- c = '\a';
- break;
- case 'b':
- c = '\b';
- break;
- case 'e':
- c = '\e';
- break;
- case 'f':
- c = '\f';
- break;
- case 'n':
- c = '\n';
- break;
- case 'r':
- c = '\r';
- break;
- case 't':
- c = '\t';
- break;
- case 'v':
- c = '\v';
- break;
- case '\\':
- c = '\\';
- break;
- case '\'':
- c = '\'';
- break;
- case '"':
- c = '\"';
- break;
- default:
+ bool success;
+ c = escapeChar(node->str_begin + 2, node->str_end, &success);
+ if (!success) {
printError(node->str_begin, node->str_end, "Bad escape character");
- return NULL;
+ goto RETURN_ERROR;
}
} else {
printError(node->str_begin, node->str_end, "Bad character");
- return NULL;
+ goto RETURN_ERROR;
}
*metadata = c;
return node->parserNode =
newParserNode(PARSER_TOKEN_VALUE_CHAR, node->str_begin,
node->str_end, metadata, parent);
+RETURN_ERROR:
+ free(metadata);
+ return NULL;
+}
+
+ParserNode *parserString(LexerNode *node, ParserNode *parent) {
+ ParserNodeStringMetadata *metadata = a404m_malloc(sizeof(*metadata));
+
+ metadata->begin = metadata->end = a404m_malloc(
+ sizeof(*metadata->begin) * (node->str_end - node->str_begin));
+
+ for (char *iter = node->str_begin + 1; iter < node->str_end - 2; ++iter) {
+ if (*iter == '\\') {
+ iter += 1;
+ bool success;
+ *metadata->end = escapeChar(iter, node->str_end, &success);
+ if (!success) {
+ printError(node->str_begin, node->str_end, "Bad escape character");
+ goto RETURN_ERROR;
+ }
+ } else {
+ *metadata->end = *iter;
+ }
+ metadata->end += 1;
+ }
+
+ return node->parserNode =
+ newParserNode(PARSER_TOKEN_VALUE_STRING, node->str_begin,
+ node->str_end, metadata, parent);
+RETURN_ERROR:
+ free(metadata->begin);
+ free(metadata);
+ return NULL;
}
ParserNode *parserBoolValue(LexerNode *node, ParserNode *parent) {
@@ -1464,6 +1479,7 @@ ParserNode *parserFunction(LexerNode *node, LexerNode *begin, LexerNode *end,
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_TYPE_TYPE:
case PARSER_TOKEN_TYPE_FUNCTION:
case PARSER_TOKEN_TYPE_VOID:
@@ -1958,6 +1974,7 @@ bool isExpression(ParserNode *node) {
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_KEYWORD_IF:
case PARSER_TOKEN_KEYWORD_WHILE:
case PARSER_TOKEN_KEYWORD_COMPTIME:
@@ -2039,6 +2056,7 @@ bool isType(ParserNode *node) {
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_KEYWORD_PUTC:
case PARSER_TOKEN_KEYWORD_RETURN:
case PARSER_TOKEN_OPERATOR_ASSIGN:
@@ -2079,6 +2097,7 @@ bool isValue(ParserNode *node) {
case PARSER_TOKEN_VALUE_FLOAT:
case PARSER_TOKEN_VALUE_BOOL:
case PARSER_TOKEN_VALUE_CHAR:
+ case PARSER_TOKEN_VALUE_STRING:
case PARSER_TOKEN_IDENTIFIER:
case PARSER_TOKEN_BUILTIN:
case PARSER_TOKEN_OPERATOR_ACCESS:
@@ -2148,3 +2167,45 @@ bool isValue(ParserNode *node) {
}
UNREACHABLE;
}
+
+char escapeChar(char *begin, char *end, bool *success) {
+ (void)end;
+ switch (*begin) {
+ case 'a':
+ *success = true;
+ return '\a';
+ case 'b':
+ *success = true;
+ return '\b';
+ case 'e':
+ *success = true;
+ return '\e';
+ case 'f':
+ *success = true;
+ return '\f';
+ case 'n':
+ *success = true;
+ return '\n';
+ case 'r':
+ *success = true;
+ return '\r';
+ case 't':
+ *success = true;
+ return '\t';
+ case 'v':
+ *success = true;
+ return '\v';
+ case '\\':
+ *success = true;
+ return '\\';
+ case '\'':
+ *success = true;
+ return '\'';
+ case '"':
+ *success = true;
+ return '\"';
+ default:
+ *success = false;
+ return 0;
+ }
+}
diff --git a/src/compiler/parser.h b/src/compiler/parser.h
index 120ae39..326849b 100644
--- a/src/compiler/parser.h
+++ b/src/compiler/parser.h
@@ -15,6 +15,7 @@ typedef enum ParserToken {
PARSER_TOKEN_VALUE_FLOAT,
PARSER_TOKEN_VALUE_BOOL,
PARSER_TOKEN_VALUE_CHAR,
+ PARSER_TOKEN_VALUE_STRING,
PARSER_TOKEN_TYPE_TYPE,
PARSER_TOKEN_TYPE_FUNCTION,
@@ -139,6 +140,11 @@ typedef f128 ParserNodeFloatMetadata;
typedef ParserNodeIntMetadata ParserNodeCharMetadata;
+typedef struct ParserNodeStringMetadata {
+ char *begin;
+ char *end;
+} ParserNodeStringMetadata;
+
typedef struct ParserNodeInfixMetadata {
ParserNode *left;
ParserNode *right;
@@ -192,6 +198,7 @@ ParserNode *parserPutc(LexerNode *node, LexerNode *end, ParserNode *parent);
ParserNode *parserReturn(LexerNode *node, LexerNode *end, ParserNode *parent);
ParserNode *parserNumber(LexerNode *node, ParserNode *parent);
ParserNode *parserChar(LexerNode *node, ParserNode *parent);
+ParserNode *parserString(LexerNode *node, ParserNode *parent);
ParserNode *parserBoolValue(LexerNode *node, ParserNode *parent);
ParserNode *parserEol(LexerNode *node, LexerNode *begin, ParserNode *parent);
ParserNode *parserComma(LexerNode *node, LexerNode *begin, ParserNode *parent);
@@ -228,3 +235,5 @@ bool isAllArguments(const ParserNodeArray *nodes);
bool isExpression(ParserNode *node);
bool isType(ParserNode *node);
bool isValue(ParserNode *node);
+
+char escapeChar(char *begin,char *end, bool *success);