#include "lexer.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <utils/memory/memory.h>

const char *TOKEN_STRINGS[] = {
    "TOKEN_NONE",
    "TOKEN_IDENTIFIER",
    "TOKEN_KEYWORD_PRINT",
    "TOKEN_NUMBER",
    "TOKEN_STRING",
    "TOKEN_OPERATOR",
    "TOKEN_OPERATOR_PARENTHESES_OPEN",
    "TOKEN_OPERATOR_PARENTHESES_CLOSE",
    "TOKEN_OPERATOR_CURLY_BRACKET_OPEN",
    "TOKEN_OPERATOR_CURLY_BRACKET_CLOSE",
    "TOKEN_OPERATOR_ASSIGN",
    "TOKEN_OPERATOR_EQUAL",
    "TOKEN_OPERATOR_COLON",
    "TOKEN_OPERATOR_EOL",
    "TOKEN_SYMBOL",
    "TOKEN_PARSED",
};

static const char *KEYWORDS_STRINGS[] = {
    "print",
};
static const Token KEYWORDS_TOKENS[] = {
    TOKEN_KEYWORD_PRINT,
};
static const size_t KEYWORDS_SIZE = sizeof(KEYWORDS_STRINGS) / sizeof(char *);

static const char *OPERATORS_STRINGS[] = {
    "(", ")", "{", "}", "=", "==", ":", ";",
};
static const Token OPERATORS_TOKENS[] = {
    TOKEN_OPERATOR_PARENTHESES_OPEN,
    TOKEN_OPERATOR_PARENTHESES_CLOSE,
    TOKEN_OPERATOR_CURLY_BRACKET_OPEN,
    TOKEN_OPERATOR_CURLY_BRACKET_CLOSE,
    TOKEN_OPERATOR_ASSIGN,
    TOKEN_OPERATOR_EQUAL,
    TOKEN_OPERATOR_COLON,
    TOKEN_OPERATOR_EOL,
};
static const size_t OPERATOR_SIZE = sizeof(OPERATORS_STRINGS) / sizeof(char *);

void printNodes(Nodes nodes) {
  for (size_t i = 0; i < nodes.size; ++i) {
    const Node node = nodes.nodes[i];
    printf("{'%.*s',%s}\n", (int)(node.strEnd - node.strBegin), node.strBegin,
           TOKEN_STRINGS[node.token]);
  }
}

void deleteNodes(Nodes nodes) { free(nodes.nodes); }

Nodes lexer(char const *restrict str) {
  size_t nodes_size = 10;
  Node *nodes = a404m_malloc(nodes_size * sizeof(Node));
  size_t nodes_inserted = 0;

  Node node = {
      .strBegin = str,
      .strEnd = str,
      .token = TOKEN_NONE,
  };

  for (int i = 0;; ++i) {
    const char c = str[i];
    if (c == '\0') {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                        TOKEN_NONE);
      break;
    } else if (c == '/') {
      const char follow = str[i + 1];
      if (follow == '/') {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_NONE);
        for (i += 2; str[i] != '\0' && str[i] != '\n'; ++i);
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_NONE);
        if (str[i] == '\0') {
          goto RETURN_SUCCESS;
        }
        continue;
      } else if (follow == '*') {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_NONE);
        int in = 1;
        for (i += 2;; ++i) {
          switch (str[i]) {
            case '\0':
              fprintf(stderr,
                      "expected multi line comment to end at compiler line %d "
                      "and in=%d\n",
                      __LINE__, in);
              exit(1);
            case '*':
              ++i;
              if (str[i] == '/') {
                --in;
                if (in == 0) {
                  goto END_OF_BLOCK_COMMENT_LOOP;
                }
              } else if (str[i] == '\0') {
                fprintf(stderr,
                        "expected multi line comment to end at compiler line "
                        "%d and in=%d\n",
                        __LINE__, in);
                exit(1);
              }
              break;
            case '/':
              ++i;
              if (str[i] == '*') {
                ++in;
              } else if (str[i] == '\0') {
                fprintf(stderr,
                        "expected multi line comment to end at compiler line "
                        "%d and in=%d\n",
                        __LINE__, in);
                exit(1);
              }
              break;
          }
        }
      END_OF_BLOCK_COMMENT_LOOP:
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_NONE);
        if (str[i] == '\0') {
          goto RETURN_SUCCESS;
        }
        continue;
      }
    }
    if (isSpace(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                        TOKEN_NONE);
    } else if (isIdentifier(c)) {
      if (node.token != TOKEN_IDENTIFIER && node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_IDENTIFIER);
      }
    } else if (isIdentifierSymbol(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                        TOKEN_IDENTIFIER);
      for (++i;; ++i) {
        const char current = str[i];
        if (current == c) {
          break;
        } else if (current == '\0') {
          fprintf(stderr, "expected %c to end\n", c);
          exit(1);
        }
      }
      ++node.strBegin;
      push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node, str,
                               i, TOKEN_NONE);
    } else if (isNumber(c)) {
      if (node.token != TOKEN_NUMBER && node.token != TOKEN_IDENTIFIER &&
          node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_NUMBER);
      }
    } else if (isString(c)) {
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                        TOKEN_STRING);

      for (++i;; ++i) {
        const char current = str[i];
        if (current == c) {
          break;
        }else if(current == '\\'){
          ++i;
        } else if (current == '\0') {
          fprintf(stderr, "expected %c to end\n", c);
          exit(1);
        }
      }

      ++i;
      push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                        TOKEN_NONE);
      --i;
    } else if (isOperator(c)) {
      if (node.token == TOKEN_OPERATOR) {
        const Token token = getOperator(node.strBegin, str + i + 1);
        if (token != TOKEN_NONE) {
          continue;
        } else {
          node.token = getOperator(node.strBegin, str + i);
          if (node.token == TOKEN_NONE) {
            fprintf(stderr, "unknown operator '%.*s'\n",
                    (int)(str + i - node.strBegin), node.strBegin);
            exit(1);
          }
          push_clear_without_check(&nodes, &nodes_size, &nodes_inserted, &node,
                                   str, i, TOKEN_OPERATOR);
        }
      } else {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_OPERATOR);
      }
    } else if (isSymbol(c)) {
      if (node.token != TOKEN_SYMBOL) {
        push_if_not_empty(&nodes, &nodes_size, &nodes_inserted, &node, str, i,
                          TOKEN_SYMBOL);
      }
    } else {
      fprintf(stderr, "unexpected char '%c'\n", c);
      exit(1);
    }
  }

RETURN_SUCCESS:
  Nodes result = {
      .nodes = a404m_realloc(nodes, nodes_inserted * sizeof(Node)),
      .size = nodes_inserted,
  };

  return result;
}

void push_if_not_empty(Node **restrict nodes, size_t *restrict nodes_size,
                       size_t *restrict nodes_inserted, Node *restrict node,
                       char const *restrict str, int i, Token token) {
  if (node->token != TOKEN_NONE) {
    if (*nodes_size == *nodes_inserted) {
      *nodes_size += *nodes_size / 2 + 1;
      *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
    }
    node->strEnd = str + i;
    if (node->token == TOKEN_IDENTIFIER) {
      const Token foundToken = getKeyword(node->strBegin, node->strEnd);
      if (foundToken != TOKEN_NONE) {
        node->token = foundToken;
      }
    } else if (node->token == TOKEN_OPERATOR) {
      const Token foundToken = getOperator(node->strBegin, node->strEnd);
      if (foundToken != TOKEN_NONE) {
        node->token = foundToken;
      }
    }

    (*nodes)[*nodes_inserted] = *node;
    ++*nodes_inserted;
  }
  node->strBegin = str + i;
  node->token = token;
}

void push_clear_without_check(Node **restrict nodes,
                              size_t *restrict nodes_size,
                              size_t *restrict nodes_inserted, Node *node,
                              char const *restrict str, int i, Token token) {
  if (*nodes_size == *nodes_inserted) {
    *nodes_size += *nodes_size / 2 + 1;
    *nodes = a404m_realloc(*nodes, *nodes_size * sizeof(Node));
  }
  node->strEnd = str + i;
  (*nodes)[*nodes_inserted] = *node;
  ++*nodes_inserted;
  node->strBegin = str + i;
  node->token = token;
}

bool isSpace(char c) { return isspace(c); }

bool isIdentifier(char c) {
  return ('A' <= c && c <= 'Z') || ('a' <= c && c <= 'z') || c == '_';
}

bool isIdentifierSymbol(char c) { return c == '`'; }

bool isNumber(char c) { return '0' <= c && c <= '9'; }

bool isString(char c) { return c == '"' || c == '\''; }

bool isOperator(char c) {
  switch (c) {
    case '(':
    case ')':
    case '[':
    case ']':
    case '{':
    case '}':
    case '*':
    case '/':
    case '%':
    case '+':
    case '-':
    case ':':
    case '!':
    case '=':
    case '<':
    case '>':
    case '&':
    case '|':
    case '.':
    case ',':
    case ';':
      return true;
    default:
      return false;
  }
}

bool isSymbol(char c) { return c == '#'; }

Token getKeyword(char const *strBegin, char const *strEnd) {
  const size_t strSize = strEnd - strBegin;

  for (size_t i = 0; i < KEYWORDS_SIZE; ++i) {
    const char *search = KEYWORDS_STRINGS[i];
    if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) {
      return KEYWORDS_TOKENS[i];
    }
  }

  return TOKEN_NONE;
}
Token getOperator(char const *strBegin, char const *strEnd) {
  const size_t strSize = strEnd - strBegin;

  for (size_t i = 0; i < OPERATOR_SIZE; ++i) {
    const char *search = OPERATORS_STRINGS[i];
    if (strlen(search) == strSize && strncmp(search, strBegin, strSize) == 0) {
      return OPERATORS_TOKENS[i];
    }
  }

  return TOKEN_NONE;
}