2024-10-13 23:46:03 -04:00
|
|
|
#include <ctype.h>
|
2024-10-19 09:09:37 -04:00
|
|
|
#include <stdio.h>
|
2024-10-13 23:46:03 -04:00
|
|
|
#include <string.h>
|
2024-10-19 09:09:37 -04:00
|
|
|
#include <limits.h>
|
2024-10-13 23:46:03 -04:00
|
|
|
|
2024-10-02 17:57:04 -04:00
|
|
|
#include "include/lexer.h"
|
2024-10-19 09:09:37 -04:00
|
|
|
#include "include/dstr.h"
|
2024-10-10 16:09:25 -04:00
|
|
|
#include "include/util.h"
|
2024-10-02 17:57:04 -04:00
|
|
|
|
|
|
|
Lexer* lexer_init(char* src) {
|
|
|
|
Lexer* lexer = malloc(sizeof(Lexer));
|
|
|
|
|
|
|
|
lexer->src = src;
|
2024-10-19 09:09:37 -04:00
|
|
|
lexer->srcln = strlen(src);
|
2024-10-02 17:57:04 -04:00
|
|
|
lexer->cchar = lexer->src;
|
2024-10-02 21:04:54 -04:00
|
|
|
|
2024-10-10 16:09:25 -04:00
|
|
|
lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
|
2024-10-02 17:57:04 -04:00
|
|
|
lexer->ntokens = 0;
|
|
|
|
lexer->state = LEXER_STATE_CONFUSED;
|
|
|
|
|
2024-10-13 23:46:03 -04:00
|
|
|
log_dbgf("created new lexer @ %p", lexer);
|
|
|
|
|
2024-10-02 17:57:04 -04:00
|
|
|
return lexer;
|
|
|
|
}
|
|
|
|
|
2024-10-02 21:04:54 -04:00
|
|
|
void lexer_destroy(Lexer* lexer) {
|
2024-10-02 17:57:04 -04:00
|
|
|
free(lexer->src);
|
|
|
|
|
2024-10-10 16:09:25 -04:00
|
|
|
for (int i = 0; i < lexer->ntokens; i++) token_destroy(lexer->tokens[i]);
|
2024-10-02 17:57:04 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_lex(Lexer* lexer) {
|
|
|
|
while (*lexer->cchar) {
|
|
|
|
switch (lexer->state) {
|
2024-10-02 21:04:54 -04:00
|
|
|
case LEXER_STATE_CONFUSED: lexer_do_confused(lexer); break;
|
|
|
|
case LEXER_STATE_NUM: lexer_do_number(lexer); break;
|
|
|
|
case LEXER_STATE_CALL: lexer_do_call(lexer); break;
|
|
|
|
default: break;
|
2024-10-02 17:57:04 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2024-10-05 09:24:12 -04:00
|
|
|
|
2024-10-07 11:48:53 -04:00
|
|
|
void lexer_do_confused(Lexer* lexer) {
|
2024-10-13 23:46:03 -04:00
|
|
|
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
|
|
|
|
2024-10-19 09:09:37 -04:00
|
|
|
if (isdigit(*lexer->cchar)) {
|
|
|
|
lexer->state = LEXER_STATE_NUM;
|
|
|
|
lexer_do_number(lexer);
|
|
|
|
} else {
|
|
|
|
lexer->state = LEXER_STATE_CALL;
|
|
|
|
lexer_do_call(lexer);
|
|
|
|
}
|
2024-10-07 11:48:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_do_number(Lexer* lexer) {
|
2024-10-13 23:46:03 -04:00
|
|
|
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
// Length of the number string.
|
|
|
|
size_t numln;
|
2024-10-07 11:48:53 -04:00
|
|
|
|
|
|
|
// Where the number string starts.
|
|
|
|
char* start = lexer->cchar;
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
for (numln = 0; *lexer->cchar && isdigit(*lexer->cchar); numln++)
|
2024-10-07 11:48:53 -04:00
|
|
|
lexer_inc(lexer);
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
char* num = malloc(numln + 1);
|
|
|
|
memcpy(num, start, numln);
|
|
|
|
num[numln] = '\0';
|
2024-10-07 11:48:53 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, numln));
|
2024-10-19 09:09:37 -04:00
|
|
|
lexer->state = LEXER_STATE_CONFUSED;
|
2024-10-07 11:48:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_do_call(Lexer* lexer) {
|
2024-10-13 23:46:03 -04:00
|
|
|
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
|
|
|
|
2024-10-07 11:48:53 -04:00
|
|
|
// Size of the call string.
|
2024-10-19 10:59:05 -04:00
|
|
|
size_t callln;
|
2024-10-07 11:48:53 -04:00
|
|
|
|
|
|
|
// Where the call string starts.
|
|
|
|
char* start = lexer->cchar;
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
for (callln = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callln++)
|
2024-10-07 11:48:53 -04:00
|
|
|
lexer_inc(lexer);
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
char* call = malloc(callln + 1);
|
|
|
|
memcpy(call, start, callln);
|
|
|
|
call[callln] = '\0';
|
2024-10-10 16:09:25 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, callln));
|
2024-10-19 09:09:37 -04:00
|
|
|
|
|
|
|
lexer->state = LEXER_STATE_CONFUSED;
|
2024-10-07 11:48:53 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
void lexer_inc(Lexer* lexer) {
|
|
|
|
lexer->cchar += sizeof(char);
|
|
|
|
}
|
|
|
|
|
2024-10-05 09:24:12 -04:00
|
|
|
void lexer_add_token(Lexer* lexer, Token* token) {
|
2024-10-07 11:48:53 -04:00
|
|
|
assert(lexer->ntokens < TOKENS_MAX);
|
|
|
|
|
|
|
|
if (lexer->ntokens < TOKENS_MAX - 1) {
|
2024-10-10 16:09:25 -04:00
|
|
|
lexer->tokens[lexer->ntokens] = token;
|
2024-10-07 11:48:53 -04:00
|
|
|
lexer->ntokens++;
|
2024-10-19 10:59:05 -04:00
|
|
|
|
|
|
|
log_dbgf("added token (total: %ld)", lexer->ntokens);
|
2024-10-07 11:48:53 -04:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
void lexer_print(Lexer* lexer) { lexer_print_i(lexer, 0); }
|
|
|
|
|
|
|
|
void lexer_print_i(Lexer* lexer, int ilvl) {
|
|
|
|
Dstr* spacing = dstr_init();
|
|
|
|
char* sp = spacing->buf;
|
|
|
|
for (int i = 0; i < ilvl; i++) dstr_appendch(spacing, ' ');
|
|
|
|
|
|
|
|
printf("%sLexer @ %p\n", sp, lexer);
|
|
|
|
printf("%s state:\n", sp);
|
|
|
|
lexerstate_print_i(lexer->state, ilvl + 2);
|
|
|
|
printf("%s srcln:\n", sp);
|
|
|
|
printf("%s %ld\n", sp, lexer->srcln);
|
|
|
|
printf("%s src:\n", sp);
|
|
|
|
printf("%s \"%s\"\n", sp, lexer->src);
|
|
|
|
printf("%s cchar: \'%c\'\n", sp, *lexer->cchar);
|
|
|
|
printf("%s ntokens: %ld\n", sp, lexer->ntokens);
|
|
|
|
printf("%s tokens: [\n", sp);
|
|
|
|
|
|
|
|
for (int i = 0; i < lexer->ntokens; i++) {
|
|
|
|
token_print_i(lexer->tokens[i], ilvl + 2);
|
|
|
|
printf(",\n\n");
|
|
|
|
}
|
|
|
|
}
|
2024-10-19 09:09:37 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
void lexerstate_print(LexerState s) { lexerstate_print_i(s, 0); }
|
2024-10-19 09:09:37 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
void lexerstate_print_i(LexerState s, int ilvl) {
|
|
|
|
Dstr* spacing = dstr_init();
|
2024-10-19 09:09:37 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
for (int j = 0; j < ilvl; j++) dstr_appendch(spacing, ' ');
|
2024-10-19 09:09:37 -04:00
|
|
|
|
2024-10-19 10:59:05 -04:00
|
|
|
if (s > LEXER_STATE_MAX) {
|
|
|
|
printf("%sUnknown (%d)\n", spacing->buf, s);
|
|
|
|
log_dbgf("%d is not a valid LexerSate (max: %d)", s, LEXER_STATE_MAX);
|
|
|
|
return;
|
2024-10-19 09:09:37 -04:00
|
|
|
}
|
2024-10-19 10:59:05 -04:00
|
|
|
|
|
|
|
printf("%s%s\n", spacing->buf, lexerstate_names[s]);
|
|
|
|
|
|
|
|
dstr_destroy(spacing);
|
2024-10-19 09:09:37 -04:00
|
|
|
}
|