scl/src/lexer.c

136 lines
3.5 KiB
C
Raw Normal View History

2024-10-13 23:46:03 -04:00
#include <ctype.h>
2024-10-19 09:09:37 -04:00
#include <stdio.h>
2024-10-13 23:46:03 -04:00
#include <string.h>
2024-10-19 09:09:37 -04:00
#include <limits.h>
2024-10-13 23:46:03 -04:00
2024-10-02 17:57:04 -04:00
#include "include/lexer.h"
2024-10-19 09:09:37 -04:00
#include "include/dstr.h"
#include "include/util.h"
2024-10-02 17:57:04 -04:00
Lexer* lexer_init(char* src) {
Lexer* lexer = malloc(sizeof(Lexer));
lexer->src = src;
2024-10-19 09:09:37 -04:00
lexer->srcln = strlen(src);
2024-10-02 17:57:04 -04:00
lexer->cchar = lexer->src;
2024-10-02 21:04:54 -04:00
lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
2024-10-02 17:57:04 -04:00
lexer->ntokens = 0;
lexer->state = LEXER_STATE_CONFUSED;
2024-10-13 23:46:03 -04:00
log_dbgf("created new lexer @ %p", lexer);
2024-10-02 17:57:04 -04:00
return lexer;
}
2024-10-02 21:04:54 -04:00
void lexer_destroy(Lexer* lexer) {
2024-10-02 17:57:04 -04:00
free(lexer->src);
for (int i = 0; i < lexer->ntokens; i++) token_destroy(lexer->tokens[i]);
2024-10-02 17:57:04 -04:00
}
void lexer_lex(Lexer* lexer) {
while (*lexer->cchar) {
switch (lexer->state) {
2024-10-02 21:04:54 -04:00
case LEXER_STATE_CONFUSED: lexer_do_confused(lexer); break;
case LEXER_STATE_NUM: lexer_do_number(lexer); break;
case LEXER_STATE_CALL: lexer_do_call(lexer); break;
default: break;
2024-10-02 17:57:04 -04:00
}
}
}
2024-10-05 09:24:12 -04:00
2024-10-07 11:48:53 -04:00
void lexer_do_confused(Lexer* lexer) {
2024-10-13 23:46:03 -04:00
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
2024-10-19 09:09:37 -04:00
if (isdigit(*lexer->cchar)) {
lexer->state = LEXER_STATE_NUM;
lexer_do_number(lexer);
} else {
lexer->state = LEXER_STATE_CALL;
lexer_do_call(lexer);
}
2024-10-07 11:48:53 -04:00
}
void lexer_do_number(Lexer* lexer) {
2024-10-13 23:46:03 -04:00
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
2024-10-07 11:48:53 -04:00
// Size of the number string.
size_t numsz;
// Where the number string starts.
char* start = lexer->cchar;
for (numsz = 0; *lexer->cchar && isdigit(*lexer->cchar); numsz++)
lexer_inc(lexer);
char* num = malloc(numsz + 1);
memcpy(num, start, numsz);
num[numsz] = '\0';
2024-10-13 23:46:03 -04:00
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1));
2024-10-19 09:09:37 -04:00
lexer->state = LEXER_STATE_CONFUSED;
2024-10-07 11:48:53 -04:00
}
void lexer_do_call(Lexer* lexer) {
2024-10-13 23:46:03 -04:00
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
2024-10-07 11:48:53 -04:00
// Size of the call string.
size_t callsz;
// Where the call string starts.
char* start = lexer->cchar;
2024-10-16 08:13:32 -04:00
for (callsz = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callsz++)
2024-10-07 11:48:53 -04:00
lexer_inc(lexer);
char* call = malloc(callsz + 1);
memcpy(call, start, callsz);
call[callsz] = '\0';
2024-10-13 23:46:03 -04:00
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1));
2024-10-19 09:09:37 -04:00
lexer->state = LEXER_STATE_CONFUSED;
2024-10-07 11:48:53 -04:00
}
void lexer_inc(Lexer* lexer) {
lexer->cchar += sizeof(char);
}
2024-10-05 09:24:12 -04:00
void lexer_add_token(Lexer* lexer, Token* token) {
2024-10-07 11:48:53 -04:00
assert(lexer->ntokens < TOKENS_MAX);
if (lexer->ntokens < TOKENS_MAX - 1) {
lexer->tokens[lexer->ntokens] = token;
2024-10-07 11:48:53 -04:00
lexer->ntokens++;
}
}
2024-10-19 09:09:37 -04:00
Dstr* lexer_to_dstr(Lexer* lexer) {
Dstr* str = dstr_init();
size_t titlesz = sizeof("Lexer @ 0x00000000");
char title[titlesz];
sprintf(title, "Lexer @ %p", lexer);
dstr_append(str, title, titlesz - 1);
size_t ln = snprintf(NULL, 0, "srcln: %ld", lexer->srcln);
char src_sz[ln + 1];
snprintf(src_sz, ln + 1, "srcln: %ld", lexer->srcln);
dstr_append(str, src_sz, ln - 1);
dstr_append(str, "\nsrc: ", 5);
dstr_append(str, lexer->src, lexer->srcln);
return str;
}
char* lexer_state_to_str(LexerState s) {
switch (s) {
case LEXER_STATE_NUM: return "NUM";
case LEXER_STATE_CALL: return "CALL";
case LEXER_STATE_CONFUSED: return "CONFUSED";
default: return "UNKNOWN";
}
}