diff --git a/src/include/lexer.h b/src/include/lexer.h index d0032fa..024b24a 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -2,12 +2,8 @@ #define LEXER_H #include -#include -#include #include -#include -#include "util.h" #include "token.h" #define TOKENS_MAX 32 @@ -24,7 +20,7 @@ typedef enum { // Lexer: converts text to tokens. typedef struct { char* src; // The source text. - size_t srcl; // The number of source chars. + size_t srcln; // The number of source chars. char* cchar; // The current character. Token** tokens; // The tokens produced. size_t ntokens; // The number of tokens. @@ -55,4 +51,10 @@ void lexer_inc(Lexer* lexer); // Add a token to the lexer. void lexer_add_token(Lexer* lexer, Token* token); +// Returns a dynamic string representation of the Lexer. +Dstr* lexer_to_dstr(Lexer* lexer); + +// Returns a string representation of the LexerState. +char* lexer_state_to_str(LexerState s); + #endif diff --git a/src/include/util.h b/src/include/util.h index 38932df..b6bb647 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -1,20 +1,28 @@ #ifndef UTIL_H #define UTIL_H -#ifdef DBG +#ifdef DBG // Debug macros +// Log a message. #define log_dbg(msg) \ printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \ "\033[0m\n", \ __func__); +// Log a message with formatting. #define log_dbgf(msg, ...) \ printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \ "\033[0m\n", \ __func__, __VA_ARGS__); -#else +#else // ifdef DBG #define log_dbg(msg) -#endif +#endif // ifdef DBG else + +// Maximum size of a string containing only an int. +#define MAXSTRINTSZ ((CHAR_BIT * sizeof(int) - 1) / 3 + 2) + +// Maximum size of a string containing only a size_t. +#define MAXSTRIZE_TSZ ((CHAR_BIT * sizeof(size_t) - 1) / 3 + 2) #endif diff --git a/src/lexer.c b/src/lexer.c index af26a40..61b3838 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,14 +1,17 @@ #include +#include #include +#include #include "include/lexer.h" +#include "include/dstr.h" #include "include/util.h" Lexer* lexer_init(char* src) { Lexer* lexer = malloc(sizeof(Lexer)); lexer->src = src; - lexer->srcl = strlen(src); + lexer->srcln = strlen(src); lexer->cchar = lexer->src; lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*)); @@ -40,15 +43,18 @@ void lexer_lex(Lexer* lexer) { void lexer_do_confused(Lexer* lexer) { log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); - lexer->state = LEXER_STATE_CONFUSED; - if (isdigit(*lexer->cchar)) lexer_do_number(lexer); - else lexer_do_call(lexer); + if (isdigit(*lexer->cchar)) { + lexer->state = LEXER_STATE_NUM; + lexer_do_number(lexer); + } else { + lexer->state = LEXER_STATE_CALL; + lexer_do_call(lexer); + } } void lexer_do_number(Lexer* lexer) { log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); - lexer->state = LEXER_STATE_NUM; // Size of the number string. size_t numsz; @@ -63,20 +69,18 @@ void lexer_do_number(Lexer* lexer) { num[numsz] = '\0'; lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1)); + lexer->state = LEXER_STATE_CONFUSED; } void lexer_do_call(Lexer* lexer) { log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); - lexer->state = LEXER_STATE_CALL; // Size of the call string. size_t callsz; // Where the call string starts. char* start = lexer->cchar; - for (; *lexer->cchar && (isblank(lexer->cchar) || *lexer->cchar == '\n'); lexer_inc(lexer)); - for (callsz = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callsz++) lexer_inc(lexer); @@ -85,6 +89,8 @@ void lexer_do_call(Lexer* lexer) { call[callsz] = '\0'; lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1)); + + lexer->state = LEXER_STATE_CONFUSED; } void lexer_inc(Lexer* lexer) { @@ -100,3 +106,30 @@ void lexer_add_token(Lexer* lexer, Token* token) { } } +Dstr* lexer_to_dstr(Lexer* lexer) { + Dstr* str = dstr_init(); + + size_t titlesz = sizeof("Lexer @ 0x00000000"); + char title[titlesz]; + sprintf(title, "Lexer @ %p", lexer); + dstr_append(str, title, titlesz - 1); + + size_t ln = snprintf(NULL, 0, "srcln: %ld", lexer->srcln); + char src_sz[ln + 1]; + snprintf(src_sz, ln + 1, "srcln: %ld", lexer->srcln); + dstr_append(str, src_sz, ln - 1); + + dstr_append(str, "\nsrc: ", 5); + dstr_append(str, lexer->src, lexer->srcln); + + return str; +} + +char* lexer_state_to_str(LexerState s) { + switch (s) { + case LEXER_STATE_NUM: return "NUM"; + case LEXER_STATE_CALL: return "CALL"; + case LEXER_STATE_CONFUSED: return "CONFUSED"; + default: return "UNKNOWN"; + } +} diff --git a/src/main.c b/src/main.c index 7899779..4e4fbde 100644 --- a/src/main.c +++ b/src/main.c @@ -20,7 +20,7 @@ int main(int argc, char** argv) { if (cline->ln > 0) { Lexer* lexer = lexer_init(cline->buf); lexer_lex(lexer); - printf("\n=%s\n", token_to_dstr(lexer->tokens[0])->buf); + printf("\n%s\n", lexer_to_dstr(lexer)->buf); } } }