Global lexer.

This commit is contained in:
Jacob Signorovitch 2024-11-07 19:41:14 -05:00
parent 8cf09e43c9
commit 120038ea8f
3 changed files with 68 additions and 75 deletions

View File

@ -37,36 +37,36 @@ typedef struct {
// Create a lexer. // Create a lexer.
void lexer_init(char* src); void lexer_init(char* src);
// Destroy a lexer. // Destroy the lexer.
// Does not destroy `lexer->src`! // Does not destroy `thelexer->src`.
void lexer_destroy(Lexer* lexer); void lexer_destroy();
// Convert text to tokens. // Convert text to tokens.
void lexer_lex(Lexer* lexer); void lexer_lex();
// Lex in confused mode. // Lex in confused mode.
void lexer_do_confused(Lexer* lexer); void lexer_do_confused();
// Lex in number mode. // Lex in number mode.
void lexer_do_number(Lexer* lexer); void lexer_do_number();
// Lex in call mode. // Lex in call mode.
void lexer_do_call(Lexer* lexer); void lexer_do_call();
// Increment the lexer's current character pointer. // Increment the lexer's current character pointer.
void lexer_inc(Lexer* lexer); void lexer_inc();
// Add a token to the lexer. // Add a token to the lexer.
void lexer_add_token(Lexer* lexer, Token* token); void lexer_add_token(Token* token);
// Print a representation of a Lexer. // Print a representation of a Lexer.
void lexer_print(Lexer* lexer); void lexer_print();
// Print a representation of a Lexer at specified indentation level. // Print a representation of a Lexer at specified indentation level.
void lexer_print_i(Lexer* lexer, int ilvl); void lexer_print_i(int ilvl);
// Print a representation of a LexerState. // Print a representation of a LexerState.
void lexerstate_print_raw(LexerState s); void lexerstate_print_raw();
// Create the input string. // Create the input string.
void lexer_set_global(const char* str); void lexer_set_global(const char* str);

View File

@ -24,121 +24,114 @@ void lexer_init(char* src) {
log_dbgf("created thelexer @ %p", thelexer); log_dbgf("created thelexer @ %p", thelexer);
} }
void lexer_destroy(Lexer* lexer) { void lexer_destroy() {
// Does not free lexer->src. // Does not free lexer->src.
for (int i = 0; i < lexer->ntokens; i++) token_destroy(lexer->tokens[i]); for (int i = 0; i < thelexer->ntokens; i++) token_destroy(thelexer->tokens[i]);
} }
void lexer_lex(Lexer* lexer) { void lexer_lex() {
while (*lexer->cchar) { while (*thelexer->cchar) {
switch (lexer->state) { switch (thelexer->state) {
case LEXER_STATE_CONFUSED: lexer_do_confused(lexer); break; case LEXER_STATE_CONFUSED: lexer_do_confused(); break;
case LEXER_STATE_NUM: lexer_do_number(lexer); break; case LEXER_STATE_NUM: lexer_do_number(); break;
case LEXER_STATE_CALL: lexer_do_call(lexer); break; case LEXER_STATE_CALL: lexer_do_call(); break;
default: break; default: break;
} }
} }
} }
void lexer_do_confused(Lexer* lexer) { void lexer_do_confused() {
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", thelexer,
*lexer->cchar, (int)*lexer->cchar); *thelexer->cchar, (int)*thelexer->cchar);
if (isspace(*lexer->cchar)) lexer_inc(lexer); if (isspace(*thelexer->cchar)) lexer_inc();
if (isdigit(*lexer->cchar)) { if (isdigit(*thelexer->cchar)) {
lexer->state = LEXER_STATE_NUM; thelexer->state = LEXER_STATE_NUM;
lexer_do_number(lexer); lexer_do_number();
} else { } else {
lexer->state = LEXER_STATE_CALL; thelexer->state = LEXER_STATE_CALL;
lexer_do_call(lexer); lexer_do_call();
} }
} }
void lexer_do_number(Lexer* lexer) { void lexer_do_number() {
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", thelexer,
*lexer->cchar, (int)*lexer->cchar); *thelexer->cchar, (int)*thelexer->cchar);
// Length of the number string. // Length of the number string.
size_t numln; size_t numln;
// Where the number string starts. // Where the number string starts.
char* start = lexer->cchar; char* start = thelexer->cchar;
for (numln = 0; *lexer->cchar && isdigit(*lexer->cchar); numln++) for (numln = 0; *thelexer->cchar && isdigit(*thelexer->cchar); numln++)
lexer_inc(lexer); lexer_inc();
char* num = malloc(numln + 1); char* num = malloc(numln + 1);
memcpy(num, start, numln); memcpy(num, start, numln);
num[numln] = '\0'; num[numln] = '\0';
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, numln)); lexer_add_token(token_init(TOKEN_TYPE_NUMBER, num, numln));
lexer->state = LEXER_STATE_CONFUSED; thelexer->state = LEXER_STATE_CONFUSED;
} }
void lexer_do_call(Lexer* lexer) { void lexer_do_call() {
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", thelexer,
*lexer->cchar, (int)*lexer->cchar); *thelexer->cchar, (int)*thelexer->cchar);
// Size of the call string. // Size of the call string.
size_t callln; size_t callln;
// Where the call string starts. // Where the call string starts.
char* start = lexer->cchar; char* start = thelexer->cchar;
for (callln = 0; for (callln = 0;
*lexer->cchar && (!isdigit(*lexer->cchar) && !isspace(*lexer->cchar)); *thelexer->cchar && (!isdigit(*thelexer->cchar) && !isspace(*thelexer->cchar));
callln++) callln++)
lexer_inc(lexer); lexer_inc();
char* call = malloc(callln + 1); char* call = malloc(callln + 1);
memcpy(call, start, callln); memcpy(call, start, callln);
call[callln] = '\0'; call[callln] = '\0';
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, callln)); lexer_add_token(token_init(TOKEN_TYPE_CALL, call, callln));
lexer->state = LEXER_STATE_CONFUSED; thelexer->state = LEXER_STATE_CONFUSED;
} }
void lexer_inc(Lexer* lexer) { void lexer_inc() {
lexer->cchar += sizeof(char); thelexer->cchar += sizeof(char);
} }
void lexer_add_token(Lexer* lexer, Token* token) { void lexer_add_token(Token* token) {
assert(lexer->ntokens < TOKENS_MAX); assert(thelexer->ntokens < TOKENS_MAX);
if (lexer->ntokens < TOKENS_MAX - 1) { if (thelexer->ntokens < TOKENS_MAX - 1) {
lexer->tokens[lexer->ntokens] = token; thelexer->tokens[thelexer->ntokens] = token;
lexer->ntokens++; thelexer->ntokens++;
log_dbgf("added token (total: %ld)", lexer->ntokens); log_dbgf("added token (total: %ld)", thelexer->ntokens);
} }
} }
void lexer_print(Lexer* lexer) { lexer_print_i(lexer, 0); } void lexer_print() { lexer_print_i(0); }
void lexer_print_i(Lexer* lexer, int ilvl) { void lexer_print_i(int ilvl) {
INDENT_BEGIN(ilvl); INDENT_BEGIN(ilvl);
INDENT_TITLE("Lexer", lexer); INDENT_TITLE("Lexer", thelexer);
INDENT_FIELD_NONL_START("state") INDENT_FIELD_NONL_START("state")
lexerstate_print_raw(lexer->state); lexerstate_print_raw();
INDENT_FIELD_NONL_END INDENT_FIELD_NONL_END
INDENT_FIELD("srcln", "%ld", lexer->srcln); INDENT_FIELD("srcln", "%ld", thelexer->srcln);
INDENT_FIELD_NL("src", "\"%s\"", lexer->src); INDENT_FIELD_NL("src", "\"%s\"", thelexer->src);
INDENT_FIELD("cchar", "'%c'", *lexer->cchar); INDENT_FIELD("cchar", "'%c'", *thelexer->cchar);
INDENT_FIELD("ntokens", "%ld", lexer->ntokens); INDENT_FIELD("ntokens", "%ld", thelexer->ntokens);
INDENT_FIELD_LIST("tokens", lexer->tokens, lexer->ntokens, token_print_i); INDENT_FIELD_LIST("tokens", thelexer->tokens, thelexer->ntokens, token_print_i);
#if 0
printf("%s tokens: [\n", INDENT_spacing->buf);
for (int i = 0; i < lexer->ntokens; i++) {
token_print_i(lexer->tokens[i], ilvl + 2);
printf(",\n\n");
}
#endif
} }
void lexerstate_print_raw(LexerState s) { void lexerstate_print_raw() {
LexerState s = thelexer->state;
if (s > LEXER_STATE_MAX) { if (s > LEXER_STATE_MAX) {
printf("Unknown (%d)", s); printf("Unknown (%d)", s);
log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX); log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX);

View File

@ -17,10 +17,10 @@ int main(int argc, char** argv) {
log_dbgf("cline: %s", cline->buf); log_dbgf("cline: %s", cline->buf);
if (cline->ln > 0) { if (cline->ln > 0) {
Lexer* lexer = lexer_init(cline->buf); lexer_init(cline->buf);
lexer_lex(lexer); lexer_lex();
lexer_print(lexer); lexer_print();
lexer_destroy(lexer); lexer_destroy();
} }
dstr_destroy(cline); dstr_destroy(cline);