Compare commits

...

2 Commits

Author SHA1 Message Date
933418895e Something. 2024-10-19 09:09:37 -04:00
3c56290448 Numbers work. Calls do not work. 2024-10-16 08:13:32 -04:00
4 changed files with 66 additions and 21 deletions

View File

@ -2,12 +2,8 @@
#define LEXER_H #define LEXER_H
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h> #include <assert.h>
#include <ctype.h>
#include "util.h"
#include "token.h" #include "token.h"
#define TOKENS_MAX 32 #define TOKENS_MAX 32
@ -24,7 +20,7 @@ typedef enum {
// Lexer: converts text to tokens. // Lexer: converts text to tokens.
typedef struct { typedef struct {
char* src; // The source text. char* src; // The source text.
size_t srcl; // The number of source chars. size_t srcln; // The number of source chars.
char* cchar; // The current character. char* cchar; // The current character.
Token** tokens; // The tokens produced. Token** tokens; // The tokens produced.
size_t ntokens; // The number of tokens. size_t ntokens; // The number of tokens.
@ -55,4 +51,10 @@ void lexer_inc(Lexer* lexer);
// Add a token to the lexer. // Add a token to the lexer.
void lexer_add_token(Lexer* lexer, Token* token); void lexer_add_token(Lexer* lexer, Token* token);
// Returns a dynamic string representation of the Lexer.
Dstr* lexer_to_dstr(Lexer* lexer);
// Returns a string representation of the LexerState.
char* lexer_state_to_str(LexerState s);
#endif #endif

View File

@ -1,20 +1,28 @@
#ifndef UTIL_H #ifndef UTIL_H
#define UTIL_H #define UTIL_H
#ifdef DBG #ifdef DBG // Debug macros
// Log a message.
#define log_dbg(msg) \ #define log_dbg(msg) \
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \ printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
"\033[0m\n", \ "\033[0m\n", \
__func__); __func__);
// Log a message with formatting.
#define log_dbgf(msg, ...) \ #define log_dbgf(msg, ...) \
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \ printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
"\033[0m\n", \ "\033[0m\n", \
__func__, __VA_ARGS__); __func__, __VA_ARGS__);
#else #else // ifdef DBG
#define log_dbg(msg) #define log_dbg(msg)
#endif #endif // ifdef DBG else
// Maximum size of a string containing only an int.
#define MAXSTRINTSZ ((CHAR_BIT * sizeof(int) - 1) / 3 + 2)
// Maximum size of a string containing only a size_t.
#define MAXSTRIZE_TSZ ((CHAR_BIT * sizeof(size_t) - 1) / 3 + 2)
#endif #endif

View File

@ -1,14 +1,17 @@
#include <ctype.h> #include <ctype.h>
#include <stdio.h>
#include <string.h> #include <string.h>
#include <limits.h>
#include "include/lexer.h" #include "include/lexer.h"
#include "include/dstr.h"
#include "include/util.h" #include "include/util.h"
Lexer* lexer_init(char* src) { Lexer* lexer_init(char* src) {
Lexer* lexer = malloc(sizeof(Lexer)); Lexer* lexer = malloc(sizeof(Lexer));
lexer->src = src; lexer->src = src;
lexer->srcl = strlen(src); lexer->srcln = strlen(src);
lexer->cchar = lexer->src; lexer->cchar = lexer->src;
lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*)); lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
@ -40,15 +43,18 @@ void lexer_lex(Lexer* lexer) {
void lexer_do_confused(Lexer* lexer) { void lexer_do_confused(Lexer* lexer) {
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
lexer->state = LEXER_STATE_CONFUSED; if (isdigit(*lexer->cchar)) {
if (isdigit(*lexer->cchar)) lexer_do_number(lexer); lexer->state = LEXER_STATE_NUM;
else lexer_do_call(lexer); lexer_do_number(lexer);
} else {
lexer->state = LEXER_STATE_CALL;
lexer_do_call(lexer);
}
} }
void lexer_do_number(Lexer* lexer) { void lexer_do_number(Lexer* lexer) {
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
lexer->state = LEXER_STATE_NUM;
// Size of the number string. // Size of the number string.
size_t numsz; size_t numsz;
@ -63,21 +69,19 @@ void lexer_do_number(Lexer* lexer) {
num[numsz] = '\0'; num[numsz] = '\0';
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1)); lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1));
lexer->state = LEXER_STATE_CONFUSED;
} }
void lexer_do_call(Lexer* lexer) { void lexer_do_call(Lexer* lexer) {
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
lexer->state = LEXER_STATE_CALL;
// Size of the call string. // Size of the call string.
size_t callsz; size_t callsz;
// Where the call string starts. // Where the call string starts.
char* start = lexer->cchar; char* start = lexer->cchar;
for (; *lexer->cchar && (isblank(lexer->cchar) || *lexer->cchar == '\n'); lexer_inc(lexer)); for (callsz = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callsz++)
for (callsz = 0; *lexer->cchar && isalpha(*lexer->cchar); callsz++)
lexer_inc(lexer); lexer_inc(lexer);
char* call = malloc(callsz + 1); char* call = malloc(callsz + 1);
@ -85,6 +89,8 @@ void lexer_do_call(Lexer* lexer) {
call[callsz] = '\0'; call[callsz] = '\0';
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1)); lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1));
lexer->state = LEXER_STATE_CONFUSED;
} }
void lexer_inc(Lexer* lexer) { void lexer_inc(Lexer* lexer) {
@ -100,3 +106,30 @@ void lexer_add_token(Lexer* lexer, Token* token) {
} }
} }
Dstr* lexer_to_dstr(Lexer* lexer) {
Dstr* str = dstr_init();
size_t titlesz = sizeof("Lexer @ 0x00000000");
char title[titlesz];
sprintf(title, "Lexer @ %p", lexer);
dstr_append(str, title, titlesz - 1);
size_t ln = snprintf(NULL, 0, "srcln: %ld", lexer->srcln);
char src_sz[ln + 1];
snprintf(src_sz, ln + 1, "srcln: %ld", lexer->srcln);
dstr_append(str, src_sz, ln - 1);
dstr_append(str, "\nsrc: ", 5);
dstr_append(str, lexer->src, lexer->srcln);
return str;
}
char* lexer_state_to_str(LexerState s) {
switch (s) {
case LEXER_STATE_NUM: return "NUM";
case LEXER_STATE_CALL: return "CALL";
case LEXER_STATE_CONFUSED: return "CONFUSED";
default: return "UNKNOWN";
}
}

View File

@ -10,15 +10,17 @@ int main(int argc, char** argv) {
Dstr* cline = dstr_init(); // The current line. Dstr* cline = dstr_init(); // The current line.
printf("> "); printf("> ");
fflush(stdout); fflush(stdout);
for (char cch; (cch = getchar() != EOF);) { for (char cch; (cch = getc(stdin)) != '\n';) {
dstr_appendch(cline, fgetc(stdin)); log_dbgf("cchar: %c", cch);
dstr_appendch(cline, cch);
} }
log_dbgf("cline: %s", cline->buf);
if (cline->ln > 0) { if (cline->ln > 0) {
Lexer* lexer = lexer_init(cline->buf); Lexer* lexer = lexer_init(cline->buf);
lexer_lex(lexer); lexer_lex(lexer);
printf("\n=%s\n", token_to_dstr(lexer->tokens[0])->buf); printf("\n%s\n", lexer_to_dstr(lexer)->buf);
} }
} }
} }