diff --git a/Makefile b/Makefile index 32b73f4..739efb1 100644 --- a/Makefile +++ b/Makefile @@ -22,9 +22,9 @@ TEST_SRC_FILES = $(wildcard $(TEST_DIR)/*.c) TEST_OBJ_FILES = $(patsubst $(TEST_DIR)/%.c, $(TEST_OBJ_DIR)/%.o, $(TEST_SRC_FILES)) # Stupid things. -RESETCOLOR = \x1b[0m -WHITE = $(RESETCOLOR)\x1b[37m -WHITE_BOLD = $(RESETCOLOR)\x1b[37;1m +RESETCOLOR = \033[0m +WHITE = $(RESETCOLOR)\033[37m +WHITE_BOLD = $(RESETCOLOR)\033[37;1m all: $(TARGET) @@ -33,7 +33,7 @@ release: CFLAGS = -Wall -O2 release: $(TARGET) run: $(TARGET) - @ echo -e "$(WHITE_BOLD)Running... $(RESETCOLOR)./$(TARGET)" + @ echo "$(WHITE_BOLD)Running... $(RESETCOLOR)./$(TARGET)" @ ./$(TARGET) # Link to final binary. diff --git a/src/include/lexer.h b/src/include/lexer.h index 024b24a..cda7aa7 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -15,16 +15,23 @@ typedef enum { // state). LEXER_STATE_NUM, // Looking at a number. LEXER_STATE_CALL, // Looking at a call. + LEXER_STATE_MAX = LEXER_STATE_CALL, } LexerState; +static char* lexerstate_names[] = { + [LEXER_STATE_CONFUSED] = "CONFUSED", + [LEXER_STATE_NUM] = "NUM", + [LEXER_STATE_CALL] = "CALL", +}; + // Lexer: converts text to tokens. typedef struct { - char* src; // The source text. - size_t srcln; // The number of source chars. - char* cchar; // The current character. - Token** tokens; // The tokens produced. - size_t ntokens; // The number of tokens. LexerState state; // What the lexer is looking at. + size_t srcln; // The number of source chars. + char* src; // The source text. + char* cchar; // The current character. + size_t ntokens; // The number of tokens. + Token** tokens; // The tokens produced. } Lexer; // Create a lexer. @@ -51,10 +58,16 @@ void lexer_inc(Lexer* lexer); // Add a token to the lexer. void lexer_add_token(Lexer* lexer, Token* token); -// Returns a dynamic string representation of the Lexer. -Dstr* lexer_to_dstr(Lexer* lexer); +// Print a representation of a Lexer. +void lexer_print(Lexer* lexer); -// Returns a string representation of the LexerState. -char* lexer_state_to_str(LexerState s); +// Print a representation of a Lexer at specified indentation level. +void lexer_print_i(Lexer* lexer, int ilvl); + +// Print a representation of a LexerState. +void lexerstate_print(LexerState s); + +// Print a representation of a LexerState at the specified indentation level. +void lexerstate_print_i(LexerState s, int ilvl); #endif diff --git a/src/include/token.h b/src/include/token.h index 527213e..b6307ff 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -3,11 +3,10 @@ #include -#include "dstr.h" - typedef enum { TOKEN_TYPE_CALL, TOKEN_TYPE_NUMBER, + TOKEN_TYPE_MAX = TOKEN_TYPE_NUMBER, } TokenType; // Token. @@ -24,7 +23,17 @@ void token_destroy(Token* token); // Prints out a representation of the Token. void token_print(Token* token); +// Prints out a representation of the Token, with the specified indent level. +void token_print_i(Token* token, int ilevel); + // Prints out a representation of the TokenType. void tokentype_print(TokenType t); +// Prints out a representation of the TokenType, with the specified indent +// level. +void tokentype_print_i(TokenType t, int ilevel); + +// Prints a token's type. That's it. +void tokentype_print_raw(TokenType t); + #endif diff --git a/src/include/util.h b/src/include/util.h index 69a9f7b..d9a7f49 100644 --- a/src/include/util.h +++ b/src/include/util.h @@ -5,24 +5,37 @@ // Log a message. #define log_dbg(msg) \ - printf("\033[37;1mdbg\033[0m:\033[37m%s\033[0m:\033[32m " msg \ - "\033[0m\n", \ + printf("\033[37;1mdbg\033[0m:\033[37m%s\033[0m:\033[32m " msg "\033[0m\n", \ __func__); // Log a message with formatting. #define log_dbgf(msg, ...) \ - printf("\033[37;1mdbg\033[0m:\033[37m%s\033[0m:\033[32m " msg \ - "\033[0m\n", \ + printf("\033[37;1mdbg\033[0m:\033[37m%s\033[0m:\033[32m " msg "\033[0m\n", \ __func__, __VA_ARGS__); #else // ifdef DBG #define log_dbg(msg) +#define log_dbgf(msg, ...) #endif // ifdef DBG else -// Maximum size of a string containing only an int. -#define MAXSTRINTSZ ((CHAR_BIT * sizeof(int) - 1) / 3 + 2) +// Start in indent block. +#define INDENT_BEGIN(ILVL) \ + Dstr* INDENT_spacing = dstr_init(); \ + for (int INDENT_j = 0; INDENT_j < ILVL; INDENT_j++) \ + dstr_appendch(INDENT_spacing, ' '); -// Maximum size of a string containing only a size_t. -#define MAXSTRIZE_TSZ ((CHAR_BIT * sizeof(size_t) - 1) / 3 + 2) +#define INDENT_TITLE(THING, WHERE) \ + printf("%s" THING " @ %p\n", INDENT_spacing->buf, WHERE); + +#define INDENT_FIELD(FIELD, VAL, ...) \ + printf("%s " FIELD ": " VAL "\n", INDENT_spacing->buf, __VA_ARGS__); + +#define INDENT_FIELD_NL(FIELD, VAL) \ + printf("%s " FIELD ":\n%s " VAL "\n", INDENT_spacing->buf, \ + INDENT_spacing->buf, __VA_ARGS__); + +#define INDENT_FIELD_NONL(FIELD) printf("%s " FIELD ": ", INDENT_spacing->buf); + +#define INDENT_END dstr_destroy(INDENT_spacing); #endif diff --git a/src/lexer.c b/src/lexer.c index 61b3838..dda0d01 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -55,20 +55,20 @@ void lexer_do_confused(Lexer* lexer) { void lexer_do_number(Lexer* lexer) { log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); - // Size of the number string. - size_t numsz; + // Length of the number string. + size_t numln; // Where the number string starts. char* start = lexer->cchar; - for (numsz = 0; *lexer->cchar && isdigit(*lexer->cchar); numsz++) + for (numln = 0; *lexer->cchar && isdigit(*lexer->cchar); numln++) lexer_inc(lexer); - char* num = malloc(numsz + 1); - memcpy(num, start, numsz); - num[numsz] = '\0'; + char* num = malloc(numln + 1); + memcpy(num, start, numln); + num[numln] = '\0'; - lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1)); + lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, numln)); lexer->state = LEXER_STATE_CONFUSED; } @@ -76,19 +76,19 @@ void lexer_do_call(Lexer* lexer) { log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar); // Size of the call string. - size_t callsz; + size_t callln; // Where the call string starts. char* start = lexer->cchar; - for (callsz = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callsz++) + for (callln = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callln++) lexer_inc(lexer); - char* call = malloc(callsz + 1); - memcpy(call, start, callsz); - call[callsz] = '\0'; + char* call = malloc(callln + 1); + memcpy(call, start, callln); + call[callln] = '\0'; - lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1)); + lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, callln)); lexer->state = LEXER_STATE_CONFUSED; } @@ -103,33 +103,49 @@ void lexer_add_token(Lexer* lexer, Token* token) { if (lexer->ntokens < TOKENS_MAX - 1) { lexer->tokens[lexer->ntokens] = token; lexer->ntokens++; + + log_dbgf("added token (total: %ld)", lexer->ntokens); } } -Dstr* lexer_to_dstr(Lexer* lexer) { - Dstr* str = dstr_init(); +void lexer_print(Lexer* lexer) { lexer_print_i(lexer, 0); } - size_t titlesz = sizeof("Lexer @ 0x00000000"); - char title[titlesz]; - sprintf(title, "Lexer @ %p", lexer); - dstr_append(str, title, titlesz - 1); +void lexer_print_i(Lexer* lexer, int ilvl) { + Dstr* spacing = dstr_init(); + char* sp = spacing->buf; + for (int i = 0; i < ilvl; i++) dstr_appendch(spacing, ' '); - size_t ln = snprintf(NULL, 0, "srcln: %ld", lexer->srcln); - char src_sz[ln + 1]; - snprintf(src_sz, ln + 1, "srcln: %ld", lexer->srcln); - dstr_append(str, src_sz, ln - 1); + printf("%sLexer @ %p\n", sp, lexer); + printf("%s state:\n", sp); + lexerstate_print_i(lexer->state, ilvl + 2); + printf("%s srcln:\n", sp); + printf("%s %ld\n", sp, lexer->srcln); + printf("%s src:\n", sp); + printf("%s \"%s\"\n", sp, lexer->src); + printf("%s cchar: \'%c\'\n", sp, *lexer->cchar); + printf("%s ntokens: %ld\n", sp, lexer->ntokens); + printf("%s tokens: [\n", sp); - dstr_append(str, "\nsrc: ", 5); - dstr_append(str, lexer->src, lexer->srcln); - - return str; -} - -char* lexer_state_to_str(LexerState s) { - switch (s) { - case LEXER_STATE_NUM: return "NUM"; - case LEXER_STATE_CALL: return "CALL"; - case LEXER_STATE_CONFUSED: return "CONFUSED"; - default: return "UNKNOWN"; + for (int i = 0; i < lexer->ntokens; i++) { + token_print_i(lexer->tokens[i], ilvl + 2); + printf(",\n\n"); } } + +void lexerstate_print(LexerState s) { lexerstate_print_i(s, 0); } + +void lexerstate_print_i(LexerState s, int ilvl) { + Dstr* spacing = dstr_init(); + + for (int j = 0; j < ilvl; j++) dstr_appendch(spacing, ' '); + + if (s > LEXER_STATE_MAX) { + printf("%sUnknown (%d)\n", spacing->buf, s); + log_dbgf("%d is not a valid LexerSate (max: %d)", s, LEXER_STATE_MAX); + return; + } + + printf("%s%s\n", spacing->buf, lexerstate_names[s]); + + dstr_destroy(spacing); +} diff --git a/src/main.c b/src/main.c index 4e4fbde..56fb5d4 100644 --- a/src/main.c +++ b/src/main.c @@ -20,7 +20,7 @@ int main(int argc, char** argv) { if (cline->ln > 0) { Lexer* lexer = lexer_init(cline->buf); lexer_lex(lexer); - printf("\n%s\n", lexer_to_dstr(lexer)->buf); + lexer_print(lexer); } } } diff --git a/src/token.c b/src/token.c index 42f150f..3a537f7 100644 --- a/src/token.c +++ b/src/token.c @@ -1,14 +1,20 @@ +#include + #include "include/token.h" #include "include/dstr.h" +#include "include/util.h" -#include +static char* tokentype_names[] = { + [TOKEN_TYPE_CALL] = "CALL", + [TOKEN_TYPE_NUMBER] = "NUMBER", +}; Token* token_init(TokenType type, char* val, size_t valn) { Token* t = malloc(sizeof(Token)); t->type = type; - t->val = val; t->valn = valn; + t->val = val; return t; } @@ -18,25 +24,53 @@ void token_destroy(Token* t) { free(t); } -Dstr* token_to_dstr(Token* token) { - Dstr* str = dstr_init(); +void token_print(Token* token) { token_print_i(token, 0); } - size_t titlesz = sizeof("Token @ 0x00000000"); - char title[titlesz]; - sprintf(title, "Token @ %p", token); - dstr_append(str, title, titlesz - 1); - dstr_append(str, "\n", 1); +#if 0 +void token_print_i(Token *token, int ilvl) { + Dstr* spacing = dstr_init(); + for (int j = 0; j < ilvl; j++) dstr_appendch(spacing, ' '); - size_t typesz = sizeof("type: 1"); - char type[typesz]; - // If token_to_dstr starts breaking, it might be because there're more than - // 10 types. FIXME. - sprintf(type, "type: %d", token->type); - dstr_append(str, type, typesz - 1); - dstr_append(str, "\n", 1); + printf("%sToken @ %p\n", spacing->buf, token); + printf("%s type:\n", spacing->buf); + tokentype_print_i(token->type, ilvl+1); + printf("%s valn:\n", spacing->buf); + printf("%s %ld\n", spacing->buf, token->valn); + printf("%s val:\n", spacing->buf); + printf("%s \"%s\"\n", spacing->buf, token->val); - dstr_append(str, "val: ", 5); - dstr_append(str, token->val, token->valn); - - return str; + // : +} +#endif + +void token_print_i(Token *token, int ilvl) { + INDENT_BEGIN(ilvl); + + INDENT_TITLE("Token", token); + INDENT_FIELD_NONL("type"); + tokentype_print_raw(token->type); +} + +void tokentype_print_raw(TokenType t) { + if (t > TOKEN_TYPE_MAX) { + printf("Unknown (%d)", t); + log_dbgf("%d is not a valid TokenType (max: %d)", t, TOKEN_TYPE_MAX); + return; + } + + printf("%s", tokentype_names[t]); +} + +void tokentype_print(TokenType t) { tokentype_print_i(t, 0); } + +void tokentype_print_i(TokenType t, int i) { + INDENT_BEGIN(i); + + if (t > TOKEN_TYPE_MAX) { + INDENT_FIELD("val", "Unknown (%d)", t); + log_dbgf("%d is not a valid TokenType (max: %d)", t, TOKEN_TYPE_MAX); + return; + } + + INDENT_FIELD("val", "%s", tokentype_names[t]); }