From 4080d1a80a5c21097d16ce2caf67087dc1c3905a Mon Sep 17 00:00:00 2001 From: Jacob Date: Sat, 23 Nov 2024 10:21:34 -0500 Subject: [PATCH] Cleaned up. --- Makefile | 3 +- src/dstr.c | 12 ++-- src/grammar.y | 2 +- src/include/lexer.h | 79 ++++------------------ src/include/parser.h | 26 -------- src/lexer.c | 151 ++----------------------------------------- src/main.c | 12 ++-- src/parser.c | 30 --------- 8 files changed, 34 insertions(+), 281 deletions(-) delete mode 100644 src/include/parser.h delete mode 100644 src/parser.c diff --git a/Makefile b/Makefile index 90b3069..445f3bf 100644 --- a/Makefile +++ b/Makefile @@ -24,7 +24,6 @@ UNITY_C = $(TEST_DIR)/unity/unity.c TEST_SRC_FILES = $(wildcard $(TEST_DIR)/*.c) TEST_OBJ_FILES = $(patsubst $(TEST_DIR)/%.c, $(TEST_OBJ_DIR)/%.o, $(TEST_SRC_FILES)) -# Stupid things. RESETCOLOR = \033[0m WHITE = $(RESETCOLOR)\033[37m WHITE_BOLD = $(RESETCOLOR)\033[37;1m @@ -45,13 +44,13 @@ $(GRAM_FILES): $(SRC_DIR)/grammar.y @ echo -e "$(WHITE_BOLD)Generating grammars...$(RESETCOLOR) bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h" @ bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h - # Compile grammars. $(OBJ_DIR)/grammar.o: $(GRAM_DIR)/grammar.tab.c $(GRAM_DIR)/grammar.tab.h $(OBJ_DIR)/lexer.o $(CC) $(CFLAGS) -c $< -o $@ # Lexer depends on grammars. $(OBJ_DIR)/lexer.o: $(SRC_DIR)/lexer.c $(GRAM_FILES) + @ mkdir -p $(OBJ_DIR) $(CC) $(CFLAGS) -c $< -o $@ # Compile project sources. diff --git a/src/dstr.c b/src/dstr.c index b98b81d..0f64570 100644 --- a/src/dstr.c +++ b/src/dstr.c @@ -1,8 +1,8 @@ #include "include/dstr.h" #include "include/util.h" -#include #include +#include Dstr* dstr_init(void) { Dstr* dstr = malloc(sizeof(Dstr)); @@ -25,7 +25,8 @@ void dstr_append(Dstr* dest, char* src, size_t ln) { // Double the buffer size when overflown. dest->bufsz *= 2; dest->buf = realloc(dest->buf, dest->bufsz); - log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz); + log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2, + dest->bufsz); } // Overwrites the \0 at the end of the string, keeps the null from the given @@ -34,16 +35,17 @@ void dstr_append(Dstr* dest, char* src, size_t ln) { dest->ln += ln; } -void dstr_appendch(Dstr *dest, char ch) { +void dstr_appendch(Dstr* dest, char ch) { if (dest->ln + 1 + 1 > dest->bufsz) { // Double the buffer size when overflown. dest->bufsz *= 2; dest->buf = realloc(dest->buf, dest->bufsz); - log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz); + log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2, + dest->bufsz); } // Overwrites the preexisting null terminator, and adds one of its own. dest->buf[dest->ln] = ch; - dest->buf[dest->ln+1] = '\0'; + dest->buf[dest->ln + 1] = '\0'; dest->ln += 1; } diff --git a/src/grammar.y b/src/grammar.y index af95a11..d801633 100644 --- a/src/grammar.y +++ b/src/grammar.y @@ -30,7 +30,7 @@ input: %empty - | input line + | line ; diff --git a/src/include/lexer.h b/src/include/lexer.h index f11b9c0..9fb7631 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -1,76 +1,23 @@ #ifndef LEXER_H #define LEXER_H -#include #include -#include "token.h" +#ifdef __has_include + #if __has_include("../../build/grammars/grammar.tab.h") + #include "../../build/grammars/grammar.tab.h" + #else + #warn "Build resources not present!" + #endif +#else + #warn "Not sure whether build-time resources are present." + #include "../../build/grammars/grammar.tab.h" +#endif -#define TOKENS_MAX 32 -#define ZERO_CHAR 30 - -// What the lexer is currently looking at. -typedef enum { - LEXER_STATE_CONFUSED, // Can't decide what it's looking at (also initial - // state). - LEXER_STATE_NUM, // Looking at a number. - LEXER_STATE_CALL, // Looking at a call. - LEXER_STATE_MAX = LEXER_STATE_CALL, -} LexerState; - -static char* lexerstate_names[] = { - [LEXER_STATE_CONFUSED] = "CONFUSED", - [LEXER_STATE_NUM] = "NUM", - [LEXER_STATE_CALL] = "CALL", -}; - -// Lexer: converts text to tokens. -typedef struct { - LexerState state; // What the lexer is looking at. - size_t srcln; // The number of source chars. - char* src; // The source text. - char* cchar; // The current character. - size_t ntokens; // The number of tokens. - Token** tokens; // The tokens produced. -} Lexer; - -// Create a lexer. -void lexer_init(char* src); - -// Destroy the lexer. -// Does not destroy `thelexer->src`. -void lexer_destroy(); - -// Convert text to tokens. -void lexer_lex(); - -// Lex in confused mode. -void lexer_do_confused(); - -// Lex in number mode. -void lexer_do_number(); - -// Lex in call mode. -void lexer_do_call(); - -// Increment the lexer's current character pointer. -void lexer_inc(); - -// Add a token to the lexer. -void lexer_add_token(Token* token); - -// Print a representation of a Lexer. -void lexer_print(); - -// Print a representation of a Lexer at specified indentation level. -void lexer_print_i(int ilvl); - -// Print a representation of a LexerState. -void lexerstate_print_raw(); - -// Create the input string. -void lexer_set_global(const char* str); +extern YYSTYPE yylval; +extern char* inp; +// Called by `yyparse()` (in bison-generated files.) int yylex(); void yyerror(char const* s); diff --git a/src/include/parser.h b/src/include/parser.h deleted file mode 100644 index 026fd31..0000000 --- a/src/include/parser.h +++ /dev/null @@ -1,26 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H - -#include "token.h" -#include "ast.h" -#include "stack.h" - -typedef struct { - size_t tokenc; // Number of tokens in tokenv; - Token* ctoken; // The current token. - Token** tokenv; // Token vector. - AST* ast; // Abstract syntax tree. - Stack* ops; - Stack* nums; -} Parser; - -Parser* parser_init(size_t tokenc, Token** tokenv); -void parser_destroy(Parser* parser); - -// Increment `parser->ctoken`. -void parser_inc(Parser* parser); - -// Step forward 1 token and add to stacks. -void parser_step(Parser* parser); - -#endif diff --git a/src/lexer.c b/src/lexer.c index 0394c02..f2de0f3 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,163 +1,24 @@ #include #include #include -#include -#include "include/dstr.h" #include "include/lexer.h" -#include "include/token.h" -#include "include/util.h" - -#include "../build/grammars/grammar.tab.h" - -extern YYSTYPE yylval; - -Lexer* thelexer = NULL; - -void lexer_init(char* src) { - thelexer = malloc(sizeof(Lexer)); - - thelexer->src = src; - thelexer->srcln = strlen(src); - thelexer->cchar = thelexer->src; - - thelexer->tokens = calloc(TOKENS_MAX, sizeof(Token*)); - thelexer->ntokens = 0; - thelexer->state = LEXER_STATE_CONFUSED; - - log_dbgf("created thelexer @ %p", thelexer); -} - -void lexer_destroy() { - // Does not free lexer->src. - for (int i = 0; i < thelexer->ntokens; i++) - token_destroy(thelexer->tokens[i]); -} - -void lexer_lex() { - while (*thelexer->cchar) { - switch (thelexer->state) { - case LEXER_STATE_CONFUSED: lexer_do_confused(); break; - case LEXER_STATE_NUM: lexer_do_number(); break; - case LEXER_STATE_CALL: lexer_do_call(); break; - default: break; - } - } -} - -void lexer_do_confused() { - log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", thelexer, - *thelexer->cchar, (int)*thelexer->cchar); - - if (isspace(*thelexer->cchar)) lexer_inc(); - - if (isdigit(*thelexer->cchar)) { - thelexer->state = LEXER_STATE_NUM; - lexer_do_number(); - } else { - thelexer->state = LEXER_STATE_CALL; - lexer_do_call(); - } -} - -void lexer_do_number() { - log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", thelexer, - *thelexer->cchar, (int)*thelexer->cchar); - - // Length of the number string. - size_t numln; - - // Where the number string starts. - char* start = thelexer->cchar; - - for (numln = 0; *thelexer->cchar && isdigit(*thelexer->cchar); numln++) - lexer_inc(); - - char* num = malloc(numln + 1); - memcpy(num, start, numln); - num[numln] = '\0'; - - lexer_add_token(token_init(TOKEN_TYPE_NUMBER, num, numln)); - thelexer->state = LEXER_STATE_CONFUSED; -} - -void lexer_do_call() { - log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", thelexer, - *thelexer->cchar, (int)*thelexer->cchar); - - // Size of the call string. - size_t callln; - - // Where the call string starts. - char* start = thelexer->cchar; - - for (callln = 0; *thelexer->cchar && - (!isdigit(*thelexer->cchar) && !isspace(*thelexer->cchar)); - callln++) - lexer_inc(); - - char* call = malloc(callln + 1); - memcpy(call, start, callln); - call[callln] = '\0'; - - lexer_add_token(token_init(TOKEN_TYPE_CALL, call, callln)); - - thelexer->state = LEXER_STATE_CONFUSED; -} - -void lexer_inc() { thelexer->cchar += sizeof(char); } - -void lexer_add_token(Token* token) { - assert(thelexer->ntokens < TOKENS_MAX); - - if (thelexer->ntokens < TOKENS_MAX - 1) { - thelexer->tokens[thelexer->ntokens] = token; - thelexer->ntokens++; - - log_dbgf("added token (total: %ld)", thelexer->ntokens); - } -} - -void lexer_print() { lexer_print_i(0); } - -void lexer_print_i(int ilvl) { - INDENT_BEGIN(ilvl); - INDENT_TITLE("Lexer", thelexer); - INDENT_FIELD_NONL_START("state") - lexerstate_print_raw(); - INDENT_FIELD_NONL_END - INDENT_FIELD("srcln", "%ld", thelexer->srcln); - INDENT_FIELD_NL("src", "\"%s\"", thelexer->src); - INDENT_FIELD("cchar", "'%c'", *thelexer->cchar); - INDENT_FIELD("ntokens", "%ld", thelexer->ntokens); - INDENT_FIELD_LIST("tokens", thelexer->tokens, thelexer->ntokens, - token_print_i); -} - -void lexerstate_print_raw() { - LexerState s = thelexer->state; - if (s > LEXER_STATE_MAX) { - printf("Unknown (%d)", s); - log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX); - } else printf("%s", lexerstate_names[s]); -} int yylex() { - if (*thelexer->cchar == '\0') return YYEOF; + if (*inp == '\0') return YYEOF; // Skip all whitespace. - while (*thelexer->cchar == ' ' || *thelexer->cchar == '\t') - thelexer->cchar++; + while (*inp == ' ' || *inp == '\t') { inp++; } // Assign & consume current character. - int c = *thelexer->cchar++; + int c = *inp++; // Check for NUM. if (isdigit(c)) { int value = c - '0'; - while (isdigit(*thelexer->cchar)) { - value = value * 10 + (*thelexer->cchar - '0'); // Accumulate value. - thelexer->cchar++; + while (isdigit(*inp)) { + value = value * 10 + (*inp - '0'); // Accumulate value. + inp++; } yylval.intval = value; // Set the token value. return NUM; diff --git a/src/main.c b/src/main.c index 5de1aef..8f8683f 100644 --- a/src/main.c +++ b/src/main.c @@ -2,7 +2,6 @@ #include "include/ast.h" #include "include/dstr.h" -#include "include/exec.h" #include "include/lexer.h" #include "include/util.h" @@ -11,6 +10,9 @@ // Global Abstract Syntax Tree. extern AST* root; +// Global input text. +char* inp = NULL; + extern int yyparse(); int main(int argc, char** argv) { @@ -22,20 +24,18 @@ int main(int argc, char** argv) { log_dbgf("cchar: %c", cch); dstr_appendch(cline, cch); } + dstr_appendch(cline, '\n'); log_dbgf("cline: %s", cline->buf); if (cline->ln > 0) { - lexer_init(cline->buf); - lexer_lex(); - lexer_print(); + // I hope it's null-terminated. + inp = cline->buf; if (yyparse() == 0) { printf("Parsed successfully!\n"); - exec(root); } else { printf("Parse error.\n"); } - lexer_destroy(); } dstr_destroy(cline); diff --git a/src/parser.c b/src/parser.c deleted file mode 100644 index f61daf1..0000000 --- a/src/parser.c +++ /dev/null @@ -1,30 +0,0 @@ -#include "include/stack.h" -#include "include/util.h" -#include "include/parser.h" - -Parser* parser_init(size_t tokenc, Token** tokenv) { - talloc(Parser, parser); - - parser->tokenc = tokenc; - parser->ctoken = *tokenv; - parser->tokenv = tokenv; - parser->ast = NULL; - parser->ops = stack_init(); - parser->nums = stack_init(); - - return parser; -} - -void parser_destroy(Parser* parser) { - if (!parser) return; - - // Also frees parser->ctoken. - for (int i = 0; i < parser->tokenc; i++) token_destroy(parser->tokenv[i]); - - ast_destroy(parser->ast); - - stack_destroy(parser->ops); - stack_destroy(parser->nums); -} - -