Cleaned up.

2024-11-23 10:21:34 -05:00 · 2024-11-23 10:21:34 -05:00 · 4080d1a80a
commit 4080d1a80a
parent a36ae22d52
8 changed files with 34 additions and 281 deletions
--- a/3
+++ b/3
@ -24,7 +24,6 @@ UNITY_C = $(TEST_DIR)/unity/unity.c
 TEST_SRC_FILES = $(wildcard $(TEST_DIR)/*.c)
 TEST_OBJ_FILES = $(patsubst $(TEST_DIR)/%.c, $(TEST_OBJ_DIR)/%.o, $(TEST_SRC_FILES))
 # Stupid things.
 RESETCOLOR = \033[0m
 WHITE = $(RESETCOLOR)\033[37m
 WHITE_BOLD = $(RESETCOLOR)\033[37;1m
@ -45,13 +44,13 @@ $(GRAM_FILES): $(SRC_DIR)/grammar.y
 	@ echo -e "$(WHITE_BOLD)Generating grammars...$(RESETCOLOR) bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h"
 	@ bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h
 # Compile grammars.
 $(OBJ_DIR)/grammar.o: $(GRAM_DIR)/grammar.tab.c $(GRAM_DIR)/grammar.tab.h $(OBJ_DIR)/lexer.o
 	$(CC) $(CFLAGS) -c $< -o $@
 # Lexer depends on grammars.
 $(OBJ_DIR)/lexer.o: $(SRC_DIR)/lexer.c $(GRAM_FILES)
 	@ mkdir -p $(OBJ_DIR)
 	$(CC) $(CFLAGS) -c $< -o $@
 # Compile project sources.
--- a/src/dstr.c
+++ b/src/dstr.c
@ -1,8 +1,8 @@
 #include "include/dstr.h"
 #include "include/util.h"
 #include <string.h>
 #include <stdio.h>
 #include <string.h>
 Dstr* dstr_init(void) {
    Dstr* dstr = malloc(sizeof(Dstr));
@ -25,7 +25,8 @@ void dstr_append(Dstr* dest, char* src, size_t ln) {
        // Double the buffer size when overflown.
        dest->bufsz *= 2;
        dest->buf = realloc(dest->buf, dest->bufsz);
-        log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz);
+        log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2,
                 dest->bufsz);
    }
    // Overwrites the \0 at the end of the string, keeps the null from the given
@ -39,7 +40,8 @@ void dstr_appendch(Dstr *dest, char ch) {
        // Double the buffer size when overflown.
        dest->bufsz *= 2;
        dest->buf = realloc(dest->buf, dest->bufsz);
-        log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz);
+        log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2,
                 dest->bufsz);
    }
    // Overwrites the preexisting null terminator, and adds one of its own.
--- a/src/grammar.y
+++ b/src/grammar.y
@ -30,7 +30,7 @@
 input:
    %empty
-    | input line
+    | line
    ;
--- a/src/include/lexer.h
+++ b/src/include/lexer.h
@ -1,76 +1,23 @@
 #ifndef LEXER_H
 #define LEXER_H
 #include <stdlib.h>
 #include <assert.h>
-#include "token.h"
+#ifdef __has_include
    #if __has_include("../../build/grammars/grammar.tab.h")
        #include "../../build/grammars/grammar.tab.h"
    #else
        #warn "Build resources not present!"
    #endif
 #else
    #warn "Not sure whether build-time resources are present."
    #include "../../build/grammars/grammar.tab.h"
 #endif
-#define TOKENS_MAX 32
+extern YYSTYPE yylval;
-#define ZERO_CHAR 30
+extern char* inp;
 // What the lexer is currently looking at.
 typedef enum {
    LEXER_STATE_CONFUSED, // Can't decide what it's looking at (also initial
                          // state).
    LEXER_STATE_NUM,      // Looking at a number.
    LEXER_STATE_CALL,     // Looking at a call.
    LEXER_STATE_MAX = LEXER_STATE_CALL,
 } LexerState;
 static char* lexerstate_names[] = {
    [LEXER_STATE_CONFUSED] = "CONFUSED",
    [LEXER_STATE_NUM] = "NUM",
    [LEXER_STATE_CALL] = "CALL",
 };
 // Lexer: converts text to tokens.
 typedef struct {
    LexerState state; // What the lexer is looking at.
    size_t srcln;     // The number of source chars.
    char* src;        // The source text.
    char* cchar;      // The current character.
    size_t ntokens;   // The number of tokens.
    Token** tokens;   // The tokens produced.
 } Lexer;
 // Create a lexer.
 void lexer_init(char* src);
 // Destroy the lexer.
 // Does not destroy `thelexer->src`.
 void lexer_destroy();
 // Convert text to tokens.
 void lexer_lex();
 // Lex in confused mode.
 void lexer_do_confused();
 // Lex in number mode.
 void lexer_do_number();
 // Lex in call mode.
 void lexer_do_call();
 // Increment the lexer's current character pointer.
 void lexer_inc();
 // Add a token to the lexer.
 void lexer_add_token(Token* token);
 // Print a representation of a Lexer.
 void lexer_print();
 // Print a representation of a Lexer at specified indentation level.
 void lexer_print_i(int ilvl);
 // Print a representation of a LexerState.
 void lexerstate_print_raw();
 // Create the input string.
 void lexer_set_global(const char* str);
 // Called by `yyparse()` (in bison-generated files.)
 int yylex();
 void yyerror(char const* s);
--- a/src/include/parser.h
+++ b/src/include/parser.h
@ -1,26 +0,0 @@
 #ifndef PARSER_H
 #define PARSER_H
 #include "token.h"
 #include "ast.h"
 #include "stack.h"
 typedef struct {
    size_t tokenc;      // Number of tokens in tokenv;
    Token* ctoken;      // The current token.
    Token** tokenv;     // Token vector.
    AST* ast;           // Abstract syntax tree.
    Stack* ops;
    Stack* nums;
 } Parser;
 Parser* parser_init(size_t tokenc, Token** tokenv);
 void parser_destroy(Parser* parser);
 // Increment `parser->ctoken`.
 void parser_inc(Parser* parser);
 // Step forward 1 token and add to stacks.
 void parser_step(Parser* parser);
 #endif
--- a/src/lexer.c
+++ b/src/lexer.c
@ -1,163 +1,24 @@
 #include <ctype.h>
 #include <limits.h>
 #include <stdio.h>
 #include <string.h>
 #include "include/dstr.h"
 #include "include/lexer.h"
 #include "include/token.h"
 #include "include/util.h"
 #include "../build/grammars/grammar.tab.h"
 extern YYSTYPE yylval;
 Lexer* thelexer = NULL;
 void lexer_init(char* src) {
    thelexer = malloc(sizeof(Lexer));
    thelexer->src = src;
    thelexer->srcln = strlen(src);
    thelexer->cchar = thelexer->src;
    thelexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
    thelexer->ntokens = 0;
    thelexer->state = LEXER_STATE_CONFUSED;
    log_dbgf("created thelexer @ %p", thelexer);
 }
 void lexer_destroy() {
    // Does not free lexer->src.
    for (int i = 0; i < thelexer->ntokens; i++)
        token_destroy(thelexer->tokens[i]);
 }
 void lexer_lex() {
    while (*thelexer->cchar) {
        switch (thelexer->state) {
        case LEXER_STATE_CONFUSED: lexer_do_confused(); break;
        case LEXER_STATE_NUM:      lexer_do_number(); break;
        case LEXER_STATE_CALL:     lexer_do_call(); break;
        default:                   break;
        }
    }
 }
 void lexer_do_confused() {
    log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", thelexer,
             *thelexer->cchar, (int)*thelexer->cchar);
    if (isspace(*thelexer->cchar)) lexer_inc();
    if (isdigit(*thelexer->cchar)) {
        thelexer->state = LEXER_STATE_NUM;
        lexer_do_number();
    } else {
        thelexer->state = LEXER_STATE_CALL;
        lexer_do_call();
    }
 }
 void lexer_do_number() {
    log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", thelexer,
             *thelexer->cchar, (int)*thelexer->cchar);
    // Length of the number string.
    size_t numln;
    // Where the number string starts.
    char* start = thelexer->cchar;
    for (numln = 0; *thelexer->cchar && isdigit(*thelexer->cchar); numln++)
        lexer_inc();
    char* num = malloc(numln + 1);
    memcpy(num, start, numln);
    num[numln] = '\0';
    lexer_add_token(token_init(TOKEN_TYPE_NUMBER, num, numln));
    thelexer->state = LEXER_STATE_CONFUSED;
 }
 void lexer_do_call() {
    log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", thelexer,
             *thelexer->cchar, (int)*thelexer->cchar);
    // Size of the call string.
    size_t callln;
    // Where the call string starts.
    char* start = thelexer->cchar;
    for (callln = 0; *thelexer->cchar &&
                     (!isdigit(*thelexer->cchar) && !isspace(*thelexer->cchar));
         callln++)
        lexer_inc();
    char* call = malloc(callln + 1);
    memcpy(call, start, callln);
    call[callln] = '\0';
    lexer_add_token(token_init(TOKEN_TYPE_CALL, call, callln));
    thelexer->state = LEXER_STATE_CONFUSED;
 }
 void lexer_inc() { thelexer->cchar += sizeof(char); }
 void lexer_add_token(Token* token) {
    assert(thelexer->ntokens < TOKENS_MAX);
    if (thelexer->ntokens < TOKENS_MAX - 1) {
        thelexer->tokens[thelexer->ntokens] = token;
        thelexer->ntokens++;
        log_dbgf("added token (total: %ld)", thelexer->ntokens);
    }
 }
 void lexer_print() { lexer_print_i(0); }
 void lexer_print_i(int ilvl) {
    INDENT_BEGIN(ilvl);
    INDENT_TITLE("Lexer", thelexer);
    INDENT_FIELD_NONL_START("state")
    lexerstate_print_raw();
    INDENT_FIELD_NONL_END
    INDENT_FIELD("srcln", "%ld", thelexer->srcln);
    INDENT_FIELD_NL("src", "\"%s\"", thelexer->src);
    INDENT_FIELD("cchar", "'%c'", *thelexer->cchar);
    INDENT_FIELD("ntokens", "%ld", thelexer->ntokens);
    INDENT_FIELD_LIST("tokens", thelexer->tokens, thelexer->ntokens,
                      token_print_i);
 }
 void lexerstate_print_raw() {
    LexerState s = thelexer->state;
    if (s > LEXER_STATE_MAX) {
        printf("Unknown (%d)", s);
        log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX);
    } else printf("%s", lexerstate_names[s]);
 }
 int yylex() {
-    if (*thelexer->cchar == '\0') return YYEOF;
+    if (*inp == '\0') return YYEOF;
    // Skip all whitespace.
-    while (*thelexer->cchar == ' ' || *thelexer->cchar == '\t')
+    while (*inp == ' ' || *inp == '\t') { inp++; }
        thelexer->cchar++;
    // Assign & consume current character.
-    int c = *thelexer->cchar++;
+    int c = *inp++;
    // Check for NUM.
    if (isdigit(c)) {
        int value = c - '0';
-        while (isdigit(*thelexer->cchar)) {
+        while (isdigit(*inp)) {
-            value = value * 10 + (*thelexer->cchar - '0'); // Accumulate value.
+            value = value * 10 + (*inp - '0'); // Accumulate value.
-            thelexer->cchar++;
+            inp++;
        }
        yylval.intval = value; // Set the token value.
        return NUM;
--- a/src/main.c
+++ b/src/main.c
@ -2,7 +2,6 @@
 #include "include/ast.h"
 #include "include/dstr.h"
 #include "include/exec.h"
 #include "include/lexer.h"
 #include "include/util.h"
@ -11,6 +10,9 @@
 // Global Abstract Syntax Tree.
 extern AST* root;
 // Global input text.
 char* inp = NULL;
 extern int yyparse();
 int main(int argc, char** argv) {
@ -22,20 +24,18 @@ int main(int argc, char** argv) {
            log_dbgf("cchar: %c", cch);
            dstr_appendch(cline, cch);
        }
        dstr_appendch(cline, '\n');
        log_dbgf("cline: %s", cline->buf);
        if (cline->ln > 0) {
-            lexer_init(cline->buf);
+            // I hope it's null-terminated.
-            lexer_lex();
+            inp = cline->buf;
            lexer_print();
            if (yyparse() == 0) {
                printf("Parsed successfully!\n");
                exec(root);
            } else {
                printf("Parse error.\n");
            }
            lexer_destroy();
        }
        dstr_destroy(cline);
--- a/src/parser.c
+++ b/src/parser.c
@ -1,30 +0,0 @@
 #include "include/stack.h"
 #include "include/util.h"
 #include "include/parser.h"
 Parser* parser_init(size_t tokenc, Token** tokenv) {
    talloc(Parser, parser);
    parser->tokenc = tokenc;
    parser->ctoken = *tokenv;
    parser->tokenv = tokenv;
    parser->ast = NULL;
    parser->ops = stack_init();
    parser->nums = stack_init();
    return parser;
 }
 void parser_destroy(Parser* parser) {
    if (!parser) return;
    // Also frees parser->ctoken.
    for (int i = 0; i < parser->tokenc; i++) token_destroy(parser->tokenv[i]);
    ast_destroy(parser->ast);
    stack_destroy(parser->ops);
    stack_destroy(parser->nums);
 }