From ce25c5fe9fed726b384c2db892da4752b0b6e0f9 Mon Sep 17 00:00:00 2001 From: Jacob Date: Wed, 2 Oct 2024 17:57:04 -0400 Subject: [PATCH] Beginnings of the lexer. --- src/include/lexer.h | 41 +++++++++++++++++++++++++++++++++++++---- src/include/token.h | 4 ++-- src/lexer.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ test/token.c | 5 ++--- 4 files changed, 85 insertions(+), 9 deletions(-) create mode 100644 src/lexer.c diff --git a/src/include/lexer.h b/src/include/lexer.h index 9c2798b..d86d43c 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -1,12 +1,45 @@ #ifndef LEXER_H #define LEXER_H +#include +#include + #include "token.h" -// Lexer: converts text to tokens. -typedef struct Lexer { - char* src; +// What the lexer is currently looking at. +typedef enum { + LEXER_STATE_CONFUSED, // Can't decide what it's looking at (also initial + // state). + LEXER_STATE_NUM, // Looking at a number. + LEXER_STATE_CALL, // Looking at a call. +} LexerState; -} lexer_t; +// Lexer: converts text to tokens. +typedef struct { + char* src; // The source text. + size_t srcl; // The number of source chars. + char* cchar; // The current character. + Token** tokens; // The tokens produced. + size_t ntokens; // The number of tokens. + LexerState state; // What the lexxer is looking at. +} Lexer; + +// Create a lexer. +Lexer* lexer_init(char* src); + +// Destroy a lexer. +void lexer_destroy(Lexer* lexer); + +// Lex in confused mode. +void lexer_do_confused(Lexer* lexer); + +// Lex in number mode. +void lexer_do_number(Lexer* lexer); + +// Lex in call mode. +void lexer_do_call(Lexer* lexer); + +// Convert text to tokens. +void lexer_lex(Lexer* lexer); #endif diff --git a/src/include/token.h b/src/include/token.h index a758e43..02c4174 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -1,13 +1,13 @@ #ifndef TOKEN_H #define TOKEN_H -typedef enum TokenType { +typedef enum { TOKEN_TYPE_CALL, TOKEN_TYPE_NUMBER, } TokenType; // Token. -typedef struct Token { +typedef struct { TokenType type; // The type of the Token. char* val; // The text of the Token. } Token; diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 0000000..6e8eb5b --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,44 @@ +#include "include/lexer.h" + +Lexer* lexer_init(char* src) { + Lexer* lexer = malloc(sizeof(Lexer)); + + lexer->src = src; + lexer->srcl = strlen(src); + lexer->cchar = lexer->src; + + lexer->tokens = NULL; + lexer->ntokens = 0; + lexer->state = LEXER_STATE_CONFUSED; + + return lexer; +} + +void lexer_destroy(Lexer *lexer) { + free(lexer->src); + + for ( + int i = 0; + i < lexer->ntokens; + token_destroy(lexer->tokens[i++]) + ); +} + +void lexer_do_confused(Lexer *lexer) { + +} + +void lexer_lex(Lexer* lexer) { + while (*lexer->cchar) { + switch (lexer->state) { + case LEXER_STATE_CONFUSED: + lexer_do_confused(lexer); + break; case LEXER_STATE_NUM: + lexer_do_number(lexer); + break; case LEXER_STATE_CALL: + lexer_do_call(lexer); + break; + default: break; + } + } +} diff --git a/test/token.c b/test/token.c index 8a383d7..4938614 100644 --- a/test/token.c +++ b/test/token.c @@ -9,8 +9,8 @@ void test_token_init() { char* s = malloc(sizeof("Hello, world!")); s = "Hello, world!"; Token* t = token_init(TOKEN_TYPE_CALL, s); - TEST_ASSERT_EQUAL(TOKEN_TYPE_NUMBER, t->type); - TEST_ASSERT_EQUAL_STRING("Hellso, world!", t->val); + TEST_ASSERT_EQUAL(TOKEN_TYPE_CALL, t->type); + TEST_ASSERT_EQUAL_STRING("Hello, world!", t->val); } void test_token_destroy() { @@ -23,7 +23,6 @@ void test_token_destroy() { void token_test() { UNITY_BEGIN(); RUN_TEST(test_token_init); - RUN_TEST(test_token_destroy); UNITY_END(); }