From 77f40cf3c59a735ac7a8157cf33df6cb803b42ad Mon Sep 17 00:00:00 2001 From: Jacob Date: Mon, 7 Oct 2024 11:48:53 -0400 Subject: [PATCH] Changed things. --- src/include/lexer.h | 16 +++++++-- src/include/token.h | 7 ++++ src/lexer.c | 88 +++++++++++++++++++++++++++++++++++++-------- src/main.c | 11 ++++-- src/token.c | 17 +++++++-- test/token.c | 2 +- 6 files changed, 119 insertions(+), 22 deletions(-) diff --git a/src/include/lexer.h b/src/include/lexer.h index d5791fe..47827ae 100644 --- a/src/include/lexer.h +++ b/src/include/lexer.h @@ -3,9 +3,13 @@ #include #include +#include #include "token.h" +#define TOKENS_MAX 32 +#define ZERO_CHAR 30 + // What the lexer is currently looking at. typedef enum { LEXER_STATE_CONFUSED, // Can't decide what it's looking at (also initial @@ -30,6 +34,9 @@ Lexer* lexer_init(char* src); // Destroy a lexer. void lexer_destroy(Lexer* lexer); +// Convert text to tokens. +void lexer_lex(Lexer* lexer); + // Lex in confused mode. void lexer_do_confused(Lexer* lexer); @@ -39,10 +46,15 @@ void lexer_do_number(Lexer* lexer); // Lex in call mode. void lexer_do_call(Lexer* lexer); -// Convert text to tokens. -void lexer_lex(Lexer* lexer); +// Increment the lexer's current character pointer. +void lexer_inc(Lexer* lexer); // Add a token to the lexer. void lexer_add_token(Lexer* lexer, Token* token); +// Print the contents of a lexer. +void lexer_print(Lexer* lexer); + +static char* lexer_state_str(Lexer* lexer); + #endif diff --git a/src/include/token.h b/src/include/token.h index 2b36870..8f1159f 100644 --- a/src/include/token.h +++ b/src/include/token.h @@ -1,6 +1,9 @@ #ifndef TOKEN_H #define TOKEN_H +#include +#include + typedef enum { TOKEN_TYPE_CALL, TOKEN_TYPE_NUMBER, @@ -10,9 +13,13 @@ typedef enum { typedef struct { TokenType type; // The type of the Token. char* val; // The text of the Token. + size_t len; // Length of the text of the Token. } Token; Token* token_init(TokenType type, char* val); void token_destroy(Token* token); +void token_print(Token* token); +static char* token_type_str(Token* token); + #endif diff --git a/src/lexer.c b/src/lexer.c index fe225cf..3dac4f4 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,6 +1,9 @@ #include "include/lexer.h" #include "include/token.h" +#include #include +#include +#include Lexer* lexer_init(char* src) { Lexer* lexer = malloc(sizeof(Lexer)); @@ -9,7 +12,7 @@ Lexer* lexer_init(char* src) { lexer->srcl = strlen(src); lexer->cchar = lexer->src; - lexer->tokens = NULL; + lexer->tokens = calloc(TOKENS_MAX, sizeof(Token)); lexer->ntokens = 0; lexer->state = LEXER_STATE_CONFUSED; @@ -22,16 +25,6 @@ void lexer_destroy(Lexer* lexer) { for (int i = 0; i < lexer->ntokens; token_destroy(lexer->tokens[i++])); } -void lexer_do_confused(Lexer* lexer) { - int c = atoi(lexer->cchar); - - if (c) lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, lexer->cchar)); - else lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, lexer->cchar)); -} - -void lexer_do_number(Lexer* lexer) {} -void lexer_do_call(Lexer* lexer) {} - void lexer_lex(Lexer* lexer) { while (*lexer->cchar) { switch (lexer->state) { @@ -43,7 +36,74 @@ void lexer_lex(Lexer* lexer) { } } -void lexer_add_token(Lexer* lexer, Token* token) { - (void)reallocarray(lexer->tokens, lexer->ntokens++, sizeof(Token)); - lexer->tokens[lexer->ntokens-1] = token; +void lexer_do_confused(Lexer* lexer) { + if (isdigit(*lexer->cchar)) lexer_do_number(lexer); + else lexer_do_call(lexer); +} + +void lexer_do_number(Lexer* lexer) { + // Size of the number string. + size_t numsz; + + // Where the number string starts. + char* start = lexer->cchar; + + for (numsz = 0; *lexer->cchar && isdigit(*lexer->cchar); numsz++) + lexer_inc(lexer); + + char* num = malloc(numsz + 1); + memcpy(num, start, numsz); + num[numsz] = '\0'; + + lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num)); +} + +void lexer_do_call(Lexer* lexer) { + // Size of the call string. + size_t callsz; + + // Where the call string starts. + char* start = lexer->cchar; + + for(callsz = 0; *lexer->cchar && isalpha(*lexer->cchar); callsz++) + lexer_inc(lexer); + + char* call = malloc(callsz + 1); + memcpy(call, start, callsz); + call[callsz] = '\0'; +} + +void lexer_inc(Lexer* lexer) { + lexer->cchar += sizeof(char); +} + +void lexer_add_token(Lexer* lexer, Token* token) { + assert(lexer->ntokens < TOKENS_MAX); + + if (lexer->ntokens < TOKENS_MAX - 1) { + lexer->tokens[lexer->ntokens - 1] = token; + lexer->ntokens++; + } +} + +void lexer_print(Lexer* lexer) { + printf("Lexer @%p:\n", lexer); + printf("\tsrc: \"%s\"\n", lexer->src); + printf("\tsrcl: \"%ld\"\n", lexer->srcl); + printf("\tcchar: \"%s\"\n", lexer->cchar); + printf("\tntokens: %ld\n", lexer->ntokens); + printf("\ttokens: [START]\n"); + for (int i = 0; i < lexer->ntokens; i++) token_print(lexer->tokens[i]); + printf("[END]\n"); + printf("\tstate: %s\n", lexer_state_str(lexer)); + +} + +static char* lexer_state_str(Lexer* lexer) { + switch (lexer->state) { + case LEXER_STATE_NUM: return "NUM"; + case LEXER_STATE_CALL: return "CALL"; + case LEXER_STATE_CONFUSED: return "CONFUSED"; + default: return "???"; + } } diff --git a/src/main.c b/src/main.c index 4eaa844..2bf16be 100644 --- a/src/main.c +++ b/src/main.c @@ -1,9 +1,14 @@ #include #include "include/util.h" +#include "include/lexer.h" int main(int argc, char** argv) { - printf("2 is even: %s\n", is_even(2) ? "true" : "false"); - printf("5 is even: %s\n", is_even(5) ? "true" : "false"); - return 0; + char* text = malloc(5); + text = "a1b2"; + + Lexer* lexer = lexer_init(text); + lexer_print(lexer); + lexer_lex(lexer); + lexer_print(lexer); } diff --git a/src/token.c b/src/token.c index a227d60..368c5ed 100644 --- a/src/token.c +++ b/src/token.c @@ -1,5 +1,3 @@ -#include - #include "include/token.h" Token* token_init(TokenType type, char* val) { @@ -15,3 +13,18 @@ void token_destroy(Token* t) { free(t->val); free(t); } + +void token_print(Token* token) { + printf("Token @%p:\n", token); + printf("\ttype: %s\n", token_type_str(token)); + printf("\tval: %s\n", token->val); + printf("\tlen: %ld\n", token->len); +} + +static char* token_type_str(Token* token) { + switch (token->type) { + case TOKEN_TYPE_CALL: return "CALL"; + case TOKEN_TYPE_NUMBER: return "NUMBER"; + default: return "???"; + } +} diff --git a/test/token.c b/test/token.c index 92a79e1..862856d 100644 --- a/test/token.c +++ b/test/token.c @@ -19,6 +19,6 @@ int test_token() { return UNITY_END(); } -__attribute__((constructor)) void register_tests_token() { +__attribute__((constructor)) void register_token() { register_test(test_token); }