Cleaned up.
This commit is contained in:
		
							
								
								
									
										3
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										3
									
								
								Makefile
									
									
									
									
									
								
							| @@ -24,7 +24,6 @@ UNITY_C = $(TEST_DIR)/unity/unity.c | ||||
| TEST_SRC_FILES = $(wildcard $(TEST_DIR)/*.c) | ||||
| TEST_OBJ_FILES = $(patsubst $(TEST_DIR)/%.c, $(TEST_OBJ_DIR)/%.o, $(TEST_SRC_FILES)) | ||||
|  | ||||
| # Stupid things. | ||||
| RESETCOLOR = \033[0m | ||||
| WHITE = $(RESETCOLOR)\033[37m | ||||
| WHITE_BOLD = $(RESETCOLOR)\033[37;1m | ||||
| @@ -45,13 +44,13 @@ $(GRAM_FILES): $(SRC_DIR)/grammar.y | ||||
| 	@ echo -e "$(WHITE_BOLD)Generating grammars...$(RESETCOLOR) bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h" | ||||
| 	@ bison $< -o$(GRAM_DIR)/grammar.tab.c -H$(GRAM_DIR)/grammar.tab.h | ||||
|  | ||||
|  | ||||
| # Compile grammars. | ||||
| $(OBJ_DIR)/grammar.o: $(GRAM_DIR)/grammar.tab.c $(GRAM_DIR)/grammar.tab.h $(OBJ_DIR)/lexer.o | ||||
| 	$(CC) $(CFLAGS) -c $< -o $@ | ||||
|  | ||||
| # Lexer depends on grammars. | ||||
| $(OBJ_DIR)/lexer.o: $(SRC_DIR)/lexer.c $(GRAM_FILES) | ||||
| 	@ mkdir -p $(OBJ_DIR) | ||||
| 	$(CC) $(CFLAGS) -c $< -o $@ | ||||
|  | ||||
| # Compile project sources. | ||||
|   | ||||
							
								
								
									
										12
									
								
								src/dstr.c
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								src/dstr.c
									
									
									
									
									
								
							| @@ -1,8 +1,8 @@ | ||||
| #include "include/dstr.h" | ||||
| #include "include/util.h" | ||||
|  | ||||
| #include <string.h> | ||||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
|  | ||||
| Dstr* dstr_init(void) { | ||||
|     Dstr* dstr = malloc(sizeof(Dstr)); | ||||
| @@ -25,7 +25,8 @@ void dstr_append(Dstr* dest, char* src, size_t ln) { | ||||
|         // Double the buffer size when overflown. | ||||
|         dest->bufsz *= 2; | ||||
|         dest->buf = realloc(dest->buf, dest->bufsz); | ||||
|         log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz); | ||||
|         log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2, | ||||
|                  dest->bufsz); | ||||
|     } | ||||
|  | ||||
|     // Overwrites the \0 at the end of the string, keeps the null from the given | ||||
| @@ -34,16 +35,17 @@ void dstr_append(Dstr* dest, char* src, size_t ln) { | ||||
|     dest->ln += ln; | ||||
| } | ||||
|  | ||||
| void dstr_appendch(Dstr *dest, char ch) { | ||||
| void dstr_appendch(Dstr* dest, char ch) { | ||||
|     if (dest->ln + 1 + 1 > dest->bufsz) { | ||||
|         // Double the buffer size when overflown. | ||||
|         dest->bufsz *= 2; | ||||
|         dest->buf = realloc(dest->buf, dest->bufsz); | ||||
|         log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz/2, dest->bufsz); | ||||
|         log_dbgf("dstr @ %p doubled from %ld to %ld", dest, dest->bufsz / 2, | ||||
|                  dest->bufsz); | ||||
|     } | ||||
|  | ||||
|     // Overwrites the preexisting null terminator, and adds one of its own. | ||||
|     dest->buf[dest->ln] = ch; | ||||
|     dest->buf[dest->ln+1] = '\0'; | ||||
|     dest->buf[dest->ln + 1] = '\0'; | ||||
|     dest->ln += 1; | ||||
| } | ||||
|   | ||||
| @@ -30,7 +30,7 @@ | ||||
|  | ||||
| input: | ||||
|     %empty | ||||
|     | input line | ||||
|     | line | ||||
|     ; | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,76 +1,23 @@ | ||||
| #ifndef LEXER_H | ||||
| #define LEXER_H | ||||
|  | ||||
| #include <stdlib.h> | ||||
| #include <assert.h> | ||||
|  | ||||
| #include "token.h" | ||||
| #ifdef __has_include | ||||
|     #if __has_include("../../build/grammars/grammar.tab.h") | ||||
|         #include "../../build/grammars/grammar.tab.h" | ||||
|     #else | ||||
|         #warn "Build resources not present!" | ||||
|     #endif | ||||
| #else | ||||
|     #warn "Not sure whether build-time resources are present." | ||||
|     #include "../../build/grammars/grammar.tab.h" | ||||
| #endif | ||||
|  | ||||
| #define TOKENS_MAX 32 | ||||
| #define ZERO_CHAR 30 | ||||
|  | ||||
| // What the lexer is currently looking at. | ||||
| typedef enum { | ||||
|     LEXER_STATE_CONFUSED, // Can't decide what it's looking at (also initial | ||||
|                           // state). | ||||
|     LEXER_STATE_NUM,      // Looking at a number. | ||||
|     LEXER_STATE_CALL,     // Looking at a call. | ||||
|     LEXER_STATE_MAX = LEXER_STATE_CALL, | ||||
| } LexerState; | ||||
|  | ||||
| static char* lexerstate_names[] = { | ||||
|     [LEXER_STATE_CONFUSED] = "CONFUSED", | ||||
|     [LEXER_STATE_NUM] = "NUM", | ||||
|     [LEXER_STATE_CALL] = "CALL", | ||||
| }; | ||||
|  | ||||
| // Lexer: converts text to tokens. | ||||
| typedef struct { | ||||
|     LexerState state; // What the lexer is looking at. | ||||
|     size_t srcln;     // The number of source chars. | ||||
|     char* src;        // The source text. | ||||
|     char* cchar;      // The current character. | ||||
|     size_t ntokens;   // The number of tokens. | ||||
|     Token** tokens;   // The tokens produced. | ||||
| } Lexer; | ||||
|  | ||||
| // Create a lexer. | ||||
| void lexer_init(char* src); | ||||
|  | ||||
| // Destroy the lexer. | ||||
| // Does not destroy `thelexer->src`. | ||||
| void lexer_destroy(); | ||||
|  | ||||
| // Convert text to tokens. | ||||
| void lexer_lex(); | ||||
|  | ||||
| // Lex in confused mode. | ||||
| void lexer_do_confused(); | ||||
|  | ||||
| // Lex in number mode. | ||||
| void lexer_do_number(); | ||||
|  | ||||
| // Lex in call mode. | ||||
| void lexer_do_call(); | ||||
|  | ||||
| // Increment the lexer's current character pointer. | ||||
| void lexer_inc(); | ||||
|  | ||||
| // Add a token to the lexer. | ||||
| void lexer_add_token(Token* token); | ||||
|  | ||||
| // Print a representation of a Lexer. | ||||
| void lexer_print(); | ||||
|  | ||||
| // Print a representation of a Lexer at specified indentation level. | ||||
| void lexer_print_i(int ilvl); | ||||
|  | ||||
| // Print a representation of a LexerState. | ||||
| void lexerstate_print_raw(); | ||||
|  | ||||
| // Create the input string. | ||||
| void lexer_set_global(const char* str); | ||||
| extern YYSTYPE yylval; | ||||
| extern char* inp; | ||||
|  | ||||
| // Called by `yyparse()` (in bison-generated files.) | ||||
| int yylex(); | ||||
| void yyerror(char const* s); | ||||
|  | ||||
|   | ||||
| @@ -1,26 +0,0 @@ | ||||
| #ifndef PARSER_H | ||||
| #define PARSER_H | ||||
|  | ||||
| #include "token.h" | ||||
| #include "ast.h" | ||||
| #include "stack.h" | ||||
|  | ||||
| typedef struct { | ||||
|     size_t tokenc;      // Number of tokens in tokenv; | ||||
|     Token* ctoken;      // The current token. | ||||
|     Token** tokenv;     // Token vector. | ||||
|     AST* ast;           // Abstract syntax tree. | ||||
|     Stack* ops; | ||||
|     Stack* nums; | ||||
| } Parser; | ||||
|  | ||||
| Parser* parser_init(size_t tokenc, Token** tokenv); | ||||
| void parser_destroy(Parser* parser); | ||||
|  | ||||
| // Increment `parser->ctoken`. | ||||
| void parser_inc(Parser* parser); | ||||
|  | ||||
| // Step forward 1 token and add to stacks. | ||||
| void parser_step(Parser* parser); | ||||
|  | ||||
| #endif | ||||
							
								
								
									
										151
									
								
								src/lexer.c
									
									
									
									
									
								
							
							
						
						
									
										151
									
								
								src/lexer.c
									
									
									
									
									
								
							| @@ -1,163 +1,24 @@ | ||||
| #include <ctype.h> | ||||
| #include <limits.h> | ||||
| #include <stdio.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "include/dstr.h" | ||||
| #include "include/lexer.h" | ||||
| #include "include/token.h" | ||||
| #include "include/util.h" | ||||
|  | ||||
| #include "../build/grammars/grammar.tab.h" | ||||
|  | ||||
| extern YYSTYPE yylval; | ||||
|  | ||||
| Lexer* thelexer = NULL; | ||||
|  | ||||
| void lexer_init(char* src) { | ||||
|     thelexer = malloc(sizeof(Lexer)); | ||||
|  | ||||
|     thelexer->src = src; | ||||
|     thelexer->srcln = strlen(src); | ||||
|     thelexer->cchar = thelexer->src; | ||||
|  | ||||
|     thelexer->tokens = calloc(TOKENS_MAX, sizeof(Token*)); | ||||
|     thelexer->ntokens = 0; | ||||
|     thelexer->state = LEXER_STATE_CONFUSED; | ||||
|  | ||||
|     log_dbgf("created thelexer @ %p", thelexer); | ||||
| } | ||||
|  | ||||
| void lexer_destroy() { | ||||
|     // Does not free lexer->src. | ||||
|     for (int i = 0; i < thelexer->ntokens; i++) | ||||
|         token_destroy(thelexer->tokens[i]); | ||||
| } | ||||
|  | ||||
| void lexer_lex() { | ||||
|     while (*thelexer->cchar) { | ||||
|         switch (thelexer->state) { | ||||
|         case LEXER_STATE_CONFUSED: lexer_do_confused(); break; | ||||
|         case LEXER_STATE_NUM:      lexer_do_number(); break; | ||||
|         case LEXER_STATE_CALL:     lexer_do_call(); break; | ||||
|         default:                   break; | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| void lexer_do_confused() { | ||||
|     log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", thelexer, | ||||
|              *thelexer->cchar, (int)*thelexer->cchar); | ||||
|  | ||||
|     if (isspace(*thelexer->cchar)) lexer_inc(); | ||||
|  | ||||
|     if (isdigit(*thelexer->cchar)) { | ||||
|         thelexer->state = LEXER_STATE_NUM; | ||||
|         lexer_do_number(); | ||||
|     } else { | ||||
|         thelexer->state = LEXER_STATE_CALL; | ||||
|         lexer_do_call(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void lexer_do_number() { | ||||
|     log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", thelexer, | ||||
|              *thelexer->cchar, (int)*thelexer->cchar); | ||||
|  | ||||
|     // Length of the number string. | ||||
|     size_t numln; | ||||
|  | ||||
|     // Where the number string starts. | ||||
|     char* start = thelexer->cchar; | ||||
|  | ||||
|     for (numln = 0; *thelexer->cchar && isdigit(*thelexer->cchar); numln++) | ||||
|         lexer_inc(); | ||||
|  | ||||
|     char* num = malloc(numln + 1); | ||||
|     memcpy(num, start, numln); | ||||
|     num[numln] = '\0'; | ||||
|  | ||||
|     lexer_add_token(token_init(TOKEN_TYPE_NUMBER, num, numln)); | ||||
|     thelexer->state = LEXER_STATE_CONFUSED; | ||||
| } | ||||
|  | ||||
| void lexer_do_call() { | ||||
|     log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", thelexer, | ||||
|              *thelexer->cchar, (int)*thelexer->cchar); | ||||
|  | ||||
|     // Size of the call string. | ||||
|     size_t callln; | ||||
|  | ||||
|     // Where the call string starts. | ||||
|     char* start = thelexer->cchar; | ||||
|  | ||||
|     for (callln = 0; *thelexer->cchar && | ||||
|                      (!isdigit(*thelexer->cchar) && !isspace(*thelexer->cchar)); | ||||
|          callln++) | ||||
|         lexer_inc(); | ||||
|  | ||||
|     char* call = malloc(callln + 1); | ||||
|     memcpy(call, start, callln); | ||||
|     call[callln] = '\0'; | ||||
|  | ||||
|     lexer_add_token(token_init(TOKEN_TYPE_CALL, call, callln)); | ||||
|  | ||||
|     thelexer->state = LEXER_STATE_CONFUSED; | ||||
| } | ||||
|  | ||||
| void lexer_inc() { thelexer->cchar += sizeof(char); } | ||||
|  | ||||
| void lexer_add_token(Token* token) { | ||||
|     assert(thelexer->ntokens < TOKENS_MAX); | ||||
|  | ||||
|     if (thelexer->ntokens < TOKENS_MAX - 1) { | ||||
|         thelexer->tokens[thelexer->ntokens] = token; | ||||
|         thelexer->ntokens++; | ||||
|  | ||||
|         log_dbgf("added token (total: %ld)", thelexer->ntokens); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void lexer_print() { lexer_print_i(0); } | ||||
|  | ||||
| void lexer_print_i(int ilvl) { | ||||
|     INDENT_BEGIN(ilvl); | ||||
|     INDENT_TITLE("Lexer", thelexer); | ||||
|     INDENT_FIELD_NONL_START("state") | ||||
|     lexerstate_print_raw(); | ||||
|     INDENT_FIELD_NONL_END | ||||
|     INDENT_FIELD("srcln", "%ld", thelexer->srcln); | ||||
|     INDENT_FIELD_NL("src", "\"%s\"", thelexer->src); | ||||
|     INDENT_FIELD("cchar", "'%c'", *thelexer->cchar); | ||||
|     INDENT_FIELD("ntokens", "%ld", thelexer->ntokens); | ||||
|     INDENT_FIELD_LIST("tokens", thelexer->tokens, thelexer->ntokens, | ||||
|                       token_print_i); | ||||
| } | ||||
|  | ||||
| void lexerstate_print_raw() { | ||||
|     LexerState s = thelexer->state; | ||||
|     if (s > LEXER_STATE_MAX) { | ||||
|         printf("Unknown (%d)", s); | ||||
|         log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX); | ||||
|     } else printf("%s", lexerstate_names[s]); | ||||
| } | ||||
|  | ||||
| int yylex() { | ||||
|     if (*thelexer->cchar == '\0') return YYEOF; | ||||
|     if (*inp == '\0') return YYEOF; | ||||
|  | ||||
|     // Skip all whitespace. | ||||
|     while (*thelexer->cchar == ' ' || *thelexer->cchar == '\t') | ||||
|         thelexer->cchar++; | ||||
|     while (*inp == ' ' || *inp == '\t') { inp++; } | ||||
|  | ||||
|     // Assign & consume current character. | ||||
|     int c = *thelexer->cchar++; | ||||
|     int c = *inp++; | ||||
|  | ||||
|     // Check for NUM. | ||||
|     if (isdigit(c)) { | ||||
|         int value = c - '0'; | ||||
|         while (isdigit(*thelexer->cchar)) { | ||||
|             value = value * 10 + (*thelexer->cchar - '0'); // Accumulate value. | ||||
|             thelexer->cchar++; | ||||
|         while (isdigit(*inp)) { | ||||
|             value = value * 10 + (*inp - '0'); // Accumulate value. | ||||
|             inp++; | ||||
|         } | ||||
|         yylval.intval = value; // Set the token value. | ||||
|         return NUM; | ||||
|   | ||||
							
								
								
									
										12
									
								
								src/main.c
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								src/main.c
									
									
									
									
									
								
							| @@ -2,7 +2,6 @@ | ||||
|  | ||||
| #include "include/ast.h" | ||||
| #include "include/dstr.h" | ||||
| #include "include/exec.h" | ||||
| #include "include/lexer.h" | ||||
| #include "include/util.h" | ||||
|  | ||||
| @@ -11,6 +10,9 @@ | ||||
| // Global Abstract Syntax Tree. | ||||
| extern AST* root; | ||||
|  | ||||
| // Global input text. | ||||
| char* inp = NULL; | ||||
|  | ||||
| extern int yyparse(); | ||||
|  | ||||
| int main(int argc, char** argv) { | ||||
| @@ -22,20 +24,18 @@ int main(int argc, char** argv) { | ||||
|             log_dbgf("cchar: %c", cch); | ||||
|             dstr_appendch(cline, cch); | ||||
|         } | ||||
|         dstr_appendch(cline, '\n'); | ||||
|  | ||||
|         log_dbgf("cline: %s", cline->buf); | ||||
|  | ||||
|         if (cline->ln > 0) { | ||||
|             lexer_init(cline->buf); | ||||
|             lexer_lex(); | ||||
|             lexer_print(); | ||||
|             // I hope it's null-terminated. | ||||
|             inp = cline->buf; | ||||
|             if (yyparse() == 0) { | ||||
|                 printf("Parsed successfully!\n"); | ||||
|                 exec(root); | ||||
|             } else { | ||||
|                 printf("Parse error.\n"); | ||||
|             } | ||||
|             lexer_destroy(); | ||||
|         } | ||||
|  | ||||
|         dstr_destroy(cline); | ||||
|   | ||||
							
								
								
									
										30
									
								
								src/parser.c
									
									
									
									
									
								
							
							
						
						
									
										30
									
								
								src/parser.c
									
									
									
									
									
								
							| @@ -1,30 +0,0 @@ | ||||
| #include "include/stack.h" | ||||
| #include "include/util.h" | ||||
| #include "include/parser.h" | ||||
|  | ||||
| Parser* parser_init(size_t tokenc, Token** tokenv) { | ||||
|     talloc(Parser, parser); | ||||
|  | ||||
|     parser->tokenc = tokenc; | ||||
|     parser->ctoken = *tokenv; | ||||
|     parser->tokenv = tokenv; | ||||
|     parser->ast = NULL; | ||||
|     parser->ops = stack_init(); | ||||
|     parser->nums = stack_init(); | ||||
|  | ||||
|     return parser; | ||||
| } | ||||
|  | ||||
| void parser_destroy(Parser* parser) { | ||||
|     if (!parser) return; | ||||
|  | ||||
|     // Also frees parser->ctoken. | ||||
|     for (int i = 0; i < parser->tokenc; i++) token_destroy(parser->tokenv[i]); | ||||
|  | ||||
|     ast_destroy(parser->ast); | ||||
|  | ||||
|     stack_destroy(parser->ops); | ||||
|     stack_destroy(parser->nums); | ||||
| } | ||||
|  | ||||
|  | ||||
		Reference in New Issue
	
	Block a user