Compare commits
2 Commits
63f5064ba9
...
933418895e
Author | SHA1 | Date | |
---|---|---|---|
933418895e | |||
3c56290448 |
@ -2,12 +2,8 @@
|
|||||||
#define LEXER_H
|
#define LEXER_H
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <ctype.h>
|
|
||||||
|
|
||||||
#include "util.h"
|
|
||||||
#include "token.h"
|
#include "token.h"
|
||||||
|
|
||||||
#define TOKENS_MAX 32
|
#define TOKENS_MAX 32
|
||||||
@ -24,7 +20,7 @@ typedef enum {
|
|||||||
// Lexer: converts text to tokens.
|
// Lexer: converts text to tokens.
|
||||||
typedef struct {
|
typedef struct {
|
||||||
char* src; // The source text.
|
char* src; // The source text.
|
||||||
size_t srcl; // The number of source chars.
|
size_t srcln; // The number of source chars.
|
||||||
char* cchar; // The current character.
|
char* cchar; // The current character.
|
||||||
Token** tokens; // The tokens produced.
|
Token** tokens; // The tokens produced.
|
||||||
size_t ntokens; // The number of tokens.
|
size_t ntokens; // The number of tokens.
|
||||||
@ -55,4 +51,10 @@ void lexer_inc(Lexer* lexer);
|
|||||||
// Add a token to the lexer.
|
// Add a token to the lexer.
|
||||||
void lexer_add_token(Lexer* lexer, Token* token);
|
void lexer_add_token(Lexer* lexer, Token* token);
|
||||||
|
|
||||||
|
// Returns a dynamic string representation of the Lexer.
|
||||||
|
Dstr* lexer_to_dstr(Lexer* lexer);
|
||||||
|
|
||||||
|
// Returns a string representation of the LexerState.
|
||||||
|
char* lexer_state_to_str(LexerState s);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@ -1,20 +1,28 @@
|
|||||||
#ifndef UTIL_H
|
#ifndef UTIL_H
|
||||||
#define UTIL_H
|
#define UTIL_H
|
||||||
|
|
||||||
#ifdef DBG
|
#ifdef DBG // Debug macros
|
||||||
|
|
||||||
|
// Log a message.
|
||||||
#define log_dbg(msg) \
|
#define log_dbg(msg) \
|
||||||
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
|
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
|
||||||
"\033[0m\n", \
|
"\033[0m\n", \
|
||||||
__func__);
|
__func__);
|
||||||
|
|
||||||
|
// Log a message with formatting.
|
||||||
#define log_dbgf(msg, ...) \
|
#define log_dbgf(msg, ...) \
|
||||||
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
|
printf("\033[37;1mdbg\033[0m:\033[37;5m%s\033[0m:\033[32m " msg \
|
||||||
"\033[0m\n", \
|
"\033[0m\n", \
|
||||||
__func__, __VA_ARGS__);
|
__func__, __VA_ARGS__);
|
||||||
|
|
||||||
#else
|
#else // ifdef DBG
|
||||||
#define log_dbg(msg)
|
#define log_dbg(msg)
|
||||||
#endif
|
#endif // ifdef DBG else
|
||||||
|
|
||||||
|
// Maximum size of a string containing only an int.
|
||||||
|
#define MAXSTRINTSZ ((CHAR_BIT * sizeof(int) - 1) / 3 + 2)
|
||||||
|
|
||||||
|
// Maximum size of a string containing only a size_t.
|
||||||
|
#define MAXSTRIZE_TSZ ((CHAR_BIT * sizeof(size_t) - 1) / 3 + 2)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
51
src/lexer.c
51
src/lexer.c
@ -1,14 +1,17 @@
|
|||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <stdio.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <limits.h>
|
||||||
|
|
||||||
#include "include/lexer.h"
|
#include "include/lexer.h"
|
||||||
|
#include "include/dstr.h"
|
||||||
#include "include/util.h"
|
#include "include/util.h"
|
||||||
|
|
||||||
Lexer* lexer_init(char* src) {
|
Lexer* lexer_init(char* src) {
|
||||||
Lexer* lexer = malloc(sizeof(Lexer));
|
Lexer* lexer = malloc(sizeof(Lexer));
|
||||||
|
|
||||||
lexer->src = src;
|
lexer->src = src;
|
||||||
lexer->srcl = strlen(src);
|
lexer->srcln = strlen(src);
|
||||||
lexer->cchar = lexer->src;
|
lexer->cchar = lexer->src;
|
||||||
|
|
||||||
lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
|
lexer->tokens = calloc(TOKENS_MAX, sizeof(Token*));
|
||||||
@ -40,15 +43,18 @@ void lexer_lex(Lexer* lexer) {
|
|||||||
void lexer_do_confused(Lexer* lexer) {
|
void lexer_do_confused(Lexer* lexer) {
|
||||||
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
||||||
|
|
||||||
lexer->state = LEXER_STATE_CONFUSED;
|
if (isdigit(*lexer->cchar)) {
|
||||||
if (isdigit(*lexer->cchar)) lexer_do_number(lexer);
|
lexer->state = LEXER_STATE_NUM;
|
||||||
else lexer_do_call(lexer);
|
lexer_do_number(lexer);
|
||||||
|
} else {
|
||||||
|
lexer->state = LEXER_STATE_CALL;
|
||||||
|
lexer_do_call(lexer);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_do_number(Lexer* lexer) {
|
void lexer_do_number(Lexer* lexer) {
|
||||||
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
||||||
|
|
||||||
lexer->state = LEXER_STATE_NUM;
|
|
||||||
// Size of the number string.
|
// Size of the number string.
|
||||||
size_t numsz;
|
size_t numsz;
|
||||||
|
|
||||||
@ -63,21 +69,19 @@ void lexer_do_number(Lexer* lexer) {
|
|||||||
num[numsz] = '\0';
|
num[numsz] = '\0';
|
||||||
|
|
||||||
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1));
|
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, 1));
|
||||||
|
lexer->state = LEXER_STATE_CONFUSED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_do_call(Lexer* lexer) {
|
void lexer_do_call(Lexer* lexer) {
|
||||||
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer, *lexer->cchar, (int)*lexer->cchar);
|
||||||
|
|
||||||
lexer->state = LEXER_STATE_CALL;
|
|
||||||
// Size of the call string.
|
// Size of the call string.
|
||||||
size_t callsz;
|
size_t callsz;
|
||||||
|
|
||||||
// Where the call string starts.
|
// Where the call string starts.
|
||||||
char* start = lexer->cchar;
|
char* start = lexer->cchar;
|
||||||
|
|
||||||
for (; *lexer->cchar && (isblank(lexer->cchar) || *lexer->cchar == '\n'); lexer_inc(lexer));
|
for (callsz = 0; *lexer->cchar && (!isdigit(*lexer->cchar)); callsz++)
|
||||||
|
|
||||||
for (callsz = 0; *lexer->cchar && isalpha(*lexer->cchar); callsz++)
|
|
||||||
lexer_inc(lexer);
|
lexer_inc(lexer);
|
||||||
|
|
||||||
char* call = malloc(callsz + 1);
|
char* call = malloc(callsz + 1);
|
||||||
@ -85,6 +89,8 @@ void lexer_do_call(Lexer* lexer) {
|
|||||||
call[callsz] = '\0';
|
call[callsz] = '\0';
|
||||||
|
|
||||||
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1));
|
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, 1));
|
||||||
|
|
||||||
|
lexer->state = LEXER_STATE_CONFUSED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_inc(Lexer* lexer) {
|
void lexer_inc(Lexer* lexer) {
|
||||||
@ -100,3 +106,30 @@ void lexer_add_token(Lexer* lexer, Token* token) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Dstr* lexer_to_dstr(Lexer* lexer) {
|
||||||
|
Dstr* str = dstr_init();
|
||||||
|
|
||||||
|
size_t titlesz = sizeof("Lexer @ 0x00000000");
|
||||||
|
char title[titlesz];
|
||||||
|
sprintf(title, "Lexer @ %p", lexer);
|
||||||
|
dstr_append(str, title, titlesz - 1);
|
||||||
|
|
||||||
|
size_t ln = snprintf(NULL, 0, "srcln: %ld", lexer->srcln);
|
||||||
|
char src_sz[ln + 1];
|
||||||
|
snprintf(src_sz, ln + 1, "srcln: %ld", lexer->srcln);
|
||||||
|
dstr_append(str, src_sz, ln - 1);
|
||||||
|
|
||||||
|
dstr_append(str, "\nsrc: ", 5);
|
||||||
|
dstr_append(str, lexer->src, lexer->srcln);
|
||||||
|
|
||||||
|
return str;
|
||||||
|
}
|
||||||
|
|
||||||
|
char* lexer_state_to_str(LexerState s) {
|
||||||
|
switch (s) {
|
||||||
|
case LEXER_STATE_NUM: return "NUM";
|
||||||
|
case LEXER_STATE_CALL: return "CALL";
|
||||||
|
case LEXER_STATE_CONFUSED: return "CONFUSED";
|
||||||
|
default: return "UNKNOWN";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
12
src/main.c
12
src/main.c
@ -10,15 +10,17 @@ int main(int argc, char** argv) {
|
|||||||
Dstr* cline = dstr_init(); // The current line.
|
Dstr* cline = dstr_init(); // The current line.
|
||||||
printf("> ");
|
printf("> ");
|
||||||
fflush(stdout);
|
fflush(stdout);
|
||||||
for (char cch; (cch = getchar() != EOF);) {
|
for (char cch; (cch = getc(stdin)) != '\n';) {
|
||||||
dstr_appendch(cline, fgetc(stdin));
|
log_dbgf("cchar: %c", cch);
|
||||||
|
dstr_appendch(cline, cch);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log_dbgf("cline: %s", cline->buf);
|
||||||
|
|
||||||
if (cline->ln > 0) {
|
if (cline->ln > 0) {
|
||||||
Lexer* lexer = lexer_init(cline->buf);
|
Lexer* lexer = lexer_init(cline->buf);
|
||||||
lexer_lex(lexer);
|
lexer_lex(lexer);
|
||||||
printf("\n=%s\n", token_to_dstr(lexer->tokens[0])->buf);
|
printf("\n%s\n", lexer_to_dstr(lexer)->buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user