Global lexer.
This commit is contained in:
parent
8cf09e43c9
commit
120038ea8f
@ -37,36 +37,36 @@ typedef struct {
|
|||||||
// Create a lexer.
|
// Create a lexer.
|
||||||
void lexer_init(char* src);
|
void lexer_init(char* src);
|
||||||
|
|
||||||
// Destroy a lexer.
|
// Destroy the lexer.
|
||||||
// Does not destroy `lexer->src`!
|
// Does not destroy `thelexer->src`.
|
||||||
void lexer_destroy(Lexer* lexer);
|
void lexer_destroy();
|
||||||
|
|
||||||
// Convert text to tokens.
|
// Convert text to tokens.
|
||||||
void lexer_lex(Lexer* lexer);
|
void lexer_lex();
|
||||||
|
|
||||||
// Lex in confused mode.
|
// Lex in confused mode.
|
||||||
void lexer_do_confused(Lexer* lexer);
|
void lexer_do_confused();
|
||||||
|
|
||||||
// Lex in number mode.
|
// Lex in number mode.
|
||||||
void lexer_do_number(Lexer* lexer);
|
void lexer_do_number();
|
||||||
|
|
||||||
// Lex in call mode.
|
// Lex in call mode.
|
||||||
void lexer_do_call(Lexer* lexer);
|
void lexer_do_call();
|
||||||
|
|
||||||
// Increment the lexer's current character pointer.
|
// Increment the lexer's current character pointer.
|
||||||
void lexer_inc(Lexer* lexer);
|
void lexer_inc();
|
||||||
|
|
||||||
// Add a token to the lexer.
|
// Add a token to the lexer.
|
||||||
void lexer_add_token(Lexer* lexer, Token* token);
|
void lexer_add_token(Token* token);
|
||||||
|
|
||||||
// Print a representation of a Lexer.
|
// Print a representation of a Lexer.
|
||||||
void lexer_print(Lexer* lexer);
|
void lexer_print();
|
||||||
|
|
||||||
// Print a representation of a Lexer at specified indentation level.
|
// Print a representation of a Lexer at specified indentation level.
|
||||||
void lexer_print_i(Lexer* lexer, int ilvl);
|
void lexer_print_i(int ilvl);
|
||||||
|
|
||||||
// Print a representation of a LexerState.
|
// Print a representation of a LexerState.
|
||||||
void lexerstate_print_raw(LexerState s);
|
void lexerstate_print_raw();
|
||||||
|
|
||||||
// Create the input string.
|
// Create the input string.
|
||||||
void lexer_set_global(const char* str);
|
void lexer_set_global(const char* str);
|
||||||
|
111
src/lexer.c
111
src/lexer.c
@ -24,121 +24,114 @@ void lexer_init(char* src) {
|
|||||||
log_dbgf("created thelexer @ %p", thelexer);
|
log_dbgf("created thelexer @ %p", thelexer);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_destroy(Lexer* lexer) {
|
void lexer_destroy() {
|
||||||
// Does not free lexer->src.
|
// Does not free lexer->src.
|
||||||
for (int i = 0; i < lexer->ntokens; i++) token_destroy(lexer->tokens[i]);
|
for (int i = 0; i < thelexer->ntokens; i++) token_destroy(thelexer->tokens[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_lex(Lexer* lexer) {
|
void lexer_lex() {
|
||||||
while (*lexer->cchar) {
|
while (*thelexer->cchar) {
|
||||||
switch (lexer->state) {
|
switch (thelexer->state) {
|
||||||
case LEXER_STATE_CONFUSED: lexer_do_confused(lexer); break;
|
case LEXER_STATE_CONFUSED: lexer_do_confused(); break;
|
||||||
case LEXER_STATE_NUM: lexer_do_number(lexer); break;
|
case LEXER_STATE_NUM: lexer_do_number(); break;
|
||||||
case LEXER_STATE_CALL: lexer_do_call(lexer); break;
|
case LEXER_STATE_CALL: lexer_do_call(); break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_do_confused(Lexer* lexer) {
|
void lexer_do_confused() {
|
||||||
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", lexer,
|
log_dbgf("lexer @ %p entered confused mode @ char '%c' (%d)", thelexer,
|
||||||
*lexer->cchar, (int)*lexer->cchar);
|
*thelexer->cchar, (int)*thelexer->cchar);
|
||||||
|
|
||||||
if (isspace(*lexer->cchar)) lexer_inc(lexer);
|
if (isspace(*thelexer->cchar)) lexer_inc();
|
||||||
|
|
||||||
if (isdigit(*lexer->cchar)) {
|
if (isdigit(*thelexer->cchar)) {
|
||||||
lexer->state = LEXER_STATE_NUM;
|
thelexer->state = LEXER_STATE_NUM;
|
||||||
lexer_do_number(lexer);
|
lexer_do_number();
|
||||||
} else {
|
} else {
|
||||||
lexer->state = LEXER_STATE_CALL;
|
thelexer->state = LEXER_STATE_CALL;
|
||||||
lexer_do_call(lexer);
|
lexer_do_call();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_do_number(Lexer* lexer) {
|
void lexer_do_number() {
|
||||||
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", lexer,
|
log_dbgf("lexer @ %p entered number mode @ char '%c' (%d)", thelexer,
|
||||||
*lexer->cchar, (int)*lexer->cchar);
|
*thelexer->cchar, (int)*thelexer->cchar);
|
||||||
|
|
||||||
// Length of the number string.
|
// Length of the number string.
|
||||||
size_t numln;
|
size_t numln;
|
||||||
|
|
||||||
// Where the number string starts.
|
// Where the number string starts.
|
||||||
char* start = lexer->cchar;
|
char* start = thelexer->cchar;
|
||||||
|
|
||||||
for (numln = 0; *lexer->cchar && isdigit(*lexer->cchar); numln++)
|
for (numln = 0; *thelexer->cchar && isdigit(*thelexer->cchar); numln++)
|
||||||
lexer_inc(lexer);
|
lexer_inc();
|
||||||
|
|
||||||
char* num = malloc(numln + 1);
|
char* num = malloc(numln + 1);
|
||||||
memcpy(num, start, numln);
|
memcpy(num, start, numln);
|
||||||
num[numln] = '\0';
|
num[numln] = '\0';
|
||||||
|
|
||||||
lexer_add_token(lexer, token_init(TOKEN_TYPE_NUMBER, num, numln));
|
lexer_add_token(token_init(TOKEN_TYPE_NUMBER, num, numln));
|
||||||
lexer->state = LEXER_STATE_CONFUSED;
|
thelexer->state = LEXER_STATE_CONFUSED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_do_call(Lexer* lexer) {
|
void lexer_do_call() {
|
||||||
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", lexer,
|
log_dbgf("lexer @ %p entered call mode @ char '%c' (%d)", thelexer,
|
||||||
*lexer->cchar, (int)*lexer->cchar);
|
*thelexer->cchar, (int)*thelexer->cchar);
|
||||||
|
|
||||||
// Size of the call string.
|
// Size of the call string.
|
||||||
size_t callln;
|
size_t callln;
|
||||||
|
|
||||||
// Where the call string starts.
|
// Where the call string starts.
|
||||||
char* start = lexer->cchar;
|
char* start = thelexer->cchar;
|
||||||
|
|
||||||
for (callln = 0;
|
for (callln = 0;
|
||||||
*lexer->cchar && (!isdigit(*lexer->cchar) && !isspace(*lexer->cchar));
|
*thelexer->cchar && (!isdigit(*thelexer->cchar) && !isspace(*thelexer->cchar));
|
||||||
callln++)
|
callln++)
|
||||||
lexer_inc(lexer);
|
lexer_inc();
|
||||||
|
|
||||||
char* call = malloc(callln + 1);
|
char* call = malloc(callln + 1);
|
||||||
memcpy(call, start, callln);
|
memcpy(call, start, callln);
|
||||||
call[callln] = '\0';
|
call[callln] = '\0';
|
||||||
|
|
||||||
lexer_add_token(lexer, token_init(TOKEN_TYPE_CALL, call, callln));
|
lexer_add_token(token_init(TOKEN_TYPE_CALL, call, callln));
|
||||||
|
|
||||||
lexer->state = LEXER_STATE_CONFUSED;
|
thelexer->state = LEXER_STATE_CONFUSED;
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_inc(Lexer* lexer) {
|
void lexer_inc() {
|
||||||
lexer->cchar += sizeof(char);
|
thelexer->cchar += sizeof(char);
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_add_token(Lexer* lexer, Token* token) {
|
void lexer_add_token(Token* token) {
|
||||||
assert(lexer->ntokens < TOKENS_MAX);
|
assert(thelexer->ntokens < TOKENS_MAX);
|
||||||
|
|
||||||
if (lexer->ntokens < TOKENS_MAX - 1) {
|
if (thelexer->ntokens < TOKENS_MAX - 1) {
|
||||||
lexer->tokens[lexer->ntokens] = token;
|
thelexer->tokens[thelexer->ntokens] = token;
|
||||||
lexer->ntokens++;
|
thelexer->ntokens++;
|
||||||
|
|
||||||
log_dbgf("added token (total: %ld)", lexer->ntokens);
|
log_dbgf("added token (total: %ld)", thelexer->ntokens);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexer_print(Lexer* lexer) { lexer_print_i(lexer, 0); }
|
void lexer_print() { lexer_print_i(0); }
|
||||||
|
|
||||||
void lexer_print_i(Lexer* lexer, int ilvl) {
|
void lexer_print_i(int ilvl) {
|
||||||
INDENT_BEGIN(ilvl);
|
INDENT_BEGIN(ilvl);
|
||||||
INDENT_TITLE("Lexer", lexer);
|
INDENT_TITLE("Lexer", thelexer);
|
||||||
INDENT_FIELD_NONL_START("state")
|
INDENT_FIELD_NONL_START("state")
|
||||||
lexerstate_print_raw(lexer->state);
|
lexerstate_print_raw();
|
||||||
INDENT_FIELD_NONL_END
|
INDENT_FIELD_NONL_END
|
||||||
INDENT_FIELD("srcln", "%ld", lexer->srcln);
|
INDENT_FIELD("srcln", "%ld", thelexer->srcln);
|
||||||
INDENT_FIELD_NL("src", "\"%s\"", lexer->src);
|
INDENT_FIELD_NL("src", "\"%s\"", thelexer->src);
|
||||||
INDENT_FIELD("cchar", "'%c'", *lexer->cchar);
|
INDENT_FIELD("cchar", "'%c'", *thelexer->cchar);
|
||||||
INDENT_FIELD("ntokens", "%ld", lexer->ntokens);
|
INDENT_FIELD("ntokens", "%ld", thelexer->ntokens);
|
||||||
INDENT_FIELD_LIST("tokens", lexer->tokens, lexer->ntokens, token_print_i);
|
INDENT_FIELD_LIST("tokens", thelexer->tokens, thelexer->ntokens, token_print_i);
|
||||||
#if 0
|
|
||||||
printf("%s tokens: [\n", INDENT_spacing->buf);
|
|
||||||
|
|
||||||
for (int i = 0; i < lexer->ntokens; i++) {
|
|
||||||
token_print_i(lexer->tokens[i], ilvl + 2);
|
|
||||||
printf(",\n\n");
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void lexerstate_print_raw(LexerState s) {
|
void lexerstate_print_raw() {
|
||||||
|
LexerState s = thelexer->state;
|
||||||
if (s > LEXER_STATE_MAX) {
|
if (s > LEXER_STATE_MAX) {
|
||||||
printf("Unknown (%d)", s);
|
printf("Unknown (%d)", s);
|
||||||
log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX);
|
log_dbgf("%d is not a valid LexerState (max: %d)", s, TOKEN_TYPE_MAX);
|
||||||
|
@ -17,10 +17,10 @@ int main(int argc, char** argv) {
|
|||||||
log_dbgf("cline: %s", cline->buf);
|
log_dbgf("cline: %s", cline->buf);
|
||||||
|
|
||||||
if (cline->ln > 0) {
|
if (cline->ln > 0) {
|
||||||
Lexer* lexer = lexer_init(cline->buf);
|
lexer_init(cline->buf);
|
||||||
lexer_lex(lexer);
|
lexer_lex();
|
||||||
lexer_print(lexer);
|
lexer_print();
|
||||||
lexer_destroy(lexer);
|
lexer_destroy();
|
||||||
}
|
}
|
||||||
|
|
||||||
dstr_destroy(cline);
|
dstr_destroy(cline);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user