commit dd0da306d6a210eb93c26af69cf9e484125b0c9c
parent e96299d559a92fb9d377da134e5bd502e8dad946
Author: citbl <citbl@citbl.org>
Date: Wed, 13 May 2026 23:07:48 +1000
lexer progress
Diffstat:
2 files changed, 42 insertions(+), 10 deletions(-)
diff --git a/src/lexer.c b/src/lexer.c
@@ -23,6 +23,8 @@ void print_tokens(Lexer* lex)
t = lex->tokens[i];
typ = t.type;
+ printf("%s: %zu:%zu ", t.filename, t.line, t.col);
+
switch (typ) {
case LIT_STRING:
printf("STRING LITERAL: %s\n", lex->tokens[i].lexeme.value);
@@ -33,8 +35,17 @@ void print_tokens(Lexer* lex)
case LIT_INT:
printf("DECIMAL LITERAL: %s\n", lex->tokens[i].lexeme.value);
break;
+ case EQ:
+ printf("EQ =\n");
+ break;
+ case SEMICOL:
+ printf("SEMICOL ;\n");
+ break;
+ case IDENT:
+ printf("IDENT: %s\n", lex->tokens[i].lexeme.value);
+ break;
default:
- printf("print_tokens: unhandled token %i", typ);
+ printf("print_tokens: unhandled token %i\n", typ);
break;
}
}
@@ -60,9 +71,10 @@ static char peek(Lexer* lex)
static char advance(Lexer* lex)
{
const char c = peek(lex);
+ if (c == '\r') advance(lex);
if (c == '\n') {
lex->state.line++;
- lex->state.col = 1;
+ lex->state.col = 0;
}
else {
lex->state.col++;
@@ -89,19 +101,23 @@ static void lex_number(Lexer* lex, Token* tok)
{
char c = lex->code[lex->state.pos];
Str* str = &tok->lexeme;
- str_append(str, c);
tok->type = LIT_INT;
+ str_append(str, c);
while (lex->state.pos < lex->code_len) {
- advance(lex);
c = peek(lex);
+ if (c == '_' && tok->type == LIT_INT) {
+ advance(lex); // allow _ in large integers
+ continue;
+ }
+ if (c != '.' && !isdigit((unsigned char)c)) break;
if (c == '.' && tok->type == LIT_DECIMAL) {
err(lex, "parsing number failed with more than one decimal point '.'\n");
}
if (c == '.' && tok->type == LIT_INT) tok->type = LIT_DECIMAL;
- if (c == '_' && tok->type == LIT_INT) continue; // allow _ in large integers
- if (c != '.' && !isdigit((unsigned char)c)) break;
str_append(str, c);
+ advance(lex);
}
+ advance(lex);
add_token(lex, *tok);
}
@@ -109,13 +125,13 @@ static void lex_ident(Lexer* lex, Token* tok)
{
char c = lex->code[lex->state.pos];
Str* str = &tok->lexeme;
- str_append(str, c);
tok->type = IDENT;
while (lex->state.pos < lex->code_len) {
- advance(lex);
+ str_append(str, c);
+ // printf("char: %c\n", c);
c = peek(lex);
if (!isalnum((unsigned char)c)) break;
- str_append(str, c);
+ advance(lex);
}
add_token(lex, *tok);
}
@@ -140,7 +156,7 @@ Lexer* lexer_lex(Lexer* lex)
lex->tokens = calloc(250, sizeof(Token));
lex->state.pos = 0;
lex->state.line = 1;
- lex->state.col = 1;
+ lex->state.col = 0;
// longest valid token first
while (lex->state.pos < lex->code_len) {
@@ -153,9 +169,12 @@ Lexer* lexer_lex(Lexer* lex)
}
if (isalpha((unsigned char)c)) {
lex_ident(lex, &t);
+ advance(lex);
+ continue;
}
if (isdigit((unsigned char)c)) {
lex_number(lex, &t);
+ continue;
}
switch (c) {
@@ -166,13 +185,25 @@ Lexer* lexer_lex(Lexer* lex)
while (lex->code[lex->state.pos] != '\"' || lex->state.pos > lex->code_len) {
add_to_string(&t, c);
+ lex->state.col++;
c = lex->code[++lex->state.pos];
}
advance(lex);
add_token(lex, t);
continue;
+ case '=':
+ t.type = EQ;
+ add_token(lex, t);
+ advance(lex);
+ continue;
+ case ';':
+ t.type = SEMICOL;
+ add_token(lex, t);
+ advance(lex);
+ continue;
case '\n':
case '\r':
+ case ' ':
advance(lex);
continue;
break;
diff --git a/src/token.h b/src/token.h
@@ -24,6 +24,7 @@ typedef enum Token_Type {
DASH_GT,
EQ,
BANG,
+ SEMICOL,
LIT_STRING,
LIT_DECIMAL,
LIT_INT,