commit 05c4135547a761744cd1553980fed2b9f853cade
parent 20834b8524da952999b7611c212dc3a3fae92e15
Author: citbl <citbl@citbl.org>
Date: Sun, 10 May 2026 20:02:55 +1000
better lexing
Diffstat:
3 files changed, 64 insertions(+), 42 deletions(-)
diff --git a/src/common.h b/src/common.h
@@ -78,6 +78,8 @@ typedef struct Token {
typedef struct Lexer_State {
size_t pos;
+ size_t line;
+ size_t col;
bool in_string;
bool in_comment;
} Lexer_State;
diff --git a/src/lexer.c b/src/lexer.c
@@ -1,18 +1,19 @@
#include "lexer.h"
#include <stdio.h>
#include <string.h>
+#include <ctype.h>
-static void add_token(Lexer *l, Token t)
+static void add_token(Lexer *lex, Token t)
{
- if (l->len >= l->cap) {
- l->cap = l->cap == 0 ? 256 : l->cap * 2;
- l->tokens = realloc(l->tokens, l->cap * sizeof(Token));
+ if (lex->len >= lex->cap) {
+ lex->cap = lex->cap == 0 ? 256 : lex->cap * 2;
+ lex->tokens = realloc(lex->tokens, lex->cap * sizeof(Token));
}
- l->tokens[l->len++] = t;
+ lex->tokens[lex->len++] = t;
}
-void print_tokens(Lexer *l)
+void print_tokens(Lexer *lex)
{
Token t;
Token_Type typ;
@@ -20,13 +21,13 @@ void print_tokens(Lexer *l)
printf("------- print tokens --------\n");
- for (i = 0; i < l->len; i++) {
- t = l->tokens[i];
+ for (i = 0; i < lex->len; i++) {
+ t = lex->tokens[i];
typ = t.type;
switch (typ) {
case LIT_STRING:
- printf("STRING LITERAL: %s\n", l->tokens[i].value.as_string.value);
+ printf("STRING LITERAL: %s\n", lex->tokens[i].value.as_string.value);
break;
default:
printf("print_tokens: unhandled token %i", typ);
@@ -35,9 +36,9 @@ void print_tokens(Lexer *l)
}
}
-static void add_to_string(Token *t, char c)
+static void add_to_string(Token *tok, char c)
{
- String *str = &t->value.as_string;
+ String *str = &tok->value.as_string;
if (str->len >= str->cap) {
str->cap = str->cap == 0 ? 256 : str->cap * 2;
@@ -48,72 +49,90 @@ static void add_to_string(Token *t, char c)
str->value[str->len] = '\0';
}
-static char peek(Lexer *l)
+static char peek(Lexer *lex)
{
- size_t next = l->state.pos + 1;
+ size_t next = lex->state.pos + 1;
- if (next >= l->code_len) {
+ if (next >= lex->code_len) {
return '\0';
}
- return l->code[next];
+ return lex->code[next];
}
-static void run_until_char(Lexer *l, char c)
+static char advance(Lexer *lex)
+{
+ const char c = peek(lex);
+ // if (!c) return c;
+ if (c == '\n') {
+ lex->state.line++;
+ lex->state.col = 1;
+ }
+ else {
+ lex->state.col++;
+ }
+ lex->state.pos++;
+ return c;
+}
+
+static void run_until_char(Lexer *lex, char c)
{
do {
- l->state.pos++;
- } while (peek(l) != c);
- l->state.pos++;
+ advance(lex);
+ } while (peek(lex) != c);
+ advance(lex);
}
-Lexer *lexer_lex(Lexer *l)
+Lexer *lexer_lex(Lexer *lex)
{
char c = '\0';
- size_t len = strlen(l->code);
+ size_t len = strlen(lex->code);
Token t = {
- .filename = l->filename, .path = l->path, .col = -1, .line = -1, .type = NOTYETSET, .value = {0}};
+ .filename = lex->filename, .path = lex->path, .col = -1, .line = -1, .type = NOTYETSET, .value = {0}};
- l->tokens = calloc(250, sizeof(Token));
- l->state.pos = 0;
- l->state.in_string = false;
+ lex->tokens = calloc(250, sizeof(Token));
+ lex->state.pos = 0;
+ lex->state.in_string = false;
// longest valid token first
- while (l->state.pos <= len) {
- c = l->code[l->state.pos];
+ while (lex->state.pos <= len) {
+ c = lex->code[lex->state.pos];
- if (c == '/' && peek(l) == '/') {
- run_until_char(l, '\n');
+ if (c == '/' && peek(lex) == '/') {
+ run_until_char(lex, '\n');
continue;
}
+ if (isdigit(c)) {
+ }
+
switch (c) {
case '\"':
- l->state.in_string = true;
+ lex->state.in_string = true;
t.type = LIT_STRING;
- c = l->code[++l->state.pos];
+ advance(lex);
+ c = lex->code[lex->state.pos];
- while (l->code[l->state.pos] != '\"') {
+ while (lex->code[lex->state.pos] != '\"') {
add_to_string(&t, c);
- c = l->code[++l->state.pos];
+ c = lex->code[++lex->state.pos];
}
-
- l->state.pos++;
- l->state.in_string = false;
- add_token(l, t);
+ advance(lex);
+ lex->state.in_string = false;
+ add_token(lex, t);
continue;
case EOF:
- return l;
+ return lex;
case '\n':
case '\r':
- l->state.pos++;
+ advance(lex);
continue;
break;
}
- printf("unhandled: %zu: %c\n", l->state.pos, c);
- l->state.pos++;
+ printf("unhandled: %zu: %c\n", lex->state.pos, c);
+ advance(lex);
}
- return l;
+ return lex;
}
diff --git a/test.sic b/test.sic
@@ -3,3 +3,4 @@
void main() {
str name = "Johnny Mnemonic";
}
+