sic

The sic programming language, compiler and tools (WIP)
Log | Files | Refs

commit ed045d5937f7786751e2f80042a9dabd83e92bba
parent 3b669b1ae207f7a3c8692343732463539299140e
Author: citbl <citbl@citbl.org>
Date:   Thu, 14 May 2026 19:09:40 +1000

refac

Diffstat:
Msrc/lexer.c | 211+++++++++++++++++++++++++++++++++++++++++--------------------------------------
1 file changed, 110 insertions(+), 101 deletions(-)

diff --git a/src/lexer.c b/src/lexer.c @@ -6,107 +6,14 @@ #include "array.h" #include "str.h" -static void add_token(Lexer* lex, Token t) -{ - ARRAY_PUSH(lex->tokens, lex->len, lex->cap, t); -} - -static void add_to_string(Token* tok, char c) -{ - Str* str = &tok->lexeme; - str_append(str, c); -} - -static char peek(Lexer* lex) -{ - size_t next = lex->state.pos + 1; - - if (next >= lex->code_len) { - return '\0'; - } - - return lex->code[next]; -} - -static char advance(Lexer* lex) -{ - const char c = peek(lex); - if (c == '\r') advance(lex); - if (c == '\n') { - lex->state.line++; - lex->state.col = 0; - } - else { - lex->state.col++; - } - lex->state.pos++; - return c; -} - -static void run_until_char(Lexer* lex, char c) -{ - do { - advance(lex); - } while (peek(lex) != c); - advance(lex); -} - -static void err(Lexer* lex, const char* message) -{ - fprintf(stderr, "%s %zu:%zu %s", lex->filename, lex->state.line, lex->state.col, message); - exit(1); -} - -static void lex_number(Lexer* lex, Token* tok) -{ - char c = lex->code[lex->state.pos]; - Str* str = &tok->lexeme; - tok->type = LIT_INT; - str_append(str, c); - while (lex->state.pos < lex->code_len) { - c = peek(lex); - if (c == '_' && tok->type == LIT_INT) { - advance(lex); // allow _ in large integers - continue; - } - if (c != '.' && !isdigit((unsigned char)c)) break; - if (c == '.' && tok->type == LIT_DECIMAL) { - err(lex, "parsing number failed with more than one decimal point '.'\n"); - } - if (c == '.' && tok->type == LIT_INT) tok->type = LIT_DECIMAL; - str_append(str, c); - advance(lex); - } - advance(lex); - add_token(lex, *tok); -} - -static void lex_ident(Lexer* lex, Token* tok) -{ - char c = lex->code[lex->state.pos]; - Str* str = &tok->lexeme; - tok->type = IDENT; - while (lex->state.pos < lex->code_len) { - str_append(str, c); - // printf("char: %c\n", c); - c = peek(lex); - if (!isalnum((unsigned char)c)) break; - advance(lex); - } - add_token(lex, *tok); -} - -static Token new_token(Lexer* lex) -{ - return (Token){ - .filename = lex->filename, - .path = lex->path, - .col = lex->state.col, - .line = lex->state.line, - .type = NOTYETSET, - .lexeme = {0}, - }; -} +static void add_token(Lexer* lex, Token t); +static void add_to_string(Token* tok, char c); +static char peek(Lexer* lex); +static char advance(Lexer* lex); +static void run_until_char(Lexer* lex, char c); +static void lex_number(Lexer* lex, Token* tok); +static void lex_ident(Lexer* lex, Token* tok); +static Token new_token(Lexer* lex); Lexer* lexer_lex(Lexer* lex) { @@ -210,3 +117,105 @@ Lexer* lexer_lex(Lexer* lex) return lex; } + +static void add_token(Lexer* lex, Token t) +{ + ARRAY_PUSH(lex->tokens, lex->len, lex->cap, t); +} + +static void add_to_string(Token* tok, char c) +{ + Str* str = &tok->lexeme; + str_append(str, c); +} + +static char peek(Lexer* lex) +{ + size_t next = lex->state.pos + 1; + + if (next >= lex->code_len) { + return '\0'; + } + + return lex->code[next]; +} + +static char advance(Lexer* lex) +{ + const char c = peek(lex); + if (c == '\r') advance(lex); + if (c == '\n') { + lex->state.line++; + lex->state.col = 0; + } + else { + lex->state.col++; + } + lex->state.pos++; + return c; +} + +static void run_until_char(Lexer* lex, char c) +{ + do { + advance(lex); + } while (peek(lex) != c); + advance(lex); +} + +static void err(Lexer* lex, const char* message) +{ + fprintf(stderr, "%s %zu:%zu %s", lex->filename, lex->state.line, lex->state.col, message); + exit(1); +} + +static void lex_number(Lexer* lex, Token* tok) +{ + char c = lex->code[lex->state.pos]; + Str* str = &tok->lexeme; + tok->type = LIT_INT; + str_append(str, c); + while (lex->state.pos < lex->code_len) { + c = peek(lex); + if (c == '_' && tok->type == LIT_INT) { + advance(lex); // allow _ in large integers + continue; + } + if (c != '.' && !isdigit((unsigned char)c)) break; + if (c == '.' && tok->type == LIT_DECIMAL) { + err(lex, "parsing number failed with more than one decimal point '.'\n"); + } + if (c == '.' && tok->type == LIT_INT) tok->type = LIT_DECIMAL; + str_append(str, c); + advance(lex); + } + advance(lex); + add_token(lex, *tok); +} + +static void lex_ident(Lexer* lex, Token* tok) +{ + char c = lex->code[lex->state.pos]; + Str* str = &tok->lexeme; + tok->type = IDENT; + while (lex->state.pos < lex->code_len) { + str_append(str, c); + // printf("char: %c\n", c); + c = peek(lex); + if (!isalnum((unsigned char)c)) break; + advance(lex); + } + add_token(lex, *tok); +} + +static Token new_token(Lexer* lex) +{ + return (Token){ + .filename = lex->filename, + .path = lex->path, + .col = lex->state.col, + .line = lex->state.line, + .type = NOTYETSET, + .lexeme = {0}, + }; +}