mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

commit e8d159ab73e9e892625a7cf958ec3e0173ba5cc8
parent c2e2bf5d721f53ca93d722297104f049be77bc52
Author: citbl <citbl@citbl.org>
Date:   Wed, 20 May 2026 22:33:01 +1000

lex wip

Diffstat:
Mmtcc/src/lexer.c | 46+++++++++++++++++++++++++++++++++++++---------
1 file changed, 37 insertions(+), 9 deletions(-)

diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c @@ -2,6 +2,7 @@ #include "lexer.h" #include "string.h" +#include "array.h" static bool is__(const char c); static bool is_alpha(const char c); @@ -11,7 +12,10 @@ static bool is_alpha_numeric(const char c); static bool is_dot(const char c); static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident); -void lexer_lex(struct lexer *lexer) { +static void add_token(struct lexer *, struct token); + +void +lexer_lex(struct lexer *lexer) { size_t len = strnlen(lexer->src, lexer->src_len); const char *src = lexer->src; char c, cx; @@ -21,6 +25,8 @@ void lexer_lex(struct lexer *lexer) { size_t start = 0; size_t start_col = 0; struct span ident; + struct token tok; + enum token_type ttype; while (i < len) { c = lexer->src[i]; @@ -45,7 +51,12 @@ void lexer_lex(struct lexer *lexer) { } ident = (struct span){ .filename = lexer->filename, .col = col, .line = line, .start = start, .stop = i}; - compare_span_to_token(lexer, ident); + ttype = compare_span_to_token(lexer, ident); + tok = (struct token){.span = ident, .token_type = ttype}; + add_token(lexer, tok); + i++; + col++; + continue; } cx = (i < len) ? lexer->src[i] : '\0'; @@ -63,32 +74,43 @@ void lexer_lex(struct lexer *lexer) { case '\t': case '\r': case ' ': + i++; + col++; continue; } + + i++; + col++; } } -static bool is_space(const char c) { +static bool +is_space(const char c) { return c == ' ' | c == '\t' | c == '\r' | c == '\n'; } -static bool is__(const char c) { +static bool +is__(const char c) { return c == '_'; } -static bool is_alpha(const char c) { +static bool +is_alpha(const char c) { return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } -static bool is_digit(const char c) { +static bool +is_digit(const char c) { return (c >= '0' && c <= '9'); } -static bool is_alpha_numeric(const char c) { +static bool +is_alpha_numeric(const char c) { return is_alpha(c) || is_digit(c); } -static bool is_dot(const char c) { +static bool +is_dot(const char c) { return c == '.'; } @@ -99,7 +121,8 @@ static const char *NAMES_TOKEN[] = { [TOKEN_LBRACE] = "open brace",*/ }; -static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident) { +static enum token_type +compare_span_to_token(struct lexer *lexer, struct span ident) { enum token_type t = TOKEN_IDENT; char c; size_t i; @@ -148,3 +171,8 @@ static enum token_type compare_span_to_token(struct lexer *lexer, struct span id t = TOKEN_KEYWORD_PUB; return t; } + +static void +add_token(struct lexer *l, struct token tok) { + ARRAY_PUSH(l->tokens, l->tok_len, l->tok_cap, tok); +}