commit e8d159ab73e9e892625a7cf958ec3e0173ba5cc8
parent c2e2bf5d721f53ca93d722297104f049be77bc52
Author: citbl <citbl@citbl.org>
Date: Wed, 20 May 2026 22:33:01 +1000
lex wip
Diffstat:
1 file changed, 37 insertions(+), 9 deletions(-)
diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c
@@ -2,6 +2,7 @@
#include "lexer.h"
#include "string.h"
+#include "array.h"
static bool is__(const char c);
static bool is_alpha(const char c);
@@ -11,7 +12,10 @@ static bool is_alpha_numeric(const char c);
static bool is_dot(const char c);
static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident);
-void lexer_lex(struct lexer *lexer) {
+static void add_token(struct lexer *, struct token);
+
+void
+lexer_lex(struct lexer *lexer) {
size_t len = strnlen(lexer->src, lexer->src_len);
const char *src = lexer->src;
char c, cx;
@@ -21,6 +25,8 @@ void lexer_lex(struct lexer *lexer) {
size_t start = 0;
size_t start_col = 0;
struct span ident;
+ struct token tok;
+ enum token_type ttype;
while (i < len) {
c = lexer->src[i];
@@ -45,7 +51,12 @@ void lexer_lex(struct lexer *lexer) {
}
ident = (struct span){
.filename = lexer->filename, .col = col, .line = line, .start = start, .stop = i};
- compare_span_to_token(lexer, ident);
+ ttype = compare_span_to_token(lexer, ident);
+ tok = (struct token){.span = ident, .token_type = ttype};
+ add_token(lexer, tok);
+ i++;
+ col++;
+ continue;
}
cx = (i < len) ? lexer->src[i] : '\0';
@@ -63,32 +74,43 @@ void lexer_lex(struct lexer *lexer) {
case '\t':
case '\r':
case ' ':
+ i++;
+ col++;
continue;
}
+
+ i++;
+ col++;
}
}
-static bool is_space(const char c) {
+static bool
+is_space(const char c) {
return c == ' ' | c == '\t' | c == '\r' | c == '\n';
}
-static bool is__(const char c) {
+static bool
+is__(const char c) {
return c == '_';
}
-static bool is_alpha(const char c) {
+static bool
+is_alpha(const char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
-static bool is_digit(const char c) {
+static bool
+is_digit(const char c) {
return (c >= '0' && c <= '9');
}
-static bool is_alpha_numeric(const char c) {
+static bool
+is_alpha_numeric(const char c) {
return is_alpha(c) || is_digit(c);
}
-static bool is_dot(const char c) {
+static bool
+is_dot(const char c) {
return c == '.';
}
@@ -99,7 +121,8 @@ static const char *NAMES_TOKEN[] = {
[TOKEN_LBRACE] = "open brace",*/
};
-static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident) {
+static enum token_type
+compare_span_to_token(struct lexer *lexer, struct span ident) {
enum token_type t = TOKEN_IDENT;
char c;
size_t i;
@@ -148,3 +171,8 @@ static enum token_type compare_span_to_token(struct lexer *lexer, struct span id
t = TOKEN_KEYWORD_PUB;
return t;
}
+
+static void
+add_token(struct lexer *l, struct token tok) {
+ ARRAY_PUSH(l->tokens, l->tok_len, l->tok_cap, tok);
+}