commit 15fb7b2e5a110528b4024e56c1c1e9d2ff384889
parent 84e387d0bffde1310a9549d791792c9fccb4fa93
Author: citbl <citbl@citbl.org>
Date: Wed, 20 May 2026 20:31:20 +1000
wip
Diffstat:
6 files changed, 105 insertions(+), 11 deletions(-)
diff --git a/mtcc/.clangd b/mtcc/.clangd
@@ -0,0 +1,15 @@
+CompileFlags:
+ Add: [
+ -Wall,
+ -Wextra,
+ -Wpedantic,
+ -Wshadow,
+ -Wpointer-arith,
+ -Wcast-qual,
+ -Wcast-align,
+ -Wstrict-prototypes,
+ -Wmissing-prototypes,
+ -xc,
+ -std=c17,
+ -g,
+ ]
diff --git a/mtcc/.gitignore b/mtcc/.gitignore
@@ -1,2 +1,3 @@
.DS_Store
*.dSYM
+mtcc
diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c
@@ -1,10 +1,58 @@
+#include <stdbool.h>
+
#include "lexer.h"
#include "string.h"
+static bool is__(const char c);
+static bool is_alpha(const char c);
+static bool is_digit(const char c);
+static bool is_space(const char c);
+static bool is_alpha_numeric(const char c);
+static bool is_dot(const char c);
+
void lexer_lex(lexer_t *lexer) {
- size_t len = strnlen(lexer->source, lexer->source_len);
- while (lexer->pos < len) {
- char c = lexer->source[lexer->pos++];
+ size_t len = strnlen(lexer->src, lexer->src_len);
+ const char *src = lexer->src;
+ char c, cx;
+ size_t i = 0;
+ size_t col = 1;
+ size_t line = 1;
+ size_t start = 0;
+ size_t start_col = 0;
+
+ while (i < len) {
+ c = lexer->src[i];
+
+ if (is_space(c)) {
+ if (c == '\n') {
+ i++;
+ col = 1;
+ } else {
+ col++;
+ }
+ i++;
+ }
+
+ start = i;
+ start_col = col;
+
+ if (is_alpha(c) || is__(c)) {
+ while (i < len && (is_alpha_numeric(src[i]) || is__(src[i]))) {
+ i++;
+ col++;
+ }
+ }
+
+ cx = (i < len) ? lexer->src[i] : '\0';
+
+ if (c == '/' && cx == '/') {
+ while (i < len && src[i] != '\n') {
+ i++;
+ col++;
+ }
+ continue;
+ }
+
switch (c) {
case '\n':
case '\t':
@@ -14,3 +62,34 @@ void lexer_lex(lexer_t *lexer) {
}
}
}
+
+static bool is_space(const char c) {
+ return c == ' ' | c == '\t' | c == '\r' | c == '\n';
+}
+
+static bool is__(const char c) {
+ return c == '_';
+}
+
+static bool is_alpha(const char c) {
+ return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
+static bool is_digit(const char c) {
+ return (c >= '0' && c <= '9');
+}
+
+static bool is_alpha_numeric(const char c) {
+ return is_alpha(c) || is_digit(c);
+}
+
+static bool is_dot(const char c) {
+ return c == '.';
+}
+
+static const char *TYPES[] = {
+ /*[TOKEN_IDENT] = "ident/type",
+ [TOKEN_LPAREN] = "open paren",
+ [TOKEN_RPAREN] = "close paren",
+ [TOKEN_LBRACE] = "open brace",*/
+};
diff --git a/mtcc/src/lexer.h b/mtcc/src/lexer.h
@@ -3,12 +3,11 @@
typedef struct lexer_t {
const char *filename;
- const char *source;
- size_t source_len;
+ const char *src;
+ size_t src_len;
token_t *tokens;
size_t tok_len;
size_t tok_cap;
- size_t pos;
} lexer_t;
void lexer_lex(lexer_t *lexer);
diff --git a/mtcc/src/main.c b/mtcc/src/main.c
@@ -17,8 +17,8 @@ int main(int argc, char **argv) {
file = read_file(filename);
lexer = (lexer_t){
.filename = filename,
- .source = file.contents,
- .source_len = file.len,
+ .src = file.contents,
+ .src_len = file.len,
};
lexer_lex(&lexer);
diff --git a/mtcc/src/token.h b/mtcc/src/token.h
@@ -10,9 +10,9 @@ typedef struct {
token_type_t token_type;
struct {
- char *filename;
- size_t line;
- size_t col;
+ const char *filename;
+ size_t line, col;
+ size_t start, stop;
} span_t;
} token_t;