commit 2be51147b5deb9e75d8afd9db39803d1d7214e05
parent 15fb7b2e5a110528b4024e56c1c1e9d2ff384889
Author: citbl <citbl@citbl.org>
Date: Wed, 20 May 2026 21:51:53 +1000
wip lexing
Diffstat:
9 files changed, 138 insertions(+), 26 deletions(-)
diff --git a/mtcc/.clangd b/mtcc/.clangd
@@ -9,7 +9,7 @@ CompileFlags:
-Wcast-align,
-Wstrict-prototypes,
-Wmissing-prototypes,
+ -Werror=declaration-after-statement,
-xc,
- -std=c17,
- -g,
+ -std=c23
]
diff --git a/mtcc/makefile b/mtcc/makefile
@@ -1,11 +1,17 @@
MAKEFLAGS += --silent
default:
- clang -O1 -Wall -Wextra -Wpedantic -std=c23 \
+ clang -O1 -std=c23 \
-Werror=declaration-after-statement \
-o mtcc **/*.c
./mtcc target.mty
+check:
+ clang -O1 -Wall -Wextra -Wpedantic -std=c23 \
+ -Werror=declaration-after-statement \
+ -Wno-unused-function -fsanitize=address,undefined \
+ -o mtcc **/*.c
+
clean:
rm -rf *.dSYM
rm -rf mtcc
diff --git a/mtcc/mtcc b/mtcc/mtcc
Binary files differ.
diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c
@@ -9,8 +9,9 @@ static bool is_digit(const char c);
static bool is_space(const char c);
static bool is_alpha_numeric(const char c);
static bool is_dot(const char c);
+static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident);
-void lexer_lex(lexer_t *lexer) {
+void lexer_lex(struct lexer *lexer) {
size_t len = strnlen(lexer->src, lexer->src_len);
const char *src = lexer->src;
char c, cx;
@@ -19,6 +20,7 @@ void lexer_lex(lexer_t *lexer) {
size_t line = 1;
size_t start = 0;
size_t start_col = 0;
+ struct span ident;
while (i < len) {
c = lexer->src[i];
@@ -41,6 +43,9 @@ void lexer_lex(lexer_t *lexer) {
i++;
col++;
}
+ ident = (struct span){
+ .filename = lexer->filename, .col = col, .line = line, .start = start, .stop = i};
+ compare_span_to_token(lexer, ident);
}
cx = (i < len) ? lexer->src[i] : '\0';
@@ -87,9 +92,59 @@ static bool is_dot(const char c) {
return c == '.';
}
-static const char *TYPES[] = {
+static const char *NAMES_TOKEN[] = {
/*[TOKEN_IDENT] = "ident/type",
[TOKEN_LPAREN] = "open paren",
[TOKEN_RPAREN] = "close paren",
[TOKEN_LBRACE] = "open brace",*/
};
+
+static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident) {
+ enum token_type t = TOKEN_IDENT;
+ char c;
+ size_t i;
+
+ if (ident.start >= lexer->src_len || ident.stop >= lexer->src_len) return false;
+
+ if (strncmp(lexer->src + ident.start, "ns", 2) == 0)
+ t = TOKEN_KEYWORD_NS;
+ else if (strncmp(lexer->src + ident.start, "in", 2) == 0)
+ t = TOKEN_KEYWORD_IN;
+ else if (strncmp(lexer->src + ident.start, "from", 4) == 0)
+ t = TOKEN_KEYWORD_FROM;
+ else if (strncmp(lexer->src + ident.start, "use", 3) == 0)
+ t = TOKEN_KEYWORD_USE;
+ else if (strncmp(lexer->src + ident.start, "ffi", 3) == 0)
+ t = TOKEN_KEYWORD_FFI;
+ else if (strncmp(lexer->src + ident.start, "drop", 4) == 0)
+ t = TOKEN_KEYWORD_DROP;
+ else if (strncmp(lexer->src + ident.start, "as", 2) == 0)
+ t = TOKEN_KEYWORD_AS;
+ else if (strncmp(lexer->src + ident.start, "of", 2) == 0)
+ t = TOKEN_KEYWORD_OF;
+ else if (strncmp(lexer->src + ident.start, "and", 3) == 0)
+ t = TOKEN_KEYWORD_AND;
+ else if (strncmp(lexer->src + ident.start, "or", 2) == 0)
+ t = TOKEN_KEYWORD_OR;
+ else if (strncmp(lexer->src + ident.start, "ref", 3) == 0)
+ t = TOKEN_KEYWORD_REF;
+ else if (strncmp(lexer->src + ident.start, "struct", 6) == 0)
+ t = TOKEN_KEYWORD_STRUCT;
+ else if (strncmp(lexer->src + ident.start, "enum", 4) == 0)
+ t = TOKEN_KEYWORD_ENUM;
+ else if (strncmp(lexer->src + ident.start, "pre", 3) == 0)
+ t = TOKEN_KEYWORD_PRE;
+ else if (strncmp(lexer->src + ident.start, "post", 4) == 0)
+ t = TOKEN_KEYWORD_POST;
+ else if (strncmp(lexer->src + ident.start, "inv", 3) == 0)
+ t = TOKEN_KEYWORD_INV;
+ else if (strncmp(lexer->src + ident.start, "if", 2) == 0)
+ t = TOKEN_KEYWORD_IF;
+ else if (strncmp(lexer->src + ident.start, "else", 4) == 0)
+ t = TOKEN_KEYWORD_ELSE;
+ else if (strncmp(lexer->src + ident.start, "where", 5) == 0)
+ t = TOKEN_KEYWORD_WHERE;
+ else if (strncmp(lexer->src + ident.start, "pub", 3) == 0)
+ t = TOKEN_KEYWORD_PUB;
+ return t;
+}
diff --git a/mtcc/src/lexer.h b/mtcc/src/lexer.h
@@ -1,13 +1,13 @@
#pragma once
#include "token.h"
-typedef struct lexer_t {
+struct lexer {
const char *filename;
const char *src;
size_t src_len;
- token_t *tokens;
+ struct token *tokens;
size_t tok_len;
size_t tok_cap;
-} lexer_t;
+};
-void lexer_lex(lexer_t *lexer);
+void lexer_lex(struct lexer *);
diff --git a/mtcc/src/main.c b/mtcc/src/main.c
@@ -5,7 +5,7 @@
int main(int argc, char **argv) {
const char *filename;
file_t file;
- lexer_t lexer;
+ struct lexer lexer;
if (argc < 2) {
const char *cmp = argv[0];
@@ -15,7 +15,7 @@ int main(int argc, char **argv) {
filename = argv[1];
file = read_file(filename);
- lexer = (lexer_t){
+ lexer = (struct lexer){
.filename = filename,
.src = file.contents,
.src_len = file.len,
diff --git a/mtcc/src/str.c b/mtcc/src/str.c
@@ -1,6 +1,6 @@
#include "str.h"
#include "array.h"
-void str_append(Str *str, const char c) {
+void str_append(struct str *str, const char c) {
STRING_PUSH(str->value, str->len, str->cap, c);
}
diff --git a/mtcc/src/str.h b/mtcc/src/str.h
@@ -2,10 +2,10 @@
#include <stddef.h>
-typedef struct Str {
+struct str {
char *value;
size_t len;
size_t cap;
-} Str;
+};
-void str_append(Str *str, const char c);
+void str_append(struct str *, const char);
diff --git a/mtcc/src/token.h b/mtcc/src/token.h
@@ -2,17 +2,68 @@
#include <stddef.h>
-typedef enum {
- T_IDENT,
-} token_type_t;
+enum token_type {
+ TOKEN_IDENT,
-typedef struct {
- token_type_t token_type;
+ TOKEN_KEYWORD_NS,
+ TOKEN_KEYWORD_IN,
+ TOKEN_KEYWORD_FROM,
+ TOKEN_KEYWORD_USE,
+ TOKEN_KEYWORD_FFI,
+ TOKEN_KEYWORD_DROP,
+ TOKEN_KEYWORD_AS,
+ TOKEN_KEYWORD_OF,
+ TOKEN_KEYWORD_AND,
+ TOKEN_KEYWORD_OR,
+ TOKEN_KEYWORD_REF,
+ TOKEN_KEYWORD_STRUCT,
+ TOKEN_KEYWORD_ENUM,
+ TOKEN_KEYWORD_PRE,
+ TOKEN_KEYWORD_POST,
+ TOKEN_KEYWORD_INV,
+ TOKEN_KEYWORD_IF,
+ TOKEN_KEYWORD_ELSE,
+ TOKEN_KEYWORD_WHERE,
+ TOKEN_KEYWORD_PUB,
- struct {
- const char *filename;
- size_t line, col;
- size_t start, stop;
- } span_t;
+ TOKEN_DOT,
+ TOKEN_COLON,
+ TOKEN_COLON_COLON,
+ TOKEN_COMMA,
+ TOKEN_EOF,
+ TOKEN_EQ,
-} token_t;
+ TOKEN_LITERAL_BOOL,
+ TOKEN_LITERAL_INT,
+ TOKEN_LITERAL_FLOAT,
+ TOKEN_LITERAL_CHAR,
+ TOKEN_LITERAL_STR,
+
+ TOKEN_MINUS,
+ TOKEN_PLUS,
+ TOKEN_STAR,
+ TOKEN_SLASH,
+ TOKEN_BAD_TOKEN,
+ TOKEN_MINUS_EQ,
+ TOKEN_PLUS_EQ,
+ TOKEN_MINUS_MINUS,
+ TOKEN_PLUS_PLUS,
+
+ TOKEN_L_PAREN,
+ TOKEN_R_PAREN,
+ TOKEN_L_BRACKET,
+ TOKEN_R_BRACKET,
+ TOKEN_L_BRACE,
+ TOKEN_R_BRACE,
+};
+
+struct span {
+ const char *filename;
+ size_t line, col;
+ size_t start, stop;
+};
+
+struct token {
+ enum token_type token_type;
+ struct span span;
+};