wip lexing - mighty - The mighty programming language, compiler and tools (WIP)

commit 2be51147b5deb9e75d8afd9db39803d1d7214e05
parent 15fb7b2e5a110528b4024e56c1c1e9d2ff384889
Author: citbl <citbl@citbl.org>
Date:   Wed, 20 May 2026 21:51:53 +1000

wip lexing

Diffstat:
M mtcc/.clangd  | 4 ++--
M mtcc/makefile  | 8 +++++++-
M mtcc/mtcc  | 0 
M mtcc/src/lexer.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M mtcc/src/lexer.h  | 8 ++++----
M mtcc/src/main.c  | 4 ++--
M mtcc/src/str.c  | 2 +-
M mtcc/src/str.h  | 6 +++---
M mtcc/src/token.h  | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------

9 files changed, 138 insertions(+), 26 deletions(-)
diff --git a/mtcc/.clangd b/mtcc/.clangd
@@ -9,7 +9,7 @@ CompileFlags:
     -Wcast-align,
     -Wstrict-prototypes,
     -Wmissing-prototypes,
+    -Werror=declaration-after-statement, 
     -xc,
-    -std=c17,
-    -g,
+    -std=c23
     ]
diff --git a/mtcc/makefile b/mtcc/makefile
@@ -1,11 +1,17 @@
 MAKEFLAGS += --silent
 
 default:
-	clang -O1 -Wall -Wextra -Wpedantic -std=c23 \
+	clang -O1 -std=c23 \
 		-Werror=declaration-after-statement \
 		-o mtcc **/*.c
 	./mtcc target.mty
 
+check:
+	clang -O1 -Wall -Wextra -Wpedantic -std=c23 \
+		-Werror=declaration-after-statement \
+		-Wno-unused-function -fsanitize=address,undefined \
+		-o mtcc **/*.c
+
 clean:
 	rm -rf *.dSYM
 	rm -rf mtcc
diff --git a/mtcc/mtcc b/mtcc/mtcc
Binary files differ.
diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c
@@ -9,8 +9,9 @@ static bool is_digit(const char c);
 static bool is_space(const char c);
 static bool is_alpha_numeric(const char c);
 static bool is_dot(const char c);
+static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident);
 
-void lexer_lex(lexer_t *lexer) {
+void lexer_lex(struct lexer *lexer) {
     size_t len = strnlen(lexer->src, lexer->src_len);
     const char *src = lexer->src;
     char c, cx;
@@ -19,6 +20,7 @@ void lexer_lex(lexer_t *lexer) {
     size_t line = 1;
     size_t start = 0;
     size_t start_col = 0;
+    struct span ident;
 
     while (i < len) {
         c = lexer->src[i];
@@ -41,6 +43,9 @@ void lexer_lex(lexer_t *lexer) {
                 i++;
                 col++;
             }
+            ident = (struct span){
+                .filename = lexer->filename, .col = col, .line = line, .start = start, .stop = i};
+            compare_span_to_token(lexer, ident);
         }
 
         cx = (i < len) ? lexer->src[i] : '\0';
@@ -87,9 +92,59 @@ static bool is_dot(const char c) {
     return c == '.';
 }
 
-static const char *TYPES[] = {
+static const char *NAMES_TOKEN[] = {
     /*[TOKEN_IDENT] = "ident/type",
     [TOKEN_LPAREN] = "open paren",
     [TOKEN_RPAREN] = "close paren",
     [TOKEN_LBRACE] = "open brace",*/
 };
+
+static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident) {
+    enum token_type t = TOKEN_IDENT;
+    char c;
+    size_t i;
+
+    if (ident.start >= lexer->src_len || ident.stop >= lexer->src_len) return false;
+
+    if (strncmp(lexer->src + ident.start, "ns", 2) == 0)
+        t = TOKEN_KEYWORD_NS;
+    else if (strncmp(lexer->src + ident.start, "in", 2) == 0)
+        t = TOKEN_KEYWORD_IN;
+    else if (strncmp(lexer->src + ident.start, "from", 4) == 0)
+        t = TOKEN_KEYWORD_FROM;
+    else if (strncmp(lexer->src + ident.start, "use", 3) == 0)
+        t = TOKEN_KEYWORD_USE;
+    else if (strncmp(lexer->src + ident.start, "ffi", 3) == 0)
+        t = TOKEN_KEYWORD_FFI;
+    else if (strncmp(lexer->src + ident.start, "drop", 4) == 0)
+        t = TOKEN_KEYWORD_DROP;
+    else if (strncmp(lexer->src + ident.start, "as", 2) == 0)
+        t = TOKEN_KEYWORD_AS;
+    else if (strncmp(lexer->src + ident.start, "of", 2) == 0)
+        t = TOKEN_KEYWORD_OF;
+    else if (strncmp(lexer->src + ident.start, "and", 3) == 0)
+        t = TOKEN_KEYWORD_AND;
+    else if (strncmp(lexer->src + ident.start, "or", 2) == 0)
+        t = TOKEN_KEYWORD_OR;
+    else if (strncmp(lexer->src + ident.start, "ref", 3) == 0)
+        t = TOKEN_KEYWORD_REF;
+    else if (strncmp(lexer->src + ident.start, "struct", 6) == 0)
+        t = TOKEN_KEYWORD_STRUCT;
+    else if (strncmp(lexer->src + ident.start, "enum", 4) == 0)
+        t = TOKEN_KEYWORD_ENUM;
+    else if (strncmp(lexer->src + ident.start, "pre", 3) == 0)
+        t = TOKEN_KEYWORD_PRE;
+    else if (strncmp(lexer->src + ident.start, "post", 4) == 0)
+        t = TOKEN_KEYWORD_POST;
+    else if (strncmp(lexer->src + ident.start, "inv", 3) == 0)
+        t = TOKEN_KEYWORD_INV;
+    else if (strncmp(lexer->src + ident.start, "if", 2) == 0)
+        t = TOKEN_KEYWORD_IF;
+    else if (strncmp(lexer->src + ident.start, "else", 4) == 0)
+        t = TOKEN_KEYWORD_ELSE;
+    else if (strncmp(lexer->src + ident.start, "where", 5) == 0)
+        t = TOKEN_KEYWORD_WHERE;
+    else if (strncmp(lexer->src + ident.start, "pub", 3) == 0)
+        t = TOKEN_KEYWORD_PUB;
+    return t;
+}
diff --git a/mtcc/src/lexer.h b/mtcc/src/lexer.h
@@ -1,13 +1,13 @@
 #pragma once
 #include "token.h"
 
-typedef struct lexer_t {
+struct lexer {
     const char *filename;
     const char *src;
     size_t src_len;
-    token_t *tokens;
+    struct token *tokens;
     size_t tok_len;
     size_t tok_cap;
-} lexer_t;
+};
 
-void lexer_lex(lexer_t *lexer);
+void lexer_lex(struct lexer *);
diff --git a/mtcc/src/main.c b/mtcc/src/main.c
@@ -5,7 +5,7 @@
 int main(int argc, char **argv) {
     const char *filename;
     file_t file;
-    lexer_t lexer;
+    struct lexer lexer;
 
     if (argc < 2) {
         const char *cmp = argv[0];
@@ -15,7 +15,7 @@ int main(int argc, char **argv) {
 
     filename = argv[1];
     file = read_file(filename);
-    lexer = (lexer_t){
+    lexer = (struct lexer){
         .filename = filename,
         .src = file.contents,
         .src_len = file.len,
diff --git a/mtcc/src/str.c b/mtcc/src/str.c
@@ -1,6 +1,6 @@
 #include "str.h"
 #include "array.h"
 
-void str_append(Str *str, const char c) {
+void str_append(struct str *str, const char c) {
     STRING_PUSH(str->value, str->len, str->cap, c);
 }
diff --git a/mtcc/src/str.h b/mtcc/src/str.h
@@ -2,10 +2,10 @@
 
 #include <stddef.h>
 
-typedef struct Str {
+struct str {
     char *value;
     size_t len;
     size_t cap;
-} Str;
+};
 
-void str_append(Str *str, const char c);
+void str_append(struct str *, const char);
diff --git a/mtcc/src/token.h b/mtcc/src/token.h
@@ -2,17 +2,68 @@
 
 #include <stddef.h>
 
-typedef enum {
-    T_IDENT,
-} token_type_t;
+enum token_type {
+    TOKEN_IDENT,
 
-typedef struct {
-    token_type_t token_type;
+    TOKEN_KEYWORD_NS,
+    TOKEN_KEYWORD_IN,
+    TOKEN_KEYWORD_FROM,
+    TOKEN_KEYWORD_USE,
+    TOKEN_KEYWORD_FFI,
+    TOKEN_KEYWORD_DROP,
+    TOKEN_KEYWORD_AS,
+    TOKEN_KEYWORD_OF,
+    TOKEN_KEYWORD_AND,
+    TOKEN_KEYWORD_OR,
+    TOKEN_KEYWORD_REF,
+    TOKEN_KEYWORD_STRUCT,
+    TOKEN_KEYWORD_ENUM,
+    TOKEN_KEYWORD_PRE,
+    TOKEN_KEYWORD_POST,
+    TOKEN_KEYWORD_INV,
+    TOKEN_KEYWORD_IF,
+    TOKEN_KEYWORD_ELSE,
+    TOKEN_KEYWORD_WHERE,
+    TOKEN_KEYWORD_PUB,
 
-    struct {
-        const char *filename;
-        size_t line, col;
-        size_t start, stop;
-    } span_t;
+    TOKEN_DOT,
+    TOKEN_COLON,
+    TOKEN_COLON_COLON,
+    TOKEN_COMMA,
+    TOKEN_EOF,
+    TOKEN_EQ,
 
-} token_t;
+    TOKEN_LITERAL_BOOL,
+    TOKEN_LITERAL_INT,
+    TOKEN_LITERAL_FLOAT,
+    TOKEN_LITERAL_CHAR,
+    TOKEN_LITERAL_STR,
+
+    TOKEN_MINUS,
+    TOKEN_PLUS,
+    TOKEN_STAR,
+    TOKEN_SLASH,
+    TOKEN_BAD_TOKEN,
+    TOKEN_MINUS_EQ,
+    TOKEN_PLUS_EQ,
+    TOKEN_MINUS_MINUS,
+    TOKEN_PLUS_PLUS,
+
+    TOKEN_L_PAREN,
+    TOKEN_R_PAREN,
+    TOKEN_L_BRACKET,
+    TOKEN_R_BRACKET,
+    TOKEN_L_BRACE,
+    TOKEN_R_BRACE,
+};
+
+struct span {
+    const char *filename;
+    size_t line, col;
+    size_t start, stop;
+};
+
+struct token {
+    enum token_type token_type;
+    struct span span;
+};

	mighty The mighty programming language, compiler and tools (WIP)
	Log \| Files \| Refs

M	mtcc/.clangd	\|	4	++--
M	mtcc/makefile	\|	8	+++++++-
M	mtcc/mtcc	\|	0
M	mtcc/src/lexer.c	\|	59	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
M	mtcc/src/lexer.h	\|	8	++++----
M	mtcc/src/main.c	\|	4	++--
M	mtcc/src/str.c	\|	2	+-
M	mtcc/src/str.h	\|	6	+++---
M	mtcc/src/token.h	\|	73	++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------