mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

commit 2be51147b5deb9e75d8afd9db39803d1d7214e05
parent 15fb7b2e5a110528b4024e56c1c1e9d2ff384889
Author: citbl <citbl@citbl.org>
Date:   Wed, 20 May 2026 21:51:53 +1000

wip lexing

Diffstat:
Mmtcc/.clangd | 4++--
Mmtcc/makefile | 8+++++++-
Mmtcc/mtcc | 0
Mmtcc/src/lexer.c | 59+++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
Mmtcc/src/lexer.h | 8++++----
Mmtcc/src/main.c | 4++--
Mmtcc/src/str.c | 2+-
Mmtcc/src/str.h | 6+++---
Mmtcc/src/token.h | 73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
9 files changed, 138 insertions(+), 26 deletions(-)

diff --git a/mtcc/.clangd b/mtcc/.clangd @@ -9,7 +9,7 @@ CompileFlags: -Wcast-align, -Wstrict-prototypes, -Wmissing-prototypes, + -Werror=declaration-after-statement, -xc, - -std=c17, - -g, + -std=c23 ] diff --git a/mtcc/makefile b/mtcc/makefile @@ -1,11 +1,17 @@ MAKEFLAGS += --silent default: - clang -O1 -Wall -Wextra -Wpedantic -std=c23 \ + clang -O1 -std=c23 \ -Werror=declaration-after-statement \ -o mtcc **/*.c ./mtcc target.mty +check: + clang -O1 -Wall -Wextra -Wpedantic -std=c23 \ + -Werror=declaration-after-statement \ + -Wno-unused-function -fsanitize=address,undefined \ + -o mtcc **/*.c + clean: rm -rf *.dSYM rm -rf mtcc diff --git a/mtcc/mtcc b/mtcc/mtcc Binary files differ. diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c @@ -9,8 +9,9 @@ static bool is_digit(const char c); static bool is_space(const char c); static bool is_alpha_numeric(const char c); static bool is_dot(const char c); +static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident); -void lexer_lex(lexer_t *lexer) { +void lexer_lex(struct lexer *lexer) { size_t len = strnlen(lexer->src, lexer->src_len); const char *src = lexer->src; char c, cx; @@ -19,6 +20,7 @@ void lexer_lex(lexer_t *lexer) { size_t line = 1; size_t start = 0; size_t start_col = 0; + struct span ident; while (i < len) { c = lexer->src[i]; @@ -41,6 +43,9 @@ void lexer_lex(lexer_t *lexer) { i++; col++; } + ident = (struct span){ + .filename = lexer->filename, .col = col, .line = line, .start = start, .stop = i}; + compare_span_to_token(lexer, ident); } cx = (i < len) ? lexer->src[i] : '\0'; @@ -87,9 +92,59 @@ static bool is_dot(const char c) { return c == '.'; } -static const char *TYPES[] = { +static const char *NAMES_TOKEN[] = { /*[TOKEN_IDENT] = "ident/type", [TOKEN_LPAREN] = "open paren", [TOKEN_RPAREN] = "close paren", [TOKEN_LBRACE] = "open brace",*/ }; + +static enum token_type compare_span_to_token(struct lexer *lexer, struct span ident) { + enum token_type t = TOKEN_IDENT; + char c; + size_t i; + + if (ident.start >= lexer->src_len || ident.stop >= lexer->src_len) return false; + + if (strncmp(lexer->src + ident.start, "ns", 2) == 0) + t = TOKEN_KEYWORD_NS; + else if (strncmp(lexer->src + ident.start, "in", 2) == 0) + t = TOKEN_KEYWORD_IN; + else if (strncmp(lexer->src + ident.start, "from", 4) == 0) + t = TOKEN_KEYWORD_FROM; + else if (strncmp(lexer->src + ident.start, "use", 3) == 0) + t = TOKEN_KEYWORD_USE; + else if (strncmp(lexer->src + ident.start, "ffi", 3) == 0) + t = TOKEN_KEYWORD_FFI; + else if (strncmp(lexer->src + ident.start, "drop", 4) == 0) + t = TOKEN_KEYWORD_DROP; + else if (strncmp(lexer->src + ident.start, "as", 2) == 0) + t = TOKEN_KEYWORD_AS; + else if (strncmp(lexer->src + ident.start, "of", 2) == 0) + t = TOKEN_KEYWORD_OF; + else if (strncmp(lexer->src + ident.start, "and", 3) == 0) + t = TOKEN_KEYWORD_AND; + else if (strncmp(lexer->src + ident.start, "or", 2) == 0) + t = TOKEN_KEYWORD_OR; + else if (strncmp(lexer->src + ident.start, "ref", 3) == 0) + t = TOKEN_KEYWORD_REF; + else if (strncmp(lexer->src + ident.start, "struct", 6) == 0) + t = TOKEN_KEYWORD_STRUCT; + else if (strncmp(lexer->src + ident.start, "enum", 4) == 0) + t = TOKEN_KEYWORD_ENUM; + else if (strncmp(lexer->src + ident.start, "pre", 3) == 0) + t = TOKEN_KEYWORD_PRE; + else if (strncmp(lexer->src + ident.start, "post", 4) == 0) + t = TOKEN_KEYWORD_POST; + else if (strncmp(lexer->src + ident.start, "inv", 3) == 0) + t = TOKEN_KEYWORD_INV; + else if (strncmp(lexer->src + ident.start, "if", 2) == 0) + t = TOKEN_KEYWORD_IF; + else if (strncmp(lexer->src + ident.start, "else", 4) == 0) + t = TOKEN_KEYWORD_ELSE; + else if (strncmp(lexer->src + ident.start, "where", 5) == 0) + t = TOKEN_KEYWORD_WHERE; + else if (strncmp(lexer->src + ident.start, "pub", 3) == 0) + t = TOKEN_KEYWORD_PUB; + return t; +} diff --git a/mtcc/src/lexer.h b/mtcc/src/lexer.h @@ -1,13 +1,13 @@ #pragma once #include "token.h" -typedef struct lexer_t { +struct lexer { const char *filename; const char *src; size_t src_len; - token_t *tokens; + struct token *tokens; size_t tok_len; size_t tok_cap; -} lexer_t; +}; -void lexer_lex(lexer_t *lexer); +void lexer_lex(struct lexer *); diff --git a/mtcc/src/main.c b/mtcc/src/main.c @@ -5,7 +5,7 @@ int main(int argc, char **argv) { const char *filename; file_t file; - lexer_t lexer; + struct lexer lexer; if (argc < 2) { const char *cmp = argv[0]; @@ -15,7 +15,7 @@ int main(int argc, char **argv) { filename = argv[1]; file = read_file(filename); - lexer = (lexer_t){ + lexer = (struct lexer){ .filename = filename, .src = file.contents, .src_len = file.len, diff --git a/mtcc/src/str.c b/mtcc/src/str.c @@ -1,6 +1,6 @@ #include "str.h" #include "array.h" -void str_append(Str *str, const char c) { +void str_append(struct str *str, const char c) { STRING_PUSH(str->value, str->len, str->cap, c); } diff --git a/mtcc/src/str.h b/mtcc/src/str.h @@ -2,10 +2,10 @@ #include <stddef.h> -typedef struct Str { +struct str { char *value; size_t len; size_t cap; -} Str; +}; -void str_append(Str *str, const char c); +void str_append(struct str *, const char); diff --git a/mtcc/src/token.h b/mtcc/src/token.h @@ -2,17 +2,68 @@ #include <stddef.h> -typedef enum { - T_IDENT, -} token_type_t; +enum token_type { + TOKEN_IDENT, -typedef struct { - token_type_t token_type; + TOKEN_KEYWORD_NS, + TOKEN_KEYWORD_IN, + TOKEN_KEYWORD_FROM, + TOKEN_KEYWORD_USE, + TOKEN_KEYWORD_FFI, + TOKEN_KEYWORD_DROP, + TOKEN_KEYWORD_AS, + TOKEN_KEYWORD_OF, + TOKEN_KEYWORD_AND, + TOKEN_KEYWORD_OR, + TOKEN_KEYWORD_REF, + TOKEN_KEYWORD_STRUCT, + TOKEN_KEYWORD_ENUM, + TOKEN_KEYWORD_PRE, + TOKEN_KEYWORD_POST, + TOKEN_KEYWORD_INV, + TOKEN_KEYWORD_IF, + TOKEN_KEYWORD_ELSE, + TOKEN_KEYWORD_WHERE, + TOKEN_KEYWORD_PUB, - struct { - const char *filename; - size_t line, col; - size_t start, stop; - } span_t; + TOKEN_DOT, + TOKEN_COLON, + TOKEN_COLON_COLON, + TOKEN_COMMA, + TOKEN_EOF, + TOKEN_EQ, -} token_t; + TOKEN_LITERAL_BOOL, + TOKEN_LITERAL_INT, + TOKEN_LITERAL_FLOAT, + TOKEN_LITERAL_CHAR, + TOKEN_LITERAL_STR, + + TOKEN_MINUS, + TOKEN_PLUS, + TOKEN_STAR, + TOKEN_SLASH, + TOKEN_BAD_TOKEN, + TOKEN_MINUS_EQ, + TOKEN_PLUS_EQ, + TOKEN_MINUS_MINUS, + TOKEN_PLUS_PLUS, + + TOKEN_L_PAREN, + TOKEN_R_PAREN, + TOKEN_L_BRACKET, + TOKEN_R_BRACKET, + TOKEN_L_BRACE, + TOKEN_R_BRACE, +}; + +struct span { + const char *filename; + size_t line, col; + size_t start, stop; +}; + +struct token { + enum token_type token_type; + struct span span; +};