mighty

The mighty programming language, compiler and tools (WIP)
Log | Files | Refs

commit 15fb7b2e5a110528b4024e56c1c1e9d2ff384889
parent 84e387d0bffde1310a9549d791792c9fccb4fa93
Author: citbl <citbl@citbl.org>
Date:   Wed, 20 May 2026 20:31:20 +1000

wip

Diffstat:
Amtcc/.clangd | 15+++++++++++++++
Mmtcc/.gitignore | 1+
Mmtcc/src/lexer.c | 85++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
Mmtcc/src/lexer.h | 5++---
Mmtcc/src/main.c | 4++--
Mmtcc/src/token.h | 6+++---
6 files changed, 105 insertions(+), 11 deletions(-)

diff --git a/mtcc/.clangd b/mtcc/.clangd @@ -0,0 +1,15 @@ +CompileFlags: + Add: [ + -Wall, + -Wextra, + -Wpedantic, + -Wshadow, + -Wpointer-arith, + -Wcast-qual, + -Wcast-align, + -Wstrict-prototypes, + -Wmissing-prototypes, + -xc, + -std=c17, + -g, + ] diff --git a/mtcc/.gitignore b/mtcc/.gitignore @@ -1,2 +1,3 @@ .DS_Store *.dSYM +mtcc diff --git a/mtcc/src/lexer.c b/mtcc/src/lexer.c @@ -1,10 +1,58 @@ +#include <stdbool.h> + #include "lexer.h" #include "string.h" +static bool is__(const char c); +static bool is_alpha(const char c); +static bool is_digit(const char c); +static bool is_space(const char c); +static bool is_alpha_numeric(const char c); +static bool is_dot(const char c); + void lexer_lex(lexer_t *lexer) { - size_t len = strnlen(lexer->source, lexer->source_len); - while (lexer->pos < len) { - char c = lexer->source[lexer->pos++]; + size_t len = strnlen(lexer->src, lexer->src_len); + const char *src = lexer->src; + char c, cx; + size_t i = 0; + size_t col = 1; + size_t line = 1; + size_t start = 0; + size_t start_col = 0; + + while (i < len) { + c = lexer->src[i]; + + if (is_space(c)) { + if (c == '\n') { + i++; + col = 1; + } else { + col++; + } + i++; + } + + start = i; + start_col = col; + + if (is_alpha(c) || is__(c)) { + while (i < len && (is_alpha_numeric(src[i]) || is__(src[i]))) { + i++; + col++; + } + } + + cx = (i < len) ? lexer->src[i] : '\0'; + + if (c == '/' && cx == '/') { + while (i < len && src[i] != '\n') { + i++; + col++; + } + continue; + } + switch (c) { case '\n': case '\t': @@ -14,3 +62,34 @@ void lexer_lex(lexer_t *lexer) { } } } + +static bool is_space(const char c) { + return c == ' ' | c == '\t' | c == '\r' | c == '\n'; +} + +static bool is__(const char c) { + return c == '_'; +} + +static bool is_alpha(const char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +static bool is_digit(const char c) { + return (c >= '0' && c <= '9'); +} + +static bool is_alpha_numeric(const char c) { + return is_alpha(c) || is_digit(c); +} + +static bool is_dot(const char c) { + return c == '.'; +} + +static const char *TYPES[] = { + /*[TOKEN_IDENT] = "ident/type", + [TOKEN_LPAREN] = "open paren", + [TOKEN_RPAREN] = "close paren", + [TOKEN_LBRACE] = "open brace",*/ +}; diff --git a/mtcc/src/lexer.h b/mtcc/src/lexer.h @@ -3,12 +3,11 @@ typedef struct lexer_t { const char *filename; - const char *source; - size_t source_len; + const char *src; + size_t src_len; token_t *tokens; size_t tok_len; size_t tok_cap; - size_t pos; } lexer_t; void lexer_lex(lexer_t *lexer); diff --git a/mtcc/src/main.c b/mtcc/src/main.c @@ -17,8 +17,8 @@ int main(int argc, char **argv) { file = read_file(filename); lexer = (lexer_t){ .filename = filename, - .source = file.contents, - .source_len = file.len, + .src = file.contents, + .src_len = file.len, }; lexer_lex(&lexer); diff --git a/mtcc/src/token.h b/mtcc/src/token.h @@ -10,9 +10,9 @@ typedef struct { token_type_t token_type; struct { - char *filename; - size_t line; - size_t col; + const char *filename; + size_t line, col; + size_t start, stop; } span_t; } token_t;