commit 772c7870a795bda9fe1654a98576db9bcc7e47d1
parent b4580c3da03225f57a867d02f01f4d3ce4ab6fac
Author: citbl <citbl@citbl.org>
Date: Sun, 10 May 2026 14:57:33 +1000
wip lexer
Diffstat:
8 files changed, 137 insertions(+), 33 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -8,7 +8,7 @@ PointerAlignment: Right
AlignTrailingComments: true
SpacesBeforeTrailingComments: 1
KeepEmptyLinesAtTheStartOfBlocks: false
-AllowShortBlocksOnASingleLine: true
+AllowShortBlocksOnASingleLine: false
AllowShortIfStatementsOnASingleLine: true
AllowShortCaseLabelsOnASingleLine: false
AllowShortEnumsOnASingleLine: true
@@ -19,10 +19,10 @@ AlignConsecutiveMacros: false
SortIncludes: false
IndentCaseLabels: false
-ColumnLimit: 135
+ColumnLimit: 80
PenaltyBreakBeforeFirstCallParameter: 1
AlignAfterOpenBracket: DontAlign
BinPackArguments: false
BinPackParameters: false
-BreakAfterReturnType: TopLevelDefinitions
+#BreakAfterReturnType: TopLevelDefinitions
diff --git a/.clangd b/.clangd
@@ -1,8 +1,8 @@
CompileFlags:
Add: [
- -Wall,
- -Wextra,
- -Wpedantic,
+ #-Wall,
+ #-Wextra,
+ #-Wpedantic,
-Wshadow,
-Wpointer-arith,
-Wcast-qual,
diff --git a/language.sic b/language.sic
@@ -2,12 +2,15 @@ int ~age = 25
dec height = 87.5
str name = "George"
+// some comment here
+// another one here
int main {
str name = arg[0]
print(name)
return 0
}
+// definition of human struct
def Human {
str name
dec height
@@ -58,5 +61,3 @@ int y = 12
if cast(int, x) == y {
print("yay")
}
-
-
diff --git a/src/common.h b/src/common.h
@@ -3,8 +3,8 @@
#include <stdbool.h>
#include <stddef.h>
-typedef enum TokenType {
- TERMINUS = 4,
+typedef enum Token_Type {
+ NOTYETSET = 7,
IDENT,
KEYWORD,
SYMBOL,
@@ -26,31 +26,58 @@ typedef enum TokenType {
DASH_GT,
EQ,
BANG,
- NAMESPACE
-} TokenType;
+ LIT_STRING,
+ LIT_DECIMAL,
+ LIT_INT,
+ LIT_BOOL,
+ LIT_VOID,
+} Token_Type;
-typedef enum Keyword { IF = 130, ELSE, WHILE, OPT, LAZY, MATCH, DEF, FOR, EACH, SOME, NONE, OK, ERR, IN, IS, CAST } Keyword;
+typedef enum Keyword {
+ IF = 137,
+ ELSE,
+ WHILE,
+ OPT,
+ LAZY,
+ MATCH,
+ DEF,
+ FOR,
+ EACH,
+ SOME,
+ NONE,
+ OK,
+ ERR,
+ IN,
+ IS,
+ CAST
+} Keyword;
typedef struct Token {
- TokenType type;
+ Token_Type type;
union Value {
- char* as_string;
+ char *as_string;
char as_char;
size_t as_int;
bool as_bool;
} value;
- const char* path;
- const char* filename;
+ const char *path;
+ const char *filename;
size_t line;
size_t col;
} Token;
-typedef struct Lexer {
- const char* code;
- const char* path;
- const char* filename;
+typedef struct Lexer_State {
size_t pos;
- Token* tokens;
+ bool in_string;
+ bool in_comment;
+} Lexer_State;
+
+typedef struct Lexer {
+ const char *code;
+ const char *path;
+ const char *filename;
+ Lexer_State state;
+ Token *tokens;
size_t count;
size_t cap;
} Lexer;
diff --git a/src/lexer.c b/src/lexer.c
@@ -1,9 +1,9 @@
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
+#include <string.h>
-static void
-add_token(Lexer *l, Token t)
+static void emit_token(Lexer *l, Token t)
{
if (l->count >= l->cap) {
l->cap *= 2;
@@ -13,11 +13,76 @@ add_token(Lexer *l, Token t)
l->tokens[l->count++] = t;
}
-Lexer *
-lexer_lex(Lexer *l)
+static void add_to_string(char *str, char c)
{
- Token tok = { 0 };
+}
+
+static char peek(Lexer *l)
+{
+ char c = l->code[l->state.pos + 1];
+ // printf("PEEK: %c\n", c);
+ return c;
+}
+
+static char consume(Lexer *l)
+{
+ return l->code[++l->state.pos];
+}
+
+static void run_until_char(Lexer *l, char c)
+{
+ do {
+ // printf("%zu", l->state.pos);
+ l->state.pos++;
+ } while (peek(l) != c);
+}
+
+Lexer *lexer_lex(Lexer *l)
+{
+ char c = '\0';
+ size_t len = strlen(l->code);
+ Token t = { .filename = l->filename,
+ .path = l->path,
+ .col = -1,
+ .line = -1,
+ .type = NOTYETSET };
+
l->tokens = calloc(250, sizeof(Token));
- add_token(l, tok);
+ l->state.pos = 0;
+ l->state.in_string = false;
+
+ // longest valid token first
+ while (l->state.pos <= len) {
+ c = l->code[l->state.pos];
+
+ if (c == '/' && peek(l) == '/') {
+ // continue until the end of the line
+ run_until_char(l, '\n');
+ l->state.pos++;
+ continue;
+ }
+
+ switch (c) {
+ case '\"':
+ l->state.in_string = true;
+ run_until_char(l, '\"'); // TODO buffer up the string
+ t.type = LIT_STRING;
+ emit_token(l, t);
+
+ break;
+ case EOF:
+ return l;
+ case '\n':
+ case '\r':
+ l->state.pos++;
+ continue;
+ break;
+ }
+
+ // printf("%zu", l->state.pos);
+ printf("unhandled: %c\n", c);
+ l->state.pos++;
+ }
+
return l;
}
diff --git a/src/main.c b/src/main.c
@@ -1,9 +1,9 @@
+#include <stdio.h>
+
#include "lexer.h"
#include "utils.h"
-#include <stdio.h>
-int
-main(int argc, char **args)
+int main(int argc, char **args)
{
char *filename;
char *contents;
@@ -16,9 +16,10 @@ main(int argc, char **args)
contents = read_file(filename);
if (contents == NULL) return 1;
-
- printf("%s\n", contents);
+ lexer.code = contents;
lexer = *lexer_lex(&lexer);
+
+ printf("\n");
return 0;
}
diff --git a/src/utils.c b/src/utils.c
@@ -1,6 +1,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include "utils.h"
char *
read_file(const char *filename)
diff --git a/test.sic b/test.sic
@@ -0,0 +1,9 @@
+// this is a comment
+
+int jack = 5;
+
+// another one
+
+void main() {
+
+}