commit 18d88480bc4da10e9c224d951f7d7510bb542d1a
parent 772c7870a795bda9fe1654a98576db9bcc7e47d1
Author: citbl <citbl@citbl.org>
Date: Sun, 10 May 2026 16:53:31 +1000
token wips
Diffstat:
8 files changed, 94 insertions(+), 32 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -19,7 +19,7 @@ AlignConsecutiveMacros: false
SortIncludes: false
IndentCaseLabels: false
-ColumnLimit: 80
+ColumnLimit: 120
PenaltyBreakBeforeFirstCallParameter: 1
AlignAfterOpenBracket: DontAlign
BinPackArguments: false
diff --git a/src/common.h b/src/common.h
@@ -1,10 +1,12 @@
#pragma once
+#include <stdlib.h>
+#include <stdio.h>
#include <stdbool.h>
#include <stddef.h>
typedef enum Token_Type {
- NOTYETSET = 7,
+ NOTYETSET = 9,
IDENT,
KEYWORD,
SYMBOL,
@@ -34,7 +36,7 @@ typedef enum Token_Type {
} Token_Type;
typedef enum Keyword {
- IF = 137,
+ IF = 139,
ELSE,
WHILE,
OPT,
@@ -52,10 +54,18 @@ typedef enum Keyword {
CAST
} Keyword;
+/////////////////////////////////////////////////
+
+typedef struct String {
+ char *value;
+ size_t cap;
+ size_t len;
+} String;
+
typedef struct Token {
Token_Type type;
union Value {
- char *as_string;
+ String as_string;
char as_char;
size_t as_int;
bool as_bool;
@@ -74,10 +84,15 @@ typedef struct Lexer_State {
typedef struct Lexer {
const char *code;
+ size_t code_len;
const char *path;
const char *filename;
Lexer_State state;
Token *tokens;
- size_t count;
+ size_t len;
size_t cap;
} Lexer;
+
+/////////////////////////////////////////////////
+
+void die(bool condition, const char *message);
diff --git a/src/lexer.c b/src/lexer.c
@@ -1,27 +1,60 @@
#include "lexer.h"
#include <stdio.h>
-#include <stdlib.h>
#include <string.h>
-static void emit_token(Lexer *l, Token t)
+static void add_token(Lexer *l, Token t)
{
- if (l->count >= l->cap) {
- l->cap *= 2;
+ if (l->len >= l->cap) {
+ l->cap = l->cap == 0 ? 256 : l->cap * 2;
l->tokens = realloc(l->tokens, l->cap * sizeof(Token));
}
- l->tokens[l->count++] = t;
+ l->tokens[l->len++] = t;
}
-static void add_to_string(char *str, char c)
+void print_tokens(Lexer *l)
{
+ Token t;
+ Token_Type typ;
+ size_t i;
+
+ for (i = 0; i < l->len; i++) {
+ t = l->tokens[i];
+ typ = t.type;
+ switch (typ) {
+ case LIT_STRING:
+ printf("STRING LITERAL: %s\n", l->tokens[i].value.as_string.value);
+ break;
+ default:
+ printf("print_tokens: unhandled token %i", typ);
+ break;
+ }
+ }
+}
+
+static void add_to_string(Token *t, char c)
+{
+ String *str = &t->value.as_string;
+ char *new_value;
+
+ if (str->len >= str->cap) {
+ str->cap = str->cap == 0 ? 256 : str->cap * 2;
+ str->value = realloc(str->value, str->cap * sizeof(char));
+ }
+
+ str->value[str->len++] = c;
+ str->value[str->len] = '\0';
}
static char peek(Lexer *l)
{
- char c = l->code[l->state.pos + 1];
- // printf("PEEK: %c\n", c);
- return c;
+ size_t next = l->state.pos + 1;
+
+ if (next >= l->code_len) {
+ return '\0';
+ }
+
+ return l->code[next];
}
static char consume(Lexer *l)
@@ -41,11 +74,9 @@ Lexer *lexer_lex(Lexer *l)
{
char c = '\0';
size_t len = strlen(l->code);
- Token t = { .filename = l->filename,
- .path = l->path,
- .col = -1,
- .line = -1,
- .type = NOTYETSET };
+ Token t = {
+ .filename = l->filename, .path = l->path, .col = -1, .line = -1, .type = NOTYETSET, .value = { 0 }
+ };
l->tokens = calloc(250, sizeof(Token));
l->state.pos = 0;
@@ -65,11 +96,18 @@ Lexer *lexer_lex(Lexer *l)
switch (c) {
case '\"':
l->state.in_string = true;
- run_until_char(l, '\"'); // TODO buffer up the string
t.type = LIT_STRING;
- emit_token(l, t);
+ c = l->code[++l->state.pos];
- break;
+ while (l->code[l->state.pos] != '\"') {
+ add_to_string(&t, c);
+ c = l->code[++l->state.pos];
+ }
+
+ l->state.pos++;
+ l->state.in_string = false;
+ add_token(l, t);
+ continue;
case EOF:
return l;
case '\n':
diff --git a/src/lexer.h b/src/lexer.h
@@ -2,4 +2,5 @@
#include "common.h"
+void print_tokens(Lexer *l);
Lexer *lexer_lex(Lexer *lexer);
diff --git a/src/main.c b/src/main.c
@@ -1,4 +1,5 @@
#include <stdio.h>
+#include <string.h>
#include "lexer.h"
#include "utils.h"
@@ -17,9 +18,12 @@ int main(int argc, char **args)
contents = read_file(filename);
if (contents == NULL) return 1;
lexer.code = contents;
+ lexer.code_len = strlen(contents);
lexer = *lexer_lex(&lexer);
+ print_tokens(&lexer);
+
printf("\n");
return 0;
}
diff --git a/src/utils.c b/src/utils.c
@@ -2,9 +2,9 @@
#include <stdlib.h>
#include <string.h>
#include "utils.h"
+#include "common.h"
-char *
-read_file(const char *filename)
+char *read_file(const char *filename)
{
long fsize;
char *source;
@@ -22,12 +22,11 @@ read_file(const char *filename)
fread(source, fsize, 1, fp);
fclose(fp);
- source[fsize] = 0;
+ source[fsize] = '\0';
return source;
}
-void
-separate_file_from_path(const char *fullpath, char **out_path, char **out_filename)
+void separate_file_from_path(const char *fullpath, char **out_path, char **out_filename)
{
char *path = strdup(fullpath);
char *filename = strrchr(path, '/');
@@ -43,3 +42,10 @@ separate_file_from_path(const char *fullpath, char **out_path, char **out_filena
*out_filename = strdup(filename);
free(path);
}
+
+void die(bool condition, const char *message)
+{
+ if (!condition) return;
+ fprintf(stderr, "%s", message);
+ exit(1);
+}
diff --git a/src/utils.h b/src/utils.h
@@ -1,5 +1,7 @@
#pragma once
+#include <stdbool.h>
+
char *read_file(const char *filename);
void separate_file_from_path(const char *fullpath, char **out_path, char **out_filename);
diff --git a/test.sic b/test.sic
@@ -1,9 +1,5 @@
// this is a comment
-int jack = 5;
-
-// another one
-
void main() {
-
+ str name = "Johnny Mnemonic";
}