sic

The sic programming language, compiler and tools (WIP)
Log | Files | Refs

commit 18d88480bc4da10e9c224d951f7d7510bb542d1a
parent 772c7870a795bda9fe1654a98576db9bcc7e47d1
Author: citbl <citbl@citbl.org>
Date:   Sun, 10 May 2026 16:53:31 +1000

token wips

Diffstat:
M.clang-format | 2+-
Msrc/common.h | 23+++++++++++++++++++----
Msrc/lexer.c | 72+++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------------
Msrc/lexer.h | 1+
Msrc/main.c | 4++++
Msrc/utils.c | 16+++++++++++-----
Msrc/utils.h | 2++
Mtest.sic | 6+-----
8 files changed, 94 insertions(+), 32 deletions(-)

diff --git a/.clang-format b/.clang-format @@ -19,7 +19,7 @@ AlignConsecutiveMacros: false SortIncludes: false IndentCaseLabels: false -ColumnLimit: 80 +ColumnLimit: 120 PenaltyBreakBeforeFirstCallParameter: 1 AlignAfterOpenBracket: DontAlign BinPackArguments: false diff --git a/src/common.h b/src/common.h @@ -1,10 +1,12 @@ #pragma once +#include <stdlib.h> +#include <stdio.h> #include <stdbool.h> #include <stddef.h> typedef enum Token_Type { - NOTYETSET = 7, + NOTYETSET = 9, IDENT, KEYWORD, SYMBOL, @@ -34,7 +36,7 @@ typedef enum Token_Type { } Token_Type; typedef enum Keyword { - IF = 137, + IF = 139, ELSE, WHILE, OPT, @@ -52,10 +54,18 @@ typedef enum Keyword { CAST } Keyword; +///////////////////////////////////////////////// + +typedef struct String { + char *value; + size_t cap; + size_t len; +} String; + typedef struct Token { Token_Type type; union Value { - char *as_string; + String as_string; char as_char; size_t as_int; bool as_bool; @@ -74,10 +84,15 @@ typedef struct Lexer_State { typedef struct Lexer { const char *code; + size_t code_len; const char *path; const char *filename; Lexer_State state; Token *tokens; - size_t count; + size_t len; size_t cap; } Lexer; + +///////////////////////////////////////////////// + +void die(bool condition, const char *message); diff --git a/src/lexer.c b/src/lexer.c @@ -1,27 +1,60 @@ #include "lexer.h" #include <stdio.h> -#include <stdlib.h> #include <string.h> -static void emit_token(Lexer *l, Token t) +static void add_token(Lexer *l, Token t) { - if (l->count >= l->cap) { - l->cap *= 2; + if (l->len >= l->cap) { + l->cap = l->cap == 0 ? 256 : l->cap * 2; l->tokens = realloc(l->tokens, l->cap * sizeof(Token)); } - l->tokens[l->count++] = t; + l->tokens[l->len++] = t; } -static void add_to_string(char *str, char c) +void print_tokens(Lexer *l) { + Token t; + Token_Type typ; + size_t i; + + for (i = 0; i < l->len; i++) { + t = l->tokens[i]; + typ = t.type; + switch (typ) { + case LIT_STRING: + printf("STRING LITERAL: %s\n", l->tokens[i].value.as_string.value); + break; + default: + printf("print_tokens: unhandled token %i", typ); + break; + } + } +} + +static void add_to_string(Token *t, char c) +{ + String *str = &t->value.as_string; + char *new_value; + + if (str->len >= str->cap) { + str->cap = str->cap == 0 ? 256 : str->cap * 2; + str->value = realloc(str->value, str->cap * sizeof(char)); + } + + str->value[str->len++] = c; + str->value[str->len] = '\0'; } static char peek(Lexer *l) { - char c = l->code[l->state.pos + 1]; - // printf("PEEK: %c\n", c); - return c; + size_t next = l->state.pos + 1; + + if (next >= l->code_len) { + return '\0'; + } + + return l->code[next]; } static char consume(Lexer *l) @@ -41,11 +74,9 @@ Lexer *lexer_lex(Lexer *l) { char c = '\0'; size_t len = strlen(l->code); - Token t = { .filename = l->filename, - .path = l->path, - .col = -1, - .line = -1, - .type = NOTYETSET }; + Token t = { + .filename = l->filename, .path = l->path, .col = -1, .line = -1, .type = NOTYETSET, .value = { 0 } + }; l->tokens = calloc(250, sizeof(Token)); l->state.pos = 0; @@ -65,11 +96,18 @@ Lexer *lexer_lex(Lexer *l) switch (c) { case '\"': l->state.in_string = true; - run_until_char(l, '\"'); // TODO buffer up the string t.type = LIT_STRING; - emit_token(l, t); + c = l->code[++l->state.pos]; - break; + while (l->code[l->state.pos] != '\"') { + add_to_string(&t, c); + c = l->code[++l->state.pos]; + } + + l->state.pos++; + l->state.in_string = false; + add_token(l, t); + continue; case EOF: return l; case '\n': diff --git a/src/lexer.h b/src/lexer.h @@ -2,4 +2,5 @@ #include "common.h" +void print_tokens(Lexer *l); Lexer *lexer_lex(Lexer *lexer); diff --git a/src/main.c b/src/main.c @@ -1,4 +1,5 @@ #include <stdio.h> +#include <string.h> #include "lexer.h" #include "utils.h" @@ -17,9 +18,12 @@ int main(int argc, char **args) contents = read_file(filename); if (contents == NULL) return 1; lexer.code = contents; + lexer.code_len = strlen(contents); lexer = *lexer_lex(&lexer); + print_tokens(&lexer); + printf("\n"); return 0; } diff --git a/src/utils.c b/src/utils.c @@ -2,9 +2,9 @@ #include <stdlib.h> #include <string.h> #include "utils.h" +#include "common.h" -char * -read_file(const char *filename) +char *read_file(const char *filename) { long fsize; char *source; @@ -22,12 +22,11 @@ read_file(const char *filename) fread(source, fsize, 1, fp); fclose(fp); - source[fsize] = 0; + source[fsize] = '\0'; return source; } -void -separate_file_from_path(const char *fullpath, char **out_path, char **out_filename) +void separate_file_from_path(const char *fullpath, char **out_path, char **out_filename) { char *path = strdup(fullpath); char *filename = strrchr(path, '/'); @@ -43,3 +42,10 @@ separate_file_from_path(const char *fullpath, char **out_path, char **out_filena *out_filename = strdup(filename); free(path); } + +void die(bool condition, const char *message) +{ + if (!condition) return; + fprintf(stderr, "%s", message); + exit(1); +} diff --git a/src/utils.h b/src/utils.h @@ -1,5 +1,7 @@ #pragma once +#include <stdbool.h> + char *read_file(const char *filename); void separate_file_from_path(const char *fullpath, char **out_path, char **out_filename); diff --git a/test.sic b/test.sic @@ -1,9 +1,5 @@ // this is a comment -int jack = 5; - -// another one - void main() { - + str name = "Johnny Mnemonic"; }