commit 2e7bdd358dd26f673f39d314889db907cc5ccb91
Author: citbl <citbl@citbl.org>
Date: Sun, 5 Oct 2025 22:48:24 +1000
init
Diffstat:
52 files changed, 4211 insertions(+), 0 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,25 @@
+BasedOnStyle: Webkit
+IndentWidth: 8
+ContinuationIndentWidth: 8
+UseTab: AlignWithSpaces
+AlignTrailingComments: true
+SpacesBeforeTrailingComments: 1
+KeepEmptyLinesAtTheStartOfBlocks: false
+AllowShortBlocksOnASingleLine: true
+AllowShortIfStatementsOnASingleLine: true
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortEnumsOnASingleLine: true
+AllowShortFunctionsOnASingleLine: false
+AlignConsecutiveDeclarations: false
+AlignConsecutiveAssignments: false
+AlignConsecutiveMacros: false
+SortIncludes: false
+
+IndentCaseLabels: false
+ColumnLimit: 100
+PenaltyBreakBeforeFirstCallParameter: 1
+AlignAfterOpenBracket: DontAlign
+BinPackArguments: false
+BinPackParameters: false
+
+BreakAfterReturnType: TopLevelDefinitions
diff --git a/.clangd b/.clangd
@@ -0,0 +1,12 @@
+CompileFlags:
+ Add: [
+ -Wall,
+ -Wextra,
+ -Wpedantic,
+ -xc,
+ -std=c99,
+ -g,
+ -I/opt/homebrew/opt/libgccjit/include,
+ -L/opt/homebrew/opt/libgccjit/lib/gcc/current,
+ -lgccjit
+ ]
diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml
@@ -0,0 +1,33 @@
+name: C/C++ CI
+
+on:
+ push:
+ branches: ["master"]
+ pull_request:
+ branches: ["master"]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Install dependencies (gcc, pkg-config, libgccjit)
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y build-essential pkg-config libgccjit-13-dev || \
+ sudo apt-get install -y libgccjit-12-dev
+
+ - name: Checks
+ run: |
+ pkg-config --cflags --libs libgccjit || true
+ make V=1
+
+ - name: Build
+ run: make again
+
+ - name: Test
+ run: make test
+
+ - name: Sanitizers
+ run: make check
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,8 @@
+*.dSYM
+.DS_Store
+ox
+oxc
+fox
+err.log
+*.o
+out
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,20 @@
+{
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "name": "Build & Debug",
+ "type": "cppdbg",
+ "request": "launch",
+ "program": "${workspaceFolder}/oxc",
+ "args": [
+ "${workspaceFolder}/ex2.ox"
+ ],
+ "stopAtEntry": false,
+ "cwd": "${fileDirname}",
+ "environment": [],
+ "externalConsole": false,
+ "MIMode": "lldb",
+ "preLaunchTask": "build-ox"
+ }
+ ]
+}
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
@@ -0,0 +1,21 @@
+// .vscode/tasks.json
+{
+ "version": "2.0.0",
+ "tasks": [
+ {
+ "label": "build-ox",
+ "type": "shell",
+ "command": "make",
+ "args": ["check"],
+ "problemMatcher": [],
+ "presentation": {
+ "reveal": "never",
+ "echo": false,
+ "focus": false,
+ "panel": "shared",
+ "showReuseMessage": false,
+ "clear": false
+ }
+ }
+ ]
+}
diff --git a/.zed/debug.json b/.zed/debug.json
@@ -0,0 +1,18 @@
+// Project-local debug tasks
+//
+// For more documentation on how to configure debug tasks,
+// see: https://zed.dev/docs/debugger
+[
+ {
+ "label": "Debug native binary",
+ "build": {
+ "command": "make",
+ "args": ["check"],
+ "cwd": "$ZED_WORKTREE_ROOT"
+ },
+ "program": "$ZED_WORKTREE_ROOT/oxc",
+ "args": ["$ZED_WORKTREE_ROOT/ex10.ox"],
+ "request": "launch",
+ "adapter": "CodeLLDB"
+ }
+]
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,190 @@
+Copyright 2025 The Ox Programming language contributors
+
+Apache License
+Version 2.0, January 2004
+http://www.apache.org/licenses/
+
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+ "License" shall mean the terms and conditions for use, reproduction,
+ and distribution as defined by Sections 1 through 9 of this document.
+
+ "Licensor" shall mean the copyright owner or entity authorized by
+ the copyright owner that is granting the License.
+
+ "Legal Entity" shall mean the union of the acting entity and all
+ other entities that control, are controlled by, or are under common
+ control with that entity. For the purposes of this definition,
+ "control" means (i) the power, direct or indirect, to cause the
+ direction or management of such entity, whether by contract or
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
+ outstanding shares, or (iii) beneficial ownership of such entity.
+
+ "You" (or "Your") shall mean an individual or Legal Entity
+ exercising permissions granted by this License.
+
+ "Source" form shall mean the preferred form for making modifications,
+ including but not limited to software source code, documentation
+ source, and configuration files.
+
+ "Object" form shall mean any form resulting from mechanical
+ transformation or translation of a Source form, including but
+ not limited to compiled object code, generated documentation,
+ and conversions to other media types.
+
+ "Work" shall mean the work of authorship, whether in Source or
+ Object form, made available under the License, as indicated by a
+ copyright notice that is included in or attached to the work
+ (an example is provided in the Appendix below).
+
+ "Derivative Works" shall mean any work, whether in Source or Object
+ form, that is based on (or derived from) the Work and for which the
+ editorial revisions, annotations, elaborations, or other modifications
+ represent, as a whole, an original work of authorship. For the purposes
+ of this License, Derivative Works shall not include works that remain
+ separable from, or merely link (or bind by name) to the interfaces of,
+ the Work and Derivative Works thereof.
+
+ "Contribution" shall mean any work of authorship, including
+ the original version of the Work and any modifications or additions
+ to that Work or Derivative Works thereof, that is intentionally
+ submitted to Licensor for inclusion in the Work by the copyright owner
+ or by an individual or Legal Entity authorized to submit on behalf of
+ the copyright owner. For the purposes of this definition, "submitted"
+ means any form of electronic, verbal, or written communication sent
+ to the Licensor or its representatives, including but not limited to
+ communication on electronic mailing lists, source code control systems,
+ and issue tracking systems that are managed by, or on behalf of, the
+ Licensor for the purpose of discussing and improving the Work, but
+ excluding communication that is conspicuously marked or otherwise
+ designated in writing by the copyright owner as "Not a Contribution."
+
+ "Contributor" shall mean Licensor and any individual or Legal Entity
+ on behalf of whom a Contribution has been received by Licensor and
+ subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ copyright license to reproduce, prepare Derivative Works of,
+ publicly display, publicly perform, sublicense, and distribute the
+ Work and such Derivative Works in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of
+ this License, each Contributor hereby grants to You a perpetual,
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+ (except as stated in this section) patent license to make, have made,
+ use, offer to sell, sell, import, and otherwise transfer the Work,
+ where such license applies only to those patent claims licensable
+ by such Contributor that are necessarily infringed by their
+ Contribution(s) alone or by combination of their Contribution(s)
+ with the Work to which such Contribution(s) was submitted. If You
+ institute patent litigation against any entity (including a
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
+ or a Contribution incorporated within the Work constitutes direct
+ or contributory patent infringement, then any patent licenses
+ granted to You under this License for that Work shall terminate
+ as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the
+ Work or Derivative Works thereof in any medium, with or without
+ modifications, and in Source or Object form, provided that You
+ meet the following conditions:
+
+ (a) You must give any other recipients of the Work or Derivative
+ Works a copy of this License; and
+
+ (b) You must cause any modified files to carry prominent notices
+ stating that You changed the files; and
+
+ (c) You must retain, in the Source form of any Derivative Works
+ that You distribute, all copyright, patent, trademark, and
+ attribution notices from the Source form of the Work,
+ excluding those notices that do not pertain to any part of
+ the Derivative Works; and
+
+ (d) If the Work includes a "NOTICE" text file as part of its
+ distribution, then any Derivative Works that You distribute must
+ include a readable copy of the attribution notices contained
+ within such NOTICE file, excluding those notices that do not
+ pertain to any part of the Derivative Works, in at least one
+ of the following places: within a NOTICE text file distributed
+ as part of the Derivative Works; within the Source form or
+ documentation, if provided along with the Derivative Works; or,
+ within a display generated by the Derivative Works, if and
+ wherever such third-party notices normally appear. The contents
+ of the NOTICE file are for informational purposes only and
+ do not modify the License. You may add Your own attribution
+ notices within Derivative Works that You distribute, alongside
+ or as an addendum to the NOTICE text from the Work, provided
+ that such additional attribution notices cannot be construed
+ as modifying the License.
+
+ You may add Your own copyright statement to Your modifications and
+ may provide additional or different license terms and conditions
+ for use, reproduction, or distribution of Your modifications, or
+ for any such Derivative Works as a whole, provided Your use,
+ reproduction, and distribution of the Work otherwise complies with
+ the conditions stated in this License.
+
+5. Submission of Contributions. Unless You explicitly state otherwise,
+ any Contribution intentionally submitted for inclusion in the Work
+ by You to the Licensor shall be under the terms and conditions of
+ this License, without any additional terms or conditions.
+ Notwithstanding the above, nothing herein shall supersede or modify
+ the terms of any separate license agreement you may have executed
+ with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade
+ names, trademarks, service marks, or product names of the Licensor,
+ except as required for describing the origin of the Work and
+ reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or
+ agreed to in writing, Licensor provides the Work (and each
+ Contributor provides its Contributions) on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ implied, including, without limitation, any warranties or conditions
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+ PARTICULAR PURPOSE. You are solely responsible for determining the
+ appropriateness of using or redistributing the Work and assume any
+ risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory,
+ whether in tort (including negligence), contract, or otherwise,
+ unless required by applicable law (such as deliberate and grossly
+ negligent acts) or agreed to in writing, shall any Contributor be
+ liable to You for damages, including any direct, indirect, special,
+ incidental, or consequential damages of any character arising as a
+ result of this License or out of the use or inability to use the
+ Work (including but not limited to damages for loss of goodwill,
+ work stoppage, computer failure or malfunction, or any and all
+ other commercial damages or losses), even if such Contributor
+ has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing
+ the Work or Derivative Works thereof, You may choose to offer,
+ and charge a fee for, acceptance of support, warranty, indemnity,
+ or other liability obligations and/or rights consistent with this
+ License. However, in accepting such obligations, You may act only
+ on Your own behalf and on Your sole responsibility, not on behalf
+ of any other Contributor, and only if You agree to indemnify,
+ defend, and hold each Contributor harmless for any liability
+ incurred by, or claims asserted against, such Contributor by reason
+ of your accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
diff --git a/NOTES.md b/NOTES.md
@@ -0,0 +1,105 @@
+
+[https://github.com/gingerBill/titania]
+[https://news.ycombinator.com/item?id=45243925]
+[https://people.inf.ethz.ch/wirth/Oberon/Oberon07.Report.pdf]
+[https://people.inf.ethz.ch/wirth/ProjectOberon/PO.System.pdf]
+
+add_operator = "+" | "-" | "xor" | "or".
+mul_operator = "*" | "/" | "%" | "and".
+
+see
+ Tokenizer Semicolon Insertion Rules
+
+ When a newline is seen after the following token kind, a semicolon is inserted, otherwise no semicolon is inserted:
+
+ ...list
+
+
+Gleam my new obsession
+I love Rust, but...
+[https://ericcodes.io/blog/gleam-my-new-obsession.html]
+
+(`antirez/sds` for dynamic strings, `nothings/stb_ds` for dynamic arrays and hashmaps, and `cxong/tinydir` for reading the filesystem).
+[https://old.reddit.com/r/Compilers/comments/1nmc3r9/i_wrote_a_compiler_for_a_large_subset_of_c_in_c/]
+
+How to make stuff private and discussion on design (just mangle the names if needed)
+[http://journal.stuffwithstuff.com/2025/05/26/access-control-syntax]
+
+
+# symbols
+
+- jack int 1
+- test void->void 1
+ alice float 2
+- main void->void 1
+ peter strings 2
+- jill int 1
+
+# Zed
+
+[https://zed.dev/docs/extensions/developing-extensions]
+
+# libgccjit doco
+
+[https://gcc.gnu.org/onlinedocs/gcc-15.1.0/jit/]
+
+# license
+
+ the hare license at the bottom
+ https://sr.ht/~sircmpwn/hare/
+ the standard library is under MPL, the compiler and executables are under GPL3
+
+# walk down, compute bubbling up
+
+RDP (Root‑Descend‑Process)
+
+- Push "stacks" as you descend, nodes and local state
+- Process and pop the frame off on the way back up and merge or "combine" result with its parent.
+
+Expr ::= Add(Expr, Expr)
+ | Mul(Expr, Expr)
+ | Num(Int)
+
+R‑D‑P Application
+
+1. Root: Add( Mul(Num(2), Num(3)), Num(4) )
+2. Descend:
+ - Push Add frame.
+ - Push left child Mul.
+ - Push left child Num(2) → leaf → Process → result = 2.
+ - Push right child Num(3) → leaf → Process → result = 3.
+ - Process Mul → result = 6.
+ - Push right child Num(4) → leaf → Process → result = 4.
+3. Process Add → result = 10.
+
+** Forget the whole tree, focus on this node and reason locally. **
+
+R‑D‑P (Root‑Descend‑Process) turns recursive AST evaluation into a clear, iterative algorithm.
+Pair it with bottom‑up traversal, the visitor pattern, or an explicit stack to keep state explicit.
+This approach reduces cognitive load by isolating each node’s processing and avoiding hidden call‑stack dependencies.
+
+Keep a whiteboard model of the tree shapes.
+
+- Base case – literals and identifiers return a value immediately.
+- Recursive step – always evaluate child nodes before applying the operator at the current node.
+- After return – combine child results according to the operation; this is where side‑effects (e.g., assignment) may occur.
+
+see [[TODO]]
+
+## Only pure constant expressions are evaluated at compile time
+
+print has side effects so it doesn't
+
+2 + 3 is not run by the compiler, but it may be constant-folded in the optimizer.
+
+you lower print to a runtime call printf
+
+### CTFE compile time function execution
+
+- constant expression, constant folding and propagation
+- evaluator / constant interpreter
+- restricted evaluator in the compiler, with env and CT heap
+- try_ctfe on expression nodes
+- lowering: emit literal value to IR once folded
+- C++: consteval, Zig: comptime, Rust: constfn
+- gate with fuel(?), depth restriction and memory limits
diff --git a/README.md b/README.md
@@ -0,0 +1,5 @@
+### Ox Programming language
+
+WIP
+
+[](https://github.com/keyle/baby-c/actions/workflows/c-cpp.yml)
diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,17 @@
+@next
+
+- variables, string first, so that we can print its content;
+- print anything else than a string
+- call another function from main, that prints something
+- call another function that prints the passed argument
+
+@later
+
+- implement all or most of C's into libgccjit
+- ARC memory management, new keyword.
+
+@cruft
+
+- redo arguments as list and not linked list, handle in parse and in gen (2 places in gen?)
+
+get rid of count_args and search for 'argc'
diff --git a/ex1.ox b/ex1.ox
@@ -0,0 +1,4 @@
+void main(int param1) {
+ print("hello world\n");
+ //print(param1);
+}
diff --git a/ex10.ox b/ex10.ox
@@ -0,0 +1,6 @@
+void main() {
+ print("This is a great feeling");
+ print("This is a great feeling");
+ print("");
+ print("Oh yes.");
+}
diff --git a/ex2.ox b/ex2.ox
@@ -0,0 +1,8 @@
+// example program
+// ns main
+// T add(T a, b) inline pure => a + b;
+
+void main() {
+ string name = "harrold";
+ print("harold");
+}
diff --git a/ex3.ox b/ex3.ox
@@ -0,0 +1,5 @@
+void main() {
+ if (a == true) {
+ print("yes");
+ }
+}
diff --git a/ex4.ox b/ex4.ox
@@ -0,0 +1,7 @@
+void main() {
+ if (a == true) {
+ print("yes");
+ } else {
+ print("no");
+ }
+}
diff --git a/ex5.ox b/ex5.ox
@@ -0,0 +1,12 @@
+// typedef Person {
+// string name;
+// int age;
+// }
+
+void main() {
+ if (a == true) {
+ print("yes");
+ } else {
+ print("no");
+ }
+}
diff --git a/ex6.ox b/ex6.ox
@@ -0,0 +1,5 @@
+int main() {
+ for (int a = 0; b < 10; c++) {
+ print("hi");
+ }
+}
diff --git a/ex7.ox b/ex7.ox
@@ -0,0 +1,7 @@
+int main() {
+ int a = 1;
+ for(;;) {
+ if (a == 1) break;
+ continue 7; // TODO fix this test to fail semantics
+ }
+}
diff --git a/ex8.ox b/ex8.ox
@@ -0,0 +1,7 @@
+void main() {
+ print("test 1");
+}
+
+void test() {
+ print("test 2");
+}
diff --git a/ex9.ox b/ex9.ox
@@ -0,0 +1,13 @@
+int jack = 111;
+
+void test() {
+ float alice = 222;
+}
+
+void main() {
+ int peter = 333;
+}
+
+uint jill = 444;
+
+float jane = 123.45;
diff --git a/file.h b/file.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+char* readfile(const char* file_path)
+{
+ FILE* fp = fopen(file_path, "rb");
+ if (fp == NULL) {
+ perror("Failed to read file");
+ return NULL;
+ }
+
+ if (fseek(fp, 0, SEEK_END) != 0) {
+ fclose(fp);
+ panic("Failed to find the end of the file");
+ return NULL;
+ }
+
+ long file_size = ftell(fp);
+
+ if (file_size < 0) {
+ fclose(fp);
+ panic("Failed to determine the file size");
+ return NULL;
+ }
+
+ rewind(fp);
+
+ // check for overflow before casting
+ if ((unsigned long)file_size >= SIZE_MAX) {
+ fclose(fp);
+ panic("File too large to fit in memory");
+ return NULL;
+ }
+
+ char* contents = (char*)calloc(1, (size_t)file_size + 1);
+ if (contents == NULL) {
+ panic("Failed to allocate memory to read file");
+ fclose(fp);
+ return NULL;
+ }
+
+ size_t bytes_read = fread(contents, 1, (size_t)file_size, fp);
+ if (bytes_read != (size_t)file_size) {
+ free(contents);
+ fclose(fp);
+ panic("Failed to read the file in its entirety");
+ return NULL;
+ }
+
+ contents[file_size] = '\0';
+
+ fclose(fp);
+ return contents;
+}
diff --git a/gen.h b/gen.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <unistd.h> // for libgccjit
+#include <libgccjit.h>
+
+#include "parser.h"
+#include "sem.h"
+
+typedef struct {
+ gcc_jit_context *ctx;
+ gcc_jit_function *prev_func;
+ gcc_jit_function *curr_func;
+ gcc_jit_function* printf_fn;
+ gcc_jit_function* puts_fn;
+ gcc_jit_block *prev_block;
+ gcc_jit_block *curr_block;
+ // gcc_jit_type *type_kind; need type too?
+ Scope *scope;
+ const char *src;
+} Gen;
+
+Gen gen_init(Scope *, const char *);
+void gen_next(Gen *, Node *);
diff --git a/gen/gen.c b/gen/gen.c
@@ -0,0 +1,293 @@
+#include "../gen.h"
+#include "../utils.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+
+static gcc_jit_type* type_int;
+static gcc_jit_type* type_uint;
+static gcc_jit_type* type_float;
+static gcc_jit_type* type_void;
+static gcc_jit_type* type_cstr;
+
+#define MAXARGS 16
+
+Gen
+gen_init(Scope* scope, const char* src)
+{
+ if (scope == NULL || src == NULL) { panic("gen_init: no Scope or AST provided"); }
+
+ gcc_jit_context* ctx;
+
+ ctx = gcc_jit_context_acquire();
+
+ if (!ctx) { panic("could not acquire gcc jit context"); }
+
+ // needs loc* to work
+ // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DEBUGINFO, 1);
+ // high level
+ // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DUMP_INITIAL_TREE,
+ // 1); low level gcc_jit_context_set_bool_option(ctx,
+ // GCC_JIT_BOOL_OPTION_DUMP_INITIAL_GIMPLE, 1); info
+ // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DUMP_SUMMARY, 1);
+
+ gcc_jit_context_set_str_option(ctx, GCC_JIT_STR_OPTION_PROGNAME, "ox");
+ // keep FP
+ gcc_jit_context_add_driver_option(ctx, "-fno-omit-frame-pointer");
+
+ gcc_jit_context_set_int_option(ctx,
+ GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL,
+ /*0-3 for O3*/ 0);
+
+ type_int = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_INT64_T);
+ type_uint = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_UINT64_T);
+ type_float = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_DOUBLE);
+ type_void = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_VOID);
+ type_cstr = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_CONST_CHAR_PTR);
+
+ gcc_jit_param* pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") };
+ gcc_jit_function* fn_puts = gcc_jit_context_new_function(
+ ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0);
+
+ gcc_jit_param* pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") };
+ gcc_jit_function* fn_printf = gcc_jit_context_new_function(ctx,
+ NULL,
+ GCC_JIT_FUNCTION_IMPORTED,
+ type_int,
+ "printf",
+ 1,
+ pm_printf,
+ /*is_variadic=*/1);
+
+ return (Gen) {
+ .ctx = ctx,
+ .scope = scope,
+ .prev_func = NULL,
+ .curr_func = NULL,
+ .prev_block = NULL,
+ .curr_block = NULL,
+ .puts_fn = fn_puts,
+ .printf_fn = fn_printf,
+ .src = src,
+ };
+}
+
+static gcc_jit_rvalue* handle_expr(Gen*, Node*);
+
+static gcc_jit_rvalue*
+emit_literal_string(Gen* gen, Node* node)
+{
+ size_t len = node->data.string.value.end - node->data.string.value.start;
+ char* str = calloc(len + 1, sizeof(char));
+ if (str == NULL) panic("emit_literal_string: could not alloc");
+ memcpy(str, gen->src + node->data.string.value.start, len);
+ str[len] = '\0';
+ return gcc_jit_context_new_string_literal(gen->ctx, str);
+}
+
+static void
+build_program(Gen* gen, Node* node)
+{
+ size_t cnt = node->data.program.len;
+ for (size_t i = 0; i < cnt; i++) {
+ gen_next(gen, node->data.program.decl[i]);
+ }
+}
+
+static gcc_jit_rvalue*
+lower_builtin_print(Gen* gen, Node* node)
+{
+ size_t argc = node->data.call_expr.len;
+
+ // 1-arg, treat as puts(arg)
+ if (argc == 1) {
+ gcc_jit_rvalue* arg
+ = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many
+ // cast common cases to const char*
+ if (gcc_jit_rvalue_get_type(arg) != type_cstr)
+ arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr);
+ gcc_jit_rvalue* args[] = { arg };
+ return gcc_jit_context_new_call(gen->ctx, NULL, gen->puts_fn, 1, args);
+ }
+
+ // softpanic("we don't currently handle formatted strings to print");
+
+ // n>=1, treat as printf(fmt, ...) // Part of TODO about args as list and not
+ //
+ // through each args, form the ("formatted %s string %d etc.", str, intv) for clib's printf
+
+
+ // TODO we're talking about formatting here, which we plan on doing as a string interpolation,
+ // something along the lines of {{variable}} without defining its type would involve lookup
+ // split of the string and then formatting
+
+ // we need to discuss and decide what we'd do when the user inevitably would print out a ref to
+ // a struct. Do we say [[struct]] or do we have some automatic unwrap and display of struct data...
+ // probably, yes.
+
+ gcc_jit_rvalue** args = (gcc_jit_rvalue**)calloc(MAXARGS, sizeof(gcc_jit_rvalue*));
+
+ if (argc > MAXARGS) {
+ softpanic("we do not currently support more than 16 args to a print call");
+ }
+
+ for (size_t i = 0; i < argc; i++) {
+ gcc_jit_rvalue* arg = handle_expr(gen, node->data.call_expr.args[i]);
+ if (i == 0) {
+ if (gcc_jit_rvalue_get_type(arg) != type_cstr) {
+ // note this is probably not going to work as limited cast supported
+ // and string isn't one of them
+ arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr);
+ }
+ } else {
+ //
+ // simple widening for common scalar types
+ //
+ gcc_jit_type* ty = gcc_jit_rvalue_get_type(arg);
+ if (ty == type_int) {
+ arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr);
+ } else if (ty == type_float) {
+ // variadics already promote float→double; double is
+ } else if (ty == type_cstr) {
+ // leave as const char*
+ } else {
+ // fallback: pass pointer as void*
+ arg = gcc_jit_context_new_cast(gen->ctx,
+ NULL,
+ arg,
+ gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR));
+ }
+ }
+ // TODO auto grow
+ args[i] = arg;
+ }
+ return gcc_jit_context_new_call(gen->ctx, NULL, gen->printf_fn, argc, args);
+ return NULL;
+}
+
+static gcc_jit_function*
+lookup_function(Gen* gen, const char* func_name)
+{
+ // TODO see todo below about linked list parameters...
+}
+
+static gcc_jit_rvalue*
+handle_func_call(Gen* gen, Node* node)
+{
+ Node* fcallee = node->data.call_expr.callee;
+ const char* func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 });
+ if (strcmp(func_name, "print") == 0) return lower_builtin_print(gen, node);
+
+ softpanic("unhandled func call named: %s", func_name);
+
+ //
+ // TODO handle any function other than print...
+ //
+ // int argc = node->data.call_expr.len;
+ // gcc_jit_function* callee = lookup_function(gen, func_name);
+ // gcc_jit_rvalue* args[16]; // @future fixed at 16 parameters in call
+ // for (int i = 0; i < argc; i++) {
+ // args[i] = handle_expr(gen, node->data.call_expr.args[i]);
+ // }
+ // return gcc_jit_context_new_call(gen->ctx, NULL, callee, argc, args);
+ // return NULL;
+}
+
+static gcc_jit_rvalue*
+handle_expr(Gen* gen, Node* node)
+{
+ switch (node->type) {
+ case NODE_NUMBER_LITERAL:
+ break;
+ case NODE_STRING_LITERAL:
+ return emit_literal_string(gen, node);
+ break;
+ case NODE_CALL_EXPR: {
+ return handle_func_call(gen, node);
+ } break;
+ default:
+ printf("handle_expr unhandled, %s\n", node_type_str(node->type));
+ }
+ return NULL;
+}
+
+static void
+build_statement(Gen* gen, Node* node)
+{
+ switch (node->type) {
+ case NODE_BLOCK:
+ break;
+ case NODE_RETURN:
+ break;
+ case NODE_EXPR_STATEMENT: {
+ gcc_jit_rvalue* rv = handle_expr(gen, node->data.expr_statement.expr);
+ if (rv) gcc_jit_block_add_eval(gen->curr_block, NULL, rv);
+ } break;
+ default:
+ printf("build_statement unhandled, %s\n", node_type_str(node->type));
+ break;
+ }
+}
+
+static void
+build_block(Gen* gen, Node* body)
+{
+ for (size_t i = 0; i < body->data.block.len; i++) {
+ build_statement(gen, body->data.block.stmts[i]);
+ }
+}
+
+static void
+build_func_decl(Gen* gen, Node* node)
+{
+ gcc_jit_function* func = gcc_jit_context_new_function(gen->ctx,
+ NULL, // loc
+ GCC_JIT_FUNCTION_EXPORTED, // declared
+ type_int, // ret
+ "main", // name
+ 0, // num params
+ NULL, // params
+ 0); // is variadic
+
+ gcc_jit_block* block = gcc_jit_function_new_block(func, "entry");
+
+ gcc_jit_function* prev_func = gen->curr_func;
+ gcc_jit_block* prev_block = gen->curr_block;
+ gen->curr_block = block;
+ gen->curr_func = func;
+
+ build_block(gen, node->data.function_decl.body);
+
+ if (gen->curr_block) {
+ gcc_jit_rvalue* ret_value
+ = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0);
+ gcc_jit_block_end_with_return(gen->curr_block, NULL, ret_value);
+ gen->curr_block = NULL;
+ }
+
+ gen->curr_func = prev_func;
+ gen->curr_block = prev_block;
+}
+
+void
+gen_next(Gen* gen, Node* node)
+{
+ // printf("gen_next, %s\n", node_type_str(node->type));
+
+ switch (node->type) {
+ case NODE_PROGRAM:
+ build_program(gen, node);
+ break;
+ case NODE_FUNCTION_DECL:
+ build_func_decl(gen, node);
+ break;
+ case NODE_STRING_LITERAL:
+ emit_literal_string(gen, node);
+ break;
+ default:
+ printf("unhandled, %s\n", node_type_str(node->type));
+ }
+}
diff --git a/hmap.c b/hmap.c
@@ -0,0 +1,176 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "hmap.h"
+
+#define INITIAL_BUCKETS 8
+#define LOAD_FACTOR 0.75
+
+static void hmap_grow(HashMap* map);
+
+// Simple string hash function (djb2)
+static unsigned long hash(const char* str)
+{
+ unsigned long h = 5381;
+ unsigned char c;
+ while ((c = (unsigned char)*str++))
+ h = ((h << 5) + h) + c;
+ return h;
+}
+
+HashMap* hmap_create(size_t value_size)
+{
+ HashMap* map = calloc(1, sizeof(HashMap));
+ if (map == NULL) {
+ fprintf(stderr, "hmap_create: map: could not alloc\n");
+ }
+ map->bucket_count = INITIAL_BUCKETS;
+ map->size = 0;
+ map->value_size = value_size;
+ map->buckets = calloc(map->bucket_count, sizeof(HashNode*));
+ if (map->buckets == NULL) {
+ fprintf(stderr, "hmap_create: bucket: could not alloc\n");
+ exit(1);
+ }
+ return map;
+}
+
+void hmap_put(HashMap* map, const char* key, const void* value)
+{
+ if ((float)(map->size + 1) / map->bucket_count > LOAD_FACTOR) {
+ hmap_grow(map);
+ }
+ unsigned long h = hash(key) % map->bucket_count;
+ HashNode* node = map->buckets[h];
+ while (node) {
+ if (strcmp(node->key, key) == 0) {
+ memcpy(node->value, value, map->value_size);
+ return;
+ }
+ node = node->next;
+ }
+ HashNode* new_node = calloc(1, sizeof(HashNode));
+ if (new_node == NULL) {
+ fprintf(stderr, "hmap_put: new_node: could not alloc\n");
+ exit(1);
+ }
+ new_node->key = strdup(key);
+ new_node->value = calloc(1, map->value_size);
+ if (new_node == NULL) {
+ fprintf(stderr, "hmap_put: new_node->value: could not alloc\n");
+ exit(1);
+ }
+ memcpy(new_node->value, value, map->value_size);
+ new_node->next = map->buckets[h];
+ map->buckets[h] = new_node;
+ map->size++;
+}
+
+bool hmap_get(HashMap* map, const char* key, void* out)
+{
+ unsigned long h = hash(key) % map->bucket_count;
+ HashNode* node = map->buckets[h];
+ while (node) {
+ if (strcmp(node->key, key) == 0) {
+ memcpy(out, node->value, map->value_size);
+ return true;
+ }
+ node = node->next;
+ }
+ return false;
+}
+
+bool hmap_remove(HashMap* map, const char* key)
+{
+ unsigned long h = hash(key) % map->bucket_count;
+ HashNode* node = map->buckets[h];
+ HashNode* prev = NULL;
+ while (node) {
+ if (strcmp(node->key, key) == 0) {
+ if (prev) {
+ prev->next = node->next;
+ } else {
+ map->buckets[h] = node->next;
+ }
+ free(node->key);
+ free(node->value);
+ free(node);
+ map->size--;
+ return true;
+ }
+ prev = node;
+ node = node->next;
+ }
+ return false;
+}
+
+static void hmap_grow(HashMap* map)
+{
+ size_t new_bucket_count = map->bucket_count * 2;
+ HashNode** new_buckets = calloc(new_bucket_count, sizeof(HashNode*));
+ if (new_buckets == NULL) {
+ fprintf(stderr, "hmap_grow: could not alloc\n");
+ exit(1);
+ }
+ for (size_t i = 0; i < map->bucket_count; i++) {
+ HashNode* node = map->buckets[i];
+ while (node) {
+ HashNode* next = node->next;
+ unsigned long h = hash(node->key) % new_bucket_count;
+ node->next = new_buckets[h];
+ new_buckets[h] = node;
+ node = next;
+ }
+ }
+ free(map->buckets);
+ map->buckets = new_buckets;
+ map->bucket_count = new_bucket_count;
+}
+
+void hmap_free(HashMap* map)
+{
+ for (size_t i = 0; i < map->bucket_count; i++) {
+ HashNode* node = map->buckets[i];
+ while (node) {
+ HashNode* next = node->next;
+ free(node->key);
+ free(node->value);
+ free(node);
+ node = next;
+ }
+ }
+ free(map->buckets);
+ free(map);
+}
+
+// Example usage for struct T
+// struct T {
+// int id;
+// char name[32];
+// };
+
+// int main() {
+// HashMap* map = hmap_create(sizeof(struct T));
+// struct T t1 = {1, "Alice"};
+// struct T t2 = {2, "Bob"};
+// struct T t3 = {3, "Carol"};
+
+// hmap_put(map, "alice", &t1);
+// hmap_put(map, "bob", &t2);
+// hmap_put(map, "carol", &t3);
+
+// struct T out;
+// if (hmap_get(map, "bob", &out)) {
+// printf("bob: id=%d, name=%s\n", out.id, out.name);
+// }
+// if (hmap_get(map, "alice", &out)) {
+// printf("alice: id=%d, name=%s\n", out.id, out.name);
+// }
+// if (hmap_get(map, "dave", &out)) {
+// printf("dave: id=%d, name=%s\n", out.id, out.name);
+// } else {
+// printf("dave not found\n");
+// }
+// hmap_free(map);
+// return 0;
+// }
diff --git a/hmap.h b/hmap.h
@@ -0,0 +1,23 @@
+#pragma once
+
+#include <stdbool.h>
+#include <stdlib.h>
+
+typedef struct HashNode {
+ char* key;
+ void* value;
+ struct HashNode* next;
+} HashNode;
+
+typedef struct HashMap {
+ size_t bucket_count;
+ size_t size;
+ size_t value_size;
+ struct HashNode** buckets;
+} HashMap;
+
+HashMap* hmap_create(size_t);
+void hmap_put(HashMap* map, const char* key, const void* value);
+bool hmap_get(HashMap* map, const char* key, void* out);
+bool hmap_remove(HashMap* map, const char* key);
+void hmap_free(HashMap* map);
diff --git a/hmap_test.c b/hmap_test.c
@@ -0,0 +1,174 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hmap.h"
+
+static void hmap_test_basic(void)
+{
+ int i;
+ printf("Testing dict of integers...\n");
+
+ HashMap* map = hmap_create(sizeof(int));
+
+ struct {
+ const char* key;
+ int value;
+ } items[] = {
+ { "one", 1 },
+ { "two", 2 },
+ { "three", 3 },
+ };
+
+ // Insert items
+ for (i = 0; i < 3; i++) {
+ hmap_put(map, items[i].key, &items[i].value);
+ }
+
+ // Retrieve and check
+ int v;
+ int all_ok = 1;
+ for (i = 0; i < 3; i++) {
+ if (hmap_get(map, items[i].key, &v) && v == items[i].value) {
+ printf("PASS: %s == %d\n", items[i].key, items[i].value);
+ } else {
+ printf("FAIL: %s\n", items[i].key);
+ all_ok = 0;
+ }
+ }
+ if (all_ok)
+ printf("PASS: dict of integers test\n");
+}
+
+static void hmap_test_T(void)
+{
+ int i;
+ printf("Testing struct T...\n");
+
+ struct T {
+ int id;
+ char name[32];
+ int age;
+ char email[64];
+ float score;
+ };
+
+ HashMap* map = hmap_create(sizeof(struct T));
+
+ struct T items[] = {
+ { 1, "alice", 30, "alice@example.com", 95.5f },
+ { 2, "bob", 25, "bob@example.com", 88.0f },
+ { 3, "carol", 28, "carol@example.com", 91.2f },
+ };
+
+ // Insert items
+ for (i = 0; i < 3; i++) {
+ hmap_put(map, items[i].name, &items[i]);
+ }
+
+ // Retrieve and check
+ struct T v;
+ int all_ok = 1;
+ for (i = 0; i < 3; i++) {
+ if (hmap_get(map, items[i].name, &v) && v.id == items[i].id && strcmp(v.name, items[i].name) == 0 && v.age == items[i].age && strcmp(v.email, items[i].email) == 0 && v.score == items[i].score) {
+ printf("PASS: %s == {id:%d, age:%d, email:%s, score:%.1f}\n",
+ items[i].name, items[i].id, items[i].age, items[i].email, items[i].score);
+ } else {
+ printf("FAIL: %s\n", items[i].name);
+ all_ok = 0;
+ }
+ }
+ if (all_ok)
+ printf("PASS: struct T test\n");
+}
+
+static void hmap_test_memory_bumping(void)
+{
+ int i;
+ printf("Testing memory bumping...\n");
+
+ HashMap* map = hmap_create(sizeof(int));
+ const int N = 1000; // Large enough to trigger resizing
+
+ char key[32];
+ int all_ok = 1;
+
+ // Insert N items
+ for (i = 0; i < N; i++) {
+ snprintf(key, sizeof(key), "key_%d", i);
+ hmap_put(map, key, &i);
+ }
+
+ // Retrieve and check all N items
+ for (i = 0; i < N; i++) {
+ snprintf(key, sizeof(key), "key_%d", i);
+ int v = -1;
+ if (hmap_get(map, key, &v) && v == i) {
+ // Optionally print only a few
+ if (i < 3 || i > N - 3)
+ printf("PASS: %s == %d\n", key, v);
+ } else {
+ printf("FAIL: %s\n", key);
+ all_ok = 0;
+ }
+ }
+ if (all_ok)
+ printf("PASS: memory bumping test\n");
+}
+
+static void hmap_test_removal(void)
+{
+ int i;
+ printf("Testing removal...\n");
+
+ HashMap* map = hmap_create(sizeof(int));
+
+ struct {
+ const char* key;
+ int value;
+ } items[] = {
+ { "alpha", 10 },
+ { "beta", 20 },
+ { "gamma", 30 },
+ };
+
+ // Insert items
+ for (i = 0; i < 3; i++) {
+ hmap_put(map, items[i].key, &items[i].value);
+ }
+
+ // Remove "beta"
+ hmap_remove(map, "beta");
+
+ // Check "beta" is gone, others remain
+ int v;
+ int all_ok = 1;
+ for (i = 0; i < 3; i++) {
+ int found = hmap_get(map, items[i].key, &v);
+ if (strcmp(items[i].key, "beta") == 0) {
+ if (!found) {
+ printf("PASS: %s removed\n", items[i].key);
+ } else {
+ printf("FAIL: %s still present\n", items[i].key);
+ all_ok = 0;
+ }
+ } else {
+ if (found && v == items[i].value) {
+ printf("PASS: %s == %d\n", items[i].key, items[i].value);
+ } else {
+ printf("FAIL: %s\n", items[i].key);
+ all_ok = 0;
+ }
+ }
+ }
+ if (all_ok)
+ printf("PASS: removal test\n");
+}
+
+void hmap_tests(void)
+{
+ hmap_test_basic();
+ hmap_test_T();
+ hmap_test_memory_bumping();
+ hmap_test_removal();
+}
diff --git a/hmap_test.h b/hmap_test.h
@@ -0,0 +1,3 @@
+#pragma once
+
+void hmap_tests(void);
diff --git a/lexer.c b/lexer.c
@@ -0,0 +1,398 @@
+#include "lexer.h"
+#include "utils.h"
+
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <assert.h>
+
+static char peek(Lexer* lex)
+{
+ char c = lex->src[lex->pos];
+ return c ? c : 0;
+}
+
+static char peek2(Lexer* lex)
+{
+ char c = lex->src[lex->pos];
+ if (!c)
+ return 0;
+ c = lex->src[lex->pos + 1];
+ return c ? c : 0;
+}
+
+static char nudge(Lexer* lex)
+{
+ const char c = peek(lex);
+ if (!c)
+ return 0;
+ if (c == '\n') {
+ lex->line++;
+ lex->col = 1;
+ } else {
+ lex->col++;
+ }
+ lex->pos++;
+ return c;
+}
+
+static void skip_space_and_comments(Lexer* lex)
+{
+ for (;;) {
+ for (;;) {
+ char c = peek(lex);
+ if (c == ' ' || c == '\t' || c == '\r' || c == '\n')
+ nudge(lex);
+ else
+ break;
+ }
+ if (peek(lex) == '/' && peek2(lex) == '/') {
+ nudge(lex);
+ nudge(lex);
+ while (peek(lex) != '\n' && peek(lex) != 0)
+ nudge(lex);
+ continue;
+ }
+ break;
+ }
+}
+
+static Token make_ident(Lexer* lex, size_t pos, size_t line, size_t col)
+{
+ for (;;) {
+ char c = peek(lex);
+ if (c == '_' || isalnum(c))
+ nudge(lex);
+ else
+ break;
+ }
+
+ TokenType type = TOKEN_IDENT;
+
+ if ((lex->pos - pos) == 6 && strncmp(lex->src + pos, "return", 6) == 0)
+ type = TOKEN_RETURN;
+ else if ((lex->pos - pos) == 3 && strncmp(lex->src + pos, "for", 3) == 0)
+ type = TOKEN_FOR;
+ else if ((lex->pos - pos) == 2 && strncmp(lex->src + pos, "if", 2) == 0)
+ type = TOKEN_IF;
+ else if ((lex->pos - pos) == 4 && strncmp(lex->src + pos, "else", 4) == 0)
+ type = TOKEN_ELSE;
+ else if ((lex->pos - pos) == 8 && strncmp(lex->src + pos, "continue", 8) == 0)
+ type = TOKEN_CONTINUE;
+ else if ((lex->pos - pos) == 5 && strncmp(lex->src + pos, "break", 5) == 0)
+ type = TOKEN_BREAK;
+ else if ((lex->pos - pos) == 5 && strncmp(lex->src + pos, "while", 5) == 0)
+ type = TOKEN_WHILE;
+
+ // Check for keywords, or a Capitalised CustomType
+ // -- @later do it in the parser, keep types as unprotected names
+ // if (strncmp(lex->src + pos, "int", lex->pos - pos) == 0)
+ // type = TOKEN_INT;
+ // else if (strncmp(lex->src + pos, "float", lex->pos - pos) == 0)
+ // type = TOKEN_FLOAT;
+ // else if (strncmp(lex->src + pos, "void", lex->pos - pos) == 0)
+ // type = TOKEN_VOID;
+ // else if (strncmp(lex->src + pos, "string", lex->pos - pos) == 0)
+ // type = TOKEN_STRING;
+ // else {
+ // // If identifier starts with a capital letter, treat as TOKEN_TYPE
+ // char first = lex->src[pos];
+ // if (first >= 'A' && first <= 'Z') {
+ // type = TOKEN_TYPE;
+ // }
+ // }
+
+ return (Token) { .type = type,
+ .start = pos,
+ .line = line,
+ .col = col,
+ .end = lex->pos };
+}
+static Token make_number(Lexer* lex, size_t pos, size_t line, size_t col)
+{
+ while (isdigit(peek(lex)))
+ nudge(lex);
+ if (peek(lex) == '.' && isdigit(peek2(lex))) {
+ nudge(lex);
+ while (isdigit(peek(lex)))
+ nudge(lex);
+ }
+ return (Token) {
+ .type = TOKEN_NUMBER_LITERAL,
+ .start = pos,
+ .end = lex->pos,
+ .line = line,
+ .col = col
+ };
+}
+
+static Token make_string(Lexer* lex, size_t pos, size_t line, size_t col)
+{
+ nudge(lex); // " start
+ while (peek(lex) != '"' && peek(lex) != 0)
+ nudge(lex);
+ if (peek(lex) == '"')
+ nudge(lex); // " end
+ return (Token) {
+ .type = TOKEN_STRING_LITERAL,
+ .start = pos,
+ .end = lex->pos,
+ .line = line,
+ .col = col
+ };
+}
+
+static Token next_token(Lexer* lex)
+{
+ skip_space_and_comments(lex);
+ size_t start = lex->pos;
+ size_t line = lex->line;
+ size_t col = lex->col;
+ const char c = peek(lex);
+
+ if (c == 0)
+ return (Token) {
+ .type = TOKEN_EOF, .start = start, .end = lex->pos, .col = col, .line = line
+ };
+
+ if (isalpha(c) || c == '_')
+ return make_ident(lex, start, line, col);
+ if (isdigit(c))
+ return make_number(lex, start, line, col);
+ if (c == '"')
+ return make_string(lex, start, line, col);
+
+ TokenType type = TOKEN_UNKNOWN;
+
+ switch (c) {
+ case '(':
+ nudge(lex);
+ type = TOKEN_LPAREN;
+ break;
+ case ')':
+ nudge(lex);
+ type = TOKEN_RPAREN;
+ break;
+ case '{':
+ nudge(lex);
+ type = TOKEN_LBRACE;
+ break;
+ case '}':
+ nudge(lex);
+ type = TOKEN_RBRACE;
+ break;
+ case '[':
+ nudge(lex);
+ type = TOKEN_LBRACKET;
+ break;
+ case ']':
+ nudge(lex);
+ type = TOKEN_RBRACKET;
+ break;
+ case ';':
+ nudge(lex);
+ type = TOKEN_SEMICOLON;
+ break;
+ case '%':
+ nudge(lex);
+ type = TOKEN_PERCENT;
+ break;
+ case '/':
+ nudge(lex);
+ type = TOKEN_SLASH;
+ break;
+ case '*':
+ nudge(lex);
+ type = TOKEN_UNKNOWN;
+ break;
+ case '+':
+ nudge(lex);
+ if (peek(lex) == '+') {
+ nudge(lex);
+ type = TOKEN_PLUSPLUS;
+ } else {
+ type = TOKEN_PLUS;
+ }
+ break;
+ case '-':
+ nudge(lex);
+ if (peek(lex) == '-') {
+ nudge(lex);
+ type = TOKEN_MINUSMINUS;
+ } else {
+ type = TOKEN_MINUS;
+ }
+ break;
+ case ',':
+ nudge(lex);
+ type = TOKEN_COMMA;
+ break;
+ case '=':
+ nudge(lex);
+ if (peek(lex) == '=') {
+ nudge(lex);
+ type = TOKEN_EQUALITY;
+ } else {
+ type = TOKEN_EQUAL;
+ }
+ break;
+ case '!':
+ nudge(lex);
+ if (peek(lex) == '=') {
+ nudge(lex);
+ type = TOKEN_INEQUALITY;
+ } else {
+ type = TOKEN_BANG;
+ }
+ break;
+ case '>':
+ nudge(lex);
+ if (peek(lex) == '=') {
+ nudge(lex);
+ type = TOKEN_GT_EQ;
+ } else {
+ type = TOKEN_GT;
+ }
+ break;
+ case '<':
+ nudge(lex);
+ if (peek(lex) == '=') {
+ nudge(lex);
+ type = TOKEN_LT_EQ;
+ } else {
+ type = TOKEN_LT;
+ }
+ break;
+ default:
+ nudge(lex);
+ type = TOKEN_UNKNOWN;
+ break;
+ }
+ Token t = (Token) { .type = type, .start = start, .end = lex->pos, .col = col, .line = line };
+ return t;
+}
+
+static void print_token(const Token* t, const char* contents)
+{
+ static const char* TYPES[] = {
+ [TOKEN_IDENT] = "ident/type",
+ [TOKEN_LPAREN] = "open paren",
+ [TOKEN_RPAREN] = "close paren",
+ [TOKEN_LBRACE] = "open brace",
+ [TOKEN_RBRACE] = "close brace",
+ [TOKEN_LBRACKET] = "open bracket",
+ [TOKEN_RBRACKET] = "close bracket",
+ [TOKEN_EQUAL] = "equal",
+ [TOKEN_SEMICOLON] = "semicol",
+ [TOKEN_COMMA] = "comma",
+ [TOKEN_NUMBER_LITERAL] = "number",
+ [TOKEN_STRING_LITERAL] = "string literal",
+ [TOKEN_SLASH] = "slash",
+ [TOKEN_STAR] = "star",
+ [TOKEN_PLUS] = "plus",
+ [TOKEN_PLUSPLUS] = "++",
+ [TOKEN_MINUS] = "minus",
+ [TOKEN_MINUSMINUS] = "--",
+ [TOKEN_EQUALITY] = "equality ==",
+ [TOKEN_INEQUALITY] = "inequality !=",
+ [TOKEN_BANG] = "bang !",
+ [TOKEN_LT] = "lower than",
+ [TOKEN_GT] = "greater than",
+ [TOKEN_LT_EQ] = "lt or = than",
+ [TOKEN_GT_EQ] = "gt or = than",
+ [TOKEN_IF] = "if",
+ [TOKEN_ELSE] = "else",
+ [TOKEN_WHILE] = "while",
+ [TOKEN_FOR] = "for",
+ [TOKEN_RETURN] = "return",
+ [TOKEN_UNKNOWN] = "< UNKNOWN >",
+ [TOKEN_EOF] = "~EOF~"
+ };
+
+ printf("L%zu:%zu \t%-14s '", t->line + 1, t->col + 1, TYPES[t->type]);
+ fwrite(contents + t->start, 1, t->end - t->start, stdout);
+ printf("'\n");
+}
+
+static void add_token(Lexer* lex, Token tok)
+{
+ if (lex->token_count >= lex->token_cap) {
+ lex->token_cap *= 2;
+ lex->tokens = (Token*)realloc(lex->tokens, sizeof(Token) * lex->token_cap);
+ }
+ lex->tokens[lex->token_count++] = tok;
+}
+
+void lexer_print(Lexer* lex)
+{
+ for (size_t i = 0; i < lex->token_count; i++) {
+ print_token(&lex->tokens[i], lex->src);
+ }
+}
+
+void lexer_lex(Lexer* lex, const char* filename, const char* contents)
+{
+ lex->line = 1;
+ lex->col = 1;
+ lex->pos = 0;
+ lex->token_cap = 128;
+ lex->token_count = 0;
+ lex->tokens = (Token*)calloc(lex->token_cap, sizeof(Token));
+ if(lex->tokens == NULL) panic("lexer_lex: could not alloc");
+ lex->filename = filename;
+ lex->src = contents;
+ lex->src_len = strlen(contents);
+ for (;;) {
+ Token tok = next_token(lex);
+ add_token(lex, tok);
+ if (tok.type == TOKEN_EOF)
+ break;
+ }
+}
+
+const char* token_type_str(TokenType t)
+{
+ static const char* type_strings[] = {
+ [TOKEN_IDENT] = "TOKEN_IDENT",
+ [TOKEN_LPAREN] = "TOKEN_LPAREN",
+ [TOKEN_RPAREN] = "TOKEN_RPAREN",
+ [TOKEN_LBRACE] = "TOKEN_LBRACE",
+ [TOKEN_RBRACE] = "TOKEN_RBRACE",
+ [TOKEN_LBRACKET] = "TOKEN_LBRACKET",
+ [TOKEN_RBRACKET] = "TOKEN_RBRACKET",
+ [TOKEN_EQUAL] = "TOKEN_EQUAL",
+ [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON",
+ [TOKEN_COMMA] = "TOKEN_COMMA",
+ [TOKEN_NUMBER_LITERAL] = "TOKEN_NUMBER_LITERAL",
+ [TOKEN_STRING_LITERAL] = "TOKEN_STRING_LITERAL",
+ [TOKEN_SLASH] = "TOKEN_SLASH",
+ [TOKEN_STAR] = "TOKEN_STAR",
+ [TOKEN_PLUS] = "TOKEN_PLUS",
+ [TOKEN_PLUSPLUS] = "TOKEN_PLUSPLUS",
+ [TOKEN_MINUS] = "TOKEN_MINUS",
+ [TOKEN_MINUSMINUS] = "TOKEN_MINUSMINUS",
+ [TOKEN_EQUALITY] = "TOKEN_EQUALITY",
+ [TOKEN_INEQUALITY] = "TOKEN_INEQUALITY",
+ [TOKEN_BANG] = "TOKEN_BANG",
+ [TOKEN_LT] = "TOKEN_LT",
+ [TOKEN_GT] = "TOKEN_GT",
+ [TOKEN_LT_EQ] = "TOKEN_LT_EQ",
+ [TOKEN_GT_EQ] = "TOKEN_GT_EQ",
+ [TOKEN_IF] = "TOKEN_IF",
+ [TOKEN_ELSE] = "TOKEN_ELSE",
+ [TOKEN_WHILE] = "TOKEN_WHILE",
+ [TOKEN_FOR] = "TOKEN_FOR",
+ [TOKEN_RETURN] = "TOKEN_RETURN",
+ [TOKEN_CONTINUE] = "TOKEN_CONTINUE",
+ [TOKEN_BREAK] = "TOKEN_BREAK",
+ [TOKEN_PERCENT] = "TOKEN_PERCENT",
+ [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN",
+ [TOKEN_EOF] = "TOKEN_EOF"
+ };
+ if (t >= TOKEN_IDENT && t <= TOKEN_EOF) {
+ return type_strings[t];
+ } else {
+ return "UNKNOWN_TOKEN_TYPE";
+ }
+}
diff --git a/lexer.h b/lexer.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include <stdlib.h>
+
+typedef enum {
+ TOKEN_IDENT = 1006,
+ TOKEN_LPAREN,
+ TOKEN_RPAREN,
+ TOKEN_LBRACE,
+ TOKEN_RBRACE,
+ TOKEN_LBRACKET,
+ TOKEN_RBRACKET,
+ TOKEN_EQUAL,
+ TOKEN_SEMICOLON,
+ TOKEN_PERCENT,
+ TOKEN_COMMA,
+ TOKEN_NUMBER_LITERAL,
+ TOKEN_STRING_LITERAL,
+ TOKEN_SLASH,
+ TOKEN_STAR,
+ TOKEN_PLUS,
+ TOKEN_PLUSPLUS,
+ TOKEN_MINUS,
+ TOKEN_MINUSMINUS,
+ TOKEN_EQUALITY,
+ TOKEN_INEQUALITY,
+ TOKEN_BANG,
+ TOKEN_LT,
+ TOKEN_GT,
+ TOKEN_LT_EQ,
+ TOKEN_GT_EQ,
+ TOKEN_IF,
+ TOKEN_ELSE,
+ TOKEN_WHILE,
+ TOKEN_FOR,
+ TOKEN_BREAK,
+ TOKEN_CONTINUE,
+ TOKEN_RETURN,
+ TOKEN_UNKNOWN, // NOTE: also update print_token
+ TOKEN_EOF
+} TokenType; // NOTE also update token_type_str!
+
+const char* token_type_str(TokenType t);
+
+typedef struct {
+ size_t start;
+ size_t end;
+ size_t line;
+ size_t col;
+ TokenType type;
+} Token;
+
+typedef struct {
+ Token* tokens;
+ size_t token_count;
+ size_t token_cap;
+ size_t pos;
+ size_t line;
+ size_t col;
+ const char* src;
+ size_t src_len;
+ const char* filename;
+} Lexer;
+
+void lexer_lex(Lexer*, const char* filename, const char* contents);
+void lexer_print(Lexer*);
diff --git a/main.c b/main.c
@@ -0,0 +1,79 @@
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "file.h"
+#include "utils.h"
+#include "lexer.h"
+#include "parser.h"
+#include "sem.h"
+#include "gen.h"
+
+int
+main(int argc, char* argv[])
+{
+ if (argc < 2) {
+ printf("Usage: %s <file>\n", argv[0]);
+ return 1;
+ }
+
+ const char* filename = argv[1];
+ const char* contents = readfile(filename);
+
+ if (contents == NULL) { panic("error reading file: %s", filename); }
+
+ printf("--- lex --- \n");
+
+ Lexer lex;
+ lexer_lex(&lex, filename, contents);
+ // lexer_print(&lex);
+
+ Parser par = parser_init(&lex);
+
+ printf("--- par --- \n");
+
+ Ast ast;
+ parser_parse(&ast, &par);
+ // ast_print(&ast);
+
+ printf("--- sem --- \n");
+
+ Scope program_scope = scope_init(ast.node);
+ scope_build(&program_scope, &ast);
+ scope_print(&program_scope, &ast);
+
+ printf("--- gen --- \n");
+
+ Gen gen = gen_init(&program_scope, contents);
+ gen_next(&gen, ast.node);
+
+ gcc_jit_result* result;
+
+ /* Compile the code. */
+ result = gcc_jit_context_compile(gen.ctx);
+ if (!result) { panic("compilation failed"); }
+
+ /* Extract the generated code from "result". */
+ if (argv[2] != NULL && strcmp(argv[2], "--exec") == 0) {
+ printf("--- exec start --- \n");
+
+ typedef int (*fn_type)(void);
+ fn_type starting_func = (fn_type)gcc_jit_result_get_code(result, "main");
+
+ if (!starting_func) {
+ fprintf(stderr, "NULL gcc_jit_result_get_code\n");
+ exit(1);
+ }
+ int main_ret = starting_func();
+
+ printf("--- exec end: %d --- \n", main_ret);
+ }
+
+ fflush(stdout);
+
+ gcc_jit_context_compile_to_file(gen.ctx, GCC_JIT_OUTPUT_KIND_EXECUTABLE, "out");
+
+ gcc_jit_context_release(gen.ctx);
+ gcc_jit_result_release(result);
+}
diff --git a/makefile b/makefile
@@ -0,0 +1,46 @@
+# note: needed both gcc and libgccjit
+# macOS: brew install gcc libgccjit
+# linux: sudo apt-get install build-essential gcc g++ libgccjit-10-dev
+# update the prefix if needed
+
+# Try Homebrew first (macOS)
+BREW := $(shell command -v brew 2>/dev/null)
+ifneq ($(BREW),)
+ LIBGCCJIT_PREFIX := $(shell brew --prefix libgccjit 2>/dev/null)
+endif
+
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Darwin)
+ LIB := -I${LIBGCCJIT_PREFIX}/include -L${LIBGCCJIT_PREFIX}/lib/gcc/current -lgccjit
+else
+ LIBGCCJIT_PREFIX := $(shell dirname $(shell gcc -print-file-name=libgccjit.so))
+ LIB := -I${LIBGCCJIT_PREFIX}/include -L${LIBGCCJIT_PREFIX} -lgccjit
+endif
+
+SRC = *.c */*.c
+BIN = oxc
+STD = -std=c99
+
+default:
+ cc ${STD} -g -Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-unused-function -o ${BIN} ${SRC} ${LIB}
+
+clean:
+ rm -rf ${BIN} ${BIN}.* err.log
+
+release: clean
+ cc ${STD} -O02 -Wall -Wshadow -Wextra -Wpedantic -Werror -o ${BIN} ${SRC} ${LIB}
+
+check: clean
+ cc ${STD} -g -Wall -Wextra -fsanitize=address -fsanitize=undefined -o ${BIN} ${SRC} ${LIB}
+
+test: clean default
+ @for f in ex*.ox; do \
+ ./${BIN} $$f > /dev/null 2>err.log || { echo "FAIL: $$f"; cat err.log; exit 1; }; \
+ if [ -s err.log ]; then echo "FAIL: $$f"; cat err.log; exit 1; fi; \
+ done; \
+ rm -f err.log
+
+test-hmap: clean default
+ MallocNanoZone=0 ./oxc --test-hmap
+
+again: clean default
diff --git a/ox-syntax/LICENSE b/ox-syntax/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 Endice Software pty ltd
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/ox-syntax/language-configuration.json b/ox-syntax/language-configuration.json
@@ -0,0 +1,23 @@
+{
+ "comments": {
+ "lineComment": "//",
+ "blockComment": [
+ "/*",
+ "*/"
+ ]
+ },
+ "brackets": [
+ [
+ "{",
+ "}"
+ ],
+ [
+ "[",
+ "]"
+ ],
+ [
+ "(",
+ ")"
+ ]
+ ]
+}
diff --git a/ox-syntax/ox-syntax-0.0.3.vsix b/ox-syntax/ox-syntax-0.0.3.vsix
Binary files differ.
diff --git a/ox-syntax/package.json b/ox-syntax/package.json
@@ -0,0 +1,26 @@
+{
+ "name": "ox-syntax",
+ "displayName": "Ox Syntax",
+ "description": "Syntax highlighting for the Ox language",
+ "version": "0.0.3",
+ "engines": {
+ "vscode": "^1.50.0"
+ },
+ "contributes": {
+ "languages": [
+ {
+ "id": "ox",
+ "aliases": ["Ox", "ox"],
+ "extensions": [".ox"],
+ "configuration": "./language-configuration.json"
+ }
+ ],
+ "grammars": [
+ {
+ "language": "ox",
+ "scopeName": "source.ox",
+ "path": "./syntaxes/ox.tmLanguage.json"
+ }
+ ]
+ }
+}
diff --git a/ox-syntax/rebuild-syntax-vscode.sh b/ox-syntax/rebuild-syntax-vscode.sh
@@ -0,0 +1,3 @@
+vsce package --allow-missing-repository
+code --uninstall-extension ox-syntax
+code --install-extension ox-syntax-0.0.3.vsix
diff --git a/ox-syntax/syntaxes/ox.sublime-syntax b/ox-syntax/syntaxes/ox.sublime-syntax
@@ -0,0 +1,106 @@
+%YAML 1.2
+---
+name: Ox
+file_extensions:
+ - ox
+scope: source.ox
+
+# ------------------------------------------------------------------
+# Top‑level contexts – the “main” context is what Sublime loads first
+# ------------------------------------------------------------------
+contexts:
+ main:
+ - include: comments # block / line / shebang
+ - include: keywords
+ - include: numbers
+ - include: strings
+ - include: basic-types
+ - include: functions
+ - include: typedefs
+ - include: builtins
+
+ # ------------------------------------------------------------------
+ # Comments
+ # ------------------------------------------------------------------
+ comments:
+ - include: block-comment # /* … */
+ - match: '//.*$'
+ scope: comment.line.double-slash
+ - match: '#!.*$'
+ scope: comment.line.double-slash
+
+ # ------------------------------------------------------------------
+ # Block comments – allows nesting
+ # ------------------------------------------------------------------
+ block-comment:
+ - match: '/\*'
+ scope: punctuation.definition.comment
+ push:
+ - meta_scope: comment.block
+ # end of block comment – pop back to the parent context
+ - match: '\*/'
+ scope: punctuation.definition.comment
+ pop: true
+ # allow nested block comments
+ - include: block-comment
+
+ # ------------------------------------------------------------------
+ # Keywords, operators and constants
+ # ------------------------------------------------------------------
+ keywords:
+ - match: '\b(const|else|for|if|pkg|record|return|typedef|match|case|while)\b'
+ scope: keyword.control
+ - match: '\b(and|by|in|not|or)\b'
+ scope: keyword.operator
+ - match: '\b(false|nil|true)\b'
+ scope: constant.language
+
+ # ------------------------------------------------------------------
+ # Built‑in functions
+ # ------------------------------------------------------------------
+ builtins:
+ - match: '\b(abs|len|assert|print|exit|panic)\b'
+ scope: variable.function support.function.builtin
+
+ # ------------------------------------------------------------------
+ # Numbers
+ # ------------------------------------------------------------------
+ numbers:
+ - match: '\b([+-])?[0-9]+(\.[0-9]*(e[0-9]+))?\b'
+ scope: constant.numeric
+ - match: '\b([+-])?0x[0-9A-Fa-f]+\b'
+ scope: constant.numeric
+
+ # ------------------------------------------------------------------
+ # Basic types
+ # ------------------------------------------------------------------
+ basic-types:
+ - match: '\b(int|void|float|char|string|bool)\b'
+ scope: storage.type
+
+ # ------------------------------------------------------------------
+ # Function names look‑ahead for '(')
+ # ------------------------------------------------------------------
+ functions:
+ - match: '\b([A-Za-z_][A-Za-z0-9_]*)\s*(?=$)'
+ scope: entity.name.function
+
+ # ------------------------------------------------------------------
+ # Type names (capitalised identifiers)
+ # ------------------------------------------------------------------
+ typedefs:
+ - match: '\b[A-Z][A-Za-z0-9_]*\b'
+ scope: entity.name.type
+
+ # ------------------------------------------------------------------
+ # Strings (double‑quoted)
+ # ------------------------------------------------------------------
+ strings:
+ - begin: '"'
+ end: '"'
+ scope: string.quoted.double
+ # enable escape handling inside the string
+ escaped: true
+ patterns:
+ - match: '\\.'
+ scope: constant.character.escape
diff --git a/ox-syntax/syntaxes/ox.tmLanguage.json b/ox-syntax/syntaxes/ox.tmLanguage.json
@@ -0,0 +1,167 @@
+{
+ "name": "Ox",
+ "scopeName": "source.ox",
+ "fileTypes": [
+ "ox"
+ ],
+ "patterns": [
+ {
+ "include": "#comments"
+ },
+ {
+ "include": "#keywords"
+ },
+ {
+ "include": "#numbers"
+ },
+ {
+ "include": "#strings"
+ },
+ {
+ "include": "#basic-types"
+ },
+ {
+ "include": "#functions"
+ },
+ {
+ "include": "#typedefs"
+ },
+ {
+ "include": "#builtins"
+ }
+ ],
+ "repository": {
+ "keywords": {
+ "patterns": [
+ {
+ "match": "\\b(const|else|for|if|pkg|record|return|typedef|match|case|while)\\b",
+ "name": "keyword.control"
+ },
+ {
+ "match": "\\b(and|by|in|not|or)\\b",
+ "name": "keyword.operator"
+ },
+ {
+ "match": "\\b(false|nil|true)\\b",
+ "name": "constant.language"
+ }
+ ]
+ },
+ "builtins": {
+ "patterns": [
+ {
+ "match": "\\b(abs|len|assert|print|exit|panic)\\b",
+ "name": "variable.function support.function.builtin"
+ }
+ ]
+ },
+ "numbers": {
+ "patterns": [
+ {
+ "match": "\\b([+-])?[0-9]+(.[0-9]*(e[0-9]+))?\\b",
+ "name": "constant.numeric"
+ },
+ {
+ "match": "\\b([+-])?0x[0-9A-Fa-f]+\\b",
+ "name": "constant.numeric"
+ }
+ ]
+ },
+ "basic-types": {
+ "patterns": [
+ {
+ "match": "\\b(int|void|float|char|string|bool)\\b",
+ "name": "storage.type"
+ }
+ ]
+ },
+ "functions": {
+ "patterns": [
+ {
+ "match": "\\b([A-Za-z_][A-Za-z0-9_]*)\\s*(?=\\()",
+ "name": "entity.name.function"
+ }
+ ]
+ },
+ "typedefs": {
+ "patterns": [
+ {
+ "match": "\\b[A-Z][A-Za-z0-9_]*\\b",
+ "name": "entity.name.type"
+ }
+ ]
+ },
+ "strings": {
+ "patterns": [
+ {
+ "begin": "\\\"",
+ "beginCaptures": {
+ "0": {
+ "name": "punctuation.definition.string.begin"
+ }
+ },
+ "end": "\\\"",
+ "endCaptures": {
+ "0": {
+ "name": "punctuation.definition.string.end"
+ }
+ },
+ "name": "string.quoted.double",
+ "patterns": [
+ {
+ "match": "\\\\.",
+ "name": "constant.character.escape"
+ }
+ ]
+ }
+ ]
+ },
+ "block-comment": {
+ "begin": "/\\*",
+ "beginCaptures": {
+ "0": {
+ "name": "punctuation.definition.comment"
+ }
+ },
+ "end": "\\*/",
+ "endCaptures": {
+ "0": {
+ "name": "punctuation.definition.comment"
+ }
+ },
+ "name": "comment.block",
+ "patterns": [
+ {
+ "include": "#block-comment"
+ }
+ ]
+ },
+ "comments": {
+ "patterns": [
+ {
+ "include": "#block-comment"
+ },
+ {
+ "begin": "//",
+ "beginCaptures": {
+ "0": {
+ "name": "punctuation.definition.comment"
+ }
+ },
+ "end": "\\n",
+ "name": "comment.line.double-slash"
+ },
+ {
+ "begin": "#!",
+ "beginCaptures": {
+ "0": {
+ "name": "punctuation.definition.comment"
+ }
+ },
+ "end": "\\n",
+ "name": "comment.line.double-slash"
+ }
+ ]
+ }
+ }
+}
diff --git a/parser.h b/parser.h
@@ -0,0 +1,195 @@
+#pragma once
+
+#include "lexer.h"
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+#define IDENTSZ 256
+
+typedef enum {
+ NODE_PROGRAM = 11,
+ NODE_FUNCTION_DECL,
+ NODE_PARAM,
+ NODE_VAR_DECL,
+ NODE_VAR_ASSIGN,
+ NODE_BLOCK,
+ NODE_CALL_EXPR,
+ NODE_RETURN,
+ NODE_BREAK,
+ NODE_CONTINUE,
+ NODE_NUMBER_LITERAL,
+ NODE_STRING_LITERAL,
+ NODE_IDENT,
+ NODE_TYPE,
+ NODE_BINARY_EXPR,
+ NODE_UNARY_EXPR,
+ NODE_EXPR_STATEMENT,
+ NODE_SUBSCRIPT_EXPR,
+ NODE_IF,
+ NODE_WHILE,
+ NODE_FOR,
+ NODE_EMPTY_STATEMENT,
+ NODE_UNKNOWN,
+} NodeType; // note: if changed, edit node_type_str!
+
+const char* node_type_str(NodeType);
+void print_node_type_str(NodeType);
+
+/*
+typedef enum {
+ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
+ OP_POS, OP_NEG, OP_INC, OP_DEC,
+ OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT,
+ OP_SHL, OP_SHR,
+ OP_LOGAND, OP_LOGOR, OP_LOGNOT,
+ OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE,
+ OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN,
+ OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN,
+ OP_SHL_ASSIGN, OP_SHR_ASSIGN,
+ OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN,
+ OP_CONDITIONAL, OP_COMMA,
+ OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER,
+ OP_SUBSCRIPT, OP_CALL,
+ OP_SIZEOF, OP_ALIGNOF
+} OpType;
+*/
+
+typedef enum {
+ OP_PLUS = 23,
+ OP_MINUS,
+ OP_MUL,
+ OP_DIV,
+ OP_MOD,
+ OP_BIT_AND, // & ampersand
+ OP_BIT_OR, // |
+ OP_ASSIGN,
+ OP_EQUALITY, // ==
+ OP_INEQUALITY, // !=
+ OP_LT_EQ,
+ OP_GT_EQ,
+ OP_LT,
+ OP_GT,
+} OpType;
+
+typedef enum {
+ OPER_MINUS = 0,
+ OPER_BANG,
+ OPER_PREINC,
+ OPER_PREDEC,
+ OPER_POSTINC,
+ OPER_POSTDEC,
+} UnaryOp;
+
+typedef struct {
+ size_t start;
+ size_t end;
+} Span;
+
+typedef struct Node {
+ NodeType type;
+ struct Node* next;
+ struct Scope* scope;
+ const char* filename;
+ size_t line, col;
+
+ /* NOTE we will eventually add spans for condition info, etc. to print out in errors */
+
+ union {
+ /* clang-format off */
+ struct { struct Node** decl; size_t len, cap; } program;
+ struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl;
+ struct { Span name; struct Node* type; } param;
+ struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement;
+ struct { struct Node* cond; struct Node* body; } while_statement;
+ struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement;
+ struct { struct Node** stmts; size_t cap, len; } block;
+ struct { Span name; struct Node* type; struct Node* init; } var_decl;
+ struct { struct Node* lhs; struct Node* rhs; } var_assign;
+ struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr;
+ struct { struct Node* expr; } ret;
+ struct { struct Node* expr; } cont;
+ struct { struct Node* expr; } expr_statement;
+ struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr;
+ struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr;
+ struct { struct Node* array; struct Node* index; } subscript_expr;
+ struct { double value; } number;
+ struct { Span value; } string;
+ struct { Span name; } ident;
+ /* clang-format on */
+ } data;
+} Node;
+
+typedef struct {
+ Token* tokens;
+ size_t token_count;
+ size_t pos;
+ const char* src;
+ size_t src_len;
+ const char* filename;
+} Parser;
+
+typedef struct {
+ Node* node;
+ const char* src;
+} Ast;
+
+typedef struct {
+ Node** items;
+ size_t len, cap;
+} NodeVec;
+
+Parser parser_init(Lexer*);
+void parser_parse(Ast*, Parser*);
+void ast_print(Ast*);
+
+Token peek(Parser*);
+Token peek2(Parser*);
+Token consume(Parser*);
+Token expect(Parser*, TokenType);
+bool match(Parser*, TokenType);
+bool check(Parser*, TokenType);
+
+Node* parse_declarations(Parser*);
+
+Node* parse_number(Parser*);
+Node* parse_ident(Parser*);
+Node* parse_primary(Parser*);
+Node* parse_postfix(Parser*);
+Node* parse_primary(Parser*);
+Node* parse_unary(Parser*);
+Node* parse_term(Parser*);
+Node* parse_expression(Parser*);
+Node* parse_expression_statement(Parser*);
+Node* parse_statement(Parser*);
+Node* parse_block(Parser*);
+Node* parse_declaration_statement(Parser*);
+Node* parse_decl_or_func_decl(Parser*);
+NodeVec parse_param_list(Parser*);
+Node* parse_type(Parser*);
+Node* parse_func_call(Parser*);
+NodeVec parse_func_arguments(Parser*);
+Node* parse_if(Parser*);
+Node* parse_while(Parser*);
+Node* parse_for(Parser*);
+Node* parse_assignment(Parser*);
+Node* parse_break(Parser*);
+Node* parse_continue_statement(Parser*);
+Node* parse_expression(Parser*);
+Node* make_program_node(void);
+Node* make_ident_node(Span name);
+Node* make_param_decl(Parser*);
+Node* make_postfix_node(UnaryOp, Node*);
+Node* make_subscript_node(Node*, Node*);
+Node* make_ident_node(Span);
+Node* make_postfix_node(UnaryOp, Node*);
+Node* make_number_node(Parser*);
+Node* make_unary_node(UnaryOp, Node*);
+Node* make_string_node(Parser*);
+Node* make_binary_node(OpType, Node*, Node*);
+Node* parse_return_statement(Parser*);
+Node* make_empty_statement(void);
+Node* make_call_node(Node*, NodeVec);
+
+const char* span_str(const char* src, Span s, char* stack_alloc_chptr);
+const char* range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr);
diff --git a/parser/ast.c b/parser/ast.c
@@ -0,0 +1,120 @@
+#include "../parser.h"
+#include "../utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+Node*
+make_postfix_node(UnaryOp op, Node* operand)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_postfix_node: could not alloc");
+ node->type = NODE_UNARY_EXPR;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.unary_expr.op = op;
+ node->data.unary_expr.operand = operand;
+ node->data.unary_expr.is_postfix = true;
+ return node;
+}
+
+Node*
+make_subscript_node(Node* array, Node* index)
+{
+ Node* node = calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_subscript_node: could not alloc");
+ node->type = NODE_SUBSCRIPT_EXPR;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.subscript_expr.array = array;
+ node->data.subscript_expr.index = index;
+ return node;
+}
+
+Node*
+make_number_node(Parser* par)
+{
+ Token tok = consume(par);
+ assert(tok.type == TOKEN_NUMBER_LITERAL);
+ size_t len = tok.end - tok.start;
+ char buf[len + 1]; // strtod needs a \0 terminated string
+ for (size_t i = 0; i < len; i++)
+ buf[i] = par->src[tok.start + i];
+ buf[len] = '\0';
+ double value = strtod(buf, NULL);
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_number_node: could not alloc");
+ node->type = NODE_NUMBER_LITERAL;
+ node->scope = NULL;
+ node->data.number.value = value;
+ return node;
+}
+
+Node*
+make_unary_node(UnaryOp op, Node* operand)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_unary_node: could not alloc");
+ node->type = NODE_UNARY_EXPR;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.unary_expr.op = op;
+ node->data.unary_expr.operand = operand;
+ node->data.unary_expr.is_postfix = false;
+ return node;
+}
+
+Node*
+make_string_node(Parser* par)
+{
+ Token tok = consume(par);
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_string_node: could not alloc");
+ node->type = NODE_STRING_LITERAL;
+ node->scope = NULL;
+ node->data.string.value.start = (tok.start + 1); // "...
+ node->data.string.value.end = (tok.end - 1); // ..."
+ return node;
+}
+
+Node*
+make_binary_node(OpType op, Node* lhs, Node* rhs)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_binary_node: could not alloc");
+ node->type = NODE_BINARY_EXPR;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.binary_expr.op = op;
+ node->data.binary_expr.lhs = lhs;
+ node->data.binary_expr.rhs = rhs;
+ return node;
+}
+
+Node*
+make_empty_statement(void)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_empty_statement: could not alloc");
+ assert(node != NULL);
+ node->type = NODE_EMPTY_STATEMENT;
+ node->scope = NULL;
+ return node;
+}
+
+Node*
+make_call_node(Node* callee, NodeVec args)
+{
+ Node* call = (Node*)calloc(1, sizeof(Node));
+ if (call == NULL) panic("make_call_node: could not alloc");
+ assert(call != NULL);
+ call->type = NODE_CALL_EXPR;
+ call->scope = NULL;
+ call->data.call_expr.callee = callee;
+ call->data.call_expr.args = args.items;
+ call->data.call_expr.cap = args.cap;
+ call->data.call_expr.len = args.len;
+ return call;
+}
diff --git a/parser/decl.c b/parser/decl.c
@@ -0,0 +1,69 @@
+#include "../parser.h"
+#include "../utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+Node*
+parse_type(Parser* par)
+{
+ Token tok = expect(par, TOKEN_IDENT);
+
+ // @later we will resolve types later, incl. custom vs. system, allow for now
+ //
+ // const char firstChar = par->src[t.start];
+ // if (firstChar >= 'A' && firstChar <= 'Z') {
+ // // assume this is a user type
+ // }
+
+ Node* node = calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_type: alloc failed");
+ node->type = NODE_TYPE;
+ node->scope = NULL;
+ node->data.ident.name = (Span) { .start = tok.start, .end = tok.end };
+ return node;
+}
+
+// <TYPE> name:<IDENT>
+Node*
+make_param_decl(Parser* par)
+{
+ Node* type = parse_type(par);
+ Token param_name = expect(par, TOKEN_IDENT);
+ Span ident_name = { .start = param_name.start, .end = param_name.end };
+ Node* param = (Node*)calloc(1, sizeof(Node));
+ if (param == NULL) panic("make_param_decl alloc failed");
+ param->type = NODE_PARAM;
+ param->scope = NULL;
+ param->data.param.name = ident_name;
+ param->data.param.type = type;
+ return param;
+}
+
+NodeVec
+parse_param_list(Parser* par)
+{
+ NodeVec v = { 0 };
+ if (peek(par).type == TOKEN_RPAREN) return v; // found `)` no parameters
+
+ v.cap = 4;
+ v.items = (Node**)calloc(v.cap, sizeof(Node*));
+
+ if (v.items == NULL) panic("parse_param_list: could not alloc");
+
+ for (;;) {
+ Node* param = make_param_decl(par);
+
+ if (v.len == v.cap) {
+ v.cap *= 2;
+ v.items = (Node**)realloc(v.items, v.cap * sizeof(Node*));
+ }
+
+ v.items[v.len++] = param;
+
+ if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,`
+ }
+ return v;
+}
diff --git a/parser/expr.c b/parser/expr.c
@@ -0,0 +1,241 @@
+#include "../parser.h"
+#include "../utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+Node*
+parse_func_call(Parser* par)
+{
+ Token tok = expect(par, TOKEN_IDENT);
+ Span callee = { .start = tok.start, .end = tok.end };
+ Node* ident = make_ident_node(callee);
+
+ const char* name = span_str(par->src, ident->data.ident.name, (char[IDENTSZ]) { 0 });
+ printf("parse_func_call: %s\n", name);
+
+ expect(par, TOKEN_LPAREN);
+
+ Node* call = (Node*)calloc(1, sizeof(Node));
+ if (call == NULL) panic("parse_func_call: alloc failed");
+
+ // start parse arguments
+ if (peek(par).type != TOKEN_RPAREN) {
+ call->data.call_expr.args = (Node**)calloc(4, sizeof(Node*));
+ if (call->data.call_expr.args == NULL)
+ panic("parse_func_call: args: could not alloc");
+ call->data.call_expr.cap = 4;
+ call->data.call_expr.len = 0;
+
+ for (;;) {
+ Node* arg = parse_expression(par);
+
+ if (call->data.call_expr.len == call->data.call_expr.cap) {
+ call->data.call_expr.cap *= 2;
+ call->data.call_expr.args
+ = (Node**)realloc(call->data.call_expr.args,
+ call->data.call_expr.cap * sizeof(Node*));
+ }
+ call->data.call_expr.args[call->data.call_expr.len++] = arg;
+
+ if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,`
+ }
+ }
+ // ends parse arguments
+
+ expect(par, TOKEN_RPAREN);
+
+ call->type = NODE_CALL_EXPR;
+ call->scope = NULL;
+ call->data.call_expr.callee = ident;
+ return call;
+}
+
+Node*
+parse_number(Parser* par)
+{
+ Token tok = consume(par);
+ assert(tok.type == TOKEN_NUMBER_LITERAL);
+
+ size_t len = tok.end - tok.start;
+ char buf[len + 1]; // strtod needs a \0 terminated string
+ for (size_t i = 0; i < len; i++)
+ buf[i] = par->src[tok.start + i];
+ buf[len] = '\0';
+ double value = strtod(buf, NULL);
+
+ Node* num_node = (Node*)calloc(1, sizeof(Node));
+ if (num_node == NULL) panic("parse_number: alloc failed");
+ num_node->type = NODE_NUMBER_LITERAL;
+ num_node->scope = NULL;
+ num_node->data.number.value = value;
+ return num_node;
+}
+
+Node*
+parse_ident(Parser* par)
+{
+ Token tok = consume(par);
+ assert(tok.type == TOKEN_IDENT);
+ Node* ident_node = (Node*)calloc(1, sizeof(Node));
+ if (ident_node == NULL) panic("parse_ident: alloc failed");
+ ident_node->type = NODE_IDENT;
+ ident_node->scope = NULL;
+ ident_node->data.ident.name = (Span) { .start = tok.start, .end = tok.end };
+
+ // const char* name = span_str(
+ // par->src, (Span) { .start = tok.start, .end = tok.end }, (char[IDENTSZ]) { 0 });
+ // printf("parse_ident: %s\n", name);
+
+ return ident_node;
+}
+
+NodeVec
+parse_func_arguments(Parser* par)
+{
+ NodeVec v = { 0 };
+ if (peek(par).type == TOKEN_RPAREN) return v; // found `)` no arguments
+
+ v.cap = 4;
+ v.items = (Node**)calloc(v.cap, sizeof(Node*));
+ if (v.items == NULL) panic("parse_func_arguments: could not alloc");
+
+ for (;;) {
+ Node* arg = parse_expression(par);
+
+ if (v.len == v.cap) {
+ v.cap *= 2;
+ v.items = (Node**)realloc(v.items, v.cap * sizeof(Node*));
+ }
+
+ v.items[v.len++] = arg;
+
+ if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,`
+ }
+ return v;
+}
+
+Node*
+parse_postfix(Parser* par)
+{
+ Node* node = parse_primary(par);
+
+ for (;;) {
+ if (match(par, TOKEN_PLUSPLUS)) {
+ node = make_postfix_node(OPER_POSTINC, node);
+ } else if (match(par, TOKEN_MINUSMINUS)) {
+ node = make_postfix_node(OPER_POSTDEC, node);
+ } else if (match(par, TOKEN_LBRACKET)) {
+ Node* index = parse_expression(par); // parse inside brackets
+ expect(par, TOKEN_RBRACKET);
+ node = make_subscript_node(node, index);
+ // } else if (match(par, TOKEN_DOT)) { // TODO dot members and arrows
+ // Token id = expect(par, TOKEN_IDENTIFIER);
+ // expr = make_member_node(expr, id);
+ // } else if (match(par, TOKEN_ARROW)) {
+ // Token id = expect(par, TOKEN_IDENTIFIER);
+ // expr = make_ptrmember_node(expr, id);
+ } else if (match(par, TOKEN_LPAREN)) {
+ NodeVec args = parse_func_arguments(par); // parse func call args
+ expect(par, TOKEN_RPAREN);
+ node = make_call_node(node, args);
+ } else {
+ break;
+ }
+ }
+ return node;
+}
+
+#define STARTING_ROOT_NODES 32
+
+Node*
+make_program_node(void)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_program_node: alloc failed");
+ node->type = NODE_PROGRAM;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.program.cap = STARTING_ROOT_NODES;
+ node->data.program.len = 0;
+ node->data.program.decl = (Node**)calloc(STARTING_ROOT_NODES, sizeof(Node));
+ if (node->data.program.decl == NULL) panic("make_program_node: decls: alloc failed");
+ return node;
+}
+
+Node*
+make_ident_node(Span name)
+{
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("make_ident_node: alloc failed");
+ node->type = NODE_IDENT;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.ident.name = name;
+ return node;
+}
+
+Node*
+parse_primary(Parser* par)
+{
+ Token tok = peek(par);
+ if (tok.type == TOKEN_STRING_LITERAL) { return make_string_node(par); }
+ if (tok.type == TOKEN_NUMBER_LITERAL) { return make_number_node(par); }
+ if (tok.type == TOKEN_IDENT) { return parse_ident(par); }
+ if (tok.type == TOKEN_LPAREN) {
+ consume(par); // consume '('
+ Node* node = parse_expression(par);
+ expect(par, TOKEN_RPAREN);
+ return node;
+ }
+
+ const char* name = span_str(
+ par->src, (Span) { .start = tok.start, .end = tok.end }, (char[IDENTSZ]) { 0 });
+ panic("Expected Primary Expr, but found '%s' (%s at %s:%zu:%zu",
+ name,
+ token_type_str(tok.type),
+ par->filename,
+ tok.line,
+ tok.col);
+
+ return NULL;
+}
+
+Node*
+parse_unary(Parser* par)
+{
+ Node* inner = NULL;
+ switch (peek(par).type) {
+ case TOKEN_MINUS:
+ consume(par);
+ inner = parse_unary(par);
+ return make_unary_node(OPER_MINUS, inner);
+ case TOKEN_MINUSMINUS:
+ consume(par);
+ inner = parse_unary(par);
+ return make_unary_node(OPER_PREDEC, inner);
+ case TOKEN_BANG:
+ consume(par);
+ inner = parse_unary(par);
+ return make_unary_node(OPER_BANG, inner);
+ // TODO add others '~a' '$a' '*a' '^a' '@a' '&a'
+ case TOKEN_PLUSPLUS:
+ consume(par);
+ inner = parse_unary(par);
+ return make_unary_node(OPER_PREINC, inner);
+ case TOKEN_PLUS:
+ consume(par);
+ return parse_unary(par);
+ default:
+ return parse_postfix(par);
+ }
+}
+
+// called by parse_multiplicative
+Node*
+parse_term(Parser* par)
+{
+ return parse_unary(par);
+}
diff --git a/parser/parser.c b/parser/parser.c
@@ -0,0 +1,369 @@
+#include "../parser.h"
+#include "../utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+// TODO make sure ALL callocs have been successful
+
+Parser
+parser_init(Lexer* lex)
+{
+ return (Parser) { .pos = 0,
+ .tokens = lex->tokens,
+ .token_count = lex->token_count,
+ .src = lex->src,
+ .src_len = lex->src_len,
+ .filename = lex->filename };
+}
+
+Token
+peek(Parser* par)
+{
+ Token t = par->tokens[par->pos];
+ return t.type ? t : (Token) { .type = TOKEN_EOF };
+}
+
+Token
+peek2(Parser* par)
+{
+ if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
+ Token t = par->tokens[par->pos + 1];
+ return t.type ? t : (Token) { .type = TOKEN_EOF };
+}
+
+Token
+consume(Parser* par)
+{
+ Token t = par->tokens[par->pos];
+ if (!t.type) return (Token) { .type = TOKEN_EOF };
+ par->pos++;
+ return t;
+}
+
+bool
+check(Parser* p, TokenType type)
+{
+ return (peek(p).type == type);
+}
+
+Token
+expect(Parser* par, TokenType type)
+{
+ Token tok = peek(par);
+ if (tok.type != type) {
+ const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
+ panic("Expected %d got '%s' (%d) at %s:%zu:%zu",
+ token_type_str(type),
+ name,
+ tok.type,
+ par->filename,
+ tok.line,
+ tok.col);
+ assert(tok.type == type);
+ }
+ return consume(par);
+}
+
+bool
+match(Parser* p, TokenType type)
+{
+ // printf("matching type %d\n", type);
+ if (peek(p).type == type) {
+ consume(p);
+ return true;
+ }
+ return false;
+}
+
+static Node*
+parse_multiplicative(Parser* par)
+{
+ Node* node = parse_term(par);
+
+ for (;;) {
+ if (match(par, TOKEN_STAR)) {
+ Node* rhs = parse_unary(par);
+ node = make_binary_node(OP_MUL, node, rhs);
+ } else if (match(par, TOKEN_SLASH)) {
+ Node* rhs = parse_unary(par);
+ node = make_binary_node(OP_DIV, node, rhs);
+ } else if (match(par, TOKEN_PERCENT)) {
+ Node* rhs = parse_unary(par);
+ node = make_binary_node(OP_MOD, node, rhs);
+ } else
+ break;
+ }
+
+ return node;
+}
+// additive: +, -
+static Node*
+parse_additive(Parser* par)
+{
+ Node* node = parse_multiplicative(par);
+ for (;;) {
+ if (match(par, TOKEN_PLUS)) {
+ Node* rhs = parse_multiplicative(par);
+ node = make_binary_node(OP_PLUS, node, rhs);
+ } else if (match(par, TOKEN_MINUS)) {
+ Node* rhs = parse_multiplicative(par);
+ node = make_binary_node(OP_MINUS, node, rhs);
+ } else
+ break;
+ }
+ return node;
+}
+
+static Node*
+parse_relational(Parser* par)
+{
+ Node* node = parse_additive(par);
+ for (;;) {
+ if (match(par, TOKEN_LT)) {
+ Node* rhs = parse_additive(par);
+ node = make_binary_node('<', node, rhs);
+ } else if (match(par, TOKEN_LT_EQ)) {
+ Node* rhs = parse_additive(par);
+ node = make_binary_node(OP_LT_EQ, node, rhs);
+ } else if (match(par, TOKEN_GT)) {
+ Node* rhs = parse_additive(par);
+ node = make_binary_node('>', node, rhs);
+ } else if (match(par, TOKEN_GT_EQ)) {
+ Node* rhs = parse_additive(par);
+ node = make_binary_node(OP_GT_EQ, node, rhs);
+ } else
+ break;
+ }
+ return node;
+}
+
+static Node*
+parse_equality(Parser* par)
+{
+ Node* node = parse_relational(par);
+ for (;;) {
+ if (match(par, TOKEN_EQUALITY)) { // "=="
+ Node* rhs = parse_relational(par);
+ node = make_binary_node(OP_EQUALITY, node, rhs);
+ } else if (match(par, TOKEN_INEQUALITY)) { // "!="
+ Node* rhs = parse_relational(par);
+ node = make_binary_node(OP_INEQUALITY, node, rhs);
+ } else
+ break;
+ }
+ return node;
+}
+
+Node*
+parse_expression(Parser* par)
+{
+ return parse_equality(par);
+}
+
+Node*
+parse_expression_statement(Parser* par)
+{
+ Node* expr = parse_expression(par);
+ expect(par, TOKEN_SEMICOLON);
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_expression_statement: could not alloc");
+ node->type = NODE_EXPR_STATEMENT;
+ node->scope = NULL;
+ node->next = NULL;
+ node->data.expr_statement.expr = expr;
+ return node;
+}
+
+//
+// parse_statement
+//
+Node*
+parse_statement(Parser* par)
+{
+ Token tok = peek(par), tok2 = peek2(par);
+
+ if (tok.type == TOKEN_LBRACE) {
+ consume(par);
+ return parse_block(par);
+ }
+
+ if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT)
+ return parse_decl_or_func_decl(par);
+
+ switch (tok.type) {
+ case TOKEN_RETURN:
+ return parse_return_statement(par);
+ case TOKEN_IF:
+ return parse_if(par);
+ case TOKEN_WHILE:
+ return parse_while(par);
+ case TOKEN_FOR:
+ return parse_for(par);
+ case TOKEN_BREAK:
+ return parse_break(par);
+ case TOKEN_CONTINUE:
+ return parse_continue_statement(par);
+ case TOKEN_SEMICOLON:
+ expect(par, TOKEN_SEMICOLON);
+ return make_empty_statement();
+ // case TOKEN_IDENT: // TODO?
+ // if (tok2.type == TOKEN_EQUAL)
+ // return parse_assignment(par);
+ // else
+ // return parse_expression_statement(par);
+ default:
+ return parse_expression_statement(par);
+ }
+}
+
+Node*
+parse_block(Parser* par)
+{
+ Node* stmt;
+ Node* block = (Node*)calloc(1, sizeof(Node));
+ if (block == NULL) panic("parse_block: could not alloc");
+ block->type = NODE_BLOCK;
+ block->scope = NULL;
+ while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
+ stmt = parse_statement(par);
+
+ if (block->data.block.cap == block->data.block.len) {
+ block->data.block.cap
+ = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
+ block->data.block.stmts = realloc(
+ block->data.block.stmts, block->data.block.cap * sizeof(Node*));
+ if (block->data.block.stmts == NULL) {
+ panic("realloc failed in parse_block");
+ }
+ }
+
+ block->data.block.stmts[block->data.block.len++] = stmt;
+ }
+ expect(par, TOKEN_RBRACE);
+ // TODO next the parsing of this was relying on next and cannot
+ // anymmore, e.g. print
+ return block;
+}
+
+Node*
+parse_declaration_statement(Parser* par)
+{
+ Node* type = parse_type(par); // consumes the type (e.g., "float")
+ Token ident = expect(par, TOKEN_IDENT); // variable or function name
+ if (match(par, TOKEN_LPAREN)) {
+ perror("called a var decl but this looks to be a func decl");
+ }
+
+ Node* var = calloc(1, sizeof(Node));
+ if (var == NULL) panic("parse_declaration_statement: could not alloc");
+ var->type = NODE_VAR_DECL;
+ var->scope = NULL;
+ var->data.var_decl.name = (Span) { ident.start, ident.end };
+ var->data.var_decl.type = type;
+ Token next_tok = peek(par);
+ if (next_tok.type == TOKEN_EQUAL) {
+ consume(par);
+ var->data.var_decl.init = parse_expression(par);
+ } else {
+ consume(par);
+ var->data.var_decl.init = NULL;
+ }
+ expect(par, TOKEN_SEMICOLON);
+ return var;
+}
+
+Node*
+parse_decl_or_func_decl(Parser* par)
+{
+ Node* type = parse_type(par); // consumes the type (e.g., "float")
+ Token ident = expect(par, TOKEN_IDENT); // variable or function name
+
+ if (match(par, TOKEN_LPAREN)) { // function
+ Node* fn = calloc(1, sizeof(Node));
+ if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");
+
+ fn->type = NODE_FUNCTION_DECL;
+ fn->scope = NULL;
+
+ NodeVec v = parse_param_list(par);
+ fn->data.function_decl.params = v.items;
+ fn->data.function_decl.p_cap = v.cap;
+ fn->data.function_decl.p_len = v.len;
+
+ expect(par, TOKEN_RPAREN);
+ expect(par, TOKEN_LBRACE);
+
+ Node* body = parse_block(par);
+ fn->data.function_decl.body = body;
+
+ fn->data.function_decl.name = (Span) { ident.start, ident.end };
+ fn->data.function_decl.return_type = type;
+ fn->filename = par->filename;
+ fn->line = ident.line;
+ fn->col = ident.col;
+ return fn;
+
+ } else { // variable
+ Node* var = calloc(1, sizeof(Node));
+ if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
+ var->type = NODE_VAR_DECL;
+ var->scope = NULL;
+ var->data.var_decl.name = (Span) { ident.start, ident.end };
+ var->data.var_decl.type = type;
+ var->filename = par->filename;
+ var->line = ident.line;
+ var->col = ident.col;
+ Token next_tok = peek(par);
+ if (next_tok.type == TOKEN_EQUAL) {
+ consume(par); // consume '='
+ var->data.var_decl.init = parse_expression(par);
+ } else {
+ var->data.var_decl.init = NULL;
+ }
+ expect(par, TOKEN_SEMICOLON);
+ return var;
+ }
+}
+
+Node*
+parse_declarations(Parser* par)
+{
+ Token tok = peek(par);
+ if (tok.type == TOKEN_EOF) return NULL;
+
+ switch (tok.type) {
+ case TOKEN_IDENT:
+ return parse_decl_or_func_decl(par);
+ break;
+ default:
+ printf("unknown token to parse!: %s\n", token_type_str(tok.type));
+ return NULL;
+ }
+ return NULL;
+}
+
+void
+parser_parse(Ast* ast, Parser* par)
+{
+ assert(par->token_count > 0 && "no tokens to parse");
+ Node* node;
+ Node* program = make_program_node();
+ for (;;) {
+ node = parse_declarations(par);
+ if (node == NULL) break;
+ if (program->data.program.len == program->data.program.cap) {
+ program->data.program.cap *= 2;
+ program->data.program.decl = (Node**)realloc(program->data.program.decl,
+ program->data.program.cap * sizeof(Node*));
+ assert(program->data.program.decl != NULL && "realloc failed");
+ }
+ program->data.program.decl[program->data.program.len++] = node;
+ }
+
+ ast->src = par->src;
+ ast->node = program;
+}
diff --git a/parser/parser_utils.c b/parser/parser_utils.c
@@ -0,0 +1,339 @@
+#include "../parser.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+
+/* basic range to str */
+const char*
+range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr)
+{
+ const size_t len = end - start;
+ if (!src || !stack_alloc_chptr) return NULL;
+ if (!strchr(src, '\0')) return NULL; // src has no '\0'
+ if (len <= 0) return NULL;
+ memcpy(stack_alloc_chptr, src + start, len);
+ stack_alloc_chptr[len] = '\0';
+ return stack_alloc_chptr;
+}
+
+const char*
+span_str(const char* src, Span s, char* stack_alloc_chptr)
+{
+ return range_str(src, s.start, s.end, stack_alloc_chptr);
+}
+
+// int span_to_str(const char* src, size_t start, size_t end, char* out_buf) {
+// if (!src || !out_buf) return -1; /* Null pointer passed */
+// if (start >= end) return -2; /* Empty or inverted span */
+// const size_t len = end - start;
+// if (len >= IDENTSZ) return -4; /* Identifier too long */
+// const char* src_end = strchr(src, '\0');
+// if (!src_end) return -5; /* src not NUL‑terminated */
+// const size_t src_len = (size_t)(src_end - src);
+
+// if (end > src_len) return -6; /* Span overruns source */
+
+// if (memchr(src + start, '\0', len))
+// return -7; /* span crosses a NUL byte */
+
+// memcpy(out_buf, src + start, len);
+// out_buf[len] = '\0';
+
+// return 0;
+// }
+
+// char* span_to_str_alloc(const char* src, size_t start, size_t end) {
+// if (!src || start >= end) return NULL;
+// const char* src_end = strchr(src, '\0');
+// if (!src_end) return NULL;
+// size_t src_len = (size_t)(src_end - src);
+// if (end > src_len) return NULL;
+// size_t n = end - start;
+// if (memchr(src + start, '\0', n)) return NULL;
+// char* s = calloc(1, n + 1);
+// if (!s) return NULL;
+// memcpy(s, src + start, n);
+// s[n] = '\0';
+// return s;
+// }
+
+static void
+print_node(const char* source, Node* node, int level)
+{
+ assert(node != NULL);
+ assert(level < 192);
+
+ const char* name;
+ switch (node->type) {
+ case NODE_FUNCTION_DECL:
+ name = range_str(source,
+ node->data.function_decl.name.start,
+ node->data.function_decl.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s FUNC DECL: name='%s'\n", level, "", name);
+ if (node->data.function_decl.return_type) {
+ printf("%*s ↳ return type:\n", level * 2, "");
+ print_node(source, node->data.function_decl.return_type, level + 1);
+ }
+ if (node->data.function_decl.params) {
+ printf("%*s ↳ params:\n", level * 2, "");
+ for (size_t i = 0; i < node->data.function_decl.p_len; i++) {
+ Node* param = node->data.function_decl.params[i];
+ print_node(source, param, level + 1);
+ }
+ } else {
+ printf("%*s ↳ params: N/A\n", level * 2, "");
+ }
+ if (node->data.function_decl.body) {
+ printf("%*s ↳ body:\n", level * 2, "");
+ print_node(source, node->data.function_decl.body, level + 1);
+ }
+ break;
+ case NODE_PARAM:
+ name = range_str(source,
+ node->data.param.name.start,
+ node->data.param.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s ↳ param: name='%s'\n", level * 2, "", name);
+ if (node->data.param.type) { print_node(source, node->data.param.type, level + 1); }
+ break;
+ case NODE_VAR_DECL:
+ name = range_str(source,
+ node->data.var_decl.name.start,
+ node->data.var_decl.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s VAR DECL: name='%s'\n", level, "", name);
+ if (node->data.var_decl.type) {
+ printf("%*s ↳ type:\n", level * 2, "");
+ print_node(source, node->data.var_decl.type, level + 1);
+ }
+ if (node->data.var_decl.init) {
+ printf("%*s ↳ init:\n", level * 2, "");
+ print_node(source, node->data.var_decl.init, level + 1);
+ }
+ break;
+ case NODE_PROGRAM:
+ printf("%*s PROGRAM:\n", level, "");
+ if (node->data.program.decl) {
+ for (size_t i = 0; i < node->data.program.len; i++) {
+ print_node(source, node->data.program.decl[i], level + 1);
+ }
+ }
+ break;
+ case NODE_BLOCK:
+ printf("%*s BLOCK:\n", level, "");
+ if (node->data.block.stmts) {
+ for (size_t i = 0; i < node->data.block.len; i++) {
+ print_node(source, node->data.block.stmts[i], level + 1);
+ }
+ }
+ break;
+ case NODE_CALL_EXPR:
+ printf("%*s ↳ FUNC CALL:\n", level, "");
+ if (node->data.call_expr.callee) {
+ printf("%*s ↳ callee:\n", level * 2, "");
+ print_node(source, node->data.call_expr.callee, level + 1);
+ }
+ if (node->data.call_expr.args) {
+ printf("%*s ↳ args:\n", level * 2, "");
+ for (size_t i = 0; i < node->data.call_expr.len; i++) {
+ Node* arg = node->data.call_expr.args[i];
+ print_node(source, arg, level + 1);
+ }
+ }
+ break;
+ case NODE_RETURN:
+ printf("%*s RETURN statement:\n", level, "");
+ if (node->data.ret.expr) { print_node(source, node->data.ret.expr, level + 1); }
+ break;
+ case NODE_CONTINUE:
+ printf("%*s CONTINUE statement\n", level, "");
+ if (node->data.cont.expr) { print_node(source, node->data.cont.expr, level + 1); }
+ break;
+ case NODE_NUMBER_LITERAL:
+ printf("%*s ↳ LITERAL NUMBER value=%f\n", level * 2, "", node->data.number.value);
+ break;
+ case NODE_STRING_LITERAL: {
+ const char* lit = span_str(source, node->data.string.value, (char[IDENTSZ]) { 0 });
+ printf("%*s ↳ LITERAL STRING value=\"%s\"\n", level * 2, "", lit);
+ break;
+ }
+ case NODE_TYPE:
+ name = range_str(source,
+ node->data.ident.name.start,
+ node->data.ident.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s ↳ TYPE name='%s'\n", level * 2, "", name);
+ break;
+ case NODE_IDENT:
+ name = range_str(source,
+ node->data.ident.name.start,
+ node->data.ident.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s ↳ IDENT name='%s'\n", level * 2, "", name);
+ break;
+ // case NODE_VOID:
+ // printf("%*s <VOID>\n", level * 2, "");
+ // break;
+ // case NODE_FLOAT:
+ // printf("%*s <FLOAT>\n", level * 2, "");
+ // break;
+ // case NODE_INT:
+ // printf("%*s <INT>\n", level * 2, "");
+ // break;
+ // case NODE_STRING:
+ // printf("%*s <STRING>\n", level * 2, "");
+ // break;
+ case NODE_UNKNOWN:
+ break;
+ case NODE_VAR_ASSIGN:
+ name = range_str(source,
+ node->data.var_assign.lhs->data.ident.name.start,
+ node->data.var_assign.lhs->data.ident.name.end,
+ (char[IDENTSZ]) { 0 });
+ printf("%*s VAR ASSIGN: name='%s'\n", level, "", name);
+ break;
+ case NODE_BREAK:
+ printf("%*s BREAK statement\n", level, "");
+ break;
+ case NODE_BINARY_EXPR:
+ printf("%*s BINARY EXPR op='%c'\n", level, "", node->data.binary_expr.op);
+ if (node->data.binary_expr.lhs) {
+ printf("%*s ↳ lhs:\n", level * 2, "");
+ print_node(source, node->data.binary_expr.lhs, level + 1);
+ }
+ if (node->data.binary_expr.rhs) {
+ printf("%*s ↳ rhs:\n", level * 2, "");
+ print_node(source, node->data.binary_expr.rhs, level + 1);
+ }
+ break;
+ case NODE_UNARY_EXPR:
+ printf("%*s UNARY EXPR: op='%d' is_postfix='%s'\n",
+ level,
+ "",
+ node->data.unary_expr.op,
+ node->data.unary_expr.is_postfix ? "true" : "false");
+ if (node->data.unary_expr.operand) {
+ printf("%*s ↳ operand:\n", level * 2, "");
+ print_node(source, node->data.unary_expr.operand, level + 1);
+ }
+ break;
+ case NODE_EXPR_STATEMENT:
+ printf("%*s EXPR STMT:\n", level, "");
+ if (node->data.expr_statement.expr) {
+ print_node(source, node->data.expr_statement.expr, level + 1);
+ }
+ break;
+ case NODE_SUBSCRIPT_EXPR:
+ printf("%*s SUBSCRIPT expr:\n", level, "");
+ if (node->data.subscript_expr.array) {
+ printf("%*s ↳ array:\n", level * 2, "");
+ print_node(source, node->data.subscript_expr.array, level + 1);
+ }
+ if (node->data.subscript_expr.index) {
+ printf("%*s ↳ index:\n", level * 2, "");
+ print_node(source, node->data.subscript_expr.index, level + 1);
+ }
+ break;
+ case NODE_IF:
+ printf("%*s IF Statement:\n", level, "");
+ if (node->data.if_statement.cond) {
+ printf("%*s ↳ cond:\n", level * 2, "");
+ print_node(source, node->data.if_statement.cond, level + 1);
+ }
+ if (node->data.if_statement.then_body) {
+ printf("%*s ↳ then body:\n", level * 2, "");
+ print_node(source, node->data.if_statement.then_body, level + 1);
+ }
+ if (node->data.if_statement.else_body) {
+ printf("%*s ↳ else body:\n", level * 2, "");
+ print_node(source, node->data.if_statement.else_body, level + 1);
+ }
+ break;
+ case NODE_WHILE:
+ printf("%*s WHILE Statement:\n", level, "");
+ if (node->data.while_statement.cond) {
+ printf("%*s ↳ cond:\n", level * 2, "");
+ print_node(source, node->data.while_statement.cond, level + 1);
+ }
+ if (node->data.while_statement.body) {
+ printf("%*s ↳ body:\n", level * 2, "");
+ print_node(source, node->data.while_statement.body, level + 1);
+ }
+ break;
+ case NODE_FOR:
+ printf("%*s FOR Statement:\n", level, "");
+ if (node->data.for_statement.init) {
+ printf("%*s ↳ init:\n", level * 2, "");
+ print_node(source, node->data.for_statement.init, level + 1);
+ }
+ if (node->data.for_statement.cond) {
+ printf("%*s ↳ cond:\n", level * 2, "");
+ print_node(source, node->data.for_statement.cond, level + 1);
+ }
+ if (node->data.for_statement.increment) {
+ printf("%*s ↳ increment:\n", level * 2, "");
+ print_node(source, node->data.for_statement.increment, level + 1);
+ }
+ if (node->data.for_statement.body) {
+ printf("%*s ↳ body:\n", level * 2, "");
+ print_node(source, node->data.for_statement.body, level + 1);
+ }
+ break;
+ case NODE_EMPTY_STATEMENT:
+ printf("%*s EMPTY Statement\n", level, "");
+ break;
+ }
+
+ while (node->next) {
+ print_node(source, node->next, level);
+ node = node->next;
+ }
+}
+
+void
+ast_print(Ast* ast)
+{
+ print_node(ast->src, ast->node, 0);
+}
+
+void
+print_node_type_str(NodeType t)
+{
+ printf("print_node_type_str: %s\n", node_type_str(t));
+}
+
+const char*
+node_type_str(NodeType t)
+{
+ static const char* type_strings[] = { [NODE_PROGRAM] = "NODE_PROGRAM",
+ [NODE_FUNCTION_DECL] = "NODE_FUNCTION_DECL",
+ [NODE_PARAM] = "NODE_PARAM",
+ [NODE_VAR_DECL] = "NODE_VAR_DECL",
+ [NODE_BLOCK] = "NODE_BLOCK",
+ [NODE_CALL_EXPR] = "NODE_CALL_EXPR",
+ [NODE_RETURN] = "NODE_RETURN",
+ [NODE_CONTINUE] = "NODE_CONTINUE",
+ [NODE_NUMBER_LITERAL] = "NODE_NUMBER_LITERAL",
+ [NODE_STRING_LITERAL] = "NODE_STRING_LITERAL",
+ [NODE_TYPE] = "NODE_TYPE",
+ [NODE_IDENT] = "NODE_IDENT",
+ [NODE_UNKNOWN] = "NODE_UNKNOWN",
+ [NODE_VAR_ASSIGN] = "NODE_VAR_ASSIGN",
+ [NODE_BREAK] = "NODE_BREAK",
+ [NODE_BINARY_EXPR] = "NODE_BINARY_EXPR",
+ [NODE_UNARY_EXPR] = "NODE_UNARY_EXPR",
+ [NODE_EXPR_STATEMENT] = "NODE_EXPR_STATEMENT",
+ [NODE_SUBSCRIPT_EXPR] = "NODE_SUBSCRIPT_EXPR",
+ [NODE_IF] = "NODE_IF",
+ [NODE_WHILE] = "NODE_WHILE",
+ [NODE_FOR] = "NODE_FOR",
+ [NODE_EMPTY_STATEMENT] = "NODE_EMPTY_STATEMENT" };
+ if (t >= NODE_PROGRAM && t <= NODE_EMPTY_STATEMENT) {
+ return type_strings[t];
+ } else {
+ return "UNKNOWN_NODE_TYPE";
+ }
+}
diff --git a/parser/stmt.c b/parser/stmt.c
@@ -0,0 +1,180 @@
+#include "../parser.h"
+#include "../utils.h"
+
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <assert.h>
+
+Node*
+parse_if(Parser* par)
+{
+ expect(par, TOKEN_IF);
+ expect(par, TOKEN_LPAREN); // @later remove necessity for parens
+ Node* cond = parse_expression(par);
+ expect(par, TOKEN_RPAREN);
+
+ Node* then_body = parse_statement(par);
+
+ Node* else_body = NULL;
+ if (match(par, TOKEN_ELSE)) else_body = parse_statement(par);
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_if: could not alloc");
+ node->type = NODE_IF;
+ node->scope = NULL;
+ node->data.if_statement.cond = cond;
+ node->data.if_statement.then_body = then_body;
+ node->data.if_statement.else_body = else_body;
+ return node;
+}
+
+Node*
+parse_while(Parser* par)
+{
+ expect(par, TOKEN_WHILE);
+ expect(par, TOKEN_LPAREN);
+ Node* cond = parse_expression(par);
+ expect(par, TOKEN_RPAREN);
+
+ Node* body = parse_statement(par);
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_while: could not alloc");
+ node->type = NODE_WHILE;
+ node->scope = NULL;
+ node->data.while_statement.cond = cond;
+ node->data.while_statement.body = body;
+ return node;
+}
+
+Node*
+parse_for(Parser* par)
+{
+ expect(par, TOKEN_FOR);
+ expect(par, TOKEN_LPAREN);
+
+ // init can be empty, a decl, or a expr statement
+ Node* init = NULL; // int i = 0 ... conditional expression stment
+ if (!check(par, TOKEN_SEMICOLON)) {
+ Token tok2 = peek2(par);
+ if (tok2.type == TOKEN_IDENT) {
+ init = parse_declaration_statement(par);
+ } else {
+ init = parse_expression_statement(par);
+ }
+ } else
+ expect(par, TOKEN_SEMICOLON);
+
+ Node* cond = NULL; // i < len ... optional expression
+ if (!check(par, TOKEN_SEMICOLON)) cond = parse_expression(par);
+ expect(par, TOKEN_SEMICOLON);
+
+ Node* inc = NULL; // i++ ... optional expression
+ if (!check(par, TOKEN_RPAREN)) { inc = parse_expression(par); }
+ expect(par, TOKEN_RPAREN);
+
+ Node* body = parse_statement(par);
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ node->type = NODE_FOR;
+ node->scope = NULL;
+ node->data.for_statement.init = init;
+ node->data.for_statement.cond = cond;
+ node->data.for_statement.increment = inc;
+ if (node == NULL) panic("parse_for: could not alloc");
+
+ node->data.for_statement.body = body;
+ return node;
+}
+
+Node*
+parse_assignment(Parser* par)
+{
+ Token ident = expect(par, TOKEN_IDENT);
+ Span name = { .start = ident.start, .end = ident.end };
+
+ expect(par, TOKEN_EQUAL);
+ Node* expr = parse_expression(par);
+
+ Node* assign = (Node*)calloc(1, sizeof(Node));
+ if (assign == NULL) panic("parse_assignment: could not alloc");
+ assign->type = NODE_VAR_ASSIGN;
+ assign->scope = NULL;
+ assign->data.var_assign.lhs = (Node*)calloc(1, sizeof(Node));
+ if (assign->data.var_assign.lhs == NULL) panic("parse_for: lhs: could not alloc");
+ /*
+ identifier
+ x = 5;
+
+ member field access
+ obj.field = 5;
+ obj->field = 5;
+
+ array or pointer indexing
+ arr[0] = 5;
+ *(p + 1) = 5;
+
+ dereference
+ *p = 5;
+ */
+ assign->data.var_assign.lhs->type = NODE_IDENT; // TODO handle other cases
+ assign->data.var_assign.lhs->scope = NULL;
+ assign->data.var_assign.lhs->data.ident.name = name;
+ assign->data.var_assign.rhs = expr;
+ return assign;
+}
+
+Node*
+parse_break(Parser* par)
+{
+ expect(par, TOKEN_BREAK);
+ expect(par, TOKEN_SEMICOLON);
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_break: could not alloc");
+ node->type = NODE_BREAK;
+ node->scope = NULL;
+ return node;
+}
+
+Node*
+parse_continue_statement(Parser* par)
+{
+ expect(par, TOKEN_CONTINUE); // consume 'continue'
+
+ Node* node = (Node*)calloc(1, sizeof(Node));
+ if (node == NULL) panic("parse_continue_statemenet: could not alloc");
+ node->type = NODE_CONTINUE;
+ node->scope = NULL;
+
+ TokenType next_type = peek(par).type;
+
+ if (next_type != TOKEN_SEMICOLON)
+ node->data.cont.expr = parse_expression(par);
+ else
+ node->data.cont.expr = NULL;
+
+ expect(par, TOKEN_SEMICOLON);
+ return node;
+}
+
+Node*
+parse_return_statement(Parser* par)
+{
+ expect(par, TOKEN_RETURN); // consume 'return'
+ Node* ret = (Node*)calloc(1, sizeof(Node));
+ if (ret == NULL) panic("parse_return_statemenet: could not alloc");
+ ret->type = NODE_RETURN;
+ ret->scope = NULL;
+
+ TokenType next_type = peek(par).type;
+
+ if (next_type != TOKEN_SEMICOLON)
+ ret->data.ret.expr = parse_expression(par);
+ else
+ ret->data.ret.expr = NULL;
+
+ expect(par, TOKEN_SEMICOLON);
+ return ret;
+}
diff --git a/sem.c b/sem.c
@@ -0,0 +1,205 @@
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "sem.h"
+#include "parser.h"
+#include "utils.h"
+
+#define CALLOC_SZ 16
+#define BASE_DEPTH 1
+
+static int next_id = 100;
+
+Scope
+scope_init(Node* node)
+{
+ Scope s = (Scope) { .parent = NULL,
+ .symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)),
+ .children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)),
+ .cap = CALLOC_SZ,
+ .len = 0,
+ .ch_cap = CALLOC_SZ,
+ .ch_len = 0,
+ .depth = BASE_DEPTH,
+ .owner = node,
+ .id = next_id++ };
+
+ if (s.symbols == NULL) panic("scope_init: could not alloc");
+ if (s.children == NULL) panic("scope_init: could not alloc");
+ return s;
+}
+
+static Scope*
+new_scope_from_scope(Scope* parent_scope, Node* node)
+{
+ // new scope
+ Scope* scope = (Scope*)calloc(1, sizeof(Scope));
+ if (scope == NULL) panic("new_scope_from_scope: could not alloc");
+
+ scope->id = next_id++;
+ scope->owner = node;
+ node->scope = scope;
+
+ // init symbols list
+ scope->symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*));
+ if (scope->symbols == NULL) panic("new_scope_from_scope: symbols: could not alloc");
+ scope->cap = CALLOC_SZ;
+ scope->len = 0;
+
+ scope->children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*));
+ if (scope->children == NULL) panic("new_scope_from_scope: children: could not alloc");
+ scope->ch_cap = CALLOC_SZ;
+ scope->ch_len = 0;
+
+ // init parent and depth
+ if (parent_scope != NULL) {
+ scope->parent = parent_scope;
+ scope->depth = parent_scope->depth + 1;
+ assert(parent_scope->children != NULL);
+ if (parent_scope->ch_len == parent_scope->ch_cap) {
+ parent_scope->ch_cap *= 2;
+ parent_scope->children = (Scope**)realloc(
+ parent_scope->children, parent_scope->ch_cap * sizeof(Scope*));
+ assert(parent_scope->children != NULL && "realloc failed");
+ }
+ parent_scope->children[parent_scope->ch_len++] = scope;
+ } else {
+ scope->parent = NULL;
+ scope->depth = BASE_DEPTH;
+ }
+
+ return scope;
+}
+
+static void
+add_to_scope(Scope* scope, Symbol* sym)
+{
+ if (scope->len >= scope->cap) {
+ scope->cap *= 2;
+ scope->symbols = (Symbol**)realloc(scope->symbols, scope->cap * sizeof(Symbol*));
+ }
+ scope->symbols[scope->len++] = sym;
+}
+
+static void
+scope_var(Scope* scope, Ast* ast, Node* node)
+{
+ const char* var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 });
+ const char* type_name
+ = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 });
+
+ Symbol* sym = (Symbol*)calloc(1, sizeof(Symbol));
+ if (sym == NULL) panic("scope_var: symbol: could not alloc");
+
+ TypeInfo* type = (TypeInfo*)calloc(1, sizeof(TypeInfo));
+ if (type == NULL) panic("scope_var: type: could not alloc");
+
+ if (strcmp(type_name, "float") == 0) {
+ type->type = SYMTYPE_FLOAT;
+ } else if (strcmp(type_name, "int") == 0) {
+ type->type = SYMTYPE_INT;
+ } else if (strcmp(type_name, "string") == 0) {
+ type->type = SYMTYPE_STRING;
+ } else if (strcmp(type_name, "uint") == 0) {
+ type->type = SYMTYPE_UINT;
+ } else {
+ if (type_name[0] >= 'A' && type_name[0] <= 'Z') {
+ type->type = SYMTYPE_USER;
+ } else {
+ panic("sem: not yet defined type '%s' for variable '%s'",
+ type_name,
+ var_name);
+ }
+ }
+
+ sym->name = node->data.var_decl.name;
+ sym->decl = node->data.var_decl.init;
+ sym->type = type;
+
+ assert(var_name != NULL);
+
+ add_to_scope(scope, sym);
+}
+
+static void
+scope_func(Scope* parent_scope, Ast* ast, Node* node)
+{
+ Scope* scope = new_scope_from_scope(parent_scope, node);
+
+ for (size_t i = 0; i < node->data.block.len; i++) {
+ Node* stmt = node->data.block.stmts[i];
+ switch (stmt->type) {
+ case NODE_VAR_DECL: {
+ scope_var(scope, ast, stmt);
+ break;
+ }
+ default:
+ continue;
+ }
+ }
+}
+
+void
+scope_build(Scope* scope, Ast* ast)
+{
+ for (size_t i = 0; i < ast->node->data.program.len; i++) {
+ Node* node = ast->node->data.program.decl[i];
+ switch (node->type) {
+ case NODE_VAR_DECL:
+ scope_var(scope, ast, node);
+ break;
+ case NODE_FUNCTION_DECL:
+ scope_func(/*parent_scope*/ scope, ast, node->data.function_decl.body);
+ break;
+ default:
+ printf("unknown definition at TODO\n");
+ }
+ }
+}
+
+void
+scope_print(Scope* scope, Ast* ast)
+{
+ if (scope == NULL || scope->symbols == NULL) return;
+
+ for (size_t i = 0; i < scope->len; i++) {
+ Symbol* sym = scope->symbols[i];
+ const char* name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 });
+ int parent = -1;
+ if (scope->parent != NULL) parent = scope->parent->id;
+ bool has_owner_node = false;
+ if (scope->owner != NULL) has_owner_node = true;
+ printf("[depth %d] [id %d] Symbol name `%s` \t of type %s (parent %d, owner %s)\n",
+ scope->depth,
+ scope->id,
+ name,
+ type_kind_str(sym->type->type),
+ parent,
+ has_owner_node ? "yes" : "no");
+ }
+
+ if (scope->ch_len == 0) return;
+
+ for (size_t j = 0; j < scope->ch_len; j++) {
+ Scope* child_scope = scope->children[j];
+ scope_print(child_scope, ast);
+ }
+}
+
+const char*
+type_kind_str(SymbolType t)
+{
+ static const char* type_strings[] = {
+ [SYMTYPE_VOID] = "TYPE_VOID",
+ [SYMTYPE_INT] = "TYPE_INT",
+ [SYMTYPE_UINT] = "TYPE_UINT",
+ [SYMTYPE_FLOAT] = "TYPE_FLOAT",
+ [SYMTYPE_STRING] = "TYPE_STRING",
+ [SYMTYPE_USER] = "TYPE_USER",
+ [SYMTYPE_FUNC] = "TYPE_FUNC",
+ [SYMTYPE_TODO] = "TYPE_TODO",
+ };
+
+ return (t >= SYMTYPE_VOID && t <= SYMTYPE_TODO) ? type_strings[t] : "UNKNOWN_TYPE_KIND";
+}
diff --git a/sem.h b/sem.h
@@ -0,0 +1,111 @@
+#pragma once
+
+#include <stdlib.h>
+
+#include "parser.h"
+
+typedef enum {
+ SYMTYPE_VOID = 108,
+ SYMTYPE_INT,
+ SYMTYPE_UINT,
+ SYMTYPE_FLOAT,
+ SYMTYPE_STRING,
+ SYMTYPE_STRUCT,
+ SYMTYPE_USER,
+ SYMTYPE_ARRAY,
+ SYMTYPE_ENUM,
+ SYMTYPE_FUNC,
+ SYMTYPE_TODO,
+} SymbolType; // note also update type_kind_str!
+
+const char* type_kind_str(SymbolType);
+
+typedef enum {
+ ENUM_VALUE_INT,
+ ENUM_VALUE_STRING,
+} EnumValueKind;
+
+typedef struct StructField {
+ char* name;
+ struct Type* type;
+} StructField;
+
+typedef struct EnumField {
+ char* name;
+ EnumValueKind kind;
+ union { // not used?
+ int int_value;
+ char* string_value;
+ } val;
+} EnumField;
+
+typedef struct StructMethod {
+ char* name;
+ struct Type* return_type;
+ struct Type** param_types;
+ int params_count;
+ int params_cap;
+ // TODO add ptr to func decl of this struct method
+} StructMethod;
+
+typedef struct Type {
+ SymbolType type;
+
+ // union {
+ // struct StructType {
+ // const char* struct_name;
+ // int fields_count;
+ // int methods_count;
+ // StructField* fields;
+ // StructMethod* methods;
+ // } struct_t;
+
+ // struct ArrayType {
+ // int array_size; // -1 or fixed
+ // struct Type* of_type;
+ // bool dynamic;
+ // } array_t;
+
+ // struct EnumType {
+ // const char* enum_name;
+ // const int fields_count;
+ // EnumField* fields;
+ // EnumValueKind value_kind;
+ // } enum_t;
+ // };
+} TypeInfo;
+
+typedef struct Symbol {
+ Span name;
+ Node* decl;
+ TypeInfo* type;
+} Symbol;
+
+typedef struct Scope {
+ struct Node* owner;
+ struct Scope* parent;
+ Symbol** symbols;
+ size_t len;
+ size_t cap;
+ struct Scope** children;
+ size_t ch_len;
+ size_t ch_cap;
+ int depth;
+ int id;
+} Scope;
+
+// Symbol table functions
+void symbol_add(const char* name, TypeInfo* type);
+TypeInfo* symbol_get_type(const char* name);
+Symbol* symbol_find(const char* name);
+
+// Scope management functions
+Scope scope_init(Node*);
+void scope_add_symbol(Scope* scope, const char* name, TypeInfo* type);
+Symbol* scope_find_symbol(Scope* scope, const char* name);
+
+// Type checking functions
+int types_equal(TypeInfo* a, TypeInfo* b);
+
+void scope_build(Scope*, Ast*);
+void scope_print(Scope*, Ast*);
diff --git a/stuff/ox.txt b/stuff/ox.txt
@@ -0,0 +1,35 @@
+██████████████████████████████████████████████████████████████████
+██████████████████████████████████████████████████████████████████
+██████████████████████████████████████████████████████████████████
+████████████████ ███████████████████████████████ █████████████████
+██████████████ █████████████████████████████████ ███████████████
+████████████ ███████████████████████████████████ █████████████
+██████████ █ ███████████████████████████████████ ███████████
+████████ █ █████████████████████████████████████ █ █████████
+███████ ██ █████████████████████████████████████ █ ████████
+███████ ███ █████████████████████████████████████ █ ████████
+███████ ████ ███████████████████████████████ ███ ████████
+███████ ██████ ████ ████████
+████████ ███████ █████████████ ▓████ █████████
+██████████ ▓ ██████████████ ▓ ███████████
+████████████ █████████████ ██████████████
+███████ ███████████ ████████
+███████ ████████████ ████████
+█████████ █████████ ▓ ██████████
+█████████████ ██ ██████ ▓ ██████████████
+██████████████████ █████ ██████ ▓▓█ ███████████████████
+██████████████████ ███████ ██████ ███████████████████
+██████████████████ ██████ ██████ ███████████████████
+███████████████████ ████ ███████ ████████████████████
+██████████████████ ███ █████████ ███████████████████
+█████████████████ █ ███ ███████████████████
+█████████████████ ██████████████ ██████████████████
+████████████████ ██ ████ ██ █████████████████
+█████████████████ ██ ████ ██ ██████████████████
+██████████████████ ██████████ ▓███████████████████
+████████████████████ ▓█████████████████████
+██████████████████████ ██▓▓██ ▓███████████████████████
+████████████████████████▓ ▓█████████████████████████
+███████████████████████████ ▓████████████████████████████
+██████████████████████████████ ▓███████████████████████████████
+██████████████████████████████████████████████████████████████████
diff --git a/test.ox b/test.ox
@@ -0,0 +1,97 @@
+ns main
+
+use io, std
+
+pub rec Vehicle {
+ uint8 age
+ uint16 cc
+ str model
+ str name
+ str owner_name
+ float value() = age * cc
+ bool sold = false
+ Vehicle parent
+}
+
+inline pub int add (int a, b) => a + b
+async int add (int a, b) => await sendoff(a, b)
+
+const int jack = 7
+const uint16 jill = 12
+
+
+Vehicle build_vehicle (Vehicle v) inline pub {
+ v = { age: 12, cc: 1200, model: "Roaster" }
+ ret v
+}
+
+void print_vehicle(Vehicle v) {
+ print "Vehicle {{v.name}} is {{v.age}} yrs old with {{v.cc}} CC."
+}
+
+void main () {
+ Vehicle mazda = new { age: 12, cc: 1200, model: "Miata" }
+
+ print("mazda value is {{mazda.value}}")
+
+ print_vehicle(mazda)
+
+ print(add(5, 6)
+
+ for Vehicle v in cars {
+ print_vehicle <- v
+ }
+
+ cars -> each { v, i: print_vehicle v }
+
+ // shorthand argument in closures
+
+ []Vehicle old_cars = cars.where(v: v.age > 10)
+
+ []Vehicle big_cars = cars -> where { .cc > 2000 } -> map { .model }
+
+ []Vehicle new_cars = cars
+ -> where { Vehicle v => v.age < 5 }
+ -> sorted { Vehicle a, b => a.model > b.model }
+ -> map { Vehicle v => v.model }
+ -> each { str model => print(model) }
+
+ []Vehicle new_cars = cars
+ -> where { .age < 5 }
+ -> sorted { (Vehicle a, b) => a.age > b.age }
+ -> map { .model -> to_lower }
+ -> each { print }
+
+ http_server(8080, {sel: get_certif("certs/digitech.cert")})
+
+ http_server <- 8080, ssl: get_certif <- "certs/digitech.cert"
+
+ HttpServer server = (HttpServer)
+ -> init
+ -> serve <- port: 8080, ssl: get_certif <- paths -> where { .link = "private" }
+ -> background
+
+ HttpServer server = new {}
+ -> init()
+ -> serve(port: 8080, ssl: get_certif(paths.where { .link == "private" }))
+ -> background()
+
+ pub rec Cert { ... }
+ pub rec HttpServer {
+ void init() { ... }
+ void serve(int port, Cert ssl) { ... }
+ void background() { ... }
+ }
+
+ extend HttpServer {
+ void print { ... }
+ }
+
+ print <- "hello world"
+
+ print <- "hello world" -> capitalised // print("hello world".capitalised())
+
+ print <- capitalise <- "hello world" // print(capitalise("hello world"))
+
+}
+
diff --git a/utils.c b/utils.c
@@ -0,0 +1,31 @@
+#include "utils.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+
+void
+panic(const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "Error: ");
+ // fprintf(stderr, "\e[0;31mError: ");
+ vfprintf(stderr, fmt, args);
+ // fprintf(stderr, "\e[0m\n");
+ fprintf(stderr, "\n");
+ va_end(args);
+ exit(1);
+}
+
+void
+softpanic(const char *fmt, ...) {
+ va_list args;
+ va_start(args, fmt);
+ fprintf(stderr, "Error: ");
+ // fprintf(stderr, "\e[0;31mError: ");
+ vfprintf(stderr, fmt, args);
+ // fprintf(stderr, "\e[0m\n");
+ fprintf(stderr, "\n");
+ va_end(args);
+ exit(0);
+}
diff --git a/utils.h b/utils.h
@@ -0,0 +1,6 @@
+#pragma once
+
+#include <stdlib.h>
+
+void panic(const char *fmt, ...);
+void softpanic(const char *fmt, ...);