ox

citbl.org/ox
Log | Files | Refs | README | LICENSE

commit 2e7bdd358dd26f673f39d314889db907cc5ccb91
Author: citbl <citbl@citbl.org>
Date:   Sun,  5 Oct 2025 22:48:24 +1000

init

Diffstat:
A.clang-format | 25+++++++++++++++++++++++++
A.clangd | 12++++++++++++
A.github/workflows/c-cpp.yml | 33+++++++++++++++++++++++++++++++++
A.gitignore | 8++++++++
A.vscode/launch.json | 20++++++++++++++++++++
A.vscode/tasks.json | 21+++++++++++++++++++++
A.zed/debug.json | 18++++++++++++++++++
ALICENSE | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
ANOTES.md | 105+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
AREADME.md | 5+++++
ATODO.md | 17+++++++++++++++++
Aex1.ox | 4++++
Aex10.ox | 6++++++
Aex2.ox | 8++++++++
Aex3.ox | 5+++++
Aex4.ox | 7+++++++
Aex5.ox | 12++++++++++++
Aex6.ox | 5+++++
Aex7.ox | 7+++++++
Aex8.ox | 7+++++++
Aex9.ox | 13+++++++++++++
Afile.h | 58++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Agen.h | 23+++++++++++++++++++++++
Agen/gen.c | 293+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahmap.c | 176+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahmap.h | 23+++++++++++++++++++++++
Ahmap_test.c | 174+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Ahmap_test.h | 3+++
Alexer.c | 398+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alexer.h | 66++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amain.c | 79+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amakefile | 46++++++++++++++++++++++++++++++++++++++++++++++
Aox-syntax/LICENSE | 21+++++++++++++++++++++
Aox-syntax/language-configuration.json | 23+++++++++++++++++++++++
Aox-syntax/ox-syntax-0.0.3.vsix | 0
Aox-syntax/package.json | 26++++++++++++++++++++++++++
Aox-syntax/rebuild-syntax-vscode.sh | 3+++
Aox-syntax/syntaxes/ox.sublime-syntax | 106+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aox-syntax/syntaxes/ox.tmLanguage.json | 167+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser.h | 195+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/ast.c | 120+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/decl.c | 69+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/expr.c | 241+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/parser.c | 369+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/parser_utils.c | 339+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparser/stmt.c | 180+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asem.c | 205+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asem.h | 111+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Astuff/ox.txt | 35+++++++++++++++++++++++++++++++++++
Atest.ox | 97+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autils.c | 31+++++++++++++++++++++++++++++++
Autils.h | 6++++++
52 files changed, 4211 insertions(+), 0 deletions(-)

diff --git a/.clang-format b/.clang-format @@ -0,0 +1,25 @@ +BasedOnStyle: Webkit +IndentWidth: 8 +ContinuationIndentWidth: 8 +UseTab: AlignWithSpaces +AlignTrailingComments: true +SpacesBeforeTrailingComments: 1 +KeepEmptyLinesAtTheStartOfBlocks: false +AllowShortBlocksOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true +AllowShortFunctionsOnASingleLine: false +AlignConsecutiveDeclarations: false +AlignConsecutiveAssignments: false +AlignConsecutiveMacros: false +SortIncludes: false + +IndentCaseLabels: false +ColumnLimit: 100 +PenaltyBreakBeforeFirstCallParameter: 1 +AlignAfterOpenBracket: DontAlign +BinPackArguments: false +BinPackParameters: false + +BreakAfterReturnType: TopLevelDefinitions diff --git a/.clangd b/.clangd @@ -0,0 +1,12 @@ +CompileFlags: + Add: [ + -Wall, + -Wextra, + -Wpedantic, + -xc, + -std=c99, + -g, + -I/opt/homebrew/opt/libgccjit/include, + -L/opt/homebrew/opt/libgccjit/lib/gcc/current, + -lgccjit + ] diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml @@ -0,0 +1,33 @@ +name: C/C++ CI + +on: + push: + branches: ["master"] + pull_request: + branches: ["master"] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies (gcc, pkg-config, libgccjit) + run: | + sudo apt-get update + sudo apt-get install -y build-essential pkg-config libgccjit-13-dev || \ + sudo apt-get install -y libgccjit-12-dev + + - name: Checks + run: | + pkg-config --cflags --libs libgccjit || true + make V=1 + + - name: Build + run: make again + + - name: Test + run: make test + + - name: Sanitizers + run: make check diff --git a/.gitignore b/.gitignore @@ -0,0 +1,8 @@ +*.dSYM +.DS_Store +ox +oxc +fox +err.log +*.o +out diff --git a/.vscode/launch.json b/.vscode/launch.json @@ -0,0 +1,20 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "Build & Debug", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/oxc", + "args": [ + "${workspaceFolder}/ex2.ox" + ], + "stopAtEntry": false, + "cwd": "${fileDirname}", + "environment": [], + "externalConsole": false, + "MIMode": "lldb", + "preLaunchTask": "build-ox" + } + ] +} diff --git a/.vscode/tasks.json b/.vscode/tasks.json @@ -0,0 +1,21 @@ +// .vscode/tasks.json +{ + "version": "2.0.0", + "tasks": [ + { + "label": "build-ox", + "type": "shell", + "command": "make", + "args": ["check"], + "problemMatcher": [], + "presentation": { + "reveal": "never", + "echo": false, + "focus": false, + "panel": "shared", + "showReuseMessage": false, + "clear": false + } + } + ] +} diff --git a/.zed/debug.json b/.zed/debug.json @@ -0,0 +1,18 @@ +// Project-local debug tasks +// +// For more documentation on how to configure debug tasks, +// see: https://zed.dev/docs/debugger +[ + { + "label": "Debug native binary", + "build": { + "command": "make", + "args": ["check"], + "cwd": "$ZED_WORKTREE_ROOT" + }, + "program": "$ZED_WORKTREE_ROOT/oxc", + "args": ["$ZED_WORKTREE_ROOT/ex10.ox"], + "request": "launch", + "adapter": "CodeLLDB" + } +] diff --git a/LICENSE b/LICENSE @@ -0,0 +1,190 @@ +Copyright 2025 The Ox Programming language contributors + +Apache License +Version 2.0, January 2004 +http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative + Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for describing the origin of the Work and + reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/NOTES.md b/NOTES.md @@ -0,0 +1,105 @@ + +[https://github.com/gingerBill/titania] +[https://news.ycombinator.com/item?id=45243925] +[https://people.inf.ethz.ch/wirth/Oberon/Oberon07.Report.pdf] +[https://people.inf.ethz.ch/wirth/ProjectOberon/PO.System.pdf] + +add_operator = "+" | "-" | "xor" | "or". +mul_operator = "*" | "/" | "%" | "and". + +see + Tokenizer Semicolon Insertion Rules + + When a newline is seen after the following token kind, a semicolon is inserted, otherwise no semicolon is inserted: + + ...list + + +Gleam my new obsession +I love Rust, but... +[https://ericcodes.io/blog/gleam-my-new-obsession.html] + +(`antirez/sds` for dynamic strings, `nothings/stb_ds` for dynamic arrays and hashmaps, and `cxong/tinydir` for reading the filesystem). +[https://old.reddit.com/r/Compilers/comments/1nmc3r9/i_wrote_a_compiler_for_a_large_subset_of_c_in_c/] + +How to make stuff private and discussion on design (just mangle the names if needed) +[http://journal.stuffwithstuff.com/2025/05/26/access-control-syntax] + + +# symbols + +- jack int 1 +- test void->void 1 + alice float 2 +- main void->void 1 + peter strings 2 +- jill int 1 + +# Zed + +[https://zed.dev/docs/extensions/developing-extensions] + +# libgccjit doco + +[https://gcc.gnu.org/onlinedocs/gcc-15.1.0/jit/] + +# license + + the hare license at the bottom + https://sr.ht/~sircmpwn/hare/ + the standard library is under MPL, the compiler and executables are under GPL3 + +# walk down, compute bubbling up + +RDP (Root‑Descend‑Process) + +- Push "stacks" as you descend, nodes and local state +- Process and pop the frame off on the way back up and merge or "combine" result with its parent. + +Expr ::= Add(Expr, Expr) + | Mul(Expr, Expr) + | Num(Int) + +R‑D‑P Application + +1. Root: Add( Mul(Num(2), Num(3)), Num(4) ) +2. Descend: + - Push Add frame. + - Push left child Mul. + - Push left child Num(2) → leaf → Process → result = 2. + - Push right child Num(3) → leaf → Process → result = 3. + - Process Mul → result = 6. + - Push right child Num(4) → leaf → Process → result = 4. +3. Process Add → result = 10. + +** Forget the whole tree, focus on this node and reason locally. ** + +R‑D‑P (Root‑Descend‑Process) turns recursive AST evaluation into a clear, iterative algorithm. +Pair it with bottom‑up traversal, the visitor pattern, or an explicit stack to keep state explicit. +This approach reduces cognitive load by isolating each node’s processing and avoiding hidden call‑stack dependencies. + +Keep a whiteboard model of the tree shapes. + +- Base case – literals and identifiers return a value immediately. +- Recursive step – always evaluate child nodes before applying the operator at the current node. +- After return – combine child results according to the operation; this is where side‑effects (e.g., assignment) may occur. + +see [[TODO]] + +## Only pure constant expressions are evaluated at compile time + +print has side effects so it doesn't + +2 + 3 is not run by the compiler, but it may be constant-folded in the optimizer. + +you lower print to a runtime call printf + +### CTFE compile time function execution + +- constant expression, constant folding and propagation +- evaluator / constant interpreter +- restricted evaluator in the compiler, with env and CT heap +- try_ctfe on expression nodes +- lowering: emit literal value to IR once folded +- C++: consteval, Zig: comptime, Rust: constfn +- gate with fuel(?), depth restriction and memory limits diff --git a/README.md b/README.md @@ -0,0 +1,5 @@ +### Ox Programming language + +WIP + +[![C/C++ CI](https://github.com/keyle/baby-c/actions/workflows/c-cpp.yml/badge.svg)](https://github.com/keyle/baby-c/actions/workflows/c-cpp.yml) diff --git a/TODO.md b/TODO.md @@ -0,0 +1,17 @@ +@next + +- variables, string first, so that we can print its content; +- print anything else than a string +- call another function from main, that prints something +- call another function that prints the passed argument + +@later + +- implement all or most of C's into libgccjit +- ARC memory management, new keyword. + +@cruft + +- redo arguments as list and not linked list, handle in parse and in gen (2 places in gen?) + +get rid of count_args and search for 'argc' diff --git a/ex1.ox b/ex1.ox @@ -0,0 +1,4 @@ +void main(int param1) { + print("hello world\n"); + //print(param1); +} diff --git a/ex10.ox b/ex10.ox @@ -0,0 +1,6 @@ +void main() { + print("This is a great feeling"); + print("This is a great feeling"); + print(""); + print("Oh yes."); +} diff --git a/ex2.ox b/ex2.ox @@ -0,0 +1,8 @@ +// example program +// ns main +// T add(T a, b) inline pure => a + b; + +void main() { + string name = "harrold"; + print("harold"); +} diff --git a/ex3.ox b/ex3.ox @@ -0,0 +1,5 @@ +void main() { + if (a == true) { + print("yes"); + } +} diff --git a/ex4.ox b/ex4.ox @@ -0,0 +1,7 @@ +void main() { + if (a == true) { + print("yes"); + } else { + print("no"); + } +} diff --git a/ex5.ox b/ex5.ox @@ -0,0 +1,12 @@ +// typedef Person { +// string name; +// int age; +// } + +void main() { + if (a == true) { + print("yes"); + } else { + print("no"); + } +} diff --git a/ex6.ox b/ex6.ox @@ -0,0 +1,5 @@ +int main() { + for (int a = 0; b < 10; c++) { + print("hi"); + } +} diff --git a/ex7.ox b/ex7.ox @@ -0,0 +1,7 @@ +int main() { + int a = 1; + for(;;) { + if (a == 1) break; + continue 7; // TODO fix this test to fail semantics + } +} diff --git a/ex8.ox b/ex8.ox @@ -0,0 +1,7 @@ +void main() { + print("test 1"); +} + +void test() { + print("test 2"); +} diff --git a/ex9.ox b/ex9.ox @@ -0,0 +1,13 @@ +int jack = 111; + +void test() { + float alice = 222; +} + +void main() { + int peter = 333; +} + +uint jill = 444; + +float jane = 123.45; diff --git a/file.h b/file.h @@ -0,0 +1,58 @@ +#pragma once + +#include <stdio.h> +#include <stdlib.h> + +#include "utils.h" + +char* readfile(const char* file_path) +{ + FILE* fp = fopen(file_path, "rb"); + if (fp == NULL) { + perror("Failed to read file"); + return NULL; + } + + if (fseek(fp, 0, SEEK_END) != 0) { + fclose(fp); + panic("Failed to find the end of the file"); + return NULL; + } + + long file_size = ftell(fp); + + if (file_size < 0) { + fclose(fp); + panic("Failed to determine the file size"); + return NULL; + } + + rewind(fp); + + // check for overflow before casting + if ((unsigned long)file_size >= SIZE_MAX) { + fclose(fp); + panic("File too large to fit in memory"); + return NULL; + } + + char* contents = (char*)calloc(1, (size_t)file_size + 1); + if (contents == NULL) { + panic("Failed to allocate memory to read file"); + fclose(fp); + return NULL; + } + + size_t bytes_read = fread(contents, 1, (size_t)file_size, fp); + if (bytes_read != (size_t)file_size) { + free(contents); + fclose(fp); + panic("Failed to read the file in its entirety"); + return NULL; + } + + contents[file_size] = '\0'; + + fclose(fp); + return contents; +} diff --git a/gen.h b/gen.h @@ -0,0 +1,23 @@ +#pragma once + +#include <unistd.h> // for libgccjit +#include <libgccjit.h> + +#include "parser.h" +#include "sem.h" + +typedef struct { + gcc_jit_context *ctx; + gcc_jit_function *prev_func; + gcc_jit_function *curr_func; + gcc_jit_function* printf_fn; + gcc_jit_function* puts_fn; + gcc_jit_block *prev_block; + gcc_jit_block *curr_block; + // gcc_jit_type *type_kind; need type too? + Scope *scope; + const char *src; +} Gen; + +Gen gen_init(Scope *, const char *); +void gen_next(Gen *, Node *); diff --git a/gen/gen.c b/gen/gen.c @@ -0,0 +1,293 @@ +#include "../gen.h" +#include "../utils.h" + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/param.h> + +static gcc_jit_type* type_int; +static gcc_jit_type* type_uint; +static gcc_jit_type* type_float; +static gcc_jit_type* type_void; +static gcc_jit_type* type_cstr; + +#define MAXARGS 16 + +Gen +gen_init(Scope* scope, const char* src) +{ + if (scope == NULL || src == NULL) { panic("gen_init: no Scope or AST provided"); } + + gcc_jit_context* ctx; + + ctx = gcc_jit_context_acquire(); + + if (!ctx) { panic("could not acquire gcc jit context"); } + + // needs loc* to work + // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DEBUGINFO, 1); + // high level + // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DUMP_INITIAL_TREE, + // 1); low level gcc_jit_context_set_bool_option(ctx, + // GCC_JIT_BOOL_OPTION_DUMP_INITIAL_GIMPLE, 1); info + // gcc_jit_context_set_bool_option(ctx, GCC_JIT_BOOL_OPTION_DUMP_SUMMARY, 1); + + gcc_jit_context_set_str_option(ctx, GCC_JIT_STR_OPTION_PROGNAME, "ox"); + // keep FP + gcc_jit_context_add_driver_option(ctx, "-fno-omit-frame-pointer"); + + gcc_jit_context_set_int_option(ctx, + GCC_JIT_INT_OPTION_OPTIMIZATION_LEVEL, + /*0-3 for O3*/ 0); + + type_int = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_INT64_T); + type_uint = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_UINT64_T); + type_float = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_DOUBLE); + type_void = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_VOID); + type_cstr = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_CONST_CHAR_PTR); + + gcc_jit_param* pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") }; + gcc_jit_function* fn_puts = gcc_jit_context_new_function( + ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0); + + gcc_jit_param* pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") }; + gcc_jit_function* fn_printf = gcc_jit_context_new_function(ctx, + NULL, + GCC_JIT_FUNCTION_IMPORTED, + type_int, + "printf", + 1, + pm_printf, + /*is_variadic=*/1); + + return (Gen) { + .ctx = ctx, + .scope = scope, + .prev_func = NULL, + .curr_func = NULL, + .prev_block = NULL, + .curr_block = NULL, + .puts_fn = fn_puts, + .printf_fn = fn_printf, + .src = src, + }; +} + +static gcc_jit_rvalue* handle_expr(Gen*, Node*); + +static gcc_jit_rvalue* +emit_literal_string(Gen* gen, Node* node) +{ + size_t len = node->data.string.value.end - node->data.string.value.start; + char* str = calloc(len + 1, sizeof(char)); + if (str == NULL) panic("emit_literal_string: could not alloc"); + memcpy(str, gen->src + node->data.string.value.start, len); + str[len] = '\0'; + return gcc_jit_context_new_string_literal(gen->ctx, str); +} + +static void +build_program(Gen* gen, Node* node) +{ + size_t cnt = node->data.program.len; + for (size_t i = 0; i < cnt; i++) { + gen_next(gen, node->data.program.decl[i]); + } +} + +static gcc_jit_rvalue* +lower_builtin_print(Gen* gen, Node* node) +{ + size_t argc = node->data.call_expr.len; + + // 1-arg, treat as puts(arg) + if (argc == 1) { + gcc_jit_rvalue* arg + = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many + // cast common cases to const char* + if (gcc_jit_rvalue_get_type(arg) != type_cstr) + arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr); + gcc_jit_rvalue* args[] = { arg }; + return gcc_jit_context_new_call(gen->ctx, NULL, gen->puts_fn, 1, args); + } + + // softpanic("we don't currently handle formatted strings to print"); + + // n>=1, treat as printf(fmt, ...) // Part of TODO about args as list and not + // + // through each args, form the ("formatted %s string %d etc.", str, intv) for clib's printf + + + // TODO we're talking about formatting here, which we plan on doing as a string interpolation, + // something along the lines of {{variable}} without defining its type would involve lookup + // split of the string and then formatting + + // we need to discuss and decide what we'd do when the user inevitably would print out a ref to + // a struct. Do we say [[struct]] or do we have some automatic unwrap and display of struct data... + // probably, yes. + + gcc_jit_rvalue** args = (gcc_jit_rvalue**)calloc(MAXARGS, sizeof(gcc_jit_rvalue*)); + + if (argc > MAXARGS) { + softpanic("we do not currently support more than 16 args to a print call"); + } + + for (size_t i = 0; i < argc; i++) { + gcc_jit_rvalue* arg = handle_expr(gen, node->data.call_expr.args[i]); + if (i == 0) { + if (gcc_jit_rvalue_get_type(arg) != type_cstr) { + // note this is probably not going to work as limited cast supported + // and string isn't one of them + arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr); + } + } else { + // + // simple widening for common scalar types + // + gcc_jit_type* ty = gcc_jit_rvalue_get_type(arg); + if (ty == type_int) { + arg = gcc_jit_context_new_cast(gen->ctx, NULL, arg, type_cstr); + } else if (ty == type_float) { + // variadics already promote float→double; double is + } else if (ty == type_cstr) { + // leave as const char* + } else { + // fallback: pass pointer as void* + arg = gcc_jit_context_new_cast(gen->ctx, + NULL, + arg, + gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR)); + } + } + // TODO auto grow + args[i] = arg; + } + return gcc_jit_context_new_call(gen->ctx, NULL, gen->printf_fn, argc, args); + return NULL; +} + +static gcc_jit_function* +lookup_function(Gen* gen, const char* func_name) +{ + // TODO see todo below about linked list parameters... +} + +static gcc_jit_rvalue* +handle_func_call(Gen* gen, Node* node) +{ + Node* fcallee = node->data.call_expr.callee; + const char* func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 }); + if (strcmp(func_name, "print") == 0) return lower_builtin_print(gen, node); + + softpanic("unhandled func call named: %s", func_name); + + // + // TODO handle any function other than print... + // + // int argc = node->data.call_expr.len; + // gcc_jit_function* callee = lookup_function(gen, func_name); + // gcc_jit_rvalue* args[16]; // @future fixed at 16 parameters in call + // for (int i = 0; i < argc; i++) { + // args[i] = handle_expr(gen, node->data.call_expr.args[i]); + // } + // return gcc_jit_context_new_call(gen->ctx, NULL, callee, argc, args); + // return NULL; +} + +static gcc_jit_rvalue* +handle_expr(Gen* gen, Node* node) +{ + switch (node->type) { + case NODE_NUMBER_LITERAL: + break; + case NODE_STRING_LITERAL: + return emit_literal_string(gen, node); + break; + case NODE_CALL_EXPR: { + return handle_func_call(gen, node); + } break; + default: + printf("handle_expr unhandled, %s\n", node_type_str(node->type)); + } + return NULL; +} + +static void +build_statement(Gen* gen, Node* node) +{ + switch (node->type) { + case NODE_BLOCK: + break; + case NODE_RETURN: + break; + case NODE_EXPR_STATEMENT: { + gcc_jit_rvalue* rv = handle_expr(gen, node->data.expr_statement.expr); + if (rv) gcc_jit_block_add_eval(gen->curr_block, NULL, rv); + } break; + default: + printf("build_statement unhandled, %s\n", node_type_str(node->type)); + break; + } +} + +static void +build_block(Gen* gen, Node* body) +{ + for (size_t i = 0; i < body->data.block.len; i++) { + build_statement(gen, body->data.block.stmts[i]); + } +} + +static void +build_func_decl(Gen* gen, Node* node) +{ + gcc_jit_function* func = gcc_jit_context_new_function(gen->ctx, + NULL, // loc + GCC_JIT_FUNCTION_EXPORTED, // declared + type_int, // ret + "main", // name + 0, // num params + NULL, // params + 0); // is variadic + + gcc_jit_block* block = gcc_jit_function_new_block(func, "entry"); + + gcc_jit_function* prev_func = gen->curr_func; + gcc_jit_block* prev_block = gen->curr_block; + gen->curr_block = block; + gen->curr_func = func; + + build_block(gen, node->data.function_decl.body); + + if (gen->curr_block) { + gcc_jit_rvalue* ret_value + = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0); + gcc_jit_block_end_with_return(gen->curr_block, NULL, ret_value); + gen->curr_block = NULL; + } + + gen->curr_func = prev_func; + gen->curr_block = prev_block; +} + +void +gen_next(Gen* gen, Node* node) +{ + // printf("gen_next, %s\n", node_type_str(node->type)); + + switch (node->type) { + case NODE_PROGRAM: + build_program(gen, node); + break; + case NODE_FUNCTION_DECL: + build_func_decl(gen, node); + break; + case NODE_STRING_LITERAL: + emit_literal_string(gen, node); + break; + default: + printf("unhandled, %s\n", node_type_str(node->type)); + } +} diff --git a/hmap.c b/hmap.c @@ -0,0 +1,176 @@ +#include <stdio.h> +#include <string.h> + +#include "hmap.h" + +#define INITIAL_BUCKETS 8 +#define LOAD_FACTOR 0.75 + +static void hmap_grow(HashMap* map); + +// Simple string hash function (djb2) +static unsigned long hash(const char* str) +{ + unsigned long h = 5381; + unsigned char c; + while ((c = (unsigned char)*str++)) + h = ((h << 5) + h) + c; + return h; +} + +HashMap* hmap_create(size_t value_size) +{ + HashMap* map = calloc(1, sizeof(HashMap)); + if (map == NULL) { + fprintf(stderr, "hmap_create: map: could not alloc\n"); + } + map->bucket_count = INITIAL_BUCKETS; + map->size = 0; + map->value_size = value_size; + map->buckets = calloc(map->bucket_count, sizeof(HashNode*)); + if (map->buckets == NULL) { + fprintf(stderr, "hmap_create: bucket: could not alloc\n"); + exit(1); + } + return map; +} + +void hmap_put(HashMap* map, const char* key, const void* value) +{ + if ((float)(map->size + 1) / map->bucket_count > LOAD_FACTOR) { + hmap_grow(map); + } + unsigned long h = hash(key) % map->bucket_count; + HashNode* node = map->buckets[h]; + while (node) { + if (strcmp(node->key, key) == 0) { + memcpy(node->value, value, map->value_size); + return; + } + node = node->next; + } + HashNode* new_node = calloc(1, sizeof(HashNode)); + if (new_node == NULL) { + fprintf(stderr, "hmap_put: new_node: could not alloc\n"); + exit(1); + } + new_node->key = strdup(key); + new_node->value = calloc(1, map->value_size); + if (new_node == NULL) { + fprintf(stderr, "hmap_put: new_node->value: could not alloc\n"); + exit(1); + } + memcpy(new_node->value, value, map->value_size); + new_node->next = map->buckets[h]; + map->buckets[h] = new_node; + map->size++; +} + +bool hmap_get(HashMap* map, const char* key, void* out) +{ + unsigned long h = hash(key) % map->bucket_count; + HashNode* node = map->buckets[h]; + while (node) { + if (strcmp(node->key, key) == 0) { + memcpy(out, node->value, map->value_size); + return true; + } + node = node->next; + } + return false; +} + +bool hmap_remove(HashMap* map, const char* key) +{ + unsigned long h = hash(key) % map->bucket_count; + HashNode* node = map->buckets[h]; + HashNode* prev = NULL; + while (node) { + if (strcmp(node->key, key) == 0) { + if (prev) { + prev->next = node->next; + } else { + map->buckets[h] = node->next; + } + free(node->key); + free(node->value); + free(node); + map->size--; + return true; + } + prev = node; + node = node->next; + } + return false; +} + +static void hmap_grow(HashMap* map) +{ + size_t new_bucket_count = map->bucket_count * 2; + HashNode** new_buckets = calloc(new_bucket_count, sizeof(HashNode*)); + if (new_buckets == NULL) { + fprintf(stderr, "hmap_grow: could not alloc\n"); + exit(1); + } + for (size_t i = 0; i < map->bucket_count; i++) { + HashNode* node = map->buckets[i]; + while (node) { + HashNode* next = node->next; + unsigned long h = hash(node->key) % new_bucket_count; + node->next = new_buckets[h]; + new_buckets[h] = node; + node = next; + } + } + free(map->buckets); + map->buckets = new_buckets; + map->bucket_count = new_bucket_count; +} + +void hmap_free(HashMap* map) +{ + for (size_t i = 0; i < map->bucket_count; i++) { + HashNode* node = map->buckets[i]; + while (node) { + HashNode* next = node->next; + free(node->key); + free(node->value); + free(node); + node = next; + } + } + free(map->buckets); + free(map); +} + +// Example usage for struct T +// struct T { +// int id; +// char name[32]; +// }; + +// int main() { +// HashMap* map = hmap_create(sizeof(struct T)); +// struct T t1 = {1, "Alice"}; +// struct T t2 = {2, "Bob"}; +// struct T t3 = {3, "Carol"}; + +// hmap_put(map, "alice", &t1); +// hmap_put(map, "bob", &t2); +// hmap_put(map, "carol", &t3); + +// struct T out; +// if (hmap_get(map, "bob", &out)) { +// printf("bob: id=%d, name=%s\n", out.id, out.name); +// } +// if (hmap_get(map, "alice", &out)) { +// printf("alice: id=%d, name=%s\n", out.id, out.name); +// } +// if (hmap_get(map, "dave", &out)) { +// printf("dave: id=%d, name=%s\n", out.id, out.name); +// } else { +// printf("dave not found\n"); +// } +// hmap_free(map); +// return 0; +// } diff --git a/hmap.h b/hmap.h @@ -0,0 +1,23 @@ +#pragma once + +#include <stdbool.h> +#include <stdlib.h> + +typedef struct HashNode { + char* key; + void* value; + struct HashNode* next; +} HashNode; + +typedef struct HashMap { + size_t bucket_count; + size_t size; + size_t value_size; + struct HashNode** buckets; +} HashMap; + +HashMap* hmap_create(size_t); +void hmap_put(HashMap* map, const char* key, const void* value); +bool hmap_get(HashMap* map, const char* key, void* out); +bool hmap_remove(HashMap* map, const char* key); +void hmap_free(HashMap* map); diff --git a/hmap_test.c b/hmap_test.c @@ -0,0 +1,174 @@ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "hmap.h" + +static void hmap_test_basic(void) +{ + int i; + printf("Testing dict of integers...\n"); + + HashMap* map = hmap_create(sizeof(int)); + + struct { + const char* key; + int value; + } items[] = { + { "one", 1 }, + { "two", 2 }, + { "three", 3 }, + }; + + // Insert items + for (i = 0; i < 3; i++) { + hmap_put(map, items[i].key, &items[i].value); + } + + // Retrieve and check + int v; + int all_ok = 1; + for (i = 0; i < 3; i++) { + if (hmap_get(map, items[i].key, &v) && v == items[i].value) { + printf("PASS: %s == %d\n", items[i].key, items[i].value); + } else { + printf("FAIL: %s\n", items[i].key); + all_ok = 0; + } + } + if (all_ok) + printf("PASS: dict of integers test\n"); +} + +static void hmap_test_T(void) +{ + int i; + printf("Testing struct T...\n"); + + struct T { + int id; + char name[32]; + int age; + char email[64]; + float score; + }; + + HashMap* map = hmap_create(sizeof(struct T)); + + struct T items[] = { + { 1, "alice", 30, "alice@example.com", 95.5f }, + { 2, "bob", 25, "bob@example.com", 88.0f }, + { 3, "carol", 28, "carol@example.com", 91.2f }, + }; + + // Insert items + for (i = 0; i < 3; i++) { + hmap_put(map, items[i].name, &items[i]); + } + + // Retrieve and check + struct T v; + int all_ok = 1; + for (i = 0; i < 3; i++) { + if (hmap_get(map, items[i].name, &v) && v.id == items[i].id && strcmp(v.name, items[i].name) == 0 && v.age == items[i].age && strcmp(v.email, items[i].email) == 0 && v.score == items[i].score) { + printf("PASS: %s == {id:%d, age:%d, email:%s, score:%.1f}\n", + items[i].name, items[i].id, items[i].age, items[i].email, items[i].score); + } else { + printf("FAIL: %s\n", items[i].name); + all_ok = 0; + } + } + if (all_ok) + printf("PASS: struct T test\n"); +} + +static void hmap_test_memory_bumping(void) +{ + int i; + printf("Testing memory bumping...\n"); + + HashMap* map = hmap_create(sizeof(int)); + const int N = 1000; // Large enough to trigger resizing + + char key[32]; + int all_ok = 1; + + // Insert N items + for (i = 0; i < N; i++) { + snprintf(key, sizeof(key), "key_%d", i); + hmap_put(map, key, &i); + } + + // Retrieve and check all N items + for (i = 0; i < N; i++) { + snprintf(key, sizeof(key), "key_%d", i); + int v = -1; + if (hmap_get(map, key, &v) && v == i) { + // Optionally print only a few + if (i < 3 || i > N - 3) + printf("PASS: %s == %d\n", key, v); + } else { + printf("FAIL: %s\n", key); + all_ok = 0; + } + } + if (all_ok) + printf("PASS: memory bumping test\n"); +} + +static void hmap_test_removal(void) +{ + int i; + printf("Testing removal...\n"); + + HashMap* map = hmap_create(sizeof(int)); + + struct { + const char* key; + int value; + } items[] = { + { "alpha", 10 }, + { "beta", 20 }, + { "gamma", 30 }, + }; + + // Insert items + for (i = 0; i < 3; i++) { + hmap_put(map, items[i].key, &items[i].value); + } + + // Remove "beta" + hmap_remove(map, "beta"); + + // Check "beta" is gone, others remain + int v; + int all_ok = 1; + for (i = 0; i < 3; i++) { + int found = hmap_get(map, items[i].key, &v); + if (strcmp(items[i].key, "beta") == 0) { + if (!found) { + printf("PASS: %s removed\n", items[i].key); + } else { + printf("FAIL: %s still present\n", items[i].key); + all_ok = 0; + } + } else { + if (found && v == items[i].value) { + printf("PASS: %s == %d\n", items[i].key, items[i].value); + } else { + printf("FAIL: %s\n", items[i].key); + all_ok = 0; + } + } + } + if (all_ok) + printf("PASS: removal test\n"); +} + +void hmap_tests(void) +{ + hmap_test_basic(); + hmap_test_T(); + hmap_test_memory_bumping(); + hmap_test_removal(); +} diff --git a/hmap_test.h b/hmap_test.h @@ -0,0 +1,3 @@ +#pragma once + +void hmap_tests(void); diff --git a/lexer.c b/lexer.c @@ -0,0 +1,398 @@ +#include "lexer.h" +#include "utils.h" + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <ctype.h> +#include <assert.h> + +static char peek(Lexer* lex) +{ + char c = lex->src[lex->pos]; + return c ? c : 0; +} + +static char peek2(Lexer* lex) +{ + char c = lex->src[lex->pos]; + if (!c) + return 0; + c = lex->src[lex->pos + 1]; + return c ? c : 0; +} + +static char nudge(Lexer* lex) +{ + const char c = peek(lex); + if (!c) + return 0; + if (c == '\n') { + lex->line++; + lex->col = 1; + } else { + lex->col++; + } + lex->pos++; + return c; +} + +static void skip_space_and_comments(Lexer* lex) +{ + for (;;) { + for (;;) { + char c = peek(lex); + if (c == ' ' || c == '\t' || c == '\r' || c == '\n') + nudge(lex); + else + break; + } + if (peek(lex) == '/' && peek2(lex) == '/') { + nudge(lex); + nudge(lex); + while (peek(lex) != '\n' && peek(lex) != 0) + nudge(lex); + continue; + } + break; + } +} + +static Token make_ident(Lexer* lex, size_t pos, size_t line, size_t col) +{ + for (;;) { + char c = peek(lex); + if (c == '_' || isalnum(c)) + nudge(lex); + else + break; + } + + TokenType type = TOKEN_IDENT; + + if ((lex->pos - pos) == 6 && strncmp(lex->src + pos, "return", 6) == 0) + type = TOKEN_RETURN; + else if ((lex->pos - pos) == 3 && strncmp(lex->src + pos, "for", 3) == 0) + type = TOKEN_FOR; + else if ((lex->pos - pos) == 2 && strncmp(lex->src + pos, "if", 2) == 0) + type = TOKEN_IF; + else if ((lex->pos - pos) == 4 && strncmp(lex->src + pos, "else", 4) == 0) + type = TOKEN_ELSE; + else if ((lex->pos - pos) == 8 && strncmp(lex->src + pos, "continue", 8) == 0) + type = TOKEN_CONTINUE; + else if ((lex->pos - pos) == 5 && strncmp(lex->src + pos, "break", 5) == 0) + type = TOKEN_BREAK; + else if ((lex->pos - pos) == 5 && strncmp(lex->src + pos, "while", 5) == 0) + type = TOKEN_WHILE; + + // Check for keywords, or a Capitalised CustomType + // -- @later do it in the parser, keep types as unprotected names + // if (strncmp(lex->src + pos, "int", lex->pos - pos) == 0) + // type = TOKEN_INT; + // else if (strncmp(lex->src + pos, "float", lex->pos - pos) == 0) + // type = TOKEN_FLOAT; + // else if (strncmp(lex->src + pos, "void", lex->pos - pos) == 0) + // type = TOKEN_VOID; + // else if (strncmp(lex->src + pos, "string", lex->pos - pos) == 0) + // type = TOKEN_STRING; + // else { + // // If identifier starts with a capital letter, treat as TOKEN_TYPE + // char first = lex->src[pos]; + // if (first >= 'A' && first <= 'Z') { + // type = TOKEN_TYPE; + // } + // } + + return (Token) { .type = type, + .start = pos, + .line = line, + .col = col, + .end = lex->pos }; +} +static Token make_number(Lexer* lex, size_t pos, size_t line, size_t col) +{ + while (isdigit(peek(lex))) + nudge(lex); + if (peek(lex) == '.' && isdigit(peek2(lex))) { + nudge(lex); + while (isdigit(peek(lex))) + nudge(lex); + } + return (Token) { + .type = TOKEN_NUMBER_LITERAL, + .start = pos, + .end = lex->pos, + .line = line, + .col = col + }; +} + +static Token make_string(Lexer* lex, size_t pos, size_t line, size_t col) +{ + nudge(lex); // " start + while (peek(lex) != '"' && peek(lex) != 0) + nudge(lex); + if (peek(lex) == '"') + nudge(lex); // " end + return (Token) { + .type = TOKEN_STRING_LITERAL, + .start = pos, + .end = lex->pos, + .line = line, + .col = col + }; +} + +static Token next_token(Lexer* lex) +{ + skip_space_and_comments(lex); + size_t start = lex->pos; + size_t line = lex->line; + size_t col = lex->col; + const char c = peek(lex); + + if (c == 0) + return (Token) { + .type = TOKEN_EOF, .start = start, .end = lex->pos, .col = col, .line = line + }; + + if (isalpha(c) || c == '_') + return make_ident(lex, start, line, col); + if (isdigit(c)) + return make_number(lex, start, line, col); + if (c == '"') + return make_string(lex, start, line, col); + + TokenType type = TOKEN_UNKNOWN; + + switch (c) { + case '(': + nudge(lex); + type = TOKEN_LPAREN; + break; + case ')': + nudge(lex); + type = TOKEN_RPAREN; + break; + case '{': + nudge(lex); + type = TOKEN_LBRACE; + break; + case '}': + nudge(lex); + type = TOKEN_RBRACE; + break; + case '[': + nudge(lex); + type = TOKEN_LBRACKET; + break; + case ']': + nudge(lex); + type = TOKEN_RBRACKET; + break; + case ';': + nudge(lex); + type = TOKEN_SEMICOLON; + break; + case '%': + nudge(lex); + type = TOKEN_PERCENT; + break; + case '/': + nudge(lex); + type = TOKEN_SLASH; + break; + case '*': + nudge(lex); + type = TOKEN_UNKNOWN; + break; + case '+': + nudge(lex); + if (peek(lex) == '+') { + nudge(lex); + type = TOKEN_PLUSPLUS; + } else { + type = TOKEN_PLUS; + } + break; + case '-': + nudge(lex); + if (peek(lex) == '-') { + nudge(lex); + type = TOKEN_MINUSMINUS; + } else { + type = TOKEN_MINUS; + } + break; + case ',': + nudge(lex); + type = TOKEN_COMMA; + break; + case '=': + nudge(lex); + if (peek(lex) == '=') { + nudge(lex); + type = TOKEN_EQUALITY; + } else { + type = TOKEN_EQUAL; + } + break; + case '!': + nudge(lex); + if (peek(lex) == '=') { + nudge(lex); + type = TOKEN_INEQUALITY; + } else { + type = TOKEN_BANG; + } + break; + case '>': + nudge(lex); + if (peek(lex) == '=') { + nudge(lex); + type = TOKEN_GT_EQ; + } else { + type = TOKEN_GT; + } + break; + case '<': + nudge(lex); + if (peek(lex) == '=') { + nudge(lex); + type = TOKEN_LT_EQ; + } else { + type = TOKEN_LT; + } + break; + default: + nudge(lex); + type = TOKEN_UNKNOWN; + break; + } + Token t = (Token) { .type = type, .start = start, .end = lex->pos, .col = col, .line = line }; + return t; +} + +static void print_token(const Token* t, const char* contents) +{ + static const char* TYPES[] = { + [TOKEN_IDENT] = "ident/type", + [TOKEN_LPAREN] = "open paren", + [TOKEN_RPAREN] = "close paren", + [TOKEN_LBRACE] = "open brace", + [TOKEN_RBRACE] = "close brace", + [TOKEN_LBRACKET] = "open bracket", + [TOKEN_RBRACKET] = "close bracket", + [TOKEN_EQUAL] = "equal", + [TOKEN_SEMICOLON] = "semicol", + [TOKEN_COMMA] = "comma", + [TOKEN_NUMBER_LITERAL] = "number", + [TOKEN_STRING_LITERAL] = "string literal", + [TOKEN_SLASH] = "slash", + [TOKEN_STAR] = "star", + [TOKEN_PLUS] = "plus", + [TOKEN_PLUSPLUS] = "++", + [TOKEN_MINUS] = "minus", + [TOKEN_MINUSMINUS] = "--", + [TOKEN_EQUALITY] = "equality ==", + [TOKEN_INEQUALITY] = "inequality !=", + [TOKEN_BANG] = "bang !", + [TOKEN_LT] = "lower than", + [TOKEN_GT] = "greater than", + [TOKEN_LT_EQ] = "lt or = than", + [TOKEN_GT_EQ] = "gt or = than", + [TOKEN_IF] = "if", + [TOKEN_ELSE] = "else", + [TOKEN_WHILE] = "while", + [TOKEN_FOR] = "for", + [TOKEN_RETURN] = "return", + [TOKEN_UNKNOWN] = "< UNKNOWN >", + [TOKEN_EOF] = "~EOF~" + }; + + printf("L%zu:%zu \t%-14s '", t->line + 1, t->col + 1, TYPES[t->type]); + fwrite(contents + t->start, 1, t->end - t->start, stdout); + printf("'\n"); +} + +static void add_token(Lexer* lex, Token tok) +{ + if (lex->token_count >= lex->token_cap) { + lex->token_cap *= 2; + lex->tokens = (Token*)realloc(lex->tokens, sizeof(Token) * lex->token_cap); + } + lex->tokens[lex->token_count++] = tok; +} + +void lexer_print(Lexer* lex) +{ + for (size_t i = 0; i < lex->token_count; i++) { + print_token(&lex->tokens[i], lex->src); + } +} + +void lexer_lex(Lexer* lex, const char* filename, const char* contents) +{ + lex->line = 1; + lex->col = 1; + lex->pos = 0; + lex->token_cap = 128; + lex->token_count = 0; + lex->tokens = (Token*)calloc(lex->token_cap, sizeof(Token)); + if(lex->tokens == NULL) panic("lexer_lex: could not alloc"); + lex->filename = filename; + lex->src = contents; + lex->src_len = strlen(contents); + for (;;) { + Token tok = next_token(lex); + add_token(lex, tok); + if (tok.type == TOKEN_EOF) + break; + } +} + +const char* token_type_str(TokenType t) +{ + static const char* type_strings[] = { + [TOKEN_IDENT] = "TOKEN_IDENT", + [TOKEN_LPAREN] = "TOKEN_LPAREN", + [TOKEN_RPAREN] = "TOKEN_RPAREN", + [TOKEN_LBRACE] = "TOKEN_LBRACE", + [TOKEN_RBRACE] = "TOKEN_RBRACE", + [TOKEN_LBRACKET] = "TOKEN_LBRACKET", + [TOKEN_RBRACKET] = "TOKEN_RBRACKET", + [TOKEN_EQUAL] = "TOKEN_EQUAL", + [TOKEN_SEMICOLON] = "TOKEN_SEMICOLON", + [TOKEN_COMMA] = "TOKEN_COMMA", + [TOKEN_NUMBER_LITERAL] = "TOKEN_NUMBER_LITERAL", + [TOKEN_STRING_LITERAL] = "TOKEN_STRING_LITERAL", + [TOKEN_SLASH] = "TOKEN_SLASH", + [TOKEN_STAR] = "TOKEN_STAR", + [TOKEN_PLUS] = "TOKEN_PLUS", + [TOKEN_PLUSPLUS] = "TOKEN_PLUSPLUS", + [TOKEN_MINUS] = "TOKEN_MINUS", + [TOKEN_MINUSMINUS] = "TOKEN_MINUSMINUS", + [TOKEN_EQUALITY] = "TOKEN_EQUALITY", + [TOKEN_INEQUALITY] = "TOKEN_INEQUALITY", + [TOKEN_BANG] = "TOKEN_BANG", + [TOKEN_LT] = "TOKEN_LT", + [TOKEN_GT] = "TOKEN_GT", + [TOKEN_LT_EQ] = "TOKEN_LT_EQ", + [TOKEN_GT_EQ] = "TOKEN_GT_EQ", + [TOKEN_IF] = "TOKEN_IF", + [TOKEN_ELSE] = "TOKEN_ELSE", + [TOKEN_WHILE] = "TOKEN_WHILE", + [TOKEN_FOR] = "TOKEN_FOR", + [TOKEN_RETURN] = "TOKEN_RETURN", + [TOKEN_CONTINUE] = "TOKEN_CONTINUE", + [TOKEN_BREAK] = "TOKEN_BREAK", + [TOKEN_PERCENT] = "TOKEN_PERCENT", + [TOKEN_UNKNOWN] = "TOKEN_UNKNOWN", + [TOKEN_EOF] = "TOKEN_EOF" + }; + if (t >= TOKEN_IDENT && t <= TOKEN_EOF) { + return type_strings[t]; + } else { + return "UNKNOWN_TOKEN_TYPE"; + } +} diff --git a/lexer.h b/lexer.h @@ -0,0 +1,66 @@ +#pragma once + +#include <stdlib.h> + +typedef enum { + TOKEN_IDENT = 1006, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_EQUAL, + TOKEN_SEMICOLON, + TOKEN_PERCENT, + TOKEN_COMMA, + TOKEN_NUMBER_LITERAL, + TOKEN_STRING_LITERAL, + TOKEN_SLASH, + TOKEN_STAR, + TOKEN_PLUS, + TOKEN_PLUSPLUS, + TOKEN_MINUS, + TOKEN_MINUSMINUS, + TOKEN_EQUALITY, + TOKEN_INEQUALITY, + TOKEN_BANG, + TOKEN_LT, + TOKEN_GT, + TOKEN_LT_EQ, + TOKEN_GT_EQ, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_WHILE, + TOKEN_FOR, + TOKEN_BREAK, + TOKEN_CONTINUE, + TOKEN_RETURN, + TOKEN_UNKNOWN, // NOTE: also update print_token + TOKEN_EOF +} TokenType; // NOTE also update token_type_str! + +const char* token_type_str(TokenType t); + +typedef struct { + size_t start; + size_t end; + size_t line; + size_t col; + TokenType type; +} Token; + +typedef struct { + Token* tokens; + size_t token_count; + size_t token_cap; + size_t pos; + size_t line; + size_t col; + const char* src; + size_t src_len; + const char* filename; +} Lexer; + +void lexer_lex(Lexer*, const char* filename, const char* contents); +void lexer_print(Lexer*); diff --git a/main.c b/main.c @@ -0,0 +1,79 @@ +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "file.h" +#include "utils.h" +#include "lexer.h" +#include "parser.h" +#include "sem.h" +#include "gen.h" + +int +main(int argc, char* argv[]) +{ + if (argc < 2) { + printf("Usage: %s <file>\n", argv[0]); + return 1; + } + + const char* filename = argv[1]; + const char* contents = readfile(filename); + + if (contents == NULL) { panic("error reading file: %s", filename); } + + printf("--- lex --- \n"); + + Lexer lex; + lexer_lex(&lex, filename, contents); + // lexer_print(&lex); + + Parser par = parser_init(&lex); + + printf("--- par --- \n"); + + Ast ast; + parser_parse(&ast, &par); + // ast_print(&ast); + + printf("--- sem --- \n"); + + Scope program_scope = scope_init(ast.node); + scope_build(&program_scope, &ast); + scope_print(&program_scope, &ast); + + printf("--- gen --- \n"); + + Gen gen = gen_init(&program_scope, contents); + gen_next(&gen, ast.node); + + gcc_jit_result* result; + + /* Compile the code. */ + result = gcc_jit_context_compile(gen.ctx); + if (!result) { panic("compilation failed"); } + + /* Extract the generated code from "result". */ + if (argv[2] != NULL && strcmp(argv[2], "--exec") == 0) { + printf("--- exec start --- \n"); + + typedef int (*fn_type)(void); + fn_type starting_func = (fn_type)gcc_jit_result_get_code(result, "main"); + + if (!starting_func) { + fprintf(stderr, "NULL gcc_jit_result_get_code\n"); + exit(1); + } + int main_ret = starting_func(); + + printf("--- exec end: %d --- \n", main_ret); + } + + fflush(stdout); + + gcc_jit_context_compile_to_file(gen.ctx, GCC_JIT_OUTPUT_KIND_EXECUTABLE, "out"); + + gcc_jit_context_release(gen.ctx); + gcc_jit_result_release(result); +} diff --git a/makefile b/makefile @@ -0,0 +1,46 @@ +# note: needed both gcc and libgccjit +# macOS: brew install gcc libgccjit +# linux: sudo apt-get install build-essential gcc g++ libgccjit-10-dev +# update the prefix if needed + +# Try Homebrew first (macOS) +BREW := $(shell command -v brew 2>/dev/null) +ifneq ($(BREW),) + LIBGCCJIT_PREFIX := $(shell brew --prefix libgccjit 2>/dev/null) +endif + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Darwin) + LIB := -I${LIBGCCJIT_PREFIX}/include -L${LIBGCCJIT_PREFIX}/lib/gcc/current -lgccjit +else + LIBGCCJIT_PREFIX := $(shell dirname $(shell gcc -print-file-name=libgccjit.so)) + LIB := -I${LIBGCCJIT_PREFIX}/include -L${LIBGCCJIT_PREFIX} -lgccjit +endif + +SRC = *.c */*.c +BIN = oxc +STD = -std=c99 + +default: + cc ${STD} -g -Wall -Wextra -Wpedantic -Wshadow -Wconversion -Wno-unused-function -o ${BIN} ${SRC} ${LIB} + +clean: + rm -rf ${BIN} ${BIN}.* err.log + +release: clean + cc ${STD} -O02 -Wall -Wshadow -Wextra -Wpedantic -Werror -o ${BIN} ${SRC} ${LIB} + +check: clean + cc ${STD} -g -Wall -Wextra -fsanitize=address -fsanitize=undefined -o ${BIN} ${SRC} ${LIB} + +test: clean default + @for f in ex*.ox; do \ + ./${BIN} $$f > /dev/null 2>err.log || { echo "FAIL: $$f"; cat err.log; exit 1; }; \ + if [ -s err.log ]; then echo "FAIL: $$f"; cat err.log; exit 1; fi; \ + done; \ + rm -f err.log + +test-hmap: clean default + MallocNanoZone=0 ./oxc --test-hmap + +again: clean default diff --git a/ox-syntax/LICENSE b/ox-syntax/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Endice Software pty ltd + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/ox-syntax/language-configuration.json b/ox-syntax/language-configuration.json @@ -0,0 +1,23 @@ +{ + "comments": { + "lineComment": "//", + "blockComment": [ + "/*", + "*/" + ] + }, + "brackets": [ + [ + "{", + "}" + ], + [ + "[", + "]" + ], + [ + "(", + ")" + ] + ] +} diff --git a/ox-syntax/ox-syntax-0.0.3.vsix b/ox-syntax/ox-syntax-0.0.3.vsix Binary files differ. diff --git a/ox-syntax/package.json b/ox-syntax/package.json @@ -0,0 +1,26 @@ +{ + "name": "ox-syntax", + "displayName": "Ox Syntax", + "description": "Syntax highlighting for the Ox language", + "version": "0.0.3", + "engines": { + "vscode": "^1.50.0" + }, + "contributes": { + "languages": [ + { + "id": "ox", + "aliases": ["Ox", "ox"], + "extensions": [".ox"], + "configuration": "./language-configuration.json" + } + ], + "grammars": [ + { + "language": "ox", + "scopeName": "source.ox", + "path": "./syntaxes/ox.tmLanguage.json" + } + ] + } +} diff --git a/ox-syntax/rebuild-syntax-vscode.sh b/ox-syntax/rebuild-syntax-vscode.sh @@ -0,0 +1,3 @@ +vsce package --allow-missing-repository +code --uninstall-extension ox-syntax +code --install-extension ox-syntax-0.0.3.vsix diff --git a/ox-syntax/syntaxes/ox.sublime-syntax b/ox-syntax/syntaxes/ox.sublime-syntax @@ -0,0 +1,106 @@ +%YAML 1.2 +--- +name: Ox +file_extensions: + - ox +scope: source.ox + +# ------------------------------------------------------------------ +# Top‑level contexts – the “main” context is what Sublime loads first +# ------------------------------------------------------------------ +contexts: + main: + - include: comments # block / line / shebang + - include: keywords + - include: numbers + - include: strings + - include: basic-types + - include: functions + - include: typedefs + - include: builtins + + # ------------------------------------------------------------------ + # Comments + # ------------------------------------------------------------------ + comments: + - include: block-comment # /* … */ + - match: '//.*$' + scope: comment.line.double-slash + - match: '#!.*$' + scope: comment.line.double-slash + + # ------------------------------------------------------------------ + # Block comments – allows nesting + # ------------------------------------------------------------------ + block-comment: + - match: '/\*' + scope: punctuation.definition.comment + push: + - meta_scope: comment.block + # end of block comment – pop back to the parent context + - match: '\*/' + scope: punctuation.definition.comment + pop: true + # allow nested block comments + - include: block-comment + + # ------------------------------------------------------------------ + # Keywords, operators and constants + # ------------------------------------------------------------------ + keywords: + - match: '\b(const|else|for|if|pkg|record|return|typedef|match|case|while)\b' + scope: keyword.control + - match: '\b(and|by|in|not|or)\b' + scope: keyword.operator + - match: '\b(false|nil|true)\b' + scope: constant.language + + # ------------------------------------------------------------------ + # Built‑in functions + # ------------------------------------------------------------------ + builtins: + - match: '\b(abs|len|assert|print|exit|panic)\b' + scope: variable.function support.function.builtin + + # ------------------------------------------------------------------ + # Numbers + # ------------------------------------------------------------------ + numbers: + - match: '\b([+-])?[0-9]+(\.[0-9]*(e[0-9]+))?\b' + scope: constant.numeric + - match: '\b([+-])?0x[0-9A-Fa-f]+\b' + scope: constant.numeric + + # ------------------------------------------------------------------ + # Basic types + # ------------------------------------------------------------------ + basic-types: + - match: '\b(int|void|float|char|string|bool)\b' + scope: storage.type + + # ------------------------------------------------------------------ + # Function names look‑ahead for '(') + # ------------------------------------------------------------------ + functions: + - match: '\b([A-Za-z_][A-Za-z0-9_]*)\s*(?=$)' + scope: entity.name.function + + # ------------------------------------------------------------------ + # Type names (capitalised identifiers) + # ------------------------------------------------------------------ + typedefs: + - match: '\b[A-Z][A-Za-z0-9_]*\b' + scope: entity.name.type + + # ------------------------------------------------------------------ + # Strings (double‑quoted) + # ------------------------------------------------------------------ + strings: + - begin: '"' + end: '"' + scope: string.quoted.double + # enable escape handling inside the string + escaped: true + patterns: + - match: '\\.' + scope: constant.character.escape diff --git a/ox-syntax/syntaxes/ox.tmLanguage.json b/ox-syntax/syntaxes/ox.tmLanguage.json @@ -0,0 +1,167 @@ +{ + "name": "Ox", + "scopeName": "source.ox", + "fileTypes": [ + "ox" + ], + "patterns": [ + { + "include": "#comments" + }, + { + "include": "#keywords" + }, + { + "include": "#numbers" + }, + { + "include": "#strings" + }, + { + "include": "#basic-types" + }, + { + "include": "#functions" + }, + { + "include": "#typedefs" + }, + { + "include": "#builtins" + } + ], + "repository": { + "keywords": { + "patterns": [ + { + "match": "\\b(const|else|for|if|pkg|record|return|typedef|match|case|while)\\b", + "name": "keyword.control" + }, + { + "match": "\\b(and|by|in|not|or)\\b", + "name": "keyword.operator" + }, + { + "match": "\\b(false|nil|true)\\b", + "name": "constant.language" + } + ] + }, + "builtins": { + "patterns": [ + { + "match": "\\b(abs|len|assert|print|exit|panic)\\b", + "name": "variable.function support.function.builtin" + } + ] + }, + "numbers": { + "patterns": [ + { + "match": "\\b([+-])?[0-9]+(.[0-9]*(e[0-9]+))?\\b", + "name": "constant.numeric" + }, + { + "match": "\\b([+-])?0x[0-9A-Fa-f]+\\b", + "name": "constant.numeric" + } + ] + }, + "basic-types": { + "patterns": [ + { + "match": "\\b(int|void|float|char|string|bool)\\b", + "name": "storage.type" + } + ] + }, + "functions": { + "patterns": [ + { + "match": "\\b([A-Za-z_][A-Za-z0-9_]*)\\s*(?=\\()", + "name": "entity.name.function" + } + ] + }, + "typedefs": { + "patterns": [ + { + "match": "\\b[A-Z][A-Za-z0-9_]*\\b", + "name": "entity.name.type" + } + ] + }, + "strings": { + "patterns": [ + { + "begin": "\\\"", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin" + } + }, + "end": "\\\"", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end" + } + }, + "name": "string.quoted.double", + "patterns": [ + { + "match": "\\\\.", + "name": "constant.character.escape" + } + ] + } + ] + }, + "block-comment": { + "begin": "/\\*", + "beginCaptures": { + "0": { + "name": "punctuation.definition.comment" + } + }, + "end": "\\*/", + "endCaptures": { + "0": { + "name": "punctuation.definition.comment" + } + }, + "name": "comment.block", + "patterns": [ + { + "include": "#block-comment" + } + ] + }, + "comments": { + "patterns": [ + { + "include": "#block-comment" + }, + { + "begin": "//", + "beginCaptures": { + "0": { + "name": "punctuation.definition.comment" + } + }, + "end": "\\n", + "name": "comment.line.double-slash" + }, + { + "begin": "#!", + "beginCaptures": { + "0": { + "name": "punctuation.definition.comment" + } + }, + "end": "\\n", + "name": "comment.line.double-slash" + } + ] + } + } +} diff --git a/parser.h b/parser.h @@ -0,0 +1,195 @@ +#pragma once + +#include "lexer.h" + +#include <stdlib.h> +#include <stdbool.h> + +#define IDENTSZ 256 + +typedef enum { + NODE_PROGRAM = 11, + NODE_FUNCTION_DECL, + NODE_PARAM, + NODE_VAR_DECL, + NODE_VAR_ASSIGN, + NODE_BLOCK, + NODE_CALL_EXPR, + NODE_RETURN, + NODE_BREAK, + NODE_CONTINUE, + NODE_NUMBER_LITERAL, + NODE_STRING_LITERAL, + NODE_IDENT, + NODE_TYPE, + NODE_BINARY_EXPR, + NODE_UNARY_EXPR, + NODE_EXPR_STATEMENT, + NODE_SUBSCRIPT_EXPR, + NODE_IF, + NODE_WHILE, + NODE_FOR, + NODE_EMPTY_STATEMENT, + NODE_UNKNOWN, +} NodeType; // note: if changed, edit node_type_str! + +const char* node_type_str(NodeType); +void print_node_type_str(NodeType); + +/* +typedef enum { + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_POS, OP_NEG, OP_INC, OP_DEC, + OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT, + OP_SHL, OP_SHR, + OP_LOGAND, OP_LOGOR, OP_LOGNOT, + OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, + OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN, + OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN, + OP_SHL_ASSIGN, OP_SHR_ASSIGN, + OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN, + OP_CONDITIONAL, OP_COMMA, + OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER, + OP_SUBSCRIPT, OP_CALL, + OP_SIZEOF, OP_ALIGNOF +} OpType; +*/ + +typedef enum { + OP_PLUS = 23, + OP_MINUS, + OP_MUL, + OP_DIV, + OP_MOD, + OP_BIT_AND, // & ampersand + OP_BIT_OR, // | + OP_ASSIGN, + OP_EQUALITY, // == + OP_INEQUALITY, // != + OP_LT_EQ, + OP_GT_EQ, + OP_LT, + OP_GT, +} OpType; + +typedef enum { + OPER_MINUS = 0, + OPER_BANG, + OPER_PREINC, + OPER_PREDEC, + OPER_POSTINC, + OPER_POSTDEC, +} UnaryOp; + +typedef struct { + size_t start; + size_t end; +} Span; + +typedef struct Node { + NodeType type; + struct Node* next; + struct Scope* scope; + const char* filename; + size_t line, col; + + /* NOTE we will eventually add spans for condition info, etc. to print out in errors */ + + union { + /* clang-format off */ + struct { struct Node** decl; size_t len, cap; } program; + struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl; + struct { Span name; struct Node* type; } param; + struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement; + struct { struct Node* cond; struct Node* body; } while_statement; + struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement; + struct { struct Node** stmts; size_t cap, len; } block; + struct { Span name; struct Node* type; struct Node* init; } var_decl; + struct { struct Node* lhs; struct Node* rhs; } var_assign; + struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr; + struct { struct Node* expr; } ret; + struct { struct Node* expr; } cont; + struct { struct Node* expr; } expr_statement; + struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr; + struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr; + struct { struct Node* array; struct Node* index; } subscript_expr; + struct { double value; } number; + struct { Span value; } string; + struct { Span name; } ident; + /* clang-format on */ + } data; +} Node; + +typedef struct { + Token* tokens; + size_t token_count; + size_t pos; + const char* src; + size_t src_len; + const char* filename; +} Parser; + +typedef struct { + Node* node; + const char* src; +} Ast; + +typedef struct { + Node** items; + size_t len, cap; +} NodeVec; + +Parser parser_init(Lexer*); +void parser_parse(Ast*, Parser*); +void ast_print(Ast*); + +Token peek(Parser*); +Token peek2(Parser*); +Token consume(Parser*); +Token expect(Parser*, TokenType); +bool match(Parser*, TokenType); +bool check(Parser*, TokenType); + +Node* parse_declarations(Parser*); + +Node* parse_number(Parser*); +Node* parse_ident(Parser*); +Node* parse_primary(Parser*); +Node* parse_postfix(Parser*); +Node* parse_primary(Parser*); +Node* parse_unary(Parser*); +Node* parse_term(Parser*); +Node* parse_expression(Parser*); +Node* parse_expression_statement(Parser*); +Node* parse_statement(Parser*); +Node* parse_block(Parser*); +Node* parse_declaration_statement(Parser*); +Node* parse_decl_or_func_decl(Parser*); +NodeVec parse_param_list(Parser*); +Node* parse_type(Parser*); +Node* parse_func_call(Parser*); +NodeVec parse_func_arguments(Parser*); +Node* parse_if(Parser*); +Node* parse_while(Parser*); +Node* parse_for(Parser*); +Node* parse_assignment(Parser*); +Node* parse_break(Parser*); +Node* parse_continue_statement(Parser*); +Node* parse_expression(Parser*); +Node* make_program_node(void); +Node* make_ident_node(Span name); +Node* make_param_decl(Parser*); +Node* make_postfix_node(UnaryOp, Node*); +Node* make_subscript_node(Node*, Node*); +Node* make_ident_node(Span); +Node* make_postfix_node(UnaryOp, Node*); +Node* make_number_node(Parser*); +Node* make_unary_node(UnaryOp, Node*); +Node* make_string_node(Parser*); +Node* make_binary_node(OpType, Node*, Node*); +Node* parse_return_statement(Parser*); +Node* make_empty_statement(void); +Node* make_call_node(Node*, NodeVec); + +const char* span_str(const char* src, Span s, char* stack_alloc_chptr); +const char* range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr); diff --git a/parser/ast.c b/parser/ast.c @@ -0,0 +1,120 @@ +#include "../parser.h" +#include "../utils.h" + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +Node* +make_postfix_node(UnaryOp op, Node* operand) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_postfix_node: could not alloc"); + node->type = NODE_UNARY_EXPR; + node->scope = NULL; + node->next = NULL; + node->data.unary_expr.op = op; + node->data.unary_expr.operand = operand; + node->data.unary_expr.is_postfix = true; + return node; +} + +Node* +make_subscript_node(Node* array, Node* index) +{ + Node* node = calloc(1, sizeof(Node)); + if (node == NULL) panic("make_subscript_node: could not alloc"); + node->type = NODE_SUBSCRIPT_EXPR; + node->scope = NULL; + node->next = NULL; + node->data.subscript_expr.array = array; + node->data.subscript_expr.index = index; + return node; +} + +Node* +make_number_node(Parser* par) +{ + Token tok = consume(par); + assert(tok.type == TOKEN_NUMBER_LITERAL); + size_t len = tok.end - tok.start; + char buf[len + 1]; // strtod needs a \0 terminated string + for (size_t i = 0; i < len; i++) + buf[i] = par->src[tok.start + i]; + buf[len] = '\0'; + double value = strtod(buf, NULL); + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_number_node: could not alloc"); + node->type = NODE_NUMBER_LITERAL; + node->scope = NULL; + node->data.number.value = value; + return node; +} + +Node* +make_unary_node(UnaryOp op, Node* operand) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_unary_node: could not alloc"); + node->type = NODE_UNARY_EXPR; + node->scope = NULL; + node->next = NULL; + node->data.unary_expr.op = op; + node->data.unary_expr.operand = operand; + node->data.unary_expr.is_postfix = false; + return node; +} + +Node* +make_string_node(Parser* par) +{ + Token tok = consume(par); + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_string_node: could not alloc"); + node->type = NODE_STRING_LITERAL; + node->scope = NULL; + node->data.string.value.start = (tok.start + 1); // "... + node->data.string.value.end = (tok.end - 1); // ..." + return node; +} + +Node* +make_binary_node(OpType op, Node* lhs, Node* rhs) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_binary_node: could not alloc"); + node->type = NODE_BINARY_EXPR; + node->scope = NULL; + node->next = NULL; + node->data.binary_expr.op = op; + node->data.binary_expr.lhs = lhs; + node->data.binary_expr.rhs = rhs; + return node; +} + +Node* +make_empty_statement(void) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_empty_statement: could not alloc"); + assert(node != NULL); + node->type = NODE_EMPTY_STATEMENT; + node->scope = NULL; + return node; +} + +Node* +make_call_node(Node* callee, NodeVec args) +{ + Node* call = (Node*)calloc(1, sizeof(Node)); + if (call == NULL) panic("make_call_node: could not alloc"); + assert(call != NULL); + call->type = NODE_CALL_EXPR; + call->scope = NULL; + call->data.call_expr.callee = callee; + call->data.call_expr.args = args.items; + call->data.call_expr.cap = args.cap; + call->data.call_expr.len = args.len; + return call; +} diff --git a/parser/decl.c b/parser/decl.c @@ -0,0 +1,69 @@ +#include "../parser.h" +#include "../utils.h" + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +Node* +parse_type(Parser* par) +{ + Token tok = expect(par, TOKEN_IDENT); + + // @later we will resolve types later, incl. custom vs. system, allow for now + // + // const char firstChar = par->src[t.start]; + // if (firstChar >= 'A' && firstChar <= 'Z') { + // // assume this is a user type + // } + + Node* node = calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_type: alloc failed"); + node->type = NODE_TYPE; + node->scope = NULL; + node->data.ident.name = (Span) { .start = tok.start, .end = tok.end }; + return node; +} + +// <TYPE> name:<IDENT> +Node* +make_param_decl(Parser* par) +{ + Node* type = parse_type(par); + Token param_name = expect(par, TOKEN_IDENT); + Span ident_name = { .start = param_name.start, .end = param_name.end }; + Node* param = (Node*)calloc(1, sizeof(Node)); + if (param == NULL) panic("make_param_decl alloc failed"); + param->type = NODE_PARAM; + param->scope = NULL; + param->data.param.name = ident_name; + param->data.param.type = type; + return param; +} + +NodeVec +parse_param_list(Parser* par) +{ + NodeVec v = { 0 }; + if (peek(par).type == TOKEN_RPAREN) return v; // found `)` no parameters + + v.cap = 4; + v.items = (Node**)calloc(v.cap, sizeof(Node*)); + + if (v.items == NULL) panic("parse_param_list: could not alloc"); + + for (;;) { + Node* param = make_param_decl(par); + + if (v.len == v.cap) { + v.cap *= 2; + v.items = (Node**)realloc(v.items, v.cap * sizeof(Node*)); + } + + v.items[v.len++] = param; + + if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,` + } + return v; +} diff --git a/parser/expr.c b/parser/expr.c @@ -0,0 +1,241 @@ +#include "../parser.h" +#include "../utils.h" + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +Node* +parse_func_call(Parser* par) +{ + Token tok = expect(par, TOKEN_IDENT); + Span callee = { .start = tok.start, .end = tok.end }; + Node* ident = make_ident_node(callee); + + const char* name = span_str(par->src, ident->data.ident.name, (char[IDENTSZ]) { 0 }); + printf("parse_func_call: %s\n", name); + + expect(par, TOKEN_LPAREN); + + Node* call = (Node*)calloc(1, sizeof(Node)); + if (call == NULL) panic("parse_func_call: alloc failed"); + + // start parse arguments + if (peek(par).type != TOKEN_RPAREN) { + call->data.call_expr.args = (Node**)calloc(4, sizeof(Node*)); + if (call->data.call_expr.args == NULL) + panic("parse_func_call: args: could not alloc"); + call->data.call_expr.cap = 4; + call->data.call_expr.len = 0; + + for (;;) { + Node* arg = parse_expression(par); + + if (call->data.call_expr.len == call->data.call_expr.cap) { + call->data.call_expr.cap *= 2; + call->data.call_expr.args + = (Node**)realloc(call->data.call_expr.args, + call->data.call_expr.cap * sizeof(Node*)); + } + call->data.call_expr.args[call->data.call_expr.len++] = arg; + + if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,` + } + } + // ends parse arguments + + expect(par, TOKEN_RPAREN); + + call->type = NODE_CALL_EXPR; + call->scope = NULL; + call->data.call_expr.callee = ident; + return call; +} + +Node* +parse_number(Parser* par) +{ + Token tok = consume(par); + assert(tok.type == TOKEN_NUMBER_LITERAL); + + size_t len = tok.end - tok.start; + char buf[len + 1]; // strtod needs a \0 terminated string + for (size_t i = 0; i < len; i++) + buf[i] = par->src[tok.start + i]; + buf[len] = '\0'; + double value = strtod(buf, NULL); + + Node* num_node = (Node*)calloc(1, sizeof(Node)); + if (num_node == NULL) panic("parse_number: alloc failed"); + num_node->type = NODE_NUMBER_LITERAL; + num_node->scope = NULL; + num_node->data.number.value = value; + return num_node; +} + +Node* +parse_ident(Parser* par) +{ + Token tok = consume(par); + assert(tok.type == TOKEN_IDENT); + Node* ident_node = (Node*)calloc(1, sizeof(Node)); + if (ident_node == NULL) panic("parse_ident: alloc failed"); + ident_node->type = NODE_IDENT; + ident_node->scope = NULL; + ident_node->data.ident.name = (Span) { .start = tok.start, .end = tok.end }; + + // const char* name = span_str( + // par->src, (Span) { .start = tok.start, .end = tok.end }, (char[IDENTSZ]) { 0 }); + // printf("parse_ident: %s\n", name); + + return ident_node; +} + +NodeVec +parse_func_arguments(Parser* par) +{ + NodeVec v = { 0 }; + if (peek(par).type == TOKEN_RPAREN) return v; // found `)` no arguments + + v.cap = 4; + v.items = (Node**)calloc(v.cap, sizeof(Node*)); + if (v.items == NULL) panic("parse_func_arguments: could not alloc"); + + for (;;) { + Node* arg = parse_expression(par); + + if (v.len == v.cap) { + v.cap *= 2; + v.items = (Node**)realloc(v.items, v.cap * sizeof(Node*)); + } + + v.items[v.len++] = arg; + + if (!match(par, TOKEN_COMMA)) break; // found `)` instead of `,` + } + return v; +} + +Node* +parse_postfix(Parser* par) +{ + Node* node = parse_primary(par); + + for (;;) { + if (match(par, TOKEN_PLUSPLUS)) { + node = make_postfix_node(OPER_POSTINC, node); + } else if (match(par, TOKEN_MINUSMINUS)) { + node = make_postfix_node(OPER_POSTDEC, node); + } else if (match(par, TOKEN_LBRACKET)) { + Node* index = parse_expression(par); // parse inside brackets + expect(par, TOKEN_RBRACKET); + node = make_subscript_node(node, index); + // } else if (match(par, TOKEN_DOT)) { // TODO dot members and arrows + // Token id = expect(par, TOKEN_IDENTIFIER); + // expr = make_member_node(expr, id); + // } else if (match(par, TOKEN_ARROW)) { + // Token id = expect(par, TOKEN_IDENTIFIER); + // expr = make_ptrmember_node(expr, id); + } else if (match(par, TOKEN_LPAREN)) { + NodeVec args = parse_func_arguments(par); // parse func call args + expect(par, TOKEN_RPAREN); + node = make_call_node(node, args); + } else { + break; + } + } + return node; +} + +#define STARTING_ROOT_NODES 32 + +Node* +make_program_node(void) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_program_node: alloc failed"); + node->type = NODE_PROGRAM; + node->scope = NULL; + node->next = NULL; + node->data.program.cap = STARTING_ROOT_NODES; + node->data.program.len = 0; + node->data.program.decl = (Node**)calloc(STARTING_ROOT_NODES, sizeof(Node)); + if (node->data.program.decl == NULL) panic("make_program_node: decls: alloc failed"); + return node; +} + +Node* +make_ident_node(Span name) +{ + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("make_ident_node: alloc failed"); + node->type = NODE_IDENT; + node->scope = NULL; + node->next = NULL; + node->data.ident.name = name; + return node; +} + +Node* +parse_primary(Parser* par) +{ + Token tok = peek(par); + if (tok.type == TOKEN_STRING_LITERAL) { return make_string_node(par); } + if (tok.type == TOKEN_NUMBER_LITERAL) { return make_number_node(par); } + if (tok.type == TOKEN_IDENT) { return parse_ident(par); } + if (tok.type == TOKEN_LPAREN) { + consume(par); // consume '(' + Node* node = parse_expression(par); + expect(par, TOKEN_RPAREN); + return node; + } + + const char* name = span_str( + par->src, (Span) { .start = tok.start, .end = tok.end }, (char[IDENTSZ]) { 0 }); + panic("Expected Primary Expr, but found '%s' (%s at %s:%zu:%zu", + name, + token_type_str(tok.type), + par->filename, + tok.line, + tok.col); + + return NULL; +} + +Node* +parse_unary(Parser* par) +{ + Node* inner = NULL; + switch (peek(par).type) { + case TOKEN_MINUS: + consume(par); + inner = parse_unary(par); + return make_unary_node(OPER_MINUS, inner); + case TOKEN_MINUSMINUS: + consume(par); + inner = parse_unary(par); + return make_unary_node(OPER_PREDEC, inner); + case TOKEN_BANG: + consume(par); + inner = parse_unary(par); + return make_unary_node(OPER_BANG, inner); + // TODO add others '~a' '$a' '*a' '^a' '@a' '&a' + case TOKEN_PLUSPLUS: + consume(par); + inner = parse_unary(par); + return make_unary_node(OPER_PREINC, inner); + case TOKEN_PLUS: + consume(par); + return parse_unary(par); + default: + return parse_postfix(par); + } +} + +// called by parse_multiplicative +Node* +parse_term(Parser* par) +{ + return parse_unary(par); +} diff --git a/parser/parser.c b/parser/parser.c @@ -0,0 +1,369 @@ +#include "../parser.h" +#include "../utils.h" + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +// TODO make sure ALL callocs have been successful + +Parser +parser_init(Lexer* lex) +{ + return (Parser) { .pos = 0, + .tokens = lex->tokens, + .token_count = lex->token_count, + .src = lex->src, + .src_len = lex->src_len, + .filename = lex->filename }; +} + +Token +peek(Parser* par) +{ + Token t = par->tokens[par->pos]; + return t.type ? t : (Token) { .type = TOKEN_EOF }; +} + +Token +peek2(Parser* par) +{ + if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF }; + Token t = par->tokens[par->pos + 1]; + return t.type ? t : (Token) { .type = TOKEN_EOF }; +} + +Token +consume(Parser* par) +{ + Token t = par->tokens[par->pos]; + if (!t.type) return (Token) { .type = TOKEN_EOF }; + par->pos++; + return t; +} + +bool +check(Parser* p, TokenType type) +{ + return (peek(p).type == type); +} + +Token +expect(Parser* par, TokenType type) +{ + Token tok = peek(par); + if (tok.type != type) { + const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 }); + panic("Expected %d got '%s' (%d) at %s:%zu:%zu", + token_type_str(type), + name, + tok.type, + par->filename, + tok.line, + tok.col); + assert(tok.type == type); + } + return consume(par); +} + +bool +match(Parser* p, TokenType type) +{ + // printf("matching type %d\n", type); + if (peek(p).type == type) { + consume(p); + return true; + } + return false; +} + +static Node* +parse_multiplicative(Parser* par) +{ + Node* node = parse_term(par); + + for (;;) { + if (match(par, TOKEN_STAR)) { + Node* rhs = parse_unary(par); + node = make_binary_node(OP_MUL, node, rhs); + } else if (match(par, TOKEN_SLASH)) { + Node* rhs = parse_unary(par); + node = make_binary_node(OP_DIV, node, rhs); + } else if (match(par, TOKEN_PERCENT)) { + Node* rhs = parse_unary(par); + node = make_binary_node(OP_MOD, node, rhs); + } else + break; + } + + return node; +} +// additive: +, - +static Node* +parse_additive(Parser* par) +{ + Node* node = parse_multiplicative(par); + for (;;) { + if (match(par, TOKEN_PLUS)) { + Node* rhs = parse_multiplicative(par); + node = make_binary_node(OP_PLUS, node, rhs); + } else if (match(par, TOKEN_MINUS)) { + Node* rhs = parse_multiplicative(par); + node = make_binary_node(OP_MINUS, node, rhs); + } else + break; + } + return node; +} + +static Node* +parse_relational(Parser* par) +{ + Node* node = parse_additive(par); + for (;;) { + if (match(par, TOKEN_LT)) { + Node* rhs = parse_additive(par); + node = make_binary_node('<', node, rhs); + } else if (match(par, TOKEN_LT_EQ)) { + Node* rhs = parse_additive(par); + node = make_binary_node(OP_LT_EQ, node, rhs); + } else if (match(par, TOKEN_GT)) { + Node* rhs = parse_additive(par); + node = make_binary_node('>', node, rhs); + } else if (match(par, TOKEN_GT_EQ)) { + Node* rhs = parse_additive(par); + node = make_binary_node(OP_GT_EQ, node, rhs); + } else + break; + } + return node; +} + +static Node* +parse_equality(Parser* par) +{ + Node* node = parse_relational(par); + for (;;) { + if (match(par, TOKEN_EQUALITY)) { // "==" + Node* rhs = parse_relational(par); + node = make_binary_node(OP_EQUALITY, node, rhs); + } else if (match(par, TOKEN_INEQUALITY)) { // "!=" + Node* rhs = parse_relational(par); + node = make_binary_node(OP_INEQUALITY, node, rhs); + } else + break; + } + return node; +} + +Node* +parse_expression(Parser* par) +{ + return parse_equality(par); +} + +Node* +parse_expression_statement(Parser* par) +{ + Node* expr = parse_expression(par); + expect(par, TOKEN_SEMICOLON); + + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_expression_statement: could not alloc"); + node->type = NODE_EXPR_STATEMENT; + node->scope = NULL; + node->next = NULL; + node->data.expr_statement.expr = expr; + return node; +} + +// +// parse_statement +// +Node* +parse_statement(Parser* par) +{ + Token tok = peek(par), tok2 = peek2(par); + + if (tok.type == TOKEN_LBRACE) { + consume(par); + return parse_block(par); + } + + if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT) + return parse_decl_or_func_decl(par); + + switch (tok.type) { + case TOKEN_RETURN: + return parse_return_statement(par); + case TOKEN_IF: + return parse_if(par); + case TOKEN_WHILE: + return parse_while(par); + case TOKEN_FOR: + return parse_for(par); + case TOKEN_BREAK: + return parse_break(par); + case TOKEN_CONTINUE: + return parse_continue_statement(par); + case TOKEN_SEMICOLON: + expect(par, TOKEN_SEMICOLON); + return make_empty_statement(); + // case TOKEN_IDENT: // TODO? + // if (tok2.type == TOKEN_EQUAL) + // return parse_assignment(par); + // else + // return parse_expression_statement(par); + default: + return parse_expression_statement(par); + } +} + +Node* +parse_block(Parser* par) +{ + Node* stmt; + Node* block = (Node*)calloc(1, sizeof(Node)); + if (block == NULL) panic("parse_block: could not alloc"); + block->type = NODE_BLOCK; + block->scope = NULL; + while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) { + stmt = parse_statement(par); + + if (block->data.block.cap == block->data.block.len) { + block->data.block.cap + = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2; + block->data.block.stmts = realloc( + block->data.block.stmts, block->data.block.cap * sizeof(Node*)); + if (block->data.block.stmts == NULL) { + panic("realloc failed in parse_block"); + } + } + + block->data.block.stmts[block->data.block.len++] = stmt; + } + expect(par, TOKEN_RBRACE); + // TODO next the parsing of this was relying on next and cannot + // anymmore, e.g. print + return block; +} + +Node* +parse_declaration_statement(Parser* par) +{ + Node* type = parse_type(par); // consumes the type (e.g., "float") + Token ident = expect(par, TOKEN_IDENT); // variable or function name + if (match(par, TOKEN_LPAREN)) { + perror("called a var decl but this looks to be a func decl"); + } + + Node* var = calloc(1, sizeof(Node)); + if (var == NULL) panic("parse_declaration_statement: could not alloc"); + var->type = NODE_VAR_DECL; + var->scope = NULL; + var->data.var_decl.name = (Span) { ident.start, ident.end }; + var->data.var_decl.type = type; + Token next_tok = peek(par); + if (next_tok.type == TOKEN_EQUAL) { + consume(par); + var->data.var_decl.init = parse_expression(par); + } else { + consume(par); + var->data.var_decl.init = NULL; + } + expect(par, TOKEN_SEMICOLON); + return var; +} + +Node* +parse_decl_or_func_decl(Parser* par) +{ + Node* type = parse_type(par); // consumes the type (e.g., "float") + Token ident = expect(par, TOKEN_IDENT); // variable or function name + + if (match(par, TOKEN_LPAREN)) { // function + Node* fn = calloc(1, sizeof(Node)); + if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc"); + + fn->type = NODE_FUNCTION_DECL; + fn->scope = NULL; + + NodeVec v = parse_param_list(par); + fn->data.function_decl.params = v.items; + fn->data.function_decl.p_cap = v.cap; + fn->data.function_decl.p_len = v.len; + + expect(par, TOKEN_RPAREN); + expect(par, TOKEN_LBRACE); + + Node* body = parse_block(par); + fn->data.function_decl.body = body; + + fn->data.function_decl.name = (Span) { ident.start, ident.end }; + fn->data.function_decl.return_type = type; + fn->filename = par->filename; + fn->line = ident.line; + fn->col = ident.col; + return fn; + + } else { // variable + Node* var = calloc(1, sizeof(Node)); + if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc"); + var->type = NODE_VAR_DECL; + var->scope = NULL; + var->data.var_decl.name = (Span) { ident.start, ident.end }; + var->data.var_decl.type = type; + var->filename = par->filename; + var->line = ident.line; + var->col = ident.col; + Token next_tok = peek(par); + if (next_tok.type == TOKEN_EQUAL) { + consume(par); // consume '=' + var->data.var_decl.init = parse_expression(par); + } else { + var->data.var_decl.init = NULL; + } + expect(par, TOKEN_SEMICOLON); + return var; + } +} + +Node* +parse_declarations(Parser* par) +{ + Token tok = peek(par); + if (tok.type == TOKEN_EOF) return NULL; + + switch (tok.type) { + case TOKEN_IDENT: + return parse_decl_or_func_decl(par); + break; + default: + printf("unknown token to parse!: %s\n", token_type_str(tok.type)); + return NULL; + } + return NULL; +} + +void +parser_parse(Ast* ast, Parser* par) +{ + assert(par->token_count > 0 && "no tokens to parse"); + Node* node; + Node* program = make_program_node(); + for (;;) { + node = parse_declarations(par); + if (node == NULL) break; + if (program->data.program.len == program->data.program.cap) { + program->data.program.cap *= 2; + program->data.program.decl = (Node**)realloc(program->data.program.decl, + program->data.program.cap * sizeof(Node*)); + assert(program->data.program.decl != NULL && "realloc failed"); + } + program->data.program.decl[program->data.program.len++] = node; + } + + ast->src = par->src; + ast->node = program; +} diff --git a/parser/parser_utils.c b/parser/parser_utils.c @@ -0,0 +1,339 @@ +#include "../parser.h" + +#include <assert.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> + +/* basic range to str */ +const char* +range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr) +{ + const size_t len = end - start; + if (!src || !stack_alloc_chptr) return NULL; + if (!strchr(src, '\0')) return NULL; // src has no '\0' + if (len <= 0) return NULL; + memcpy(stack_alloc_chptr, src + start, len); + stack_alloc_chptr[len] = '\0'; + return stack_alloc_chptr; +} + +const char* +span_str(const char* src, Span s, char* stack_alloc_chptr) +{ + return range_str(src, s.start, s.end, stack_alloc_chptr); +} + +// int span_to_str(const char* src, size_t start, size_t end, char* out_buf) { +// if (!src || !out_buf) return -1; /* Null pointer passed */ +// if (start >= end) return -2; /* Empty or inverted span */ +// const size_t len = end - start; +// if (len >= IDENTSZ) return -4; /* Identifier too long */ +// const char* src_end = strchr(src, '\0'); +// if (!src_end) return -5; /* src not NUL‑terminated */ +// const size_t src_len = (size_t)(src_end - src); + +// if (end > src_len) return -6; /* Span overruns source */ + +// if (memchr(src + start, '\0', len)) +// return -7; /* span crosses a NUL byte */ + +// memcpy(out_buf, src + start, len); +// out_buf[len] = '\0'; + +// return 0; +// } + +// char* span_to_str_alloc(const char* src, size_t start, size_t end) { +// if (!src || start >= end) return NULL; +// const char* src_end = strchr(src, '\0'); +// if (!src_end) return NULL; +// size_t src_len = (size_t)(src_end - src); +// if (end > src_len) return NULL; +// size_t n = end - start; +// if (memchr(src + start, '\0', n)) return NULL; +// char* s = calloc(1, n + 1); +// if (!s) return NULL; +// memcpy(s, src + start, n); +// s[n] = '\0'; +// return s; +// } + +static void +print_node(const char* source, Node* node, int level) +{ + assert(node != NULL); + assert(level < 192); + + const char* name; + switch (node->type) { + case NODE_FUNCTION_DECL: + name = range_str(source, + node->data.function_decl.name.start, + node->data.function_decl.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s FUNC DECL: name='%s'\n", level, "", name); + if (node->data.function_decl.return_type) { + printf("%*s ↳ return type:\n", level * 2, ""); + print_node(source, node->data.function_decl.return_type, level + 1); + } + if (node->data.function_decl.params) { + printf("%*s ↳ params:\n", level * 2, ""); + for (size_t i = 0; i < node->data.function_decl.p_len; i++) { + Node* param = node->data.function_decl.params[i]; + print_node(source, param, level + 1); + } + } else { + printf("%*s ↳ params: N/A\n", level * 2, ""); + } + if (node->data.function_decl.body) { + printf("%*s ↳ body:\n", level * 2, ""); + print_node(source, node->data.function_decl.body, level + 1); + } + break; + case NODE_PARAM: + name = range_str(source, + node->data.param.name.start, + node->data.param.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s ↳ param: name='%s'\n", level * 2, "", name); + if (node->data.param.type) { print_node(source, node->data.param.type, level + 1); } + break; + case NODE_VAR_DECL: + name = range_str(source, + node->data.var_decl.name.start, + node->data.var_decl.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s VAR DECL: name='%s'\n", level, "", name); + if (node->data.var_decl.type) { + printf("%*s ↳ type:\n", level * 2, ""); + print_node(source, node->data.var_decl.type, level + 1); + } + if (node->data.var_decl.init) { + printf("%*s ↳ init:\n", level * 2, ""); + print_node(source, node->data.var_decl.init, level + 1); + } + break; + case NODE_PROGRAM: + printf("%*s PROGRAM:\n", level, ""); + if (node->data.program.decl) { + for (size_t i = 0; i < node->data.program.len; i++) { + print_node(source, node->data.program.decl[i], level + 1); + } + } + break; + case NODE_BLOCK: + printf("%*s BLOCK:\n", level, ""); + if (node->data.block.stmts) { + for (size_t i = 0; i < node->data.block.len; i++) { + print_node(source, node->data.block.stmts[i], level + 1); + } + } + break; + case NODE_CALL_EXPR: + printf("%*s ↳ FUNC CALL:\n", level, ""); + if (node->data.call_expr.callee) { + printf("%*s ↳ callee:\n", level * 2, ""); + print_node(source, node->data.call_expr.callee, level + 1); + } + if (node->data.call_expr.args) { + printf("%*s ↳ args:\n", level * 2, ""); + for (size_t i = 0; i < node->data.call_expr.len; i++) { + Node* arg = node->data.call_expr.args[i]; + print_node(source, arg, level + 1); + } + } + break; + case NODE_RETURN: + printf("%*s RETURN statement:\n", level, ""); + if (node->data.ret.expr) { print_node(source, node->data.ret.expr, level + 1); } + break; + case NODE_CONTINUE: + printf("%*s CONTINUE statement\n", level, ""); + if (node->data.cont.expr) { print_node(source, node->data.cont.expr, level + 1); } + break; + case NODE_NUMBER_LITERAL: + printf("%*s ↳ LITERAL NUMBER value=%f\n", level * 2, "", node->data.number.value); + break; + case NODE_STRING_LITERAL: { + const char* lit = span_str(source, node->data.string.value, (char[IDENTSZ]) { 0 }); + printf("%*s ↳ LITERAL STRING value=\"%s\"\n", level * 2, "", lit); + break; + } + case NODE_TYPE: + name = range_str(source, + node->data.ident.name.start, + node->data.ident.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s ↳ TYPE name='%s'\n", level * 2, "", name); + break; + case NODE_IDENT: + name = range_str(source, + node->data.ident.name.start, + node->data.ident.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s ↳ IDENT name='%s'\n", level * 2, "", name); + break; + // case NODE_VOID: + // printf("%*s <VOID>\n", level * 2, ""); + // break; + // case NODE_FLOAT: + // printf("%*s <FLOAT>\n", level * 2, ""); + // break; + // case NODE_INT: + // printf("%*s <INT>\n", level * 2, ""); + // break; + // case NODE_STRING: + // printf("%*s <STRING>\n", level * 2, ""); + // break; + case NODE_UNKNOWN: + break; + case NODE_VAR_ASSIGN: + name = range_str(source, + node->data.var_assign.lhs->data.ident.name.start, + node->data.var_assign.lhs->data.ident.name.end, + (char[IDENTSZ]) { 0 }); + printf("%*s VAR ASSIGN: name='%s'\n", level, "", name); + break; + case NODE_BREAK: + printf("%*s BREAK statement\n", level, ""); + break; + case NODE_BINARY_EXPR: + printf("%*s BINARY EXPR op='%c'\n", level, "", node->data.binary_expr.op); + if (node->data.binary_expr.lhs) { + printf("%*s ↳ lhs:\n", level * 2, ""); + print_node(source, node->data.binary_expr.lhs, level + 1); + } + if (node->data.binary_expr.rhs) { + printf("%*s ↳ rhs:\n", level * 2, ""); + print_node(source, node->data.binary_expr.rhs, level + 1); + } + break; + case NODE_UNARY_EXPR: + printf("%*s UNARY EXPR: op='%d' is_postfix='%s'\n", + level, + "", + node->data.unary_expr.op, + node->data.unary_expr.is_postfix ? "true" : "false"); + if (node->data.unary_expr.operand) { + printf("%*s ↳ operand:\n", level * 2, ""); + print_node(source, node->data.unary_expr.operand, level + 1); + } + break; + case NODE_EXPR_STATEMENT: + printf("%*s EXPR STMT:\n", level, ""); + if (node->data.expr_statement.expr) { + print_node(source, node->data.expr_statement.expr, level + 1); + } + break; + case NODE_SUBSCRIPT_EXPR: + printf("%*s SUBSCRIPT expr:\n", level, ""); + if (node->data.subscript_expr.array) { + printf("%*s ↳ array:\n", level * 2, ""); + print_node(source, node->data.subscript_expr.array, level + 1); + } + if (node->data.subscript_expr.index) { + printf("%*s ↳ index:\n", level * 2, ""); + print_node(source, node->data.subscript_expr.index, level + 1); + } + break; + case NODE_IF: + printf("%*s IF Statement:\n", level, ""); + if (node->data.if_statement.cond) { + printf("%*s ↳ cond:\n", level * 2, ""); + print_node(source, node->data.if_statement.cond, level + 1); + } + if (node->data.if_statement.then_body) { + printf("%*s ↳ then body:\n", level * 2, ""); + print_node(source, node->data.if_statement.then_body, level + 1); + } + if (node->data.if_statement.else_body) { + printf("%*s ↳ else body:\n", level * 2, ""); + print_node(source, node->data.if_statement.else_body, level + 1); + } + break; + case NODE_WHILE: + printf("%*s WHILE Statement:\n", level, ""); + if (node->data.while_statement.cond) { + printf("%*s ↳ cond:\n", level * 2, ""); + print_node(source, node->data.while_statement.cond, level + 1); + } + if (node->data.while_statement.body) { + printf("%*s ↳ body:\n", level * 2, ""); + print_node(source, node->data.while_statement.body, level + 1); + } + break; + case NODE_FOR: + printf("%*s FOR Statement:\n", level, ""); + if (node->data.for_statement.init) { + printf("%*s ↳ init:\n", level * 2, ""); + print_node(source, node->data.for_statement.init, level + 1); + } + if (node->data.for_statement.cond) { + printf("%*s ↳ cond:\n", level * 2, ""); + print_node(source, node->data.for_statement.cond, level + 1); + } + if (node->data.for_statement.increment) { + printf("%*s ↳ increment:\n", level * 2, ""); + print_node(source, node->data.for_statement.increment, level + 1); + } + if (node->data.for_statement.body) { + printf("%*s ↳ body:\n", level * 2, ""); + print_node(source, node->data.for_statement.body, level + 1); + } + break; + case NODE_EMPTY_STATEMENT: + printf("%*s EMPTY Statement\n", level, ""); + break; + } + + while (node->next) { + print_node(source, node->next, level); + node = node->next; + } +} + +void +ast_print(Ast* ast) +{ + print_node(ast->src, ast->node, 0); +} + +void +print_node_type_str(NodeType t) +{ + printf("print_node_type_str: %s\n", node_type_str(t)); +} + +const char* +node_type_str(NodeType t) +{ + static const char* type_strings[] = { [NODE_PROGRAM] = "NODE_PROGRAM", + [NODE_FUNCTION_DECL] = "NODE_FUNCTION_DECL", + [NODE_PARAM] = "NODE_PARAM", + [NODE_VAR_DECL] = "NODE_VAR_DECL", + [NODE_BLOCK] = "NODE_BLOCK", + [NODE_CALL_EXPR] = "NODE_CALL_EXPR", + [NODE_RETURN] = "NODE_RETURN", + [NODE_CONTINUE] = "NODE_CONTINUE", + [NODE_NUMBER_LITERAL] = "NODE_NUMBER_LITERAL", + [NODE_STRING_LITERAL] = "NODE_STRING_LITERAL", + [NODE_TYPE] = "NODE_TYPE", + [NODE_IDENT] = "NODE_IDENT", + [NODE_UNKNOWN] = "NODE_UNKNOWN", + [NODE_VAR_ASSIGN] = "NODE_VAR_ASSIGN", + [NODE_BREAK] = "NODE_BREAK", + [NODE_BINARY_EXPR] = "NODE_BINARY_EXPR", + [NODE_UNARY_EXPR] = "NODE_UNARY_EXPR", + [NODE_EXPR_STATEMENT] = "NODE_EXPR_STATEMENT", + [NODE_SUBSCRIPT_EXPR] = "NODE_SUBSCRIPT_EXPR", + [NODE_IF] = "NODE_IF", + [NODE_WHILE] = "NODE_WHILE", + [NODE_FOR] = "NODE_FOR", + [NODE_EMPTY_STATEMENT] = "NODE_EMPTY_STATEMENT" }; + if (t >= NODE_PROGRAM && t <= NODE_EMPTY_STATEMENT) { + return type_strings[t]; + } else { + return "UNKNOWN_NODE_TYPE"; + } +} diff --git a/parser/stmt.c b/parser/stmt.c @@ -0,0 +1,180 @@ +#include "../parser.h" +#include "../utils.h" + +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <assert.h> + +Node* +parse_if(Parser* par) +{ + expect(par, TOKEN_IF); + expect(par, TOKEN_LPAREN); // @later remove necessity for parens + Node* cond = parse_expression(par); + expect(par, TOKEN_RPAREN); + + Node* then_body = parse_statement(par); + + Node* else_body = NULL; + if (match(par, TOKEN_ELSE)) else_body = parse_statement(par); + + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_if: could not alloc"); + node->type = NODE_IF; + node->scope = NULL; + node->data.if_statement.cond = cond; + node->data.if_statement.then_body = then_body; + node->data.if_statement.else_body = else_body; + return node; +} + +Node* +parse_while(Parser* par) +{ + expect(par, TOKEN_WHILE); + expect(par, TOKEN_LPAREN); + Node* cond = parse_expression(par); + expect(par, TOKEN_RPAREN); + + Node* body = parse_statement(par); + + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_while: could not alloc"); + node->type = NODE_WHILE; + node->scope = NULL; + node->data.while_statement.cond = cond; + node->data.while_statement.body = body; + return node; +} + +Node* +parse_for(Parser* par) +{ + expect(par, TOKEN_FOR); + expect(par, TOKEN_LPAREN); + + // init can be empty, a decl, or a expr statement + Node* init = NULL; // int i = 0 ... conditional expression stment + if (!check(par, TOKEN_SEMICOLON)) { + Token tok2 = peek2(par); + if (tok2.type == TOKEN_IDENT) { + init = parse_declaration_statement(par); + } else { + init = parse_expression_statement(par); + } + } else + expect(par, TOKEN_SEMICOLON); + + Node* cond = NULL; // i < len ... optional expression + if (!check(par, TOKEN_SEMICOLON)) cond = parse_expression(par); + expect(par, TOKEN_SEMICOLON); + + Node* inc = NULL; // i++ ... optional expression + if (!check(par, TOKEN_RPAREN)) { inc = parse_expression(par); } + expect(par, TOKEN_RPAREN); + + Node* body = parse_statement(par); + + Node* node = (Node*)calloc(1, sizeof(Node)); + node->type = NODE_FOR; + node->scope = NULL; + node->data.for_statement.init = init; + node->data.for_statement.cond = cond; + node->data.for_statement.increment = inc; + if (node == NULL) panic("parse_for: could not alloc"); + + node->data.for_statement.body = body; + return node; +} + +Node* +parse_assignment(Parser* par) +{ + Token ident = expect(par, TOKEN_IDENT); + Span name = { .start = ident.start, .end = ident.end }; + + expect(par, TOKEN_EQUAL); + Node* expr = parse_expression(par); + + Node* assign = (Node*)calloc(1, sizeof(Node)); + if (assign == NULL) panic("parse_assignment: could not alloc"); + assign->type = NODE_VAR_ASSIGN; + assign->scope = NULL; + assign->data.var_assign.lhs = (Node*)calloc(1, sizeof(Node)); + if (assign->data.var_assign.lhs == NULL) panic("parse_for: lhs: could not alloc"); + /* + identifier + x = 5; + + member field access + obj.field = 5; + obj->field = 5; + + array or pointer indexing + arr[0] = 5; + *(p + 1) = 5; + + dereference + *p = 5; + */ + assign->data.var_assign.lhs->type = NODE_IDENT; // TODO handle other cases + assign->data.var_assign.lhs->scope = NULL; + assign->data.var_assign.lhs->data.ident.name = name; + assign->data.var_assign.rhs = expr; + return assign; +} + +Node* +parse_break(Parser* par) +{ + expect(par, TOKEN_BREAK); + expect(par, TOKEN_SEMICOLON); + + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_break: could not alloc"); + node->type = NODE_BREAK; + node->scope = NULL; + return node; +} + +Node* +parse_continue_statement(Parser* par) +{ + expect(par, TOKEN_CONTINUE); // consume 'continue' + + Node* node = (Node*)calloc(1, sizeof(Node)); + if (node == NULL) panic("parse_continue_statemenet: could not alloc"); + node->type = NODE_CONTINUE; + node->scope = NULL; + + TokenType next_type = peek(par).type; + + if (next_type != TOKEN_SEMICOLON) + node->data.cont.expr = parse_expression(par); + else + node->data.cont.expr = NULL; + + expect(par, TOKEN_SEMICOLON); + return node; +} + +Node* +parse_return_statement(Parser* par) +{ + expect(par, TOKEN_RETURN); // consume 'return' + Node* ret = (Node*)calloc(1, sizeof(Node)); + if (ret == NULL) panic("parse_return_statemenet: could not alloc"); + ret->type = NODE_RETURN; + ret->scope = NULL; + + TokenType next_type = peek(par).type; + + if (next_type != TOKEN_SEMICOLON) + ret->data.ret.expr = parse_expression(par); + else + ret->data.ret.expr = NULL; + + expect(par, TOKEN_SEMICOLON); + return ret; +} diff --git a/sem.c b/sem.c @@ -0,0 +1,205 @@ +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "sem.h" +#include "parser.h" +#include "utils.h" + +#define CALLOC_SZ 16 +#define BASE_DEPTH 1 + +static int next_id = 100; + +Scope +scope_init(Node* node) +{ + Scope s = (Scope) { .parent = NULL, + .symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)), + .children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)), + .cap = CALLOC_SZ, + .len = 0, + .ch_cap = CALLOC_SZ, + .ch_len = 0, + .depth = BASE_DEPTH, + .owner = node, + .id = next_id++ }; + + if (s.symbols == NULL) panic("scope_init: could not alloc"); + if (s.children == NULL) panic("scope_init: could not alloc"); + return s; +} + +static Scope* +new_scope_from_scope(Scope* parent_scope, Node* node) +{ + // new scope + Scope* scope = (Scope*)calloc(1, sizeof(Scope)); + if (scope == NULL) panic("new_scope_from_scope: could not alloc"); + + scope->id = next_id++; + scope->owner = node; + node->scope = scope; + + // init symbols list + scope->symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)); + if (scope->symbols == NULL) panic("new_scope_from_scope: symbols: could not alloc"); + scope->cap = CALLOC_SZ; + scope->len = 0; + + scope->children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)); + if (scope->children == NULL) panic("new_scope_from_scope: children: could not alloc"); + scope->ch_cap = CALLOC_SZ; + scope->ch_len = 0; + + // init parent and depth + if (parent_scope != NULL) { + scope->parent = parent_scope; + scope->depth = parent_scope->depth + 1; + assert(parent_scope->children != NULL); + if (parent_scope->ch_len == parent_scope->ch_cap) { + parent_scope->ch_cap *= 2; + parent_scope->children = (Scope**)realloc( + parent_scope->children, parent_scope->ch_cap * sizeof(Scope*)); + assert(parent_scope->children != NULL && "realloc failed"); + } + parent_scope->children[parent_scope->ch_len++] = scope; + } else { + scope->parent = NULL; + scope->depth = BASE_DEPTH; + } + + return scope; +} + +static void +add_to_scope(Scope* scope, Symbol* sym) +{ + if (scope->len >= scope->cap) { + scope->cap *= 2; + scope->symbols = (Symbol**)realloc(scope->symbols, scope->cap * sizeof(Symbol*)); + } + scope->symbols[scope->len++] = sym; +} + +static void +scope_var(Scope* scope, Ast* ast, Node* node) +{ + const char* var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 }); + const char* type_name + = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 }); + + Symbol* sym = (Symbol*)calloc(1, sizeof(Symbol)); + if (sym == NULL) panic("scope_var: symbol: could not alloc"); + + TypeInfo* type = (TypeInfo*)calloc(1, sizeof(TypeInfo)); + if (type == NULL) panic("scope_var: type: could not alloc"); + + if (strcmp(type_name, "float") == 0) { + type->type = SYMTYPE_FLOAT; + } else if (strcmp(type_name, "int") == 0) { + type->type = SYMTYPE_INT; + } else if (strcmp(type_name, "string") == 0) { + type->type = SYMTYPE_STRING; + } else if (strcmp(type_name, "uint") == 0) { + type->type = SYMTYPE_UINT; + } else { + if (type_name[0] >= 'A' && type_name[0] <= 'Z') { + type->type = SYMTYPE_USER; + } else { + panic("sem: not yet defined type '%s' for variable '%s'", + type_name, + var_name); + } + } + + sym->name = node->data.var_decl.name; + sym->decl = node->data.var_decl.init; + sym->type = type; + + assert(var_name != NULL); + + add_to_scope(scope, sym); +} + +static void +scope_func(Scope* parent_scope, Ast* ast, Node* node) +{ + Scope* scope = new_scope_from_scope(parent_scope, node); + + for (size_t i = 0; i < node->data.block.len; i++) { + Node* stmt = node->data.block.stmts[i]; + switch (stmt->type) { + case NODE_VAR_DECL: { + scope_var(scope, ast, stmt); + break; + } + default: + continue; + } + } +} + +void +scope_build(Scope* scope, Ast* ast) +{ + for (size_t i = 0; i < ast->node->data.program.len; i++) { + Node* node = ast->node->data.program.decl[i]; + switch (node->type) { + case NODE_VAR_DECL: + scope_var(scope, ast, node); + break; + case NODE_FUNCTION_DECL: + scope_func(/*parent_scope*/ scope, ast, node->data.function_decl.body); + break; + default: + printf("unknown definition at TODO\n"); + } + } +} + +void +scope_print(Scope* scope, Ast* ast) +{ + if (scope == NULL || scope->symbols == NULL) return; + + for (size_t i = 0; i < scope->len; i++) { + Symbol* sym = scope->symbols[i]; + const char* name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 }); + int parent = -1; + if (scope->parent != NULL) parent = scope->parent->id; + bool has_owner_node = false; + if (scope->owner != NULL) has_owner_node = true; + printf("[depth %d] [id %d] Symbol name `%s` \t of type %s (parent %d, owner %s)\n", + scope->depth, + scope->id, + name, + type_kind_str(sym->type->type), + parent, + has_owner_node ? "yes" : "no"); + } + + if (scope->ch_len == 0) return; + + for (size_t j = 0; j < scope->ch_len; j++) { + Scope* child_scope = scope->children[j]; + scope_print(child_scope, ast); + } +} + +const char* +type_kind_str(SymbolType t) +{ + static const char* type_strings[] = { + [SYMTYPE_VOID] = "TYPE_VOID", + [SYMTYPE_INT] = "TYPE_INT", + [SYMTYPE_UINT] = "TYPE_UINT", + [SYMTYPE_FLOAT] = "TYPE_FLOAT", + [SYMTYPE_STRING] = "TYPE_STRING", + [SYMTYPE_USER] = "TYPE_USER", + [SYMTYPE_FUNC] = "TYPE_FUNC", + [SYMTYPE_TODO] = "TYPE_TODO", + }; + + return (t >= SYMTYPE_VOID && t <= SYMTYPE_TODO) ? type_strings[t] : "UNKNOWN_TYPE_KIND"; +} diff --git a/sem.h b/sem.h @@ -0,0 +1,111 @@ +#pragma once + +#include <stdlib.h> + +#include "parser.h" + +typedef enum { + SYMTYPE_VOID = 108, + SYMTYPE_INT, + SYMTYPE_UINT, + SYMTYPE_FLOAT, + SYMTYPE_STRING, + SYMTYPE_STRUCT, + SYMTYPE_USER, + SYMTYPE_ARRAY, + SYMTYPE_ENUM, + SYMTYPE_FUNC, + SYMTYPE_TODO, +} SymbolType; // note also update type_kind_str! + +const char* type_kind_str(SymbolType); + +typedef enum { + ENUM_VALUE_INT, + ENUM_VALUE_STRING, +} EnumValueKind; + +typedef struct StructField { + char* name; + struct Type* type; +} StructField; + +typedef struct EnumField { + char* name; + EnumValueKind kind; + union { // not used? + int int_value; + char* string_value; + } val; +} EnumField; + +typedef struct StructMethod { + char* name; + struct Type* return_type; + struct Type** param_types; + int params_count; + int params_cap; + // TODO add ptr to func decl of this struct method +} StructMethod; + +typedef struct Type { + SymbolType type; + + // union { + // struct StructType { + // const char* struct_name; + // int fields_count; + // int methods_count; + // StructField* fields; + // StructMethod* methods; + // } struct_t; + + // struct ArrayType { + // int array_size; // -1 or fixed + // struct Type* of_type; + // bool dynamic; + // } array_t; + + // struct EnumType { + // const char* enum_name; + // const int fields_count; + // EnumField* fields; + // EnumValueKind value_kind; + // } enum_t; + // }; +} TypeInfo; + +typedef struct Symbol { + Span name; + Node* decl; + TypeInfo* type; +} Symbol; + +typedef struct Scope { + struct Node* owner; + struct Scope* parent; + Symbol** symbols; + size_t len; + size_t cap; + struct Scope** children; + size_t ch_len; + size_t ch_cap; + int depth; + int id; +} Scope; + +// Symbol table functions +void symbol_add(const char* name, TypeInfo* type); +TypeInfo* symbol_get_type(const char* name); +Symbol* symbol_find(const char* name); + +// Scope management functions +Scope scope_init(Node*); +void scope_add_symbol(Scope* scope, const char* name, TypeInfo* type); +Symbol* scope_find_symbol(Scope* scope, const char* name); + +// Type checking functions +int types_equal(TypeInfo* a, TypeInfo* b); + +void scope_build(Scope*, Ast*); +void scope_print(Scope*, Ast*); diff --git a/stuff/ox.txt b/stuff/ox.txt @@ -0,0 +1,35 @@ +██████████████████████████████████████████████████████████████████ +██████████████████████████████████████████████████████████████████ +██████████████████████████████████████████████████████████████████ +████████████████ ███████████████████████████████ █████████████████ +██████████████ █████████████████████████████████ ███████████████ +████████████ ███████████████████████████████████ █████████████ +██████████ █ ███████████████████████████████████ ███████████ +████████ █ █████████████████████████████████████ █ █████████ +███████ ██ █████████████████████████████████████ █ ████████ +███████ ███ █████████████████████████████████████ █ ████████ +███████ ████ ███████████████████████████████ ███ ████████ +███████ ██████ ████ ████████ +████████ ███████ █████████████ ▓████ █████████ +██████████ ▓ ██████████████ ▓ ███████████ +████████████ █████████████ ██████████████ +███████ ███████████ ████████ +███████ ████████████ ████████ +█████████ █████████ ▓ ██████████ +█████████████ ██ ██████ ▓ ██████████████ +██████████████████ █████ ██████ ▓▓█ ███████████████████ +██████████████████ ███████ ██████ ███████████████████ +██████████████████ ██████ ██████ ███████████████████ +███████████████████ ████ ███████ ████████████████████ +██████████████████ ███ █████████ ███████████████████ +█████████████████ █ ███ ███████████████████ +█████████████████ ██████████████ ██████████████████ +████████████████ ██ ████ ██ █████████████████ +█████████████████ ██ ████ ██ ██████████████████ +██████████████████ ██████████ ▓███████████████████ +████████████████████ ▓█████████████████████ +██████████████████████ ██▓▓██ ▓███████████████████████ +████████████████████████▓ ▓█████████████████████████ +███████████████████████████ ▓████████████████████████████ +██████████████████████████████ ▓███████████████████████████████ +██████████████████████████████████████████████████████████████████ diff --git a/test.ox b/test.ox @@ -0,0 +1,97 @@ +ns main + +use io, std + +pub rec Vehicle { + uint8 age + uint16 cc + str model + str name + str owner_name + float value() = age * cc + bool sold = false + Vehicle parent +} + +inline pub int add (int a, b) => a + b +async int add (int a, b) => await sendoff(a, b) + +const int jack = 7 +const uint16 jill = 12 + + +Vehicle build_vehicle (Vehicle v) inline pub { + v = { age: 12, cc: 1200, model: "Roaster" } + ret v +} + +void print_vehicle(Vehicle v) { + print "Vehicle {{v.name}} is {{v.age}} yrs old with {{v.cc}} CC." +} + +void main () { + Vehicle mazda = new { age: 12, cc: 1200, model: "Miata" } + + print("mazda value is {{mazda.value}}") + + print_vehicle(mazda) + + print(add(5, 6) + + for Vehicle v in cars { + print_vehicle <- v + } + + cars -> each { v, i: print_vehicle v } + + // shorthand argument in closures + + []Vehicle old_cars = cars.where(v: v.age > 10) + + []Vehicle big_cars = cars -> where { .cc > 2000 } -> map { .model } + + []Vehicle new_cars = cars + -> where { Vehicle v => v.age < 5 } + -> sorted { Vehicle a, b => a.model > b.model } + -> map { Vehicle v => v.model } + -> each { str model => print(model) } + + []Vehicle new_cars = cars + -> where { .age < 5 } + -> sorted { (Vehicle a, b) => a.age > b.age } + -> map { .model -> to_lower } + -> each { print } + + http_server(8080, {sel: get_certif("certs/digitech.cert")}) + + http_server <- 8080, ssl: get_certif <- "certs/digitech.cert" + + HttpServer server = (HttpServer) + -> init + -> serve <- port: 8080, ssl: get_certif <- paths -> where { .link = "private" } + -> background + + HttpServer server = new {} + -> init() + -> serve(port: 8080, ssl: get_certif(paths.where { .link == "private" })) + -> background() + + pub rec Cert { ... } + pub rec HttpServer { + void init() { ... } + void serve(int port, Cert ssl) { ... } + void background() { ... } + } + + extend HttpServer { + void print { ... } + } + + print <- "hello world" + + print <- "hello world" -> capitalised // print("hello world".capitalised()) + + print <- capitalise <- "hello world" // print(capitalise("hello world")) + +} + diff --git a/utils.c b/utils.c @@ -0,0 +1,31 @@ +#include "utils.h" + +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> + +void +panic(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + fprintf(stderr, "Error: "); + // fprintf(stderr, "\e[0;31mError: "); + vfprintf(stderr, fmt, args); + // fprintf(stderr, "\e[0m\n"); + fprintf(stderr, "\n"); + va_end(args); + exit(1); +} + +void +softpanic(const char *fmt, ...) { + va_list args; + va_start(args, fmt); + fprintf(stderr, "Error: "); + // fprintf(stderr, "\e[0;31mError: "); + vfprintf(stderr, fmt, args); + // fprintf(stderr, "\e[0m\n"); + fprintf(stderr, "\n"); + va_end(args); + exit(0); +} diff --git a/utils.h b/utils.h @@ -0,0 +1,6 @@ +#pragma once + +#include <stdlib.h> + +void panic(const char *fmt, ...); +void softpanic(const char *fmt, ...);