commit b80810f39a342f176f46b3090757eef62ed4684e
parent 9f0b5e9d7ae7671c422f1b7fe85151f630e1a4df
Author: citbl <citbl@citbl.org>
Date: Mon, 6 Oct 2025 21:39:05 +1000
wip variables, notes
Diffstat:
| M | .clang-format | | | 5 | ++++- |
| M | .clangd | | | 2 | +- |
| M | ex2.ox | | | 4 | ++-- |
| M | gen/gen.c | | | 181 | ++++++++++++++++++++++++++++++++++++++++++++----------------------------------- |
| M | lexer.h | | | 65 | +++-------------------------------------------------------------- |
| M | makefile | | | 2 | +- |
| M | parser.h | | | 242 | +++++++++++++++++++------------------------------------------------------------ |
| M | sem.c | | | 63 | +++++++++++++++++++++++++++++++-------------------------------- |
| M | sem.h | | | 110 | ++++++++----------------------------------------------------------------------- |
| A | types.h | | | 295 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
10 files changed, 504 insertions(+), 465 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -2,6 +2,9 @@ BasedOnStyle: Webkit
IndentWidth: 8
ContinuationIndentWidth: 8
UseTab: AlignWithSpaces
+
+PointerAlignment: Right
+
AlignTrailingComments: true
SpacesBeforeTrailingComments: 1
KeepEmptyLinesAtTheStartOfBlocks: false
@@ -16,7 +19,7 @@ AlignConsecutiveMacros: false
SortIncludes: false
IndentCaseLabels: false
-ColumnLimit: 100
+ColumnLimit: 150
PenaltyBreakBeforeFirstCallParameter: 1
AlignAfterOpenBracket: DontAlign
BinPackArguments: false
diff --git a/.clangd b/.clangd
@@ -4,7 +4,7 @@ CompileFlags:
-Wextra,
-Wpedantic,
-xc,
- -std=c99,
+ -std=c2x,
-g,
-I/opt/homebrew/opt/libgccjit/include,
-L/opt/homebrew/opt/libgccjit/lib/gcc/current,
diff --git a/ex2.ox b/ex2.ox
@@ -3,6 +3,6 @@
// T add(T a, b) inline pure => a + b;
void main() {
- int peter = 42;
- print("harold");
+ string peter = "steve";
+ print(peter);
}
diff --git a/gen/gen.c b/gen/gen.c
@@ -1,40 +1,33 @@
#include "../gen.h"
#include "../utils.h"
-#include <_string.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/param.h>
-static gcc_jit_type* type_int;
-static gcc_jit_type* type_uint;
-static gcc_jit_type* type_float;
-static gcc_jit_type* type_void;
-static gcc_jit_type* type_cstr;
+static gcc_jit_type *type_int;
+static gcc_jit_type *type_uint;
+static gcc_jit_type *type_float;
+static gcc_jit_type *type_void;
+static gcc_jit_type *type_cstr;
#define MAXARGS 16
-gcc_jit_location*
-loc_from_node(Gen* gen, Node* node)
+gcc_jit_location *
+loc_from_node(Gen *gen, Node *node)
{
- if(node->filename == NULL)
- exit(1);
- if(node->line == NULL)
- exit(1);
- if(node->col == NULL)
- exit(1);
-
+ if (node->filename == NULL) return NULL;
return gcc_jit_context_new_location(gen->ctx, node->filename, node->line, node->col);
}
Gen
-gen_init(Scope* scope, const char* src)
+gen_init(Scope *scope, const char *src)
{
if (scope == NULL || src == NULL) { panic("gen_init: no Scope or AST provided"); }
- gcc_jit_context* ctx;
+ gcc_jit_context *ctx;
ctx = gcc_jit_context_acquire();
@@ -62,12 +55,11 @@ gen_init(Scope* scope, const char* src)
type_void = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_VOID);
type_cstr = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_CONST_CHAR_PTR);
- gcc_jit_param* pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") };
- gcc_jit_function* fn_puts = gcc_jit_context_new_function(
- ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0);
+ gcc_jit_param *pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") };
+ gcc_jit_function *fn_puts = gcc_jit_context_new_function(ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0);
- gcc_jit_param* pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") };
- gcc_jit_function* fn_printf = gcc_jit_context_new_function(ctx,
+ gcc_jit_param *pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") };
+ gcc_jit_function *fn_printf = gcc_jit_context_new_function(ctx,
NULL,
GCC_JIT_FUNCTION_IMPORTED,
type_int,
@@ -89,28 +81,27 @@ gen_init(Scope* scope, const char* src)
};
}
-static gcc_jit_rvalue* handle_expr(Gen*, Node*);
+static gcc_jit_rvalue *handle_expr(Gen *, Node *);
-static gcc_jit_rvalue*
-emit_literal_string(Gen* gen, Node* node)
+static gcc_jit_rvalue *
+emit_literal_string(Gen *gen, Node *node)
{
size_t len = node->data.string.value.end - node->data.string.value.start;
- char* str = calloc(len + 1, sizeof(char));
+ char *str = calloc(len + 1, sizeof(char));
if (str == NULL) panic("emit_literal_string: could not alloc");
memcpy(str, gen->src + node->data.string.value.start, len);
str[len] = '\0';
return gcc_jit_context_new_string_literal(gen->ctx, str);
}
-static gcc_jit_rvalue*
-emit_literal_int(Gen* gen, Node* node)
+static gcc_jit_rvalue *
+emit_literal_int(Gen *gen, Node *node)
{
- return gcc_jit_context_new_rvalue_from_int(
- gen->ctx, type_int, (int)node->data.number.value);
+ return gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, (int)node->data.number.value);
}
static void
-build_program(Gen* gen, Node* node)
+build_program(Gen *gen, Node *node)
{
size_t cnt = node->data.program.len;
for (size_t i = 0; i < cnt; i++) {
@@ -118,22 +109,18 @@ build_program(Gen* gen, Node* node)
}
}
-static gcc_jit_rvalue*
-lower_builtin_print(Gen* gen, Node* node)
+static gcc_jit_rvalue *
+lower_builtin_print(Gen *gen, Node *node)
{
size_t argc = node->data.call_expr.len;
// 1-arg, treat as puts(arg)
if (argc == 1) {
- gcc_jit_rvalue* arg
- = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many
+ gcc_jit_rvalue *arg = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many
// cast common cases to const char*
- if (gcc_jit_rvalue_get_type(arg) != type_cstr)
- arg = gcc_jit_context_new_cast(
- gen->ctx, loc_from_node(gen, node), arg, type_cstr);
- gcc_jit_rvalue* args[] = { arg };
- return gcc_jit_context_new_call(
- gen->ctx, loc_from_node(gen, node), gen->puts_fn, 1, args);
+ if (gcc_jit_rvalue_get_type(arg) != type_cstr) arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr);
+ gcc_jit_rvalue *args[] = { arg };
+ return gcc_jit_context_new_call(gen->ctx, loc_from_node(gen, node), gen->puts_fn, 1, args);
}
// softpanic("we don't currently handle formatted strings to print");
@@ -150,39 +137,33 @@ lower_builtin_print(Gen* gen, Node* node)
// to a struct. Do we say [[struct]] or do we have some automatic unwrap and display of
// struct data... probably, yes.
- gcc_jit_rvalue** args = (gcc_jit_rvalue**)calloc(MAXARGS, sizeof(gcc_jit_rvalue*));
+ gcc_jit_rvalue **args = (gcc_jit_rvalue **)calloc(MAXARGS, sizeof(gcc_jit_rvalue *));
- if (argc > MAXARGS) {
- softpanic("we do not currently support more than 16 args to a print call");
- }
+ if (argc > MAXARGS) { softpanic("we do not currently support more than 16 args to a print call"); }
for (size_t i = 0; i < argc; i++) {
- gcc_jit_rvalue* arg = handle_expr(gen, node->data.call_expr.args[i]);
+ gcc_jit_rvalue *arg = handle_expr(gen, node->data.call_expr.args[i]);
if (i == 0) {
if (gcc_jit_rvalue_get_type(arg) != type_cstr) {
// note this is probably not going to work as limited cast supported
// and string isn't one of them
- arg = gcc_jit_context_new_cast(
- gen->ctx, loc_from_node(gen, node), arg, type_cstr);
+ arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr);
}
} else {
//
// simple widening for common scalar types
//
- gcc_jit_type* ty = gcc_jit_rvalue_get_type(arg);
+ gcc_jit_type *ty = gcc_jit_rvalue_get_type(arg);
if (ty == type_int) {
- arg = gcc_jit_context_new_cast(
- gen->ctx, loc_from_node(gen, node), arg, type_cstr);
+ arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr);
} else if (ty == type_float) {
// variadics already promote float→double; double is
} else if (ty == type_cstr) {
// leave as const char*
} else {
// fallback: pass pointer as void*
- arg = gcc_jit_context_new_cast(gen->ctx,
- loc_from_node(gen, node),
- arg,
- gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR));
+ arg = gcc_jit_context_new_cast(
+ gen->ctx, loc_from_node(gen, node), arg, gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR));
}
}
// TODO auto grow
@@ -198,11 +179,17 @@ lower_builtin_print(Gen* gen, Node* node)
// // TODO see todo below about linked list parameters...
// }
-static gcc_jit_rvalue*
-handle_func_call(Gen* gen, Node* node)
+void
+lookup_symbol(Gen *gen)
{
- Node* fcallee = node->data.call_expr.callee;
- const char* func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 });
+ // @next
+}
+
+static gcc_jit_rvalue *
+handle_func_call(Gen *gen, Node *node)
+{
+ Node *fcallee = node->data.call_expr.callee;
+ const char *func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 });
if (strcmp(func_name, "print") == 0) return lower_builtin_print(gen, node);
softpanic("unhandled func call named: %s", func_name);
@@ -221,11 +208,12 @@ handle_func_call(Gen* gen, Node* node)
// return NULL;
}
-static gcc_jit_rvalue*
-handle_expr(Gen* gen, Node* node)
+static gcc_jit_rvalue *
+handle_expr(Gen *gen, Node *node)
{
switch (node->type) {
case NODE_NUMBER_LITERAL:
+ return emit_literal_int(gen, node);
break;
case NODE_STRING_LITERAL:
return emit_literal_string(gen, node);
@@ -233,14 +221,32 @@ handle_expr(Gen* gen, Node* node)
case NODE_CALL_EXPR: {
return handle_func_call(gen, node);
} break;
+ // case NODE_IDENT: {
+ // return NULL; // fixme
+ // } break;
default:
printf("handle_expr unhandled, %s\n", node_type_str(node->type));
}
return NULL;
}
+static gcc_jit_type *
+ox_type_to_c_type(Gen *gen, Node *node)
+{
+ const char *type_name = span_str(gen->src, node->data.ident.name, (char[IDENTSZ]) { 0 });
+
+ if (strcmp(type_name, "int") == 0) {
+ return type_int;
+ } else if (strcmp(type_name, "string") == 0) {
+ return type_cstr;
+ } else {
+ softpanic("unhandled type in gen %s", type_name);
+ }
+ return NULL;
+}
+
static void
-build_statement(Gen* gen, Node* node)
+build_statement(Gen *gen, Node *node)
{
switch (node->type) {
case NODE_BLOCK:
@@ -248,18 +254,32 @@ build_statement(Gen* gen, Node* node)
case NODE_RETURN:
break;
case NODE_VAR_DECL: {
- gcc_jit_location* loc = loc_from_node(gen, node);
- const char* var_name
- = span_str(gen->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 });
- gcc_jit_lvalue* var = gcc_jit_function_new_local(gen->curr_func,
- loc,
- type_int,
- strdup(var_name)); // to be initialised
- gcc_jit_rvalue* integer_value = emit_literal_int(gen, node->data.var_decl.init);
- gcc_jit_block_add_assignment(gen->curr_block, loc, var, integer_value);
+ gcc_jit_location *loc = loc_from_node(gen, node);
+ const char *var_name = span_str(gen->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 });
+ gcc_jit_type *declared_type = ox_type_to_c_type(gen, node->data.var_decl.type);
+ gcc_jit_lvalue *var_decl = gcc_jit_function_new_local(gen->curr_func, loc, declared_type, strdup(var_name));
+
+ if (node->data.var_decl.init != NULL) {
+ gcc_jit_rvalue *rvalue = handle_expr(gen, node->data.var_decl.init);
+ gcc_jit_block_add_assignment(gen->curr_block, loc, var_decl, rvalue);
+
+ printf("add the lvalue to node scope to be found later\n");
+
+ for (size_t i = 0; i < node->scope->len; i++) {
+ Symbol *sym = node->scope->symbols[i];
+ if (sym->name.start == node->data.var_decl.name.start && sym->name.end == node->data.var_decl.name.end) {
+ sym->ctype = declared_type;
+ sym->d.lvalue = var_decl;
+
+ printf("@next, when we parse the print(x) we know we can find the x in the symbols \n");
+
+ break;
+ }
+ }
+ }
} break;
case NODE_EXPR_STATEMENT: {
- gcc_jit_rvalue* rv = handle_expr(gen, node->data.expr_statement.expr);
+ gcc_jit_rvalue *rv = handle_expr(gen, node->data.expr_statement.expr);
if (rv) gcc_jit_block_add_eval(gen->curr_block, loc_from_node(gen, node), rv);
} break;
default:
@@ -269,7 +289,7 @@ build_statement(Gen* gen, Node* node)
}
static void
-build_block(Gen* gen, Node* body)
+build_block(Gen *gen, Node *body)
{
for (size_t i = 0; i < body->data.block.len; i++) {
build_statement(gen, body->data.block.stmts[i]);
@@ -277,9 +297,9 @@ build_block(Gen* gen, Node* body)
}
static void
-build_func_decl(Gen* gen, Node* node)
+build_func_decl(Gen *gen, Node *node)
{
- gcc_jit_function* func = gcc_jit_context_new_function(gen->ctx,
+ gcc_jit_function *func = gcc_jit_context_new_function(gen->ctx,
loc_from_node(gen, node),
GCC_JIT_FUNCTION_EXPORTED, // declared
type_int, // ret
@@ -288,18 +308,17 @@ build_func_decl(Gen* gen, Node* node)
NULL, // params
0); // is variadic
- gcc_jit_block* block = gcc_jit_function_new_block(func, "entry");
+ gcc_jit_block *block = gcc_jit_function_new_block(func, "entry");
- gcc_jit_function* prev_func = gen->curr_func;
- gcc_jit_block* prev_block = gen->curr_block;
+ gcc_jit_function *prev_func = gen->curr_func;
+ gcc_jit_block *prev_block = gen->curr_block;
gen->curr_block = block;
gen->curr_func = func;
build_block(gen, node->data.function_decl.body);
if (gen->curr_block) {
- gcc_jit_rvalue* ret_value
- = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0);
+ gcc_jit_rvalue *ret_value = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0);
gcc_jit_block_end_with_return(gen->curr_block, NULL, ret_value);
gen->curr_block = NULL;
}
@@ -309,7 +328,7 @@ build_func_decl(Gen* gen, Node* node)
}
void
-gen_next(Gen* gen, Node* node)
+gen_next(Gen *gen, Node *node)
{
// printf("gen_next, %s\n", node_type_str(node->type));
diff --git a/lexer.h b/lexer.h
@@ -1,66 +1,7 @@
#pragma once
#include <stdlib.h>
+#include "types.h"
-typedef enum {
- TOKEN_IDENT = 1006,
- TOKEN_LPAREN,
- TOKEN_RPAREN,
- TOKEN_LBRACE,
- TOKEN_RBRACE,
- TOKEN_LBRACKET,
- TOKEN_RBRACKET,
- TOKEN_EQUAL,
- TOKEN_SEMICOLON,
- TOKEN_PERCENT,
- TOKEN_COMMA,
- TOKEN_NUMBER_LITERAL,
- TOKEN_STRING_LITERAL,
- TOKEN_SLASH,
- TOKEN_STAR,
- TOKEN_PLUS,
- TOKEN_PLUSPLUS,
- TOKEN_MINUS,
- TOKEN_MINUSMINUS,
- TOKEN_EQUALITY,
- TOKEN_INEQUALITY,
- TOKEN_BANG,
- TOKEN_LT,
- TOKEN_GT,
- TOKEN_LT_EQ,
- TOKEN_GT_EQ,
- TOKEN_IF,
- TOKEN_ELSE,
- TOKEN_WHILE,
- TOKEN_FOR,
- TOKEN_BREAK,
- TOKEN_CONTINUE,
- TOKEN_RETURN,
- TOKEN_UNKNOWN, // NOTE: also update print_token
- TOKEN_EOF
-} TokenType; // NOTE also update token_type_str!
-
-const char* token_type_str(TokenType t);
-
-typedef struct {
- size_t start;
- size_t end;
- size_t line;
- size_t col;
- TokenType type;
-} Token;
-
-typedef struct {
- Token* tokens;
- size_t token_count;
- size_t token_cap;
- size_t pos;
- size_t line;
- size_t col;
- const char* src;
- size_t src_len;
- const char* filename;
-} Lexer;
-
-void lexer_lex(Lexer*, const char* filename, const char* contents);
-void lexer_print(Lexer*);
+void lexer_lex(Lexer *, const char *filename, const char *contents);
+void lexer_print(Lexer *);
diff --git a/makefile b/makefile
@@ -19,7 +19,7 @@ endif
SRC = *.c */*.c
BIN = oxc
-STD = -std=c99
+STD = -std=c2x
default:
cc ${STD} -g -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -o ${BIN} ${SRC} ${LIB} # -Wpedantic -Wshadow -Wconversion
diff --git a/parser.h b/parser.h
@@ -1,195 +1,65 @@
#pragma once
#include "lexer.h"
+#include "sem.h"
+#include "types.h"
#include <stdlib.h>
#include <stdbool.h>
#define IDENTSZ 256
-typedef enum {
- NODE_PROGRAM = 11,
- NODE_FUNCTION_DECL,
- NODE_PARAM,
- NODE_VAR_DECL,
- NODE_VAR_ASSIGN,
- NODE_BLOCK,
- NODE_CALL_EXPR,
- NODE_RETURN,
- NODE_BREAK,
- NODE_CONTINUE,
- NODE_NUMBER_LITERAL,
- NODE_STRING_LITERAL,
- NODE_IDENT,
- NODE_TYPE,
- NODE_BINARY_EXPR,
- NODE_UNARY_EXPR,
- NODE_EXPR_STATEMENT,
- NODE_SUBSCRIPT_EXPR,
- NODE_IF,
- NODE_WHILE,
- NODE_FOR,
- NODE_EMPTY_STATEMENT,
- NODE_UNKNOWN,
-} NodeType; // note: if changed, edit node_type_str!
-
-const char* node_type_str(NodeType);
-void print_node_type_str(NodeType);
-
-/*
-typedef enum {
- OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
- OP_POS, OP_NEG, OP_INC, OP_DEC,
- OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT,
- OP_SHL, OP_SHR,
- OP_LOGAND, OP_LOGOR, OP_LOGNOT,
- OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE,
- OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN,
- OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN,
- OP_SHL_ASSIGN, OP_SHR_ASSIGN,
- OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN,
- OP_CONDITIONAL, OP_COMMA,
- OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER,
- OP_SUBSCRIPT, OP_CALL,
- OP_SIZEOF, OP_ALIGNOF
-} OpType;
-*/
-
-typedef enum {
- OP_PLUS = 23,
- OP_MINUS,
- OP_MUL,
- OP_DIV,
- OP_MOD,
- OP_BIT_AND, // & ampersand
- OP_BIT_OR, // |
- OP_ASSIGN,
- OP_EQUALITY, // ==
- OP_INEQUALITY, // !=
- OP_LT_EQ,
- OP_GT_EQ,
- OP_LT,
- OP_GT,
-} OpType;
-
-typedef enum {
- OPER_MINUS = 0,
- OPER_BANG,
- OPER_PREINC,
- OPER_PREDEC,
- OPER_POSTINC,
- OPER_POSTDEC,
-} UnaryOp;
-
-typedef struct {
- size_t start;
- size_t end;
-} Span;
-
-typedef struct Node {
- NodeType type;
- struct Node* next;
- struct Scope* scope;
- const char* filename;
- int line, col;
-
- /* NOTE we will eventually add spans for condition info, etc. to print out in errors */
-
- union {
- /* clang-format off */
- struct { struct Node** decl; size_t len, cap; } program;
- struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl;
- struct { Span name; struct Node* type; } param;
- struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement;
- struct { struct Node* cond; struct Node* body; } while_statement;
- struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement;
- struct { struct Node** stmts; size_t cap, len; } block;
- struct { Span name; struct Node* type; struct Node* init; } var_decl;
- struct { struct Node* lhs; struct Node* rhs; } var_assign;
- struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr;
- struct { struct Node* expr; } ret;
- struct { struct Node* expr; } cont;
- struct { struct Node* expr; } expr_statement;
- struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr;
- struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr;
- struct { struct Node* array; struct Node* index; } subscript_expr;
- struct { double value; } number;
- struct { Span value; } string;
- struct { Span name; } ident;
- /* clang-format on */
- } data;
-} Node;
-
-typedef struct {
- Token* tokens;
- size_t token_count;
- size_t pos;
- const char* src;
- size_t src_len;
- const char* filename;
-} Parser;
-
-typedef struct {
- Node* node;
- const char* src;
-} Ast;
-
-typedef struct {
- Node** items;
- size_t len, cap;
-} NodeVec;
-
-Parser parser_init(Lexer*);
-void parser_parse(Ast*, Parser*);
-void ast_print(Ast*);
-
-Token peek(Parser*);
-Token peek2(Parser*);
-Token consume(Parser*);
-Token expect(Parser*, TokenType);
-bool match(Parser*, TokenType);
-bool check(Parser*, TokenType);
-
-Node* parse_declarations(Parser*);
-
-Node* parse_number(Parser*);
-Node* parse_ident(Parser*);
-Node* parse_primary(Parser*);
-Node* parse_postfix(Parser*);
-Node* parse_primary(Parser*);
-Node* parse_unary(Parser*);
-Node* parse_term(Parser*);
-Node* parse_expression(Parser*);
-Node* parse_expression_statement(Parser*);
-Node* parse_statement(Parser*);
-Node* parse_block(Parser*);
-Node* parse_declaration_statement(Parser*);
-Node* parse_decl_or_func_decl(Parser*);
-NodeVec parse_param_list(Parser*);
-Node* parse_type(Parser*);
-Node* parse_func_call(Parser*);
-NodeVec parse_func_arguments(Parser*);
-Node* parse_if(Parser*);
-Node* parse_while(Parser*);
-Node* parse_for(Parser*);
-Node* parse_assignment(Parser*);
-Node* parse_break(Parser*);
-Node* parse_continue_statement(Parser*);
-Node* parse_expression(Parser*);
-Node* make_program_node(void);
-Node* make_ident_node(Span name);
-Node* make_param_decl(Parser*);
-Node* make_postfix_node(UnaryOp, Node*);
-Node* make_subscript_node(Node*, Node*);
-Node* make_ident_node(Span);
-Node* make_postfix_node(UnaryOp, Node*);
-Node* make_number_node(Parser*);
-Node* make_unary_node(UnaryOp, Node*);
-Node* make_string_node(Parser*);
-Node* make_binary_node(OpType, Node*, Node*);
-Node* parse_return_statement(Parser*);
-Node* make_empty_statement(void);
-Node* make_call_node(Node*, NodeVec);
-
-const char* span_str(const char* src, Span s, char* stack_alloc_chptr);
-const char* range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr);
+Parser parser_init(Lexer *);
+void parser_parse(Ast *, Parser *);
+void ast_print(Ast *);
+
+Token peek(Parser *);
+Token peek2(Parser *);
+Token consume(Parser *);
+Token expect(Parser *, TokenType);
+bool match(Parser *, TokenType);
+bool check(Parser *, TokenType);
+
+Node *parse_declarations(Parser *);
+
+Node *parse_number(Parser *);
+Node *parse_ident(Parser *);
+Node *parse_primary(Parser *);
+Node *parse_postfix(Parser *);
+Node *parse_primary(Parser *);
+Node *parse_unary(Parser *);
+Node *parse_term(Parser *);
+Node *parse_expression(Parser *);
+Node *parse_expression_statement(Parser *);
+Node *parse_statement(Parser *);
+Node *parse_block(Parser *);
+Node *parse_declaration_statement(Parser *);
+Node *parse_decl_or_func_decl(Parser *);
+NodeVec parse_param_list(Parser *);
+Node *parse_type(Parser *);
+Node *parse_func_call(Parser *);
+NodeVec parse_func_arguments(Parser *);
+Node *parse_if(Parser *);
+Node *parse_while(Parser *);
+Node *parse_for(Parser *);
+Node *parse_assignment(Parser *);
+Node *parse_break(Parser *);
+Node *parse_continue_statement(Parser *);
+Node *parse_expression(Parser *);
+Node *make_program_node(void);
+Node *make_ident_node(Span name);
+Node *make_param_decl(Parser *);
+Node *make_postfix_node(UnaryOp, Node *);
+Node *make_subscript_node(Node *, Node *);
+Node *make_ident_node(Span);
+Node *make_postfix_node(UnaryOp, Node *);
+Node *make_number_node(Parser *);
+Node *make_unary_node(UnaryOp, Node *);
+Node *make_string_node(Parser *);
+Node *make_binary_node(OpType, Node *, Node *);
+Node *parse_return_statement(Parser *);
+Node *make_empty_statement(void);
+Node *make_call_node(Node *, NodeVec);
+
+const char *span_str(const char *src, Span s, char *stack_alloc_chptr);
+const char *range_str(const char *src, size_t start, size_t end, char *stack_alloc_chptr);
diff --git a/sem.c b/sem.c
@@ -12,11 +12,11 @@
static int next_id = 100;
Scope
-scope_init(Node* node)
+scope_init(Node *node)
{
Scope s = (Scope) { .parent = NULL,
- .symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)),
- .children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)),
+ .symbols = (Symbol **)calloc(CALLOC_SZ, sizeof(Symbol *)),
+ .children = (Scope **)calloc(CALLOC_SZ, sizeof(Scope *)),
.cap = CALLOC_SZ,
.len = 0,
.ch_cap = CALLOC_SZ,
@@ -30,11 +30,11 @@ scope_init(Node* node)
return s;
}
-static Scope*
-new_scope_from_scope(Scope* parent_scope, Node* node)
+static Scope *
+new_scope_from_scope(Scope *parent_scope, Node *node)
{
// new scope
- Scope* scope = (Scope*)calloc(1, sizeof(Scope));
+ Scope *scope = (Scope *)calloc(1, sizeof(Scope));
if (scope == NULL) panic("new_scope_from_scope: could not alloc");
scope->id = next_id++;
@@ -42,12 +42,12 @@ new_scope_from_scope(Scope* parent_scope, Node* node)
node->scope = scope;
// init symbols list
- scope->symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*));
+ scope->symbols = (Symbol **)calloc(CALLOC_SZ, sizeof(Symbol *));
if (scope->symbols == NULL) panic("new_scope_from_scope: symbols: could not alloc");
scope->cap = CALLOC_SZ;
scope->len = 0;
- scope->children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*));
+ scope->children = (Scope **)calloc(CALLOC_SZ, sizeof(Scope *));
if (scope->children == NULL) panic("new_scope_from_scope: children: could not alloc");
scope->ch_cap = CALLOC_SZ;
scope->ch_len = 0;
@@ -59,8 +59,7 @@ new_scope_from_scope(Scope* parent_scope, Node* node)
assert(parent_scope->children != NULL);
if (parent_scope->ch_len == parent_scope->ch_cap) {
parent_scope->ch_cap *= 2;
- parent_scope->children = (Scope**)realloc(
- parent_scope->children, parent_scope->ch_cap * sizeof(Scope*));
+ parent_scope->children = (Scope **)realloc(parent_scope->children, parent_scope->ch_cap * sizeof(Scope *));
assert(parent_scope->children != NULL && "realloc failed");
}
parent_scope->children[parent_scope->ch_len++] = scope;
@@ -73,26 +72,25 @@ new_scope_from_scope(Scope* parent_scope, Node* node)
}
static void
-add_to_scope(Scope* scope, Symbol* sym)
+add_to_scope(Scope *scope, Symbol *sym)
{
if (scope->len >= scope->cap) {
scope->cap *= 2;
- scope->symbols = (Symbol**)realloc(scope->symbols, scope->cap * sizeof(Symbol*));
+ scope->symbols = (Symbol **)realloc(scope->symbols, scope->cap * sizeof(Symbol *));
}
scope->symbols[scope->len++] = sym;
}
static void
-scope_var(Scope* scope, Ast* ast, Node* node)
+scope_var(Scope *scope, Ast *ast, Node *node)
{
- const char* var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 });
- const char* type_name
- = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 });
+ const char *var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 });
+ const char *type_name = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 });
- Symbol* sym = (Symbol*)calloc(1, sizeof(Symbol));
+ Symbol *sym = (Symbol *)calloc(1, sizeof(Symbol));
if (sym == NULL) panic("scope_var: symbol: could not alloc");
- TypeInfo* type = (TypeInfo*)calloc(1, sizeof(TypeInfo));
+ TypeInfo *type = (TypeInfo *)calloc(1, sizeof(TypeInfo));
if (type == NULL) panic("scope_var: type: could not alloc");
if (strcmp(type_name, "float") == 0) {
@@ -107,9 +105,7 @@ scope_var(Scope* scope, Ast* ast, Node* node)
if (type_name[0] >= 'A' && type_name[0] <= 'Z') {
type->type = SYMTYPE_USER;
} else {
- panic("sem: not yet defined type '%s' for variable '%s'",
- type_name,
- var_name);
+ panic("sem: not yet defined type '%s' for variable '%s'", type_name, var_name);
}
}
@@ -123,12 +119,14 @@ scope_var(Scope* scope, Ast* ast, Node* node)
}
static void
-scope_func(Scope* parent_scope, Ast* ast, Node* node)
+scope_func(Scope *parent_scope, Ast *ast, Node *node)
{
- Scope* scope = new_scope_from_scope(parent_scope, node);
+ Scope *scope = new_scope_from_scope(parent_scope, node);
+ node->scope = scope;
for (size_t i = 0; i < node->data.block.len; i++) {
- Node* stmt = node->data.block.stmts[i];
+ Node *stmt = node->data.block.stmts[i];
+ stmt->scope = scope;
switch (stmt->type) {
case NODE_VAR_DECL: {
scope_var(scope, ast, stmt);
@@ -141,10 +139,11 @@ scope_func(Scope* parent_scope, Ast* ast, Node* node)
}
void
-scope_build(Scope* scope, Ast* ast)
+scope_build(Scope *scope, Ast *ast)
{
for (size_t i = 0; i < ast->node->data.program.len; i++) {
- Node* node = ast->node->data.program.decl[i];
+ Node *node = ast->node->data.program.decl[i];
+ node->scope = scope;
switch (node->type) {
case NODE_VAR_DECL:
scope_var(scope, ast, node);
@@ -159,13 +158,13 @@ scope_build(Scope* scope, Ast* ast)
}
void
-scope_print(Scope* scope, Ast* ast)
+scope_print(Scope *scope, Ast *ast)
{
if (scope == NULL || scope->symbols == NULL) return;
for (size_t i = 0; i < scope->len; i++) {
- Symbol* sym = scope->symbols[i];
- const char* name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 });
+ Symbol *sym = scope->symbols[i];
+ const char *name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 });
int parent = -1;
if (scope->parent != NULL) parent = scope->parent->id;
bool has_owner_node = false;
@@ -182,15 +181,15 @@ scope_print(Scope* scope, Ast* ast)
if (scope->ch_len == 0) return;
for (size_t j = 0; j < scope->ch_len; j++) {
- Scope* child_scope = scope->children[j];
+ Scope *child_scope = scope->children[j];
scope_print(child_scope, ast);
}
}
-const char*
+const char *
type_kind_str(SymbolType t)
{
- static const char* type_strings[] = {
+ static const char *type_strings[] = {
[SYMTYPE_VOID] = "TYPE_VOID",
[SYMTYPE_INT] = "TYPE_INT",
[SYMTYPE_UINT] = "TYPE_UINT",
diff --git a/sem.h b/sem.h
@@ -1,111 +1,23 @@
#pragma once
+#include <libgccjit.h>
#include <stdlib.h>
+#include "types.h"
#include "parser.h"
-typedef enum {
- SYMTYPE_VOID = 108,
- SYMTYPE_INT,
- SYMTYPE_UINT,
- SYMTYPE_FLOAT,
- SYMTYPE_STRING,
- SYMTYPE_STRUCT,
- SYMTYPE_USER,
- SYMTYPE_ARRAY,
- SYMTYPE_ENUM,
- SYMTYPE_FUNC,
- SYMTYPE_TODO,
-} SymbolType; // note also update type_kind_str!
-
-const char* type_kind_str(SymbolType);
-
-typedef enum {
- ENUM_VALUE_INT,
- ENUM_VALUE_STRING,
-} EnumValueKind;
-
-typedef struct StructField {
- char* name;
- struct Type* type;
-} StructField;
-
-typedef struct EnumField {
- char* name;
- EnumValueKind kind;
- union { // not used?
- int int_value;
- char* string_value;
- } val;
-} EnumField;
-
-typedef struct StructMethod {
- char* name;
- struct Type* return_type;
- struct Type** param_types;
- int params_count;
- int params_cap;
- // TODO add ptr to func decl of this struct method
-} StructMethod;
-
-typedef struct Type {
- SymbolType type;
-
- // union {
- // struct StructType {
- // const char* struct_name;
- // int fields_count;
- // int methods_count;
- // StructField* fields;
- // StructMethod* methods;
- // } struct_t;
-
- // struct ArrayType {
- // int array_size; // -1 or fixed
- // struct Type* of_type;
- // bool dynamic;
- // } array_t;
-
- // struct EnumType {
- // const char* enum_name;
- // const int fields_count;
- // EnumField* fields;
- // EnumValueKind value_kind;
- // } enum_t;
- // };
-} TypeInfo;
-
-typedef struct Symbol {
- Span name;
- Node* decl;
- TypeInfo* type;
-} Symbol;
-
-typedef struct Scope {
- struct Node* owner;
- struct Scope* parent;
- Symbol** symbols;
- size_t len;
- size_t cap;
- struct Scope** children;
- size_t ch_len;
- size_t ch_cap;
- int depth;
- int id;
-} Scope;
-
// Symbol table functions
-void symbol_add(const char* name, TypeInfo* type);
-TypeInfo* symbol_get_type(const char* name);
-Symbol* symbol_find(const char* name);
+void symbol_add(const char *name, TypeInfo *type);
+TypeInfo *symbol_get_type(const char *name);
+Symbol *symbol_find(const char *name);
// Scope management functions
-Scope scope_init(Node*);
-void scope_add_symbol(Scope* scope, const char* name, TypeInfo* type);
-Symbol* scope_find_symbol(Scope* scope, const char* name);
+Scope scope_init(Node *);
+void scope_add_symbol(Scope *scope, const char *name, TypeInfo *type);
+Symbol *scope_find_symbol(Scope *scope, const char *name);
// Type checking functions
-int types_equal(TypeInfo* a, TypeInfo* b);
+int types_equal(TypeInfo *a, TypeInfo *b);
-void scope_build(Scope*, Ast*);
-void scope_print(Scope*, Ast*);
+void scope_build(Scope *, Ast *);
+void scope_print(Scope *, Ast *);
diff --git a/types.h b/types.h
@@ -0,0 +1,295 @@
+#pragma once
+
+#include <libgccjit.h>
+#include <stdlib.h>
+#include <stdbool.h>
+
+typedef enum {
+ TOKEN_IDENT = 1006,
+ TOKEN_LPAREN,
+ TOKEN_RPAREN,
+ TOKEN_LBRACE,
+ TOKEN_RBRACE,
+ TOKEN_LBRACKET,
+ TOKEN_RBRACKET,
+ TOKEN_EQUAL,
+ TOKEN_SEMICOLON,
+ TOKEN_PERCENT,
+ TOKEN_COMMA,
+ TOKEN_NUMBER_LITERAL,
+ TOKEN_STRING_LITERAL,
+ TOKEN_SLASH,
+ TOKEN_STAR,
+ TOKEN_PLUS,
+ TOKEN_PLUSPLUS,
+ TOKEN_MINUS,
+ TOKEN_MINUSMINUS,
+ TOKEN_EQUALITY,
+ TOKEN_INEQUALITY,
+ TOKEN_BANG,
+ TOKEN_LT,
+ TOKEN_GT,
+ TOKEN_LT_EQ,
+ TOKEN_GT_EQ,
+ TOKEN_IF,
+ TOKEN_ELSE,
+ TOKEN_WHILE,
+ TOKEN_FOR,
+ TOKEN_BREAK,
+ TOKEN_CONTINUE,
+ TOKEN_RETURN,
+ TOKEN_UNKNOWN, // NOTE: also update print_token
+ TOKEN_EOF
+} TokenType; // NOTE also update token_type_str!
+
+const char *token_type_str(TokenType t);
+
+typedef struct {
+ size_t start;
+ size_t end;
+ size_t line;
+ size_t col;
+ TokenType type;
+} Token;
+
+typedef struct {
+ Token *tokens;
+ size_t token_count;
+ size_t token_cap;
+ size_t pos;
+ size_t line;
+ size_t col;
+ const char *src;
+ size_t src_len;
+ const char *filename;
+} Lexer;
+
+typedef enum {
+ NODE_PROGRAM = 11,
+ NODE_FUNCTION_DECL,
+ NODE_PARAM,
+ NODE_VAR_DECL,
+ NODE_VAR_ASSIGN,
+ NODE_BLOCK,
+ NODE_CALL_EXPR,
+ NODE_RETURN,
+ NODE_BREAK,
+ NODE_CONTINUE,
+ NODE_NUMBER_LITERAL,
+ NODE_STRING_LITERAL,
+ NODE_IDENT,
+ NODE_TYPE,
+ NODE_BINARY_EXPR,
+ NODE_UNARY_EXPR,
+ NODE_EXPR_STATEMENT,
+ NODE_SUBSCRIPT_EXPR,
+ NODE_IF,
+ NODE_WHILE,
+ NODE_FOR,
+ NODE_EMPTY_STATEMENT,
+ NODE_UNKNOWN,
+} NodeType; // note: if changed, edit node_type_str!
+
+const char *node_type_str(NodeType);
+void print_node_type_str(NodeType);
+
+/*
+typedef enum {
+ OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD,
+ OP_POS, OP_NEG, OP_INC, OP_DEC,
+ OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT,
+ OP_SHL, OP_SHR,
+ OP_LOGAND, OP_LOGOR, OP_LOGNOT,
+ OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE,
+ OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN,
+ OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN,
+ OP_SHL_ASSIGN, OP_SHR_ASSIGN,
+ OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN,
+ OP_CONDITIONAL, OP_COMMA,
+ OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER,
+ OP_SUBSCRIPT, OP_CALL,
+ OP_SIZEOF, OP_ALIGNOF
+} OpType;
+*/
+
+typedef enum {
+ OP_PLUS = 23,
+ OP_MINUS,
+ OP_MUL,
+ OP_DIV,
+ OP_MOD,
+ OP_BIT_AND, // & ampersand
+ OP_BIT_OR, // |
+ OP_ASSIGN,
+ OP_EQUALITY, // ==
+ OP_INEQUALITY, // !=
+ OP_LT_EQ,
+ OP_GT_EQ,
+ OP_LT,
+ OP_GT,
+} OpType;
+
+typedef enum {
+ OPER_MINUS = 0,
+ OPER_BANG,
+ OPER_PREINC,
+ OPER_PREDEC,
+ OPER_POSTINC,
+ OPER_POSTDEC,
+} UnaryOp;
+
+typedef struct {
+ size_t start;
+ size_t end;
+} Span;
+
+typedef struct Node {
+ NodeType type;
+ struct Node *next;
+ struct Scope *scope;
+ const char *filename;
+ int line, col;
+
+ /* NOTE we will eventually add spans for condition info, etc. to print out in errors */
+
+ union {
+ /* clang-format off */
+ struct { struct Node** decl; size_t len, cap; } program;
+ struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl;
+ struct { Span name; struct Node* type; } param;
+ struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement;
+ struct { struct Node* cond; struct Node* body; } while_statement;
+ struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement;
+ struct { struct Node** stmts; size_t cap, len; } block;
+ struct { Span name; struct Node* type; struct Node* init; } var_decl;
+ struct { struct Node* lhs; struct Node* rhs; } var_assign;
+ struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr;
+ struct { struct Node* expr; } ret;
+ struct { struct Node* expr; } cont;
+ struct { struct Node* expr; } expr_statement;
+ struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr;
+ struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr;
+ struct { struct Node* array; struct Node* index; } subscript_expr;
+ struct { double value; } number;
+ struct { Span value; } string;
+ struct { Span name; } ident;
+ /* clang-format on */
+ } data;
+} Node;
+
+typedef struct {
+ Token *tokens;
+ size_t token_count;
+ size_t pos;
+ const char *src;
+ size_t src_len;
+ const char *filename;
+} Parser;
+
+typedef struct {
+ Node *node;
+ const char *src;
+} Ast;
+
+typedef struct {
+ Node **items;
+ size_t len, cap;
+} NodeVec;
+
+typedef enum {
+ // todo distinguish local/exported/param
+ SYMTYPE_VOID = 108,
+ SYMTYPE_INT,
+ SYMTYPE_UINT,
+ SYMTYPE_FLOAT,
+ SYMTYPE_STRING,
+ SYMTYPE_STRUCT,
+ SYMTYPE_USER,
+ SYMTYPE_ARRAY,
+ SYMTYPE_ENUM,
+ SYMTYPE_FUNC,
+ SYMTYPE_TODO,
+} SymbolType; // note also update type_kind_str!
+
+const char *type_kind_str(SymbolType);
+
+typedef enum {
+ ENUM_VALUE_INT,
+ ENUM_VALUE_STRING,
+} EnumValueKind;
+
+typedef struct StructField {
+ char *name;
+ struct Type *type;
+} StructField;
+
+typedef struct EnumField {
+ char *name;
+ EnumValueKind kind;
+ union { // not used?
+ int int_value;
+ char *string_value;
+ } val;
+} EnumField;
+
+typedef struct StructMethod {
+ char *name;
+ struct Type *return_type;
+ struct Type **param_types;
+ int params_count;
+ int params_cap;
+ // TODO add ptr to func decl of this struct method
+} StructMethod;
+
+typedef struct Type {
+ SymbolType type;
+
+ // union {
+ // struct StructType {
+ // const char* struct_name;
+ // int fields_count;
+ // int methods_count;
+ // StructField* fields;
+ // StructMethod* methods;
+ // } struct_t;
+
+ // struct ArrayType {
+ // int array_size; // -1 or fixed
+ // struct Type* of_type;
+ // bool dynamic;
+ // } array_t;
+
+ // struct EnumType {
+ // const char* enum_name;
+ // const int fields_count;
+ // EnumField* fields;
+ // EnumValueKind value_kind;
+ // } enum_t;
+ // };
+} TypeInfo;
+
+typedef struct Symbol {
+ Span name;
+ Node *decl;
+ TypeInfo *type;
+
+ gcc_jit_type *ctype;
+ union {
+ gcc_jit_lvalue *lvalue;
+ gcc_jit_param *param;
+ gcc_jit_rvalue *const_rvalue;
+ } d;
+} Symbol;
+
+typedef struct Scope {
+ struct Node *owner;
+ struct Scope *parent;
+ Symbol **symbols;
+ size_t len;
+ size_t cap;
+ struct Scope **children;
+ size_t ch_len;
+ size_t ch_cap;
+ int depth;
+ int id;
+} Scope;