ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

commit b80810f39a342f176f46b3090757eef62ed4684e
parent 9f0b5e9d7ae7671c422f1b7fe85151f630e1a4df
Author: citbl <citbl@citbl.org>
Date:   Mon,  6 Oct 2025 21:39:05 +1000

wip variables, notes

Diffstat:
M.clang-format | 5++++-
M.clangd | 2+-
Mex2.ox | 4++--
Mgen/gen.c | 181++++++++++++++++++++++++++++++++++++++++++++-----------------------------------
Mlexer.h | 65+++--------------------------------------------------------------
Mmakefile | 2+-
Mparser.h | 242+++++++++++++++++++------------------------------------------------------------
Msem.c | 63+++++++++++++++++++++++++++++++--------------------------------
Msem.h | 110++++++++-----------------------------------------------------------------------
Atypes.h | 295+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
10 files changed, 504 insertions(+), 465 deletions(-)

diff --git a/.clang-format b/.clang-format @@ -2,6 +2,9 @@ BasedOnStyle: Webkit IndentWidth: 8 ContinuationIndentWidth: 8 UseTab: AlignWithSpaces + +PointerAlignment: Right + AlignTrailingComments: true SpacesBeforeTrailingComments: 1 KeepEmptyLinesAtTheStartOfBlocks: false @@ -16,7 +19,7 @@ AlignConsecutiveMacros: false SortIncludes: false IndentCaseLabels: false -ColumnLimit: 100 +ColumnLimit: 150 PenaltyBreakBeforeFirstCallParameter: 1 AlignAfterOpenBracket: DontAlign BinPackArguments: false diff --git a/.clangd b/.clangd @@ -4,7 +4,7 @@ CompileFlags: -Wextra, -Wpedantic, -xc, - -std=c99, + -std=c2x, -g, -I/opt/homebrew/opt/libgccjit/include, -L/opt/homebrew/opt/libgccjit/lib/gcc/current, diff --git a/ex2.ox b/ex2.ox @@ -3,6 +3,6 @@ // T add(T a, b) inline pure => a + b; void main() { - int peter = 42; - print("harold"); + string peter = "steve"; + print(peter); } diff --git a/gen/gen.c b/gen/gen.c @@ -1,40 +1,33 @@ #include "../gen.h" #include "../utils.h" -#include <_string.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/param.h> -static gcc_jit_type* type_int; -static gcc_jit_type* type_uint; -static gcc_jit_type* type_float; -static gcc_jit_type* type_void; -static gcc_jit_type* type_cstr; +static gcc_jit_type *type_int; +static gcc_jit_type *type_uint; +static gcc_jit_type *type_float; +static gcc_jit_type *type_void; +static gcc_jit_type *type_cstr; #define MAXARGS 16 -gcc_jit_location* -loc_from_node(Gen* gen, Node* node) +gcc_jit_location * +loc_from_node(Gen *gen, Node *node) { - if(node->filename == NULL) - exit(1); - if(node->line == NULL) - exit(1); - if(node->col == NULL) - exit(1); - + if (node->filename == NULL) return NULL; return gcc_jit_context_new_location(gen->ctx, node->filename, node->line, node->col); } Gen -gen_init(Scope* scope, const char* src) +gen_init(Scope *scope, const char *src) { if (scope == NULL || src == NULL) { panic("gen_init: no Scope or AST provided"); } - gcc_jit_context* ctx; + gcc_jit_context *ctx; ctx = gcc_jit_context_acquire(); @@ -62,12 +55,11 @@ gen_init(Scope* scope, const char* src) type_void = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_VOID); type_cstr = gcc_jit_context_get_type(ctx, GCC_JIT_TYPE_CONST_CHAR_PTR); - gcc_jit_param* pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") }; - gcc_jit_function* fn_puts = gcc_jit_context_new_function( - ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0); + gcc_jit_param *pm_puts[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "s") }; + gcc_jit_function *fn_puts = gcc_jit_context_new_function(ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, "puts", 1, pm_puts, 0); - gcc_jit_param* pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") }; - gcc_jit_function* fn_printf = gcc_jit_context_new_function(ctx, + gcc_jit_param *pm_printf[] = { gcc_jit_context_new_param(ctx, NULL, type_cstr, "fmt") }; + gcc_jit_function *fn_printf = gcc_jit_context_new_function(ctx, NULL, GCC_JIT_FUNCTION_IMPORTED, type_int, @@ -89,28 +81,27 @@ gen_init(Scope* scope, const char* src) }; } -static gcc_jit_rvalue* handle_expr(Gen*, Node*); +static gcc_jit_rvalue *handle_expr(Gen *, Node *); -static gcc_jit_rvalue* -emit_literal_string(Gen* gen, Node* node) +static gcc_jit_rvalue * +emit_literal_string(Gen *gen, Node *node) { size_t len = node->data.string.value.end - node->data.string.value.start; - char* str = calloc(len + 1, sizeof(char)); + char *str = calloc(len + 1, sizeof(char)); if (str == NULL) panic("emit_literal_string: could not alloc"); memcpy(str, gen->src + node->data.string.value.start, len); str[len] = '\0'; return gcc_jit_context_new_string_literal(gen->ctx, str); } -static gcc_jit_rvalue* -emit_literal_int(Gen* gen, Node* node) +static gcc_jit_rvalue * +emit_literal_int(Gen *gen, Node *node) { - return gcc_jit_context_new_rvalue_from_int( - gen->ctx, type_int, (int)node->data.number.value); + return gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, (int)node->data.number.value); } static void -build_program(Gen* gen, Node* node) +build_program(Gen *gen, Node *node) { size_t cnt = node->data.program.len; for (size_t i = 0; i < cnt; i++) { @@ -118,22 +109,18 @@ build_program(Gen* gen, Node* node) } } -static gcc_jit_rvalue* -lower_builtin_print(Gen* gen, Node* node) +static gcc_jit_rvalue * +lower_builtin_print(Gen *gen, Node *node) { size_t argc = node->data.call_expr.len; // 1-arg, treat as puts(arg) if (argc == 1) { - gcc_jit_rvalue* arg - = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many + gcc_jit_rvalue *arg = handle_expr(gen, node->data.call_expr.args[0]); // TODO [0] when many // cast common cases to const char* - if (gcc_jit_rvalue_get_type(arg) != type_cstr) - arg = gcc_jit_context_new_cast( - gen->ctx, loc_from_node(gen, node), arg, type_cstr); - gcc_jit_rvalue* args[] = { arg }; - return gcc_jit_context_new_call( - gen->ctx, loc_from_node(gen, node), gen->puts_fn, 1, args); + if (gcc_jit_rvalue_get_type(arg) != type_cstr) arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr); + gcc_jit_rvalue *args[] = { arg }; + return gcc_jit_context_new_call(gen->ctx, loc_from_node(gen, node), gen->puts_fn, 1, args); } // softpanic("we don't currently handle formatted strings to print"); @@ -150,39 +137,33 @@ lower_builtin_print(Gen* gen, Node* node) // to a struct. Do we say [[struct]] or do we have some automatic unwrap and display of // struct data... probably, yes. - gcc_jit_rvalue** args = (gcc_jit_rvalue**)calloc(MAXARGS, sizeof(gcc_jit_rvalue*)); + gcc_jit_rvalue **args = (gcc_jit_rvalue **)calloc(MAXARGS, sizeof(gcc_jit_rvalue *)); - if (argc > MAXARGS) { - softpanic("we do not currently support more than 16 args to a print call"); - } + if (argc > MAXARGS) { softpanic("we do not currently support more than 16 args to a print call"); } for (size_t i = 0; i < argc; i++) { - gcc_jit_rvalue* arg = handle_expr(gen, node->data.call_expr.args[i]); + gcc_jit_rvalue *arg = handle_expr(gen, node->data.call_expr.args[i]); if (i == 0) { if (gcc_jit_rvalue_get_type(arg) != type_cstr) { // note this is probably not going to work as limited cast supported // and string isn't one of them - arg = gcc_jit_context_new_cast( - gen->ctx, loc_from_node(gen, node), arg, type_cstr); + arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr); } } else { // // simple widening for common scalar types // - gcc_jit_type* ty = gcc_jit_rvalue_get_type(arg); + gcc_jit_type *ty = gcc_jit_rvalue_get_type(arg); if (ty == type_int) { - arg = gcc_jit_context_new_cast( - gen->ctx, loc_from_node(gen, node), arg, type_cstr); + arg = gcc_jit_context_new_cast(gen->ctx, loc_from_node(gen, node), arg, type_cstr); } else if (ty == type_float) { // variadics already promote float→double; double is } else if (ty == type_cstr) { // leave as const char* } else { // fallback: pass pointer as void* - arg = gcc_jit_context_new_cast(gen->ctx, - loc_from_node(gen, node), - arg, - gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR)); + arg = gcc_jit_context_new_cast( + gen->ctx, loc_from_node(gen, node), arg, gcc_jit_context_get_type(gen->ctx, GCC_JIT_TYPE_VOID_PTR)); } } // TODO auto grow @@ -198,11 +179,17 @@ lower_builtin_print(Gen* gen, Node* node) // // TODO see todo below about linked list parameters... // } -static gcc_jit_rvalue* -handle_func_call(Gen* gen, Node* node) +void +lookup_symbol(Gen *gen) { - Node* fcallee = node->data.call_expr.callee; - const char* func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 }); + // @next +} + +static gcc_jit_rvalue * +handle_func_call(Gen *gen, Node *node) +{ + Node *fcallee = node->data.call_expr.callee; + const char *func_name = span_str(gen->src, fcallee->data.ident.name, (char[IDENTSZ]) { 0 }); if (strcmp(func_name, "print") == 0) return lower_builtin_print(gen, node); softpanic("unhandled func call named: %s", func_name); @@ -221,11 +208,12 @@ handle_func_call(Gen* gen, Node* node) // return NULL; } -static gcc_jit_rvalue* -handle_expr(Gen* gen, Node* node) +static gcc_jit_rvalue * +handle_expr(Gen *gen, Node *node) { switch (node->type) { case NODE_NUMBER_LITERAL: + return emit_literal_int(gen, node); break; case NODE_STRING_LITERAL: return emit_literal_string(gen, node); @@ -233,14 +221,32 @@ handle_expr(Gen* gen, Node* node) case NODE_CALL_EXPR: { return handle_func_call(gen, node); } break; + // case NODE_IDENT: { + // return NULL; // fixme + // } break; default: printf("handle_expr unhandled, %s\n", node_type_str(node->type)); } return NULL; } +static gcc_jit_type * +ox_type_to_c_type(Gen *gen, Node *node) +{ + const char *type_name = span_str(gen->src, node->data.ident.name, (char[IDENTSZ]) { 0 }); + + if (strcmp(type_name, "int") == 0) { + return type_int; + } else if (strcmp(type_name, "string") == 0) { + return type_cstr; + } else { + softpanic("unhandled type in gen %s", type_name); + } + return NULL; +} + static void -build_statement(Gen* gen, Node* node) +build_statement(Gen *gen, Node *node) { switch (node->type) { case NODE_BLOCK: @@ -248,18 +254,32 @@ build_statement(Gen* gen, Node* node) case NODE_RETURN: break; case NODE_VAR_DECL: { - gcc_jit_location* loc = loc_from_node(gen, node); - const char* var_name - = span_str(gen->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 }); - gcc_jit_lvalue* var = gcc_jit_function_new_local(gen->curr_func, - loc, - type_int, - strdup(var_name)); // to be initialised - gcc_jit_rvalue* integer_value = emit_literal_int(gen, node->data.var_decl.init); - gcc_jit_block_add_assignment(gen->curr_block, loc, var, integer_value); + gcc_jit_location *loc = loc_from_node(gen, node); + const char *var_name = span_str(gen->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 }); + gcc_jit_type *declared_type = ox_type_to_c_type(gen, node->data.var_decl.type); + gcc_jit_lvalue *var_decl = gcc_jit_function_new_local(gen->curr_func, loc, declared_type, strdup(var_name)); + + if (node->data.var_decl.init != NULL) { + gcc_jit_rvalue *rvalue = handle_expr(gen, node->data.var_decl.init); + gcc_jit_block_add_assignment(gen->curr_block, loc, var_decl, rvalue); + + printf("add the lvalue to node scope to be found later\n"); + + for (size_t i = 0; i < node->scope->len; i++) { + Symbol *sym = node->scope->symbols[i]; + if (sym->name.start == node->data.var_decl.name.start && sym->name.end == node->data.var_decl.name.end) { + sym->ctype = declared_type; + sym->d.lvalue = var_decl; + + printf("@next, when we parse the print(x) we know we can find the x in the symbols \n"); + + break; + } + } + } } break; case NODE_EXPR_STATEMENT: { - gcc_jit_rvalue* rv = handle_expr(gen, node->data.expr_statement.expr); + gcc_jit_rvalue *rv = handle_expr(gen, node->data.expr_statement.expr); if (rv) gcc_jit_block_add_eval(gen->curr_block, loc_from_node(gen, node), rv); } break; default: @@ -269,7 +289,7 @@ build_statement(Gen* gen, Node* node) } static void -build_block(Gen* gen, Node* body) +build_block(Gen *gen, Node *body) { for (size_t i = 0; i < body->data.block.len; i++) { build_statement(gen, body->data.block.stmts[i]); @@ -277,9 +297,9 @@ build_block(Gen* gen, Node* body) } static void -build_func_decl(Gen* gen, Node* node) +build_func_decl(Gen *gen, Node *node) { - gcc_jit_function* func = gcc_jit_context_new_function(gen->ctx, + gcc_jit_function *func = gcc_jit_context_new_function(gen->ctx, loc_from_node(gen, node), GCC_JIT_FUNCTION_EXPORTED, // declared type_int, // ret @@ -288,18 +308,17 @@ build_func_decl(Gen* gen, Node* node) NULL, // params 0); // is variadic - gcc_jit_block* block = gcc_jit_function_new_block(func, "entry"); + gcc_jit_block *block = gcc_jit_function_new_block(func, "entry"); - gcc_jit_function* prev_func = gen->curr_func; - gcc_jit_block* prev_block = gen->curr_block; + gcc_jit_function *prev_func = gen->curr_func; + gcc_jit_block *prev_block = gen->curr_block; gen->curr_block = block; gen->curr_func = func; build_block(gen, node->data.function_decl.body); if (gen->curr_block) { - gcc_jit_rvalue* ret_value - = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0); + gcc_jit_rvalue *ret_value = gcc_jit_context_new_rvalue_from_int(gen->ctx, type_int, 0); gcc_jit_block_end_with_return(gen->curr_block, NULL, ret_value); gen->curr_block = NULL; } @@ -309,7 +328,7 @@ build_func_decl(Gen* gen, Node* node) } void -gen_next(Gen* gen, Node* node) +gen_next(Gen *gen, Node *node) { // printf("gen_next, %s\n", node_type_str(node->type)); diff --git a/lexer.h b/lexer.h @@ -1,66 +1,7 @@ #pragma once #include <stdlib.h> +#include "types.h" -typedef enum { - TOKEN_IDENT = 1006, - TOKEN_LPAREN, - TOKEN_RPAREN, - TOKEN_LBRACE, - TOKEN_RBRACE, - TOKEN_LBRACKET, - TOKEN_RBRACKET, - TOKEN_EQUAL, - TOKEN_SEMICOLON, - TOKEN_PERCENT, - TOKEN_COMMA, - TOKEN_NUMBER_LITERAL, - TOKEN_STRING_LITERAL, - TOKEN_SLASH, - TOKEN_STAR, - TOKEN_PLUS, - TOKEN_PLUSPLUS, - TOKEN_MINUS, - TOKEN_MINUSMINUS, - TOKEN_EQUALITY, - TOKEN_INEQUALITY, - TOKEN_BANG, - TOKEN_LT, - TOKEN_GT, - TOKEN_LT_EQ, - TOKEN_GT_EQ, - TOKEN_IF, - TOKEN_ELSE, - TOKEN_WHILE, - TOKEN_FOR, - TOKEN_BREAK, - TOKEN_CONTINUE, - TOKEN_RETURN, - TOKEN_UNKNOWN, // NOTE: also update print_token - TOKEN_EOF -} TokenType; // NOTE also update token_type_str! - -const char* token_type_str(TokenType t); - -typedef struct { - size_t start; - size_t end; - size_t line; - size_t col; - TokenType type; -} Token; - -typedef struct { - Token* tokens; - size_t token_count; - size_t token_cap; - size_t pos; - size_t line; - size_t col; - const char* src; - size_t src_len; - const char* filename; -} Lexer; - -void lexer_lex(Lexer*, const char* filename, const char* contents); -void lexer_print(Lexer*); +void lexer_lex(Lexer *, const char *filename, const char *contents); +void lexer_print(Lexer *); diff --git a/makefile b/makefile @@ -19,7 +19,7 @@ endif SRC = *.c */*.c BIN = oxc -STD = -std=c99 +STD = -std=c2x default: cc ${STD} -g -Wall -Wextra -Wno-unused-parameter -Wno-unused-function -o ${BIN} ${SRC} ${LIB} # -Wpedantic -Wshadow -Wconversion diff --git a/parser.h b/parser.h @@ -1,195 +1,65 @@ #pragma once #include "lexer.h" +#include "sem.h" +#include "types.h" #include <stdlib.h> #include <stdbool.h> #define IDENTSZ 256 -typedef enum { - NODE_PROGRAM = 11, - NODE_FUNCTION_DECL, - NODE_PARAM, - NODE_VAR_DECL, - NODE_VAR_ASSIGN, - NODE_BLOCK, - NODE_CALL_EXPR, - NODE_RETURN, - NODE_BREAK, - NODE_CONTINUE, - NODE_NUMBER_LITERAL, - NODE_STRING_LITERAL, - NODE_IDENT, - NODE_TYPE, - NODE_BINARY_EXPR, - NODE_UNARY_EXPR, - NODE_EXPR_STATEMENT, - NODE_SUBSCRIPT_EXPR, - NODE_IF, - NODE_WHILE, - NODE_FOR, - NODE_EMPTY_STATEMENT, - NODE_UNKNOWN, -} NodeType; // note: if changed, edit node_type_str! - -const char* node_type_str(NodeType); -void print_node_type_str(NodeType); - -/* -typedef enum { - OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, - OP_POS, OP_NEG, OP_INC, OP_DEC, - OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT, - OP_SHL, OP_SHR, - OP_LOGAND, OP_LOGOR, OP_LOGNOT, - OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, - OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN, - OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN, - OP_SHL_ASSIGN, OP_SHR_ASSIGN, - OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN, - OP_CONDITIONAL, OP_COMMA, - OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER, - OP_SUBSCRIPT, OP_CALL, - OP_SIZEOF, OP_ALIGNOF -} OpType; -*/ - -typedef enum { - OP_PLUS = 23, - OP_MINUS, - OP_MUL, - OP_DIV, - OP_MOD, - OP_BIT_AND, // & ampersand - OP_BIT_OR, // | - OP_ASSIGN, - OP_EQUALITY, // == - OP_INEQUALITY, // != - OP_LT_EQ, - OP_GT_EQ, - OP_LT, - OP_GT, -} OpType; - -typedef enum { - OPER_MINUS = 0, - OPER_BANG, - OPER_PREINC, - OPER_PREDEC, - OPER_POSTINC, - OPER_POSTDEC, -} UnaryOp; - -typedef struct { - size_t start; - size_t end; -} Span; - -typedef struct Node { - NodeType type; - struct Node* next; - struct Scope* scope; - const char* filename; - int line, col; - - /* NOTE we will eventually add spans for condition info, etc. to print out in errors */ - - union { - /* clang-format off */ - struct { struct Node** decl; size_t len, cap; } program; - struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl; - struct { Span name; struct Node* type; } param; - struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement; - struct { struct Node* cond; struct Node* body; } while_statement; - struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement; - struct { struct Node** stmts; size_t cap, len; } block; - struct { Span name; struct Node* type; struct Node* init; } var_decl; - struct { struct Node* lhs; struct Node* rhs; } var_assign; - struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr; - struct { struct Node* expr; } ret; - struct { struct Node* expr; } cont; - struct { struct Node* expr; } expr_statement; - struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr; - struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr; - struct { struct Node* array; struct Node* index; } subscript_expr; - struct { double value; } number; - struct { Span value; } string; - struct { Span name; } ident; - /* clang-format on */ - } data; -} Node; - -typedef struct { - Token* tokens; - size_t token_count; - size_t pos; - const char* src; - size_t src_len; - const char* filename; -} Parser; - -typedef struct { - Node* node; - const char* src; -} Ast; - -typedef struct { - Node** items; - size_t len, cap; -} NodeVec; - -Parser parser_init(Lexer*); -void parser_parse(Ast*, Parser*); -void ast_print(Ast*); - -Token peek(Parser*); -Token peek2(Parser*); -Token consume(Parser*); -Token expect(Parser*, TokenType); -bool match(Parser*, TokenType); -bool check(Parser*, TokenType); - -Node* parse_declarations(Parser*); - -Node* parse_number(Parser*); -Node* parse_ident(Parser*); -Node* parse_primary(Parser*); -Node* parse_postfix(Parser*); -Node* parse_primary(Parser*); -Node* parse_unary(Parser*); -Node* parse_term(Parser*); -Node* parse_expression(Parser*); -Node* parse_expression_statement(Parser*); -Node* parse_statement(Parser*); -Node* parse_block(Parser*); -Node* parse_declaration_statement(Parser*); -Node* parse_decl_or_func_decl(Parser*); -NodeVec parse_param_list(Parser*); -Node* parse_type(Parser*); -Node* parse_func_call(Parser*); -NodeVec parse_func_arguments(Parser*); -Node* parse_if(Parser*); -Node* parse_while(Parser*); -Node* parse_for(Parser*); -Node* parse_assignment(Parser*); -Node* parse_break(Parser*); -Node* parse_continue_statement(Parser*); -Node* parse_expression(Parser*); -Node* make_program_node(void); -Node* make_ident_node(Span name); -Node* make_param_decl(Parser*); -Node* make_postfix_node(UnaryOp, Node*); -Node* make_subscript_node(Node*, Node*); -Node* make_ident_node(Span); -Node* make_postfix_node(UnaryOp, Node*); -Node* make_number_node(Parser*); -Node* make_unary_node(UnaryOp, Node*); -Node* make_string_node(Parser*); -Node* make_binary_node(OpType, Node*, Node*); -Node* parse_return_statement(Parser*); -Node* make_empty_statement(void); -Node* make_call_node(Node*, NodeVec); - -const char* span_str(const char* src, Span s, char* stack_alloc_chptr); -const char* range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr); +Parser parser_init(Lexer *); +void parser_parse(Ast *, Parser *); +void ast_print(Ast *); + +Token peek(Parser *); +Token peek2(Parser *); +Token consume(Parser *); +Token expect(Parser *, TokenType); +bool match(Parser *, TokenType); +bool check(Parser *, TokenType); + +Node *parse_declarations(Parser *); + +Node *parse_number(Parser *); +Node *parse_ident(Parser *); +Node *parse_primary(Parser *); +Node *parse_postfix(Parser *); +Node *parse_primary(Parser *); +Node *parse_unary(Parser *); +Node *parse_term(Parser *); +Node *parse_expression(Parser *); +Node *parse_expression_statement(Parser *); +Node *parse_statement(Parser *); +Node *parse_block(Parser *); +Node *parse_declaration_statement(Parser *); +Node *parse_decl_or_func_decl(Parser *); +NodeVec parse_param_list(Parser *); +Node *parse_type(Parser *); +Node *parse_func_call(Parser *); +NodeVec parse_func_arguments(Parser *); +Node *parse_if(Parser *); +Node *parse_while(Parser *); +Node *parse_for(Parser *); +Node *parse_assignment(Parser *); +Node *parse_break(Parser *); +Node *parse_continue_statement(Parser *); +Node *parse_expression(Parser *); +Node *make_program_node(void); +Node *make_ident_node(Span name); +Node *make_param_decl(Parser *); +Node *make_postfix_node(UnaryOp, Node *); +Node *make_subscript_node(Node *, Node *); +Node *make_ident_node(Span); +Node *make_postfix_node(UnaryOp, Node *); +Node *make_number_node(Parser *); +Node *make_unary_node(UnaryOp, Node *); +Node *make_string_node(Parser *); +Node *make_binary_node(OpType, Node *, Node *); +Node *parse_return_statement(Parser *); +Node *make_empty_statement(void); +Node *make_call_node(Node *, NodeVec); + +const char *span_str(const char *src, Span s, char *stack_alloc_chptr); +const char *range_str(const char *src, size_t start, size_t end, char *stack_alloc_chptr); diff --git a/sem.c b/sem.c @@ -12,11 +12,11 @@ static int next_id = 100; Scope -scope_init(Node* node) +scope_init(Node *node) { Scope s = (Scope) { .parent = NULL, - .symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)), - .children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)), + .symbols = (Symbol **)calloc(CALLOC_SZ, sizeof(Symbol *)), + .children = (Scope **)calloc(CALLOC_SZ, sizeof(Scope *)), .cap = CALLOC_SZ, .len = 0, .ch_cap = CALLOC_SZ, @@ -30,11 +30,11 @@ scope_init(Node* node) return s; } -static Scope* -new_scope_from_scope(Scope* parent_scope, Node* node) +static Scope * +new_scope_from_scope(Scope *parent_scope, Node *node) { // new scope - Scope* scope = (Scope*)calloc(1, sizeof(Scope)); + Scope *scope = (Scope *)calloc(1, sizeof(Scope)); if (scope == NULL) panic("new_scope_from_scope: could not alloc"); scope->id = next_id++; @@ -42,12 +42,12 @@ new_scope_from_scope(Scope* parent_scope, Node* node) node->scope = scope; // init symbols list - scope->symbols = (Symbol**)calloc(CALLOC_SZ, sizeof(Symbol*)); + scope->symbols = (Symbol **)calloc(CALLOC_SZ, sizeof(Symbol *)); if (scope->symbols == NULL) panic("new_scope_from_scope: symbols: could not alloc"); scope->cap = CALLOC_SZ; scope->len = 0; - scope->children = (Scope**)calloc(CALLOC_SZ, sizeof(Scope*)); + scope->children = (Scope **)calloc(CALLOC_SZ, sizeof(Scope *)); if (scope->children == NULL) panic("new_scope_from_scope: children: could not alloc"); scope->ch_cap = CALLOC_SZ; scope->ch_len = 0; @@ -59,8 +59,7 @@ new_scope_from_scope(Scope* parent_scope, Node* node) assert(parent_scope->children != NULL); if (parent_scope->ch_len == parent_scope->ch_cap) { parent_scope->ch_cap *= 2; - parent_scope->children = (Scope**)realloc( - parent_scope->children, parent_scope->ch_cap * sizeof(Scope*)); + parent_scope->children = (Scope **)realloc(parent_scope->children, parent_scope->ch_cap * sizeof(Scope *)); assert(parent_scope->children != NULL && "realloc failed"); } parent_scope->children[parent_scope->ch_len++] = scope; @@ -73,26 +72,25 @@ new_scope_from_scope(Scope* parent_scope, Node* node) } static void -add_to_scope(Scope* scope, Symbol* sym) +add_to_scope(Scope *scope, Symbol *sym) { if (scope->len >= scope->cap) { scope->cap *= 2; - scope->symbols = (Symbol**)realloc(scope->symbols, scope->cap * sizeof(Symbol*)); + scope->symbols = (Symbol **)realloc(scope->symbols, scope->cap * sizeof(Symbol *)); } scope->symbols[scope->len++] = sym; } static void -scope_var(Scope* scope, Ast* ast, Node* node) +scope_var(Scope *scope, Ast *ast, Node *node) { - const char* var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 }); - const char* type_name - = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 }); + const char *var_name = span_str(ast->src, node->data.var_decl.name, (char[IDENTSZ]) { 0 }); + const char *type_name = span_str(ast->src, node->data.var_decl.type->data.ident.name, (char[IDENTSZ]) { 0 }); - Symbol* sym = (Symbol*)calloc(1, sizeof(Symbol)); + Symbol *sym = (Symbol *)calloc(1, sizeof(Symbol)); if (sym == NULL) panic("scope_var: symbol: could not alloc"); - TypeInfo* type = (TypeInfo*)calloc(1, sizeof(TypeInfo)); + TypeInfo *type = (TypeInfo *)calloc(1, sizeof(TypeInfo)); if (type == NULL) panic("scope_var: type: could not alloc"); if (strcmp(type_name, "float") == 0) { @@ -107,9 +105,7 @@ scope_var(Scope* scope, Ast* ast, Node* node) if (type_name[0] >= 'A' && type_name[0] <= 'Z') { type->type = SYMTYPE_USER; } else { - panic("sem: not yet defined type '%s' for variable '%s'", - type_name, - var_name); + panic("sem: not yet defined type '%s' for variable '%s'", type_name, var_name); } } @@ -123,12 +119,14 @@ scope_var(Scope* scope, Ast* ast, Node* node) } static void -scope_func(Scope* parent_scope, Ast* ast, Node* node) +scope_func(Scope *parent_scope, Ast *ast, Node *node) { - Scope* scope = new_scope_from_scope(parent_scope, node); + Scope *scope = new_scope_from_scope(parent_scope, node); + node->scope = scope; for (size_t i = 0; i < node->data.block.len; i++) { - Node* stmt = node->data.block.stmts[i]; + Node *stmt = node->data.block.stmts[i]; + stmt->scope = scope; switch (stmt->type) { case NODE_VAR_DECL: { scope_var(scope, ast, stmt); @@ -141,10 +139,11 @@ scope_func(Scope* parent_scope, Ast* ast, Node* node) } void -scope_build(Scope* scope, Ast* ast) +scope_build(Scope *scope, Ast *ast) { for (size_t i = 0; i < ast->node->data.program.len; i++) { - Node* node = ast->node->data.program.decl[i]; + Node *node = ast->node->data.program.decl[i]; + node->scope = scope; switch (node->type) { case NODE_VAR_DECL: scope_var(scope, ast, node); @@ -159,13 +158,13 @@ scope_build(Scope* scope, Ast* ast) } void -scope_print(Scope* scope, Ast* ast) +scope_print(Scope *scope, Ast *ast) { if (scope == NULL || scope->symbols == NULL) return; for (size_t i = 0; i < scope->len; i++) { - Symbol* sym = scope->symbols[i]; - const char* name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 }); + Symbol *sym = scope->symbols[i]; + const char *name = span_str(ast->src, sym->name, (char[IDENTSZ]) { 0 }); int parent = -1; if (scope->parent != NULL) parent = scope->parent->id; bool has_owner_node = false; @@ -182,15 +181,15 @@ scope_print(Scope* scope, Ast* ast) if (scope->ch_len == 0) return; for (size_t j = 0; j < scope->ch_len; j++) { - Scope* child_scope = scope->children[j]; + Scope *child_scope = scope->children[j]; scope_print(child_scope, ast); } } -const char* +const char * type_kind_str(SymbolType t) { - static const char* type_strings[] = { + static const char *type_strings[] = { [SYMTYPE_VOID] = "TYPE_VOID", [SYMTYPE_INT] = "TYPE_INT", [SYMTYPE_UINT] = "TYPE_UINT", diff --git a/sem.h b/sem.h @@ -1,111 +1,23 @@ #pragma once +#include <libgccjit.h> #include <stdlib.h> +#include "types.h" #include "parser.h" -typedef enum { - SYMTYPE_VOID = 108, - SYMTYPE_INT, - SYMTYPE_UINT, - SYMTYPE_FLOAT, - SYMTYPE_STRING, - SYMTYPE_STRUCT, - SYMTYPE_USER, - SYMTYPE_ARRAY, - SYMTYPE_ENUM, - SYMTYPE_FUNC, - SYMTYPE_TODO, -} SymbolType; // note also update type_kind_str! - -const char* type_kind_str(SymbolType); - -typedef enum { - ENUM_VALUE_INT, - ENUM_VALUE_STRING, -} EnumValueKind; - -typedef struct StructField { - char* name; - struct Type* type; -} StructField; - -typedef struct EnumField { - char* name; - EnumValueKind kind; - union { // not used? - int int_value; - char* string_value; - } val; -} EnumField; - -typedef struct StructMethod { - char* name; - struct Type* return_type; - struct Type** param_types; - int params_count; - int params_cap; - // TODO add ptr to func decl of this struct method -} StructMethod; - -typedef struct Type { - SymbolType type; - - // union { - // struct StructType { - // const char* struct_name; - // int fields_count; - // int methods_count; - // StructField* fields; - // StructMethod* methods; - // } struct_t; - - // struct ArrayType { - // int array_size; // -1 or fixed - // struct Type* of_type; - // bool dynamic; - // } array_t; - - // struct EnumType { - // const char* enum_name; - // const int fields_count; - // EnumField* fields; - // EnumValueKind value_kind; - // } enum_t; - // }; -} TypeInfo; - -typedef struct Symbol { - Span name; - Node* decl; - TypeInfo* type; -} Symbol; - -typedef struct Scope { - struct Node* owner; - struct Scope* parent; - Symbol** symbols; - size_t len; - size_t cap; - struct Scope** children; - size_t ch_len; - size_t ch_cap; - int depth; - int id; -} Scope; - // Symbol table functions -void symbol_add(const char* name, TypeInfo* type); -TypeInfo* symbol_get_type(const char* name); -Symbol* symbol_find(const char* name); +void symbol_add(const char *name, TypeInfo *type); +TypeInfo *symbol_get_type(const char *name); +Symbol *symbol_find(const char *name); // Scope management functions -Scope scope_init(Node*); -void scope_add_symbol(Scope* scope, const char* name, TypeInfo* type); -Symbol* scope_find_symbol(Scope* scope, const char* name); +Scope scope_init(Node *); +void scope_add_symbol(Scope *scope, const char *name, TypeInfo *type); +Symbol *scope_find_symbol(Scope *scope, const char *name); // Type checking functions -int types_equal(TypeInfo* a, TypeInfo* b); +int types_equal(TypeInfo *a, TypeInfo *b); -void scope_build(Scope*, Ast*); -void scope_print(Scope*, Ast*); +void scope_build(Scope *, Ast *); +void scope_print(Scope *, Ast *); diff --git a/types.h b/types.h @@ -0,0 +1,295 @@ +#pragma once + +#include <libgccjit.h> +#include <stdlib.h> +#include <stdbool.h> + +typedef enum { + TOKEN_IDENT = 1006, + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_LBRACE, + TOKEN_RBRACE, + TOKEN_LBRACKET, + TOKEN_RBRACKET, + TOKEN_EQUAL, + TOKEN_SEMICOLON, + TOKEN_PERCENT, + TOKEN_COMMA, + TOKEN_NUMBER_LITERAL, + TOKEN_STRING_LITERAL, + TOKEN_SLASH, + TOKEN_STAR, + TOKEN_PLUS, + TOKEN_PLUSPLUS, + TOKEN_MINUS, + TOKEN_MINUSMINUS, + TOKEN_EQUALITY, + TOKEN_INEQUALITY, + TOKEN_BANG, + TOKEN_LT, + TOKEN_GT, + TOKEN_LT_EQ, + TOKEN_GT_EQ, + TOKEN_IF, + TOKEN_ELSE, + TOKEN_WHILE, + TOKEN_FOR, + TOKEN_BREAK, + TOKEN_CONTINUE, + TOKEN_RETURN, + TOKEN_UNKNOWN, // NOTE: also update print_token + TOKEN_EOF +} TokenType; // NOTE also update token_type_str! + +const char *token_type_str(TokenType t); + +typedef struct { + size_t start; + size_t end; + size_t line; + size_t col; + TokenType type; +} Token; + +typedef struct { + Token *tokens; + size_t token_count; + size_t token_cap; + size_t pos; + size_t line; + size_t col; + const char *src; + size_t src_len; + const char *filename; +} Lexer; + +typedef enum { + NODE_PROGRAM = 11, + NODE_FUNCTION_DECL, + NODE_PARAM, + NODE_VAR_DECL, + NODE_VAR_ASSIGN, + NODE_BLOCK, + NODE_CALL_EXPR, + NODE_RETURN, + NODE_BREAK, + NODE_CONTINUE, + NODE_NUMBER_LITERAL, + NODE_STRING_LITERAL, + NODE_IDENT, + NODE_TYPE, + NODE_BINARY_EXPR, + NODE_UNARY_EXPR, + NODE_EXPR_STATEMENT, + NODE_SUBSCRIPT_EXPR, + NODE_IF, + NODE_WHILE, + NODE_FOR, + NODE_EMPTY_STATEMENT, + NODE_UNKNOWN, +} NodeType; // note: if changed, edit node_type_str! + +const char *node_type_str(NodeType); +void print_node_type_str(NodeType); + +/* +typedef enum { + OP_ADD, OP_SUB, OP_MUL, OP_DIV, OP_MOD, + OP_POS, OP_NEG, OP_INC, OP_DEC, + OP_BITAND, OP_BITOR, OP_BITXOR, OP_BITNOT, + OP_SHL, OP_SHR, + OP_LOGAND, OP_LOGOR, OP_LOGNOT, + OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, + OP_ASSIGN, OP_ADD_ASSIGN, OP_SUB_ASSIGN, + OP_MUL_ASSIGN, OP_DIV_ASSIGN, OP_MOD_ASSIGN, + OP_SHL_ASSIGN, OP_SHR_ASSIGN, + OP_AND_ASSIGN, OP_XOR_ASSIGN, OP_OR_ASSIGN, + OP_CONDITIONAL, OP_COMMA, + OP_ADDR, OP_DEREF, OP_MEMBER, OP_PTR_MEMBER, + OP_SUBSCRIPT, OP_CALL, + OP_SIZEOF, OP_ALIGNOF +} OpType; +*/ + +typedef enum { + OP_PLUS = 23, + OP_MINUS, + OP_MUL, + OP_DIV, + OP_MOD, + OP_BIT_AND, // & ampersand + OP_BIT_OR, // | + OP_ASSIGN, + OP_EQUALITY, // == + OP_INEQUALITY, // != + OP_LT_EQ, + OP_GT_EQ, + OP_LT, + OP_GT, +} OpType; + +typedef enum { + OPER_MINUS = 0, + OPER_BANG, + OPER_PREINC, + OPER_PREDEC, + OPER_POSTINC, + OPER_POSTDEC, +} UnaryOp; + +typedef struct { + size_t start; + size_t end; +} Span; + +typedef struct Node { + NodeType type; + struct Node *next; + struct Scope *scope; + const char *filename; + int line, col; + + /* NOTE we will eventually add spans for condition info, etc. to print out in errors */ + + union { + /* clang-format off */ + struct { struct Node** decl; size_t len, cap; } program; + struct { Span name; struct Node* return_type; struct Node** params; size_t p_cap, p_len; struct Node* body; } function_decl; + struct { Span name; struct Node* type; } param; + struct { struct Node* cond; struct Node* then_body; struct Node* else_body; } if_statement; + struct { struct Node* cond; struct Node* body; } while_statement; + struct { struct Node* init; struct Node* cond; struct Node* increment; struct Node* body; } for_statement; + struct { struct Node** stmts; size_t cap, len; } block; + struct { Span name; struct Node* type; struct Node* init; } var_decl; + struct { struct Node* lhs; struct Node* rhs; } var_assign; + struct { struct Node* callee; struct Node** args; size_t cap, len; } call_expr; + struct { struct Node* expr; } ret; + struct { struct Node* expr; } cont; + struct { struct Node* expr; } expr_statement; + struct { OpType op; struct Node* lhs; struct Node* rhs; } binary_expr; + struct { UnaryOp op; struct Node* operand; bool is_postfix; } unary_expr; + struct { struct Node* array; struct Node* index; } subscript_expr; + struct { double value; } number; + struct { Span value; } string; + struct { Span name; } ident; + /* clang-format on */ + } data; +} Node; + +typedef struct { + Token *tokens; + size_t token_count; + size_t pos; + const char *src; + size_t src_len; + const char *filename; +} Parser; + +typedef struct { + Node *node; + const char *src; +} Ast; + +typedef struct { + Node **items; + size_t len, cap; +} NodeVec; + +typedef enum { + // todo distinguish local/exported/param + SYMTYPE_VOID = 108, + SYMTYPE_INT, + SYMTYPE_UINT, + SYMTYPE_FLOAT, + SYMTYPE_STRING, + SYMTYPE_STRUCT, + SYMTYPE_USER, + SYMTYPE_ARRAY, + SYMTYPE_ENUM, + SYMTYPE_FUNC, + SYMTYPE_TODO, +} SymbolType; // note also update type_kind_str! + +const char *type_kind_str(SymbolType); + +typedef enum { + ENUM_VALUE_INT, + ENUM_VALUE_STRING, +} EnumValueKind; + +typedef struct StructField { + char *name; + struct Type *type; +} StructField; + +typedef struct EnumField { + char *name; + EnumValueKind kind; + union { // not used? + int int_value; + char *string_value; + } val; +} EnumField; + +typedef struct StructMethod { + char *name; + struct Type *return_type; + struct Type **param_types; + int params_count; + int params_cap; + // TODO add ptr to func decl of this struct method +} StructMethod; + +typedef struct Type { + SymbolType type; + + // union { + // struct StructType { + // const char* struct_name; + // int fields_count; + // int methods_count; + // StructField* fields; + // StructMethod* methods; + // } struct_t; + + // struct ArrayType { + // int array_size; // -1 or fixed + // struct Type* of_type; + // bool dynamic; + // } array_t; + + // struct EnumType { + // const char* enum_name; + // const int fields_count; + // EnumField* fields; + // EnumValueKind value_kind; + // } enum_t; + // }; +} TypeInfo; + +typedef struct Symbol { + Span name; + Node *decl; + TypeInfo *type; + + gcc_jit_type *ctype; + union { + gcc_jit_lvalue *lvalue; + gcc_jit_param *param; + gcc_jit_rvalue *const_rvalue; + } d; +} Symbol; + +typedef struct Scope { + struct Node *owner; + struct Scope *parent; + Symbol **symbols; + size_t len; + size_t cap; + struct Scope **children; + size_t ch_len; + size_t ch_cap; + int depth; + int id; +} Scope;