parser.c (11221B)
#include "../parser.h"
#include "../utils.h"
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>
// TODO make sure ALL callocs have been successful
Parser
parser_init(Lexer* lex)
{
return (Parser) { .pos = 0,
.tokens = lex->tokens,
.token_count = lex->token_count,
.src = lex->src,
.src_len = lex->src_len,
.filename = lex->filename };
}
Token
peek(Parser* par)
{
Token t = par->tokens[par->pos];
return t.type ? t : (Token) { .type = TOKEN_EOF };
}
Token
peek2(Parser* par)
{
if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
Token t = par->tokens[par->pos + 1];
return t.type ? t : (Token) { .type = TOKEN_EOF };
}
Token
consume(Parser* par)
{
Token t = par->tokens[par->pos];
if (!t.type) return (Token) { .type = TOKEN_EOF };
par->pos++;
return t;
}
bool
check(Parser* p, TokenType type)
{
return (peek(p).type == type);
}
Token
expect(Parser* par, TokenType type)
{
Token tok = peek(par);
if (tok.type != type) {
const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
printf("name: %s\n", name);
panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu",
token_type_str(type),
name,
tok.type,
par->filename,
tok.line,
tok.col);
assert(tok.type == type);
}
return consume(par);
}
bool
match(Parser* p, TokenType type)
{
// printf("matching type %d\n", type);
if (peek(p).type == type) {
consume(p);
return true;
}
return false;
}
static Node*
parse_multiplicative(Parser* par)
{
Node* node = parse_term(par);
for (;;) {
if (match(par, TOKEN_STAR)) {
Node* rhs = parse_unary(par);
node = make_binary_node(OP_MUL, node, rhs);
} else if (match(par, TOKEN_SLASH)) {
Node* rhs = parse_unary(par);
node = make_binary_node(OP_DIV, node, rhs);
} else if (match(par, TOKEN_PERCENT)) {
Node* rhs = parse_unary(par);
node = make_binary_node(OP_MOD, node, rhs);
} else
break;
}
return node;
}
// additive: +, -
static Node*
parse_additive(Parser* par)
{
Node* node = parse_multiplicative(par);
for (;;) {
if (match(par, TOKEN_PLUS)) {
Node* rhs = parse_multiplicative(par);
node = make_binary_node(OP_PLUS, node, rhs);
} else if (match(par, TOKEN_MINUS)) {
Node* rhs = parse_multiplicative(par);
node = make_binary_node(OP_MINUS, node, rhs);
} else
break;
}
return node;
}
static Node*
parse_relational(Parser* par)
{
Node* node = parse_additive(par);
for (;;) {
if (match(par, TOKEN_LT)) {
Node* rhs = parse_additive(par);
node = make_binary_node(OP_LT, node, rhs);
} else if (match(par, TOKEN_LT_EQ)) {
Node* rhs = parse_additive(par);
node = make_binary_node(OP_LT_EQ, node, rhs);
} else if (match(par, TOKEN_GT)) {
Node* rhs = parse_additive(par);
node = make_binary_node(OP_GT, node, rhs);
} else if (match(par, TOKEN_GT_EQ)) {
Node* rhs = parse_additive(par);
node = make_binary_node(OP_GT_EQ, node, rhs);
} else
break;
}
return node;
}
static Node*
parse_equality(Parser* par)
{
Node* node = parse_relational(par);
// TODO handle all enum gcc_jit_comparison
// {
// /* (EXPR_A) == (EXPR_B). */
// GCC_JIT_COMPARISON_EQ,
// /* (EXPR_A) != (EXPR_B). */
// GCC_JIT_COMPARISON_NE,
// /* (EXPR_A) < (EXPR_B). */
// GCC_JIT_COMPARISON_LT,
// /* (EXPR_A) <=(EXPR_B). */
// GCC_JIT_COMPARISON_LE,
// /* (EXPR_A) > (EXPR_B). */
// GCC_JIT_COMPARISON_GT,
// /* (EXPR_A) >= (EXPR_B). */
// GCC_JIT_COMPARISON_GE
// };
for (;;) {
if (match(par, TOKEN_EQUALITY)) { // "=="
Node* rhs = parse_relational(par);
node = make_binary_node(OP_EQUALITY, node, rhs);
} else if (match(par, TOKEN_INEQUALITY)) { // "!="
Node* rhs = parse_relational(par);
node = make_binary_node(OP_INEQUALITY, node, rhs);
} else {
// do not error out for other things like `=`, `&`, etc. let it pass.
break;
}
}
return node;
}
Node*
parse_assignment_expr(Parser* par)
{
Node* left = parse_equality(par);
if (match(par, TOKEN_EQUAL)) {
Node* right = parse_assignment_expr(par); // right-associative
if (left->type != NODE_IDENT) panic("invalid assignment target");
return make_binary_node(OP_ASSIGN, left, right);
}
return left;
}
Node*
parse_expression(Parser* par)
{
return parse_assignment_expr(par);
}
Node*
parse_expression_statement(Parser* par)
{
Node* expr = parse_expression(par);
expect(par, TOKEN_SEMICOLON);
Node* node = (Node*)calloc(1, sizeof(Node));
if (node == NULL) panic("parse_expression_statement: could not alloc");
node->type = NODE_EXPR_STATEMENT;
node->scope = NULL;
node->next = NULL;
node->data.expr_statement.expr = expr;
return node;
}
//
// parse_statement
//
Node*
parse_statement(Parser* par)
{
Token tok = peek(par), tok2 = peek2(par);
if (tok.type == TOKEN_LBRACE) {
consume(par);
return parse_block(par);
}
bool tok_is_a_type = (tok.type == TOKEN_IDENT || tok.type == TOKEN_VARIADIC || tok.type == TOKEN_COMP_TIME);
if (tok_is_a_type && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); } // TODO @next FN/FX handling here too
if (tok_is_a_type && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); }
switch (tok.type) {
case TOKEN_RETURN:
return parse_return_statement(par);
case TOKEN_IF:
return parse_if(par);
case TOKEN_WHILE:
return parse_while(par);
case TOKEN_FOR:
return parse_for(par);
case TOKEN_BREAK:
return parse_break(par);
case TOKEN_CONTINUE:
return parse_continue_statement(par);
case TOKEN_SEMICOLON:
expect(par, TOKEN_SEMICOLON);
return make_empty_statement();
// case TOKEN_IDENT: // TODO?
// if (tok2.type == TOKEN_EQUAL)
// return parse_assignment(par);
// else
// return parse_expression_statement(par);
default:
return parse_expression_statement(par);
}
}
Node*
parse_block(Parser* par)
{
Node* stmt;
Node* block = (Node*)calloc(1, sizeof(Node));
if (block == NULL) panic("parse_block: could not alloc");
block->type = NODE_BLOCK;
block->scope = NULL;
while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
stmt = parse_statement(par);
if (block->data.block.cap == block->data.block.len) {
block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*));
if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); }
}
block->data.block.stmts[block->data.block.len++] = stmt;
}
expect(par, TOKEN_RBRACE); // TODO move out to parent caller
// TODO next the parsing of this was relying on next and cannot
// anymmore, e.g. print
return block;
}
Node*
parse_declaration_statement(Parser* par)
{
Node* type_node = parse_type(par); // consumes the type (e.g., "float")
Token ident = expect(par, TOKEN_IDENT); // variable or function name
if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); }
Node* var = calloc(1, sizeof(Node));
if (var == NULL) panic("parse_declaration_statement: could not alloc");
var->type = NODE_VAR_DECL;
var->scope = NULL;
var->data.var_decl.name = (Span) { ident.start, ident.end };
var->data.var_decl.type = type_node;
Token next_tok = peek(par);
if (next_tok.type == TOKEN_EQUAL) {
consume(par);
var->data.var_decl.init = parse_expression(par);
} else {
consume(par);
var->data.var_decl.init = NULL;
}
expect(par, TOKEN_SEMICOLON);
return var;
}
Node*
parse_func_decl(Parser* par)
{
bool exported = false; // TODO use exported in the sem/typer/matcher/gen
if (match(par, TOKEN_FX)) {
// parse exported function
exported = true;
} else if (match(par, TOKEN_FN)) {
// parse internal function
} else {
panic("parse_func_decl: func: expected fn/fx");
}
Token ident = expect(par, TOKEN_IDENT); // variable or function name
expect(par, TOKEN_LPAREN);
NodeVec params = parse_param_list(par);
expect(par, TOKEN_RPAREN);
Node* return_type = parse_type_or_void(par);
expect(par, TOKEN_LBRACE);
Node* body = parse_block(par);
Node* fn = calloc(1, sizeof(Node));
if (fn == NULL) panic("parse_func_decl: func: could not alloc");
fn->type = NODE_FUNCTION_DECL;
fn->scope = NULL;
fn->data.function_decl.params = params.items;
fn->data.function_decl.p_cap = params.cap;
fn->data.function_decl.p_len = params.len;
fn->data.function_decl.body = body;
fn->data.function_decl.name = (Span) { ident.start, ident.end };
fn->data.function_decl.return_type = return_type;
fn->data.function_decl.exported = exported;
fn->filename = par->filename;
fn->line = ident.line;
fn->col = ident.col;
return fn;
}
Node*
parse_decl_or_func_decl(Parser* par)
{
Node* type_node = parse_type(par); // consumes the type (e.g., "float")
Token ident = expect(par, TOKEN_IDENT); // variable or function name
if (match(par, TOKEN_LPAREN)) { // function
Node* fn = calloc(1, sizeof(Node));
if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");
fn->type = NODE_FUNCTION_DECL;
fn->scope = NULL;
NodeVec v = parse_param_list(par);
fn->data.function_decl.params = v.items;
fn->data.function_decl.p_cap = v.cap;
fn->data.function_decl.p_len = v.len;
expect(par, TOKEN_RPAREN);
expect(par, TOKEN_LBRACE);
Node* body = parse_block(par);
fn->data.function_decl.body = body;
fn->data.function_decl.name = (Span) { ident.start, ident.end };
fn->data.function_decl.return_type = type_node;
fn->filename = par->filename;
fn->line = ident.line;
fn->col = ident.col;
return fn;
} else { // variable
Node* var = calloc(1, sizeof(Node));
if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
var->type = NODE_VAR_DECL;
var->scope = NULL;
var->data.var_decl.name = (Span) { ident.start, ident.end };
var->data.var_decl.type = type_node;
var->filename = par->filename;
var->line = ident.line;
var->col = ident.col;
Token next_tok = peek(par);
if (next_tok.type == TOKEN_EQUAL) {
consume(par); // consume '='
var->data.var_decl.init = parse_expression(par);
} else {
var->data.var_decl.init = NULL;
}
expect(par, TOKEN_SEMICOLON);
return var;
}
}
Node*
parse_declarations(Parser* par)
{
Token tok = peek(par);
if (tok.type == TOKEN_EOF) return NULL;
switch (tok.type) {
case TOKEN_IDENT:
return parse_decl_or_func_decl(par);
break;
case TOKEN_FN:
case TOKEN_FX:
return parse_func_decl(par);
break;
default:
printf("unknown token to parse!: %s\n", token_type_str(tok.type));
return NULL;
}
return NULL;
}
void
parser_parse(Ast* ast, Parser* par)
{
assert(par->token_count > 0 && "no tokens to parse");
Node* node;
Node* program = make_program_node(par);
for (;;) {
node = parse_declarations(par);
if (node == NULL) break;
if (program->data.program.len == program->data.program.cap) {
program->data.program.cap *= 2;
program->data.program.decl
= (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*));
assert(program->data.program.decl != NULL && "realloc failed");
}
program->data.program.decl[program->data.program.len++] = node;
}
ast->src = par->src;
ast->node = program;
}