ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

parser.c (11221B)




#include "../parser.h"
#include "../utils.h"

#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <assert.h>

// TODO make sure ALL callocs have been successful

Parser
parser_init(Lexer* lex)
{
	return (Parser) { .pos = 0,
		.tokens = lex->tokens,
		.token_count = lex->token_count,
		.src = lex->src,
		.src_len = lex->src_len,
		.filename = lex->filename };
}

Token
peek(Parser* par)
{
	Token t = par->tokens[par->pos];
	return t.type ? t : (Token) { .type = TOKEN_EOF };
}

Token
peek2(Parser* par)
{
	if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
	Token t = par->tokens[par->pos + 1];
	return t.type ? t : (Token) { .type = TOKEN_EOF };
}

Token
consume(Parser* par)
{
	Token t = par->tokens[par->pos];
	if (!t.type) return (Token) { .type = TOKEN_EOF };
	par->pos++;
	return t;
}

bool
check(Parser* p, TokenType type)
{
	return (peek(p).type == type);
}

Token
expect(Parser* par, TokenType type)
{
	Token tok = peek(par);
	if (tok.type != type) {
		const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
		printf("name: %s\n", name);
		panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu",
			token_type_str(type),
			name,
			tok.type,
			par->filename,
			tok.line,
			tok.col);
		assert(tok.type == type);
	}
	return consume(par);
}

bool
match(Parser* p, TokenType type)
{
	// printf("matching type %d\n", type);
	if (peek(p).type == type) {
		consume(p);
		return true;
	}
	return false;
}

static Node*
parse_multiplicative(Parser* par)
{
	Node* node = parse_term(par);

	for (;;) {
		if (match(par, TOKEN_STAR)) {
			Node* rhs = parse_unary(par);
			node = make_binary_node(OP_MUL, node, rhs);
		} else if (match(par, TOKEN_SLASH)) {
			Node* rhs = parse_unary(par);
			node = make_binary_node(OP_DIV, node, rhs);
		} else if (match(par, TOKEN_PERCENT)) {
			Node* rhs = parse_unary(par);
			node = make_binary_node(OP_MOD, node, rhs);
		} else
			break;
	}

	return node;
}
// additive: +, -
static Node*
parse_additive(Parser* par)
{
	Node* node = parse_multiplicative(par);
	for (;;) {
		if (match(par, TOKEN_PLUS)) {
			Node* rhs = parse_multiplicative(par);
			node = make_binary_node(OP_PLUS, node, rhs);
		} else if (match(par, TOKEN_MINUS)) {
			Node* rhs = parse_multiplicative(par);
			node = make_binary_node(OP_MINUS, node, rhs);
		} else
			break;
	}
	return node;
}

static Node*
parse_relational(Parser* par)
{
	Node* node = parse_additive(par);
	for (;;) {
		if (match(par, TOKEN_LT)) {
			Node* rhs = parse_additive(par);
			node = make_binary_node(OP_LT, node, rhs);
		} else if (match(par, TOKEN_LT_EQ)) {
			Node* rhs = parse_additive(par);
			node = make_binary_node(OP_LT_EQ, node, rhs);
		} else if (match(par, TOKEN_GT)) {
			Node* rhs = parse_additive(par);
			node = make_binary_node(OP_GT, node, rhs);
		} else if (match(par, TOKEN_GT_EQ)) {
			Node* rhs = parse_additive(par);
			node = make_binary_node(OP_GT_EQ, node, rhs);
		} else
			break;
	}
	return node;
}

static Node*
parse_equality(Parser* par)
{
	Node* node = parse_relational(par);

	// TODO  handle all enum gcc_jit_comparison
	// {
	//   /* (EXPR_A) == (EXPR_B).  */
	//   GCC_JIT_COMPARISON_EQ,

	//   /* (EXPR_A) != (EXPR_B).  */
	//   GCC_JIT_COMPARISON_NE,

	//   /* (EXPR_A) < (EXPR_B).  */
	//   GCC_JIT_COMPARISON_LT,

	//   /* (EXPR_A) <=(EXPR_B).  */
	//   GCC_JIT_COMPARISON_LE,

	//   /* (EXPR_A) > (EXPR_B).  */
	//   GCC_JIT_COMPARISON_GT,

	//   /* (EXPR_A) >= (EXPR_B).  */
	//   GCC_JIT_COMPARISON_GE
	// };

	for (;;) {
		if (match(par, TOKEN_EQUALITY)) { // "=="
			Node* rhs = parse_relational(par);
			node = make_binary_node(OP_EQUALITY, node, rhs);
		} else if (match(par, TOKEN_INEQUALITY)) { // "!="
			Node* rhs = parse_relational(par);
			node = make_binary_node(OP_INEQUALITY, node, rhs);
		} else {
			// do not error out for other things like `=`, `&`, etc. let it pass.
			break;
		}
	}
	return node;
}

Node*
parse_assignment_expr(Parser* par)
{
	Node* left = parse_equality(par);

	if (match(par, TOKEN_EQUAL)) {
		Node* right = parse_assignment_expr(par); // right-associative
		if (left->type != NODE_IDENT) panic("invalid assignment target");
		return make_binary_node(OP_ASSIGN, left, right);
	}
	return left;
}

Node*
parse_expression(Parser* par)
{
	return parse_assignment_expr(par);
}

Node*
parse_expression_statement(Parser* par)
{
	Node* expr = parse_expression(par);
	expect(par, TOKEN_SEMICOLON);

	Node* node = (Node*)calloc(1, sizeof(Node));
	if (node == NULL) panic("parse_expression_statement: could not alloc");
	node->type = NODE_EXPR_STATEMENT;
	node->scope = NULL;
	node->next = NULL;
	node->data.expr_statement.expr = expr;
	return node;
}

//
// parse_statement
//
Node*
parse_statement(Parser* par)
{
	Token tok = peek(par), tok2 = peek2(par);

	if (tok.type == TOKEN_LBRACE) {
		consume(par);
		return parse_block(par);
	}

	bool tok_is_a_type = (tok.type == TOKEN_IDENT || tok.type == TOKEN_VARIADIC || tok.type == TOKEN_COMP_TIME);

	if (tok_is_a_type && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); } // TODO @next FN/FX handling here too
	if (tok_is_a_type && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); }

	switch (tok.type) {
	case TOKEN_RETURN:
		return parse_return_statement(par);
	case TOKEN_IF:
		return parse_if(par);
	case TOKEN_WHILE:
		return parse_while(par);
	case TOKEN_FOR:
		return parse_for(par);
	case TOKEN_BREAK:
		return parse_break(par);
	case TOKEN_CONTINUE:
		return parse_continue_statement(par);
	case TOKEN_SEMICOLON:
		expect(par, TOKEN_SEMICOLON);
		return make_empty_statement();
	// case TOKEN_IDENT: // TODO?
	//     if (tok2.type == TOKEN_EQUAL)
	//         return parse_assignment(par);
	//     else
	//         return parse_expression_statement(par);
	default:
		return parse_expression_statement(par);
	}
}

Node*
parse_block(Parser* par)
{
	Node* stmt;
	Node* block = (Node*)calloc(1, sizeof(Node));
	if (block == NULL) panic("parse_block: could not alloc");
	block->type = NODE_BLOCK;
	block->scope = NULL;
	while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
		stmt = parse_statement(par);

		if (block->data.block.cap == block->data.block.len) {
			block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
			block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*));
			if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); }
		}

		block->data.block.stmts[block->data.block.len++] = stmt;
	}
	expect(par, TOKEN_RBRACE); // TODO move out to parent caller
	// TODO next the parsing of this was relying on next and cannot
	// anymmore, e.g. print
	return block;
}

Node*
parse_declaration_statement(Parser* par)
{
	Node* type_node = parse_type(par);      // consumes the type (e.g., "float")
	Token ident = expect(par, TOKEN_IDENT); // variable or function name
	if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); }

	Node* var = calloc(1, sizeof(Node));
	if (var == NULL) panic("parse_declaration_statement: could not alloc");
	var->type = NODE_VAR_DECL;
	var->scope = NULL;
	var->data.var_decl.name = (Span) { ident.start, ident.end };
	var->data.var_decl.type = type_node;
	Token next_tok = peek(par);
	if (next_tok.type == TOKEN_EQUAL) {
		consume(par);
		var->data.var_decl.init = parse_expression(par);
	} else {
		consume(par);
		var->data.var_decl.init = NULL;
	}
	expect(par, TOKEN_SEMICOLON);
	return var;
}

Node*
parse_func_decl(Parser* par)
{
	bool exported = false; // TODO use exported in the sem/typer/matcher/gen
	if (match(par, TOKEN_FX)) {
		// parse exported function
		exported = true;
	} else if (match(par, TOKEN_FN)) {
		// parse internal function
	} else {
		panic("parse_func_decl: func: expected fn/fx");
	}

	Token ident = expect(par, TOKEN_IDENT); // variable or function name

	expect(par, TOKEN_LPAREN);
	NodeVec params = parse_param_list(par);
	expect(par, TOKEN_RPAREN);

	Node* return_type = parse_type_or_void(par);

	expect(par, TOKEN_LBRACE);
	Node* body = parse_block(par);

	Node* fn = calloc(1, sizeof(Node));
	if (fn == NULL) panic("parse_func_decl: func: could not alloc");

	fn->type = NODE_FUNCTION_DECL;
	fn->scope = NULL;

	fn->data.function_decl.params = params.items;
	fn->data.function_decl.p_cap = params.cap;
	fn->data.function_decl.p_len = params.len;

	fn->data.function_decl.body = body;

	fn->data.function_decl.name = (Span) { ident.start, ident.end };
	fn->data.function_decl.return_type = return_type;
	fn->data.function_decl.exported = exported;
	fn->filename = par->filename;
	fn->line = ident.line;
	fn->col = ident.col;
	return fn;
}

Node*
parse_decl_or_func_decl(Parser* par)
{
	Node* type_node = parse_type(par);      // consumes the type (e.g., "float")
	Token ident = expect(par, TOKEN_IDENT); // variable or function name

	if (match(par, TOKEN_LPAREN)) { // function
		Node* fn = calloc(1, sizeof(Node));
		if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");

		fn->type = NODE_FUNCTION_DECL;
		fn->scope = NULL;

		NodeVec v = parse_param_list(par);
		fn->data.function_decl.params = v.items;
		fn->data.function_decl.p_cap = v.cap;
		fn->data.function_decl.p_len = v.len;

		expect(par, TOKEN_RPAREN);
		expect(par, TOKEN_LBRACE);

		Node* body = parse_block(par);
		fn->data.function_decl.body = body;

		fn->data.function_decl.name = (Span) { ident.start, ident.end };
		fn->data.function_decl.return_type = type_node;
		fn->filename = par->filename;
		fn->line = ident.line;
		fn->col = ident.col;
		return fn;

	} else { // variable
		Node* var = calloc(1, sizeof(Node));
		if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
		var->type = NODE_VAR_DECL;
		var->scope = NULL;
		var->data.var_decl.name = (Span) { ident.start, ident.end };
		var->data.var_decl.type = type_node;
		var->filename = par->filename;
		var->line = ident.line;
		var->col = ident.col;
		Token next_tok = peek(par);
		if (next_tok.type == TOKEN_EQUAL) {
			consume(par); // consume '='
			var->data.var_decl.init = parse_expression(par);
		} else {
			var->data.var_decl.init = NULL;
		}
		expect(par, TOKEN_SEMICOLON);
		return var;
	}
}

Node*
parse_declarations(Parser* par)
{
	Token tok = peek(par);
	if (tok.type == TOKEN_EOF) return NULL;

	switch (tok.type) {
	case TOKEN_IDENT:
		return parse_decl_or_func_decl(par);
		break;
	case TOKEN_FN:
	case TOKEN_FX:
		return parse_func_decl(par);
		break;
	default:
		printf("unknown token to parse!: %s\n", token_type_str(tok.type));
		return NULL;
	}
	return NULL;
}

void
parser_parse(Ast* ast, Parser* par)
{
	assert(par->token_count > 0 && "no tokens to parse");
	Node* node;
	Node* program = make_program_node(par);
	for (;;) {
		node = parse_declarations(par);
		if (node == NULL) break;
		if (program->data.program.len == program->data.program.cap) {
			program->data.program.cap *= 2;
			program->data.program.decl
				= (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*));
			assert(program->data.program.decl != NULL && "realloc failed");
		}
		program->data.program.decl[program->data.program.len++] = node;
	}

	ast->src = par->src;
	ast->node = program;
}