ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

parser.c (11221B)


      1 
      2 #include "../parser.h"
      3 #include "../utils.h"
      4 
      5 #include <stdio.h>
      6 #include <string.h>
      7 #include <stdbool.h>
      8 #include <assert.h>
      9 
     10 // TODO make sure ALL callocs have been successful
     11 
     12 Parser
     13 parser_init(Lexer* lex)
     14 {
     15 	return (Parser) { .pos = 0,
     16 		.tokens = lex->tokens,
     17 		.token_count = lex->token_count,
     18 		.src = lex->src,
     19 		.src_len = lex->src_len,
     20 		.filename = lex->filename };
     21 }
     22 
     23 Token
     24 peek(Parser* par)
     25 {
     26 	Token t = par->tokens[par->pos];
     27 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     28 }
     29 
     30 Token
     31 peek2(Parser* par)
     32 {
     33 	if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
     34 	Token t = par->tokens[par->pos + 1];
     35 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     36 }
     37 
     38 Token
     39 consume(Parser* par)
     40 {
     41 	Token t = par->tokens[par->pos];
     42 	if (!t.type) return (Token) { .type = TOKEN_EOF };
     43 	par->pos++;
     44 	return t;
     45 }
     46 
     47 bool
     48 check(Parser* p, TokenType type)
     49 {
     50 	return (peek(p).type == type);
     51 }
     52 
     53 Token
     54 expect(Parser* par, TokenType type)
     55 {
     56 	Token tok = peek(par);
     57 	if (tok.type != type) {
     58 		const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
     59 		printf("name: %s\n", name);
     60 		panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu",
     61 			token_type_str(type),
     62 			name,
     63 			tok.type,
     64 			par->filename,
     65 			tok.line,
     66 			tok.col);
     67 		assert(tok.type == type);
     68 	}
     69 	return consume(par);
     70 }
     71 
     72 bool
     73 match(Parser* p, TokenType type)
     74 {
     75 	// printf("matching type %d\n", type);
     76 	if (peek(p).type == type) {
     77 		consume(p);
     78 		return true;
     79 	}
     80 	return false;
     81 }
     82 
     83 static Node*
     84 parse_multiplicative(Parser* par)
     85 {
     86 	Node* node = parse_term(par);
     87 
     88 	for (;;) {
     89 		if (match(par, TOKEN_STAR)) {
     90 			Node* rhs = parse_unary(par);
     91 			node = make_binary_node(OP_MUL, node, rhs);
     92 		} else if (match(par, TOKEN_SLASH)) {
     93 			Node* rhs = parse_unary(par);
     94 			node = make_binary_node(OP_DIV, node, rhs);
     95 		} else if (match(par, TOKEN_PERCENT)) {
     96 			Node* rhs = parse_unary(par);
     97 			node = make_binary_node(OP_MOD, node, rhs);
     98 		} else
     99 			break;
    100 	}
    101 
    102 	return node;
    103 }
    104 // additive: +, -
    105 static Node*
    106 parse_additive(Parser* par)
    107 {
    108 	Node* node = parse_multiplicative(par);
    109 	for (;;) {
    110 		if (match(par, TOKEN_PLUS)) {
    111 			Node* rhs = parse_multiplicative(par);
    112 			node = make_binary_node(OP_PLUS, node, rhs);
    113 		} else if (match(par, TOKEN_MINUS)) {
    114 			Node* rhs = parse_multiplicative(par);
    115 			node = make_binary_node(OP_MINUS, node, rhs);
    116 		} else
    117 			break;
    118 	}
    119 	return node;
    120 }
    121 
    122 static Node*
    123 parse_relational(Parser* par)
    124 {
    125 	Node* node = parse_additive(par);
    126 	for (;;) {
    127 		if (match(par, TOKEN_LT)) {
    128 			Node* rhs = parse_additive(par);
    129 			node = make_binary_node(OP_LT, node, rhs);
    130 		} else if (match(par, TOKEN_LT_EQ)) {
    131 			Node* rhs = parse_additive(par);
    132 			node = make_binary_node(OP_LT_EQ, node, rhs);
    133 		} else if (match(par, TOKEN_GT)) {
    134 			Node* rhs = parse_additive(par);
    135 			node = make_binary_node(OP_GT, node, rhs);
    136 		} else if (match(par, TOKEN_GT_EQ)) {
    137 			Node* rhs = parse_additive(par);
    138 			node = make_binary_node(OP_GT_EQ, node, rhs);
    139 		} else
    140 			break;
    141 	}
    142 	return node;
    143 }
    144 
    145 static Node*
    146 parse_equality(Parser* par)
    147 {
    148 	Node* node = parse_relational(par);
    149 
    150 	// TODO  handle all enum gcc_jit_comparison
    151 	// {
    152 	//   /* (EXPR_A) == (EXPR_B).  */
    153 	//   GCC_JIT_COMPARISON_EQ,
    154 
    155 	//   /* (EXPR_A) != (EXPR_B).  */
    156 	//   GCC_JIT_COMPARISON_NE,
    157 
    158 	//   /* (EXPR_A) < (EXPR_B).  */
    159 	//   GCC_JIT_COMPARISON_LT,
    160 
    161 	//   /* (EXPR_A) <=(EXPR_B).  */
    162 	//   GCC_JIT_COMPARISON_LE,
    163 
    164 	//   /* (EXPR_A) > (EXPR_B).  */
    165 	//   GCC_JIT_COMPARISON_GT,
    166 
    167 	//   /* (EXPR_A) >= (EXPR_B).  */
    168 	//   GCC_JIT_COMPARISON_GE
    169 	// };
    170 
    171 	for (;;) {
    172 		if (match(par, TOKEN_EQUALITY)) { // "=="
    173 			Node* rhs = parse_relational(par);
    174 			node = make_binary_node(OP_EQUALITY, node, rhs);
    175 		} else if (match(par, TOKEN_INEQUALITY)) { // "!="
    176 			Node* rhs = parse_relational(par);
    177 			node = make_binary_node(OP_INEQUALITY, node, rhs);
    178 		} else {
    179 			// do not error out for other things like `=`, `&`, etc. let it pass.
    180 			break;
    181 		}
    182 	}
    183 	return node;
    184 }
    185 
    186 Node*
    187 parse_assignment_expr(Parser* par)
    188 {
    189 	Node* left = parse_equality(par);
    190 
    191 	if (match(par, TOKEN_EQUAL)) {
    192 		Node* right = parse_assignment_expr(par); // right-associative
    193 		if (left->type != NODE_IDENT) panic("invalid assignment target");
    194 		return make_binary_node(OP_ASSIGN, left, right);
    195 	}
    196 	return left;
    197 }
    198 
    199 Node*
    200 parse_expression(Parser* par)
    201 {
    202 	return parse_assignment_expr(par);
    203 }
    204 
    205 Node*
    206 parse_expression_statement(Parser* par)
    207 {
    208 	Node* expr = parse_expression(par);
    209 	expect(par, TOKEN_SEMICOLON);
    210 
    211 	Node* node = (Node*)calloc(1, sizeof(Node));
    212 	if (node == NULL) panic("parse_expression_statement: could not alloc");
    213 	node->type = NODE_EXPR_STATEMENT;
    214 	node->scope = NULL;
    215 	node->next = NULL;
    216 	node->data.expr_statement.expr = expr;
    217 	return node;
    218 }
    219 
    220 //
    221 // parse_statement
    222 //
    223 Node*
    224 parse_statement(Parser* par)
    225 {
    226 	Token tok = peek(par), tok2 = peek2(par);
    227 
    228 	if (tok.type == TOKEN_LBRACE) {
    229 		consume(par);
    230 		return parse_block(par);
    231 	}
    232 
    233 	bool tok_is_a_type = (tok.type == TOKEN_IDENT || tok.type == TOKEN_VARIADIC || tok.type == TOKEN_COMP_TIME);
    234 
    235 	if (tok_is_a_type && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); } // TODO @next FN/FX handling here too
    236 	if (tok_is_a_type && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); }
    237 
    238 	switch (tok.type) {
    239 	case TOKEN_RETURN:
    240 		return parse_return_statement(par);
    241 	case TOKEN_IF:
    242 		return parse_if(par);
    243 	case TOKEN_WHILE:
    244 		return parse_while(par);
    245 	case TOKEN_FOR:
    246 		return parse_for(par);
    247 	case TOKEN_BREAK:
    248 		return parse_break(par);
    249 	case TOKEN_CONTINUE:
    250 		return parse_continue_statement(par);
    251 	case TOKEN_SEMICOLON:
    252 		expect(par, TOKEN_SEMICOLON);
    253 		return make_empty_statement();
    254 	// case TOKEN_IDENT: // TODO?
    255 	//     if (tok2.type == TOKEN_EQUAL)
    256 	//         return parse_assignment(par);
    257 	//     else
    258 	//         return parse_expression_statement(par);
    259 	default:
    260 		return parse_expression_statement(par);
    261 	}
    262 }
    263 
    264 Node*
    265 parse_block(Parser* par)
    266 {
    267 	Node* stmt;
    268 	Node* block = (Node*)calloc(1, sizeof(Node));
    269 	if (block == NULL) panic("parse_block: could not alloc");
    270 	block->type = NODE_BLOCK;
    271 	block->scope = NULL;
    272 	while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
    273 		stmt = parse_statement(par);
    274 
    275 		if (block->data.block.cap == block->data.block.len) {
    276 			block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
    277 			block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*));
    278 			if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); }
    279 		}
    280 
    281 		block->data.block.stmts[block->data.block.len++] = stmt;
    282 	}
    283 	expect(par, TOKEN_RBRACE); // TODO move out to parent caller
    284 	// TODO next the parsing of this was relying on next and cannot
    285 	// anymmore, e.g. print
    286 	return block;
    287 }
    288 
    289 Node*
    290 parse_declaration_statement(Parser* par)
    291 {
    292 	Node* type_node = parse_type(par);      // consumes the type (e.g., "float")
    293 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    294 	if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); }
    295 
    296 	Node* var = calloc(1, sizeof(Node));
    297 	if (var == NULL) panic("parse_declaration_statement: could not alloc");
    298 	var->type = NODE_VAR_DECL;
    299 	var->scope = NULL;
    300 	var->data.var_decl.name = (Span) { ident.start, ident.end };
    301 	var->data.var_decl.type = type_node;
    302 	Token next_tok = peek(par);
    303 	if (next_tok.type == TOKEN_EQUAL) {
    304 		consume(par);
    305 		var->data.var_decl.init = parse_expression(par);
    306 	} else {
    307 		consume(par);
    308 		var->data.var_decl.init = NULL;
    309 	}
    310 	expect(par, TOKEN_SEMICOLON);
    311 	return var;
    312 }
    313 
    314 Node*
    315 parse_func_decl(Parser* par)
    316 {
    317 	bool exported = false; // TODO use exported in the sem/typer/matcher/gen
    318 	if (match(par, TOKEN_FX)) {
    319 		// parse exported function
    320 		exported = true;
    321 	} else if (match(par, TOKEN_FN)) {
    322 		// parse internal function
    323 	} else {
    324 		panic("parse_func_decl: func: expected fn/fx");
    325 	}
    326 
    327 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    328 
    329 	expect(par, TOKEN_LPAREN);
    330 	NodeVec params = parse_param_list(par);
    331 	expect(par, TOKEN_RPAREN);
    332 
    333 	Node* return_type = parse_type_or_void(par);
    334 
    335 	expect(par, TOKEN_LBRACE);
    336 	Node* body = parse_block(par);
    337 
    338 	Node* fn = calloc(1, sizeof(Node));
    339 	if (fn == NULL) panic("parse_func_decl: func: could not alloc");
    340 
    341 	fn->type = NODE_FUNCTION_DECL;
    342 	fn->scope = NULL;
    343 
    344 	fn->data.function_decl.params = params.items;
    345 	fn->data.function_decl.p_cap = params.cap;
    346 	fn->data.function_decl.p_len = params.len;
    347 
    348 	fn->data.function_decl.body = body;
    349 
    350 	fn->data.function_decl.name = (Span) { ident.start, ident.end };
    351 	fn->data.function_decl.return_type = return_type;
    352 	fn->data.function_decl.exported = exported;
    353 	fn->filename = par->filename;
    354 	fn->line = ident.line;
    355 	fn->col = ident.col;
    356 	return fn;
    357 }
    358 
    359 Node*
    360 parse_decl_or_func_decl(Parser* par)
    361 {
    362 	Node* type_node = parse_type(par);      // consumes the type (e.g., "float")
    363 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    364 
    365 	if (match(par, TOKEN_LPAREN)) { // function
    366 		Node* fn = calloc(1, sizeof(Node));
    367 		if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");
    368 
    369 		fn->type = NODE_FUNCTION_DECL;
    370 		fn->scope = NULL;
    371 
    372 		NodeVec v = parse_param_list(par);
    373 		fn->data.function_decl.params = v.items;
    374 		fn->data.function_decl.p_cap = v.cap;
    375 		fn->data.function_decl.p_len = v.len;
    376 
    377 		expect(par, TOKEN_RPAREN);
    378 		expect(par, TOKEN_LBRACE);
    379 
    380 		Node* body = parse_block(par);
    381 		fn->data.function_decl.body = body;
    382 
    383 		fn->data.function_decl.name = (Span) { ident.start, ident.end };
    384 		fn->data.function_decl.return_type = type_node;
    385 		fn->filename = par->filename;
    386 		fn->line = ident.line;
    387 		fn->col = ident.col;
    388 		return fn;
    389 
    390 	} else { // variable
    391 		Node* var = calloc(1, sizeof(Node));
    392 		if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
    393 		var->type = NODE_VAR_DECL;
    394 		var->scope = NULL;
    395 		var->data.var_decl.name = (Span) { ident.start, ident.end };
    396 		var->data.var_decl.type = type_node;
    397 		var->filename = par->filename;
    398 		var->line = ident.line;
    399 		var->col = ident.col;
    400 		Token next_tok = peek(par);
    401 		if (next_tok.type == TOKEN_EQUAL) {
    402 			consume(par); // consume '='
    403 			var->data.var_decl.init = parse_expression(par);
    404 		} else {
    405 			var->data.var_decl.init = NULL;
    406 		}
    407 		expect(par, TOKEN_SEMICOLON);
    408 		return var;
    409 	}
    410 }
    411 
    412 Node*
    413 parse_declarations(Parser* par)
    414 {
    415 	Token tok = peek(par);
    416 	if (tok.type == TOKEN_EOF) return NULL;
    417 
    418 	switch (tok.type) {
    419 	case TOKEN_IDENT:
    420 		return parse_decl_or_func_decl(par);
    421 		break;
    422 	case TOKEN_FN:
    423 	case TOKEN_FX:
    424 		return parse_func_decl(par);
    425 		break;
    426 	default:
    427 		printf("unknown token to parse!: %s\n", token_type_str(tok.type));
    428 		return NULL;
    429 	}
    430 	return NULL;
    431 }
    432 
    433 void
    434 parser_parse(Ast* ast, Parser* par)
    435 {
    436 	assert(par->token_count > 0 && "no tokens to parse");
    437 	Node* node;
    438 	Node* program = make_program_node(par);
    439 	for (;;) {
    440 		node = parse_declarations(par);
    441 		if (node == NULL) break;
    442 		if (program->data.program.len == program->data.program.cap) {
    443 			program->data.program.cap *= 2;
    444 			program->data.program.decl
    445 				= (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*));
    446 			assert(program->data.program.decl != NULL && "realloc failed");
    447 		}
    448 		program->data.program.decl[program->data.program.len++] = node;
    449 	}
    450 
    451 	ast->src = par->src;
    452 	ast->node = program;
    453 }