ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

parser.c (9734B)


      1 #include "../parser.h"
      2 #include "../utils.h"
      3 
      4 #include <stdio.h>
      5 #include <string.h>
      6 #include <stdbool.h>
      7 #include <assert.h>
      8 
      9 // TODO make sure ALL callocs have been successful
     10 
     11 Parser
     12 parser_init(Lexer* lex)
     13 {
     14 	return (Parser) {
     15 		.pos = 0, .tokens = lex->tokens, .token_count = lex->token_count, .src = lex->src, .src_len = lex->src_len, .filename = lex->filename
     16 	};
     17 }
     18 
     19 Token
     20 peek(Parser* par)
     21 {
     22 	Token t = par->tokens[par->pos];
     23 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     24 }
     25 
     26 Token
     27 peek2(Parser* par)
     28 {
     29 	if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
     30 	Token t = par->tokens[par->pos + 1];
     31 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     32 }
     33 
     34 Token
     35 consume(Parser* par)
     36 {
     37 	Token t = par->tokens[par->pos];
     38 	if (!t.type) return (Token) { .type = TOKEN_EOF };
     39 	par->pos++;
     40 	return t;
     41 }
     42 
     43 bool
     44 check(Parser* p, TokenType type)
     45 {
     46 	return (peek(p).type == type);
     47 }
     48 
     49 Token
     50 expect(Parser* par, TokenType type)
     51 {
     52 	Token tok = peek(par);
     53 	if (tok.type != type) {
     54 		const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
     55 		panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu", token_type_str(type), name, tok.type, par->filename, tok.line, tok.col);
     56 		assert(tok.type == type);
     57 	}
     58 	return consume(par);
     59 }
     60 
     61 bool
     62 match(Parser* p, TokenType type)
     63 {
     64 	// printf("matching type %d\n", type);
     65 	if (peek(p).type == type) {
     66 		consume(p);
     67 		return true;
     68 	}
     69 	return false;
     70 }
     71 
     72 static Node*
     73 parse_multiplicative(Parser* par)
     74 {
     75 	Node* node = parse_term(par);
     76 
     77 	for (;;) {
     78 		if (match(par, TOKEN_STAR)) {
     79 			Node* rhs = parse_unary(par);
     80 			node = make_binary_node(OP_MUL, node, rhs);
     81 		} else if (match(par, TOKEN_SLASH)) {
     82 			Node* rhs = parse_unary(par);
     83 			node = make_binary_node(OP_DIV, node, rhs);
     84 		} else if (match(par, TOKEN_PERCENT)) {
     85 			Node* rhs = parse_unary(par);
     86 			node = make_binary_node(OP_MOD, node, rhs);
     87 		} else
     88 			break;
     89 	}
     90 
     91 	return node;
     92 }
     93 // additive: +, -
     94 static Node*
     95 parse_additive(Parser* par)
     96 {
     97 	Node* node = parse_multiplicative(par);
     98 	for (;;) {
     99 		if (match(par, TOKEN_PLUS)) {
    100 			Node* rhs = parse_multiplicative(par);
    101 			node = make_binary_node(OP_PLUS, node, rhs);
    102 		} else if (match(par, TOKEN_MINUS)) {
    103 			Node* rhs = parse_multiplicative(par);
    104 			node = make_binary_node(OP_MINUS, node, rhs);
    105 		} else
    106 			break;
    107 	}
    108 	return node;
    109 }
    110 
    111 static Node*
    112 parse_relational(Parser* par)
    113 {
    114 	Node* node = parse_additive(par);
    115 	for (;;) {
    116 		if (match(par, TOKEN_LT)) {
    117 			Node* rhs = parse_additive(par);
    118 			node = make_binary_node(OP_LT, node, rhs);
    119 		} else if (match(par, TOKEN_LT_EQ)) {
    120 			Node* rhs = parse_additive(par);
    121 			node = make_binary_node(OP_LT_EQ, node, rhs);
    122 		} else if (match(par, TOKEN_GT)) {
    123 			Node* rhs = parse_additive(par);
    124 			node = make_binary_node(OP_GT, node, rhs);
    125 		} else if (match(par, TOKEN_GT_EQ)) {
    126 			Node* rhs = parse_additive(par);
    127 			node = make_binary_node(OP_GT_EQ, node, rhs);
    128 		} else
    129 			break;
    130 	}
    131 	return node;
    132 }
    133 
    134 static Node*
    135 parse_equality(Parser* par)
    136 {
    137 	Node* node = parse_relational(par);
    138 
    139 	// TODO  handle all enum gcc_jit_comparison
    140 	// {
    141 	//   /* (EXPR_A) == (EXPR_B).  */
    142 	//   GCC_JIT_COMPARISON_EQ,
    143 
    144 	//   /* (EXPR_A) != (EXPR_B).  */
    145 	//   GCC_JIT_COMPARISON_NE,
    146 
    147 	//   /* (EXPR_A) < (EXPR_B).  */
    148 	//   GCC_JIT_COMPARISON_LT,
    149 
    150 	//   /* (EXPR_A) <=(EXPR_B).  */
    151 	//   GCC_JIT_COMPARISON_LE,
    152 
    153 	//   /* (EXPR_A) > (EXPR_B).  */
    154 	//   GCC_JIT_COMPARISON_GT,
    155 
    156 	//   /* (EXPR_A) >= (EXPR_B).  */
    157 	//   GCC_JIT_COMPARISON_GE
    158 	// };
    159 
    160 	for (;;) {
    161 		if (match(par, TOKEN_EQUALITY)) { // "=="
    162 			Node* rhs = parse_relational(par);
    163 			node = make_binary_node(OP_EQUALITY, node, rhs);
    164 		} else if (match(par, TOKEN_INEQUALITY)) { // "!="
    165 			Node* rhs = parse_relational(par);
    166 			node = make_binary_node(OP_INEQUALITY, node, rhs);
    167 		} else {
    168 			// do not error out for other things like `=`, `&`, etc. let it pass.
    169 			break;
    170 		}
    171 	}
    172 	return node;
    173 }
    174 
    175 Node*
    176 parse_assignment_expr(Parser* par)
    177 {
    178 	Node* left = parse_equality(par);
    179 
    180 	if (match(par, TOKEN_EQUAL)) {
    181 		Node* right = parse_assignment_expr(par); // right-associative
    182 		if (left->type != NODE_IDENT) panic("invalid assignment target");
    183 		return make_binary_node(OP_ASSIGN, left, right);
    184 	}
    185 	return left;
    186 }
    187 
    188 Node*
    189 parse_expression(Parser* par)
    190 {
    191 	return parse_assignment_expr(par);
    192 }
    193 
    194 Node*
    195 parse_expression_statement(Parser* par)
    196 {
    197 	Node* expr = parse_expression(par);
    198 	expect(par, TOKEN_SEMICOLON);
    199 
    200 	Node* node = (Node*)calloc(1, sizeof(Node));
    201 	if (node == NULL) panic("parse_expression_statement: could not alloc");
    202 	node->type = NODE_EXPR_STATEMENT;
    203 	node->scope = NULL;
    204 	node->next = NULL;
    205 	node->data.expr_statement.expr = expr;
    206 	return node;
    207 }
    208 
    209 //
    210 // parse_statement
    211 //
    212 Node*
    213 parse_statement(Parser* par)
    214 {
    215 	Token tok = peek(par), tok2 = peek2(par);
    216 
    217 	if (tok.type == TOKEN_LBRACE) {
    218 		consume(par);
    219 		return parse_block(par);
    220 	}
    221 
    222 	if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); }
    223 
    224 	if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); }
    225 
    226 	switch (tok.type) {
    227 	case TOKEN_RETURN:
    228 		return parse_return_statement(par);
    229 	case TOKEN_IF:
    230 		return parse_if(par);
    231 	case TOKEN_WHILE:
    232 		return parse_while(par);
    233 	case TOKEN_FOR:
    234 		return parse_for(par);
    235 	case TOKEN_BREAK:
    236 		return parse_break(par);
    237 	case TOKEN_CONTINUE:
    238 		return parse_continue_statement(par);
    239 	case TOKEN_SEMICOLON:
    240 		expect(par, TOKEN_SEMICOLON);
    241 		return make_empty_statement();
    242 	// case TOKEN_IDENT: // TODO?
    243 	//     if (tok2.type == TOKEN_EQUAL)
    244 	//         return parse_assignment(par);
    245 	//     else
    246 	//         return parse_expression_statement(par);
    247 	default:
    248 		return parse_expression_statement(par);
    249 	}
    250 }
    251 
    252 Node*
    253 parse_block(Parser* par)
    254 {
    255 	Node* stmt;
    256 	Node* block = (Node*)calloc(1, sizeof(Node));
    257 	if (block == NULL) panic("parse_block: could not alloc");
    258 	block->type = NODE_BLOCK;
    259 	block->scope = NULL;
    260 	while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
    261 		stmt = parse_statement(par);
    262 
    263 		if (block->data.block.cap == block->data.block.len) {
    264 			block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
    265 			block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*));
    266 			if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); }
    267 		}
    268 
    269 		block->data.block.stmts[block->data.block.len++] = stmt;
    270 	}
    271 	expect(par, TOKEN_RBRACE);
    272 	// TODO next the parsing of this was relying on next and cannot
    273 	// anymmore, e.g. print
    274 	return block;
    275 }
    276 
    277 Node*
    278 parse_declaration_statement(Parser* par)
    279 {
    280 	Node* type = parse_type(par);           // consumes the type (e.g., "float")
    281 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    282 	if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); }
    283 
    284 	Node* var = calloc(1, sizeof(Node));
    285 	if (var == NULL) panic("parse_declaration_statement: could not alloc");
    286 	var->type = NODE_VAR_DECL;
    287 	var->scope = NULL;
    288 	var->data.var_decl.name = (Span) { ident.start, ident.end };
    289 	var->data.var_decl.type = type;
    290 	Token next_tok = peek(par);
    291 	if (next_tok.type == TOKEN_EQUAL) {
    292 		consume(par);
    293 		var->data.var_decl.init = parse_expression(par);
    294 	} else {
    295 		consume(par);
    296 		var->data.var_decl.init = NULL;
    297 	}
    298 	expect(par, TOKEN_SEMICOLON);
    299 	return var;
    300 }
    301 
    302 Node*
    303 parse_decl_or_func_decl(Parser* par)
    304 {
    305 	Node* type = parse_type(par);           // consumes the type (e.g., "float")
    306 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    307 
    308 	if (match(par, TOKEN_LPAREN)) { // function
    309 		Node* fn = calloc(1, sizeof(Node));
    310 		if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");
    311 
    312 		fn->type = NODE_FUNCTION_DECL;
    313 		fn->scope = NULL;
    314 
    315 		NodeVec v = parse_param_list(par);
    316 		fn->data.function_decl.params = v.items;
    317 		fn->data.function_decl.p_cap = v.cap;
    318 		fn->data.function_decl.p_len = v.len;
    319 
    320 		expect(par, TOKEN_RPAREN);
    321 		expect(par, TOKEN_LBRACE);
    322 
    323 		Node* body = parse_block(par);
    324 		fn->data.function_decl.body = body;
    325 
    326 		fn->data.function_decl.name = (Span) { ident.start, ident.end };
    327 		fn->data.function_decl.return_type = type;
    328 		fn->filename = par->filename;
    329 		fn->line = ident.line;
    330 		fn->col = ident.col;
    331 		return fn;
    332 
    333 	} else { // variable
    334 		Node* var = calloc(1, sizeof(Node));
    335 		if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
    336 		var->type = NODE_VAR_DECL;
    337 		var->scope = NULL;
    338 		var->data.var_decl.name = (Span) { ident.start, ident.end };
    339 		var->data.var_decl.type = type;
    340 		var->filename = par->filename;
    341 		var->line = ident.line;
    342 		var->col = ident.col;
    343 		Token next_tok = peek(par);
    344 		if (next_tok.type == TOKEN_EQUAL) {
    345 			consume(par); // consume '='
    346 			var->data.var_decl.init = parse_expression(par);
    347 		} else {
    348 			var->data.var_decl.init = NULL;
    349 		}
    350 		expect(par, TOKEN_SEMICOLON);
    351 		return var;
    352 	}
    353 }
    354 
    355 Node*
    356 parse_declarations(Parser* par)
    357 {
    358 	Token tok = peek(par);
    359 	if (tok.type == TOKEN_EOF) return NULL;
    360 
    361 	switch (tok.type) {
    362 	case TOKEN_IDENT:
    363 		return parse_decl_or_func_decl(par);
    364 		break;
    365 	default:
    366 		printf("unknown token to parse!: %s\n", token_type_str(tok.type));
    367 		return NULL;
    368 	}
    369 	return NULL;
    370 }
    371 
    372 void
    373 parser_parse(Ast* ast, Parser* par)
    374 {
    375 	assert(par->token_count > 0 && "no tokens to parse");
    376 	Node* node;
    377 	Node* program = make_program_node(par);
    378 	for (;;) {
    379 		node = parse_declarations(par);
    380 		if (node == NULL) break;
    381 		if (program->data.program.len == program->data.program.cap) {
    382 			program->data.program.cap *= 2;
    383 			program->data.program.decl = (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*));
    384 			assert(program->data.program.decl != NULL && "realloc failed");
    385 		}
    386 		program->data.program.decl[program->data.program.len++] = node;
    387 	}
    388 
    389 	ast->src = par->src;
    390 	ast->node = program;
    391 }