ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

parser.c (8829B)


      1 #include "../parser.h"
      2 #include "../utils.h"
      3 
      4 #include <stdio.h>
      5 #include <string.h>
      6 #include <stdbool.h>
      7 #include <assert.h>
      8 
      9 // TODO make sure ALL callocs have been successful
     10 
     11 Parser
     12 parser_init(Lexer* lex)
     13 {
     14 	return (Parser) { .pos = 0,
     15 		.tokens = lex->tokens,
     16 		.token_count = lex->token_count,
     17 		.src = lex->src,
     18 		.src_len = lex->src_len,
     19 		.filename = lex->filename };
     20 }
     21 
     22 Token
     23 peek(Parser* par)
     24 {
     25 	Token t = par->tokens[par->pos];
     26 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     27 }
     28 
     29 Token
     30 peek2(Parser* par)
     31 {
     32 	if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF };
     33 	Token t = par->tokens[par->pos + 1];
     34 	return t.type ? t : (Token) { .type = TOKEN_EOF };
     35 }
     36 
     37 Token
     38 consume(Parser* par)
     39 {
     40 	Token t = par->tokens[par->pos];
     41 	if (!t.type) return (Token) { .type = TOKEN_EOF };
     42 	par->pos++;
     43 	return t;
     44 }
     45 
     46 bool
     47 check(Parser* p, TokenType type)
     48 {
     49 	return (peek(p).type == type);
     50 }
     51 
     52 Token
     53 expect(Parser* par, TokenType type)
     54 {
     55 	Token tok = peek(par);
     56 	if (tok.type != type) {
     57 		const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 });
     58 		panic("Expected %d got '%s' (%d) at %s:%zu:%zu",
     59 			token_type_str(type),
     60 			name,
     61 			tok.type,
     62 			par->filename,
     63 			tok.line,
     64 			tok.col);
     65 		assert(tok.type == type);
     66 	}
     67 	return consume(par);
     68 }
     69 
     70 bool
     71 match(Parser* p, TokenType type)
     72 {
     73 	// printf("matching type %d\n", type);
     74 	if (peek(p).type == type) {
     75 		consume(p);
     76 		return true;
     77 	}
     78 	return false;
     79 }
     80 
     81 static Node*
     82 parse_multiplicative(Parser* par)
     83 {
     84 	Node* node = parse_term(par);
     85 
     86 	for (;;) {
     87 		if (match(par, TOKEN_STAR)) {
     88 			Node* rhs = parse_unary(par);
     89 			node = make_binary_node(OP_MUL, node, rhs);
     90 		} else if (match(par, TOKEN_SLASH)) {
     91 			Node* rhs = parse_unary(par);
     92 			node = make_binary_node(OP_DIV, node, rhs);
     93 		} else if (match(par, TOKEN_PERCENT)) {
     94 			Node* rhs = parse_unary(par);
     95 			node = make_binary_node(OP_MOD, node, rhs);
     96 		} else
     97 			break;
     98 	}
     99 
    100 	return node;
    101 }
    102 // additive: +, -
    103 static Node*
    104 parse_additive(Parser* par)
    105 {
    106 	Node* node = parse_multiplicative(par);
    107 	for (;;) {
    108 		if (match(par, TOKEN_PLUS)) {
    109 			Node* rhs = parse_multiplicative(par);
    110 			node = make_binary_node(OP_PLUS, node, rhs);
    111 		} else if (match(par, TOKEN_MINUS)) {
    112 			Node* rhs = parse_multiplicative(par);
    113 			node = make_binary_node(OP_MINUS, node, rhs);
    114 		} else
    115 			break;
    116 	}
    117 	return node;
    118 }
    119 
    120 static Node*
    121 parse_relational(Parser* par)
    122 {
    123 	Node* node = parse_additive(par);
    124 	for (;;) {
    125 		if (match(par, TOKEN_LT)) {
    126 			Node* rhs = parse_additive(par);
    127 			node = make_binary_node('<', node, rhs);
    128 		} else if (match(par, TOKEN_LT_EQ)) {
    129 			Node* rhs = parse_additive(par);
    130 			node = make_binary_node(OP_LT_EQ, node, rhs);
    131 		} else if (match(par, TOKEN_GT)) {
    132 			Node* rhs = parse_additive(par);
    133 			node = make_binary_node('>', node, rhs);
    134 		} else if (match(par, TOKEN_GT_EQ)) {
    135 			Node* rhs = parse_additive(par);
    136 			node = make_binary_node(OP_GT_EQ, node, rhs);
    137 		} else
    138 			break;
    139 	}
    140 	return node;
    141 }
    142 
    143 static Node*
    144 parse_equality(Parser* par)
    145 {
    146 	Node* node = parse_relational(par);
    147 	for (;;) {
    148 		if (match(par, TOKEN_EQUALITY)) { // "=="
    149 			Node* rhs = parse_relational(par);
    150 			node = make_binary_node(OP_EQUALITY, node, rhs);
    151 		} else if (match(par, TOKEN_INEQUALITY)) { // "!="
    152 			Node* rhs = parse_relational(par);
    153 			node = make_binary_node(OP_INEQUALITY, node, rhs);
    154 		} else
    155 			break;
    156 	}
    157 	return node;
    158 }
    159 
    160 Node*
    161 parse_expression(Parser* par)
    162 {
    163 	return parse_equality(par);
    164 }
    165 
    166 Node*
    167 parse_expression_statement(Parser* par)
    168 {
    169 	Node* expr = parse_expression(par);
    170 	expect(par, TOKEN_SEMICOLON);
    171 
    172 	Node* node = (Node*)calloc(1, sizeof(Node));
    173 	if (node == NULL) panic("parse_expression_statement: could not alloc");
    174 	node->type = NODE_EXPR_STATEMENT;
    175 	node->scope = NULL;
    176 	node->next = NULL;
    177 	node->data.expr_statement.expr = expr;
    178 	return node;
    179 }
    180 
    181 //
    182 // parse_statement
    183 //
    184 Node*
    185 parse_statement(Parser* par)
    186 {
    187 	Token tok = peek(par), tok2 = peek2(par);
    188 
    189 	if (tok.type == TOKEN_LBRACE) {
    190 		consume(par);
    191 		return parse_block(par);
    192 	}
    193 
    194 	if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT)
    195 		return parse_decl_or_func_decl(par);
    196 
    197 	switch (tok.type) {
    198 	case TOKEN_RETURN:
    199 		return parse_return_statement(par);
    200 	case TOKEN_IF:
    201 		return parse_if(par);
    202 	case TOKEN_WHILE:
    203 		return parse_while(par);
    204 	case TOKEN_FOR:
    205 		return parse_for(par);
    206 	case TOKEN_BREAK:
    207 		return parse_break(par);
    208 	case TOKEN_CONTINUE:
    209 		return parse_continue_statement(par);
    210 	case TOKEN_SEMICOLON:
    211 		expect(par, TOKEN_SEMICOLON);
    212 		return make_empty_statement();
    213 	// case TOKEN_IDENT: // TODO?
    214 	//     if (tok2.type == TOKEN_EQUAL)
    215 	//         return parse_assignment(par);
    216 	//     else
    217 	//         return parse_expression_statement(par);
    218 	default:
    219 		return parse_expression_statement(par);
    220 	}
    221 }
    222 
    223 Node*
    224 parse_block(Parser* par)
    225 {
    226 	Node* stmt;
    227 	Node* block = (Node*)calloc(1, sizeof(Node));
    228 	if (block == NULL) panic("parse_block: could not alloc");
    229 	block->type = NODE_BLOCK;
    230 	block->scope = NULL;
    231 	while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) {
    232 		stmt = parse_statement(par);
    233 
    234 		if (block->data.block.cap == block->data.block.len) {
    235 			block->data.block.cap
    236 				= block->data.block.cap == 0 ? 4 : block->data.block.cap * 2;
    237 			block->data.block.stmts = realloc(
    238 				block->data.block.stmts, block->data.block.cap * sizeof(Node*));
    239 			if (block->data.block.stmts == NULL) {
    240 				panic("realloc failed in parse_block");
    241 			}
    242 		}
    243 
    244 		block->data.block.stmts[block->data.block.len++] = stmt;
    245 	}
    246 	expect(par, TOKEN_RBRACE);
    247 	// TODO next the parsing of this was relying on next and cannot
    248 	// anymmore, e.g. print
    249 	return block;
    250 }
    251 
    252 Node*
    253 parse_declaration_statement(Parser* par)
    254 {
    255 	Node* type = parse_type(par);           // consumes the type (e.g., "float")
    256 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    257 	if (match(par, TOKEN_LPAREN)) {
    258 		perror("called a var decl but this looks to be a func decl");
    259 	}
    260 
    261 	Node* var = calloc(1, sizeof(Node));
    262 	if (var == NULL) panic("parse_declaration_statement: could not alloc");
    263 	var->type = NODE_VAR_DECL;
    264 	var->scope = NULL;
    265 	var->data.var_decl.name = (Span) { ident.start, ident.end };
    266 	var->data.var_decl.type = type;
    267 	Token next_tok = peek(par);
    268 	if (next_tok.type == TOKEN_EQUAL) {
    269 		consume(par);
    270 		var->data.var_decl.init = parse_expression(par);
    271 	} else {
    272 		consume(par);
    273 		var->data.var_decl.init = NULL;
    274 	}
    275 	expect(par, TOKEN_SEMICOLON);
    276 	return var;
    277 }
    278 
    279 Node*
    280 parse_decl_or_func_decl(Parser* par)
    281 {
    282 	Node* type = parse_type(par);           // consumes the type (e.g., "float")
    283 	Token ident = expect(par, TOKEN_IDENT); // variable or function name
    284 
    285 	if (match(par, TOKEN_LPAREN)) { // function
    286 		Node* fn = calloc(1, sizeof(Node));
    287 		if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc");
    288 
    289 		fn->type = NODE_FUNCTION_DECL;
    290 		fn->scope = NULL;
    291 
    292 		NodeVec v = parse_param_list(par);
    293 		fn->data.function_decl.params = v.items;
    294 		fn->data.function_decl.p_cap = v.cap;
    295 		fn->data.function_decl.p_len = v.len;
    296 
    297 		expect(par, TOKEN_RPAREN);
    298 		expect(par, TOKEN_LBRACE);
    299 
    300 		Node* body = parse_block(par);
    301 		fn->data.function_decl.body = body;
    302 
    303 		fn->data.function_decl.name = (Span) { ident.start, ident.end };
    304 		fn->data.function_decl.return_type = type;
    305 		fn->filename = par->filename;
    306 		fn->line = ident.line;
    307 		fn->col = ident.col;
    308 		return fn;
    309 
    310 	} else { // variable
    311 		Node* var = calloc(1, sizeof(Node));
    312 		if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc");
    313 		var->type = NODE_VAR_DECL;
    314 		var->scope = NULL;
    315 		var->data.var_decl.name = (Span) { ident.start, ident.end };
    316 		var->data.var_decl.type = type;
    317 		var->filename = par->filename;
    318 		var->line = ident.line;
    319 		var->col = ident.col;
    320 		Token next_tok = peek(par);
    321 		if (next_tok.type == TOKEN_EQUAL) {
    322 			consume(par); // consume '='
    323 			var->data.var_decl.init = parse_expression(par);
    324 		} else {
    325 			var->data.var_decl.init = NULL;
    326 		}
    327 		expect(par, TOKEN_SEMICOLON);
    328 		return var;
    329 	}
    330 }
    331 
    332 Node*
    333 parse_declarations(Parser* par)
    334 {
    335 	Token tok = peek(par);
    336 	if (tok.type == TOKEN_EOF) return NULL;
    337 
    338 	switch (tok.type) {
    339 	case TOKEN_IDENT:
    340 		return parse_decl_or_func_decl(par);
    341 		break;
    342 	default:
    343 		printf("unknown token to parse!: %s\n", token_type_str(tok.type));
    344 		return NULL;
    345 	}
    346 	return NULL;
    347 }
    348 
    349 void
    350 parser_parse(Ast* ast, Parser* par)
    351 {
    352 	assert(par->token_count > 0 && "no tokens to parse");
    353 	Node* node;
    354 	Node* program = make_program_node();
    355 	for (;;) {
    356 		node = parse_declarations(par);
    357 		if (node == NULL) break;
    358 		if (program->data.program.len == program->data.program.cap) {
    359 			program->data.program.cap *= 2;
    360 			program->data.program.decl = (Node**)realloc(program->data.program.decl,
    361 				program->data.program.cap * sizeof(Node*));
    362 			assert(program->data.program.decl != NULL && "realloc failed");
    363 		}
    364 		program->data.program.decl[program->data.program.len++] = node;
    365 	}
    366 
    367 	ast->src = par->src;
    368 	ast->node = program;
    369 }