parser.c (11221B)
1 2 #include "../parser.h" 3 #include "../utils.h" 4 5 #include <stdio.h> 6 #include <string.h> 7 #include <stdbool.h> 8 #include <assert.h> 9 10 // TODO make sure ALL callocs have been successful 11 12 Parser 13 parser_init(Lexer* lex) 14 { 15 return (Parser) { .pos = 0, 16 .tokens = lex->tokens, 17 .token_count = lex->token_count, 18 .src = lex->src, 19 .src_len = lex->src_len, 20 .filename = lex->filename }; 21 } 22 23 Token 24 peek(Parser* par) 25 { 26 Token t = par->tokens[par->pos]; 27 return t.type ? t : (Token) { .type = TOKEN_EOF }; 28 } 29 30 Token 31 peek2(Parser* par) 32 { 33 if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF }; 34 Token t = par->tokens[par->pos + 1]; 35 return t.type ? t : (Token) { .type = TOKEN_EOF }; 36 } 37 38 Token 39 consume(Parser* par) 40 { 41 Token t = par->tokens[par->pos]; 42 if (!t.type) return (Token) { .type = TOKEN_EOF }; 43 par->pos++; 44 return t; 45 } 46 47 bool 48 check(Parser* p, TokenType type) 49 { 50 return (peek(p).type == type); 51 } 52 53 Token 54 expect(Parser* par, TokenType type) 55 { 56 Token tok = peek(par); 57 if (tok.type != type) { 58 const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 }); 59 printf("name: %s\n", name); 60 panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu", 61 token_type_str(type), 62 name, 63 tok.type, 64 par->filename, 65 tok.line, 66 tok.col); 67 assert(tok.type == type); 68 } 69 return consume(par); 70 } 71 72 bool 73 match(Parser* p, TokenType type) 74 { 75 // printf("matching type %d\n", type); 76 if (peek(p).type == type) { 77 consume(p); 78 return true; 79 } 80 return false; 81 } 82 83 static Node* 84 parse_multiplicative(Parser* par) 85 { 86 Node* node = parse_term(par); 87 88 for (;;) { 89 if (match(par, TOKEN_STAR)) { 90 Node* rhs = parse_unary(par); 91 node = make_binary_node(OP_MUL, node, rhs); 92 } else if (match(par, TOKEN_SLASH)) { 93 Node* rhs = parse_unary(par); 94 node = make_binary_node(OP_DIV, node, rhs); 95 } else if (match(par, TOKEN_PERCENT)) { 96 Node* rhs = parse_unary(par); 97 node = make_binary_node(OP_MOD, node, rhs); 98 } else 99 break; 100 } 101 102 return node; 103 } 104 // additive: +, - 105 static Node* 106 parse_additive(Parser* par) 107 { 108 Node* node = parse_multiplicative(par); 109 for (;;) { 110 if (match(par, TOKEN_PLUS)) { 111 Node* rhs = parse_multiplicative(par); 112 node = make_binary_node(OP_PLUS, node, rhs); 113 } else if (match(par, TOKEN_MINUS)) { 114 Node* rhs = parse_multiplicative(par); 115 node = make_binary_node(OP_MINUS, node, rhs); 116 } else 117 break; 118 } 119 return node; 120 } 121 122 static Node* 123 parse_relational(Parser* par) 124 { 125 Node* node = parse_additive(par); 126 for (;;) { 127 if (match(par, TOKEN_LT)) { 128 Node* rhs = parse_additive(par); 129 node = make_binary_node(OP_LT, node, rhs); 130 } else if (match(par, TOKEN_LT_EQ)) { 131 Node* rhs = parse_additive(par); 132 node = make_binary_node(OP_LT_EQ, node, rhs); 133 } else if (match(par, TOKEN_GT)) { 134 Node* rhs = parse_additive(par); 135 node = make_binary_node(OP_GT, node, rhs); 136 } else if (match(par, TOKEN_GT_EQ)) { 137 Node* rhs = parse_additive(par); 138 node = make_binary_node(OP_GT_EQ, node, rhs); 139 } else 140 break; 141 } 142 return node; 143 } 144 145 static Node* 146 parse_equality(Parser* par) 147 { 148 Node* node = parse_relational(par); 149 150 // TODO handle all enum gcc_jit_comparison 151 // { 152 // /* (EXPR_A) == (EXPR_B). */ 153 // GCC_JIT_COMPARISON_EQ, 154 155 // /* (EXPR_A) != (EXPR_B). */ 156 // GCC_JIT_COMPARISON_NE, 157 158 // /* (EXPR_A) < (EXPR_B). */ 159 // GCC_JIT_COMPARISON_LT, 160 161 // /* (EXPR_A) <=(EXPR_B). */ 162 // GCC_JIT_COMPARISON_LE, 163 164 // /* (EXPR_A) > (EXPR_B). */ 165 // GCC_JIT_COMPARISON_GT, 166 167 // /* (EXPR_A) >= (EXPR_B). */ 168 // GCC_JIT_COMPARISON_GE 169 // }; 170 171 for (;;) { 172 if (match(par, TOKEN_EQUALITY)) { // "==" 173 Node* rhs = parse_relational(par); 174 node = make_binary_node(OP_EQUALITY, node, rhs); 175 } else if (match(par, TOKEN_INEQUALITY)) { // "!=" 176 Node* rhs = parse_relational(par); 177 node = make_binary_node(OP_INEQUALITY, node, rhs); 178 } else { 179 // do not error out for other things like `=`, `&`, etc. let it pass. 180 break; 181 } 182 } 183 return node; 184 } 185 186 Node* 187 parse_assignment_expr(Parser* par) 188 { 189 Node* left = parse_equality(par); 190 191 if (match(par, TOKEN_EQUAL)) { 192 Node* right = parse_assignment_expr(par); // right-associative 193 if (left->type != NODE_IDENT) panic("invalid assignment target"); 194 return make_binary_node(OP_ASSIGN, left, right); 195 } 196 return left; 197 } 198 199 Node* 200 parse_expression(Parser* par) 201 { 202 return parse_assignment_expr(par); 203 } 204 205 Node* 206 parse_expression_statement(Parser* par) 207 { 208 Node* expr = parse_expression(par); 209 expect(par, TOKEN_SEMICOLON); 210 211 Node* node = (Node*)calloc(1, sizeof(Node)); 212 if (node == NULL) panic("parse_expression_statement: could not alloc"); 213 node->type = NODE_EXPR_STATEMENT; 214 node->scope = NULL; 215 node->next = NULL; 216 node->data.expr_statement.expr = expr; 217 return node; 218 } 219 220 // 221 // parse_statement 222 // 223 Node* 224 parse_statement(Parser* par) 225 { 226 Token tok = peek(par), tok2 = peek2(par); 227 228 if (tok.type == TOKEN_LBRACE) { 229 consume(par); 230 return parse_block(par); 231 } 232 233 bool tok_is_a_type = (tok.type == TOKEN_IDENT || tok.type == TOKEN_VARIADIC || tok.type == TOKEN_COMP_TIME); 234 235 if (tok_is_a_type && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); } // TODO @next FN/FX handling here too 236 if (tok_is_a_type && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); } 237 238 switch (tok.type) { 239 case TOKEN_RETURN: 240 return parse_return_statement(par); 241 case TOKEN_IF: 242 return parse_if(par); 243 case TOKEN_WHILE: 244 return parse_while(par); 245 case TOKEN_FOR: 246 return parse_for(par); 247 case TOKEN_BREAK: 248 return parse_break(par); 249 case TOKEN_CONTINUE: 250 return parse_continue_statement(par); 251 case TOKEN_SEMICOLON: 252 expect(par, TOKEN_SEMICOLON); 253 return make_empty_statement(); 254 // case TOKEN_IDENT: // TODO? 255 // if (tok2.type == TOKEN_EQUAL) 256 // return parse_assignment(par); 257 // else 258 // return parse_expression_statement(par); 259 default: 260 return parse_expression_statement(par); 261 } 262 } 263 264 Node* 265 parse_block(Parser* par) 266 { 267 Node* stmt; 268 Node* block = (Node*)calloc(1, sizeof(Node)); 269 if (block == NULL) panic("parse_block: could not alloc"); 270 block->type = NODE_BLOCK; 271 block->scope = NULL; 272 while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) { 273 stmt = parse_statement(par); 274 275 if (block->data.block.cap == block->data.block.len) { 276 block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2; 277 block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*)); 278 if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); } 279 } 280 281 block->data.block.stmts[block->data.block.len++] = stmt; 282 } 283 expect(par, TOKEN_RBRACE); // TODO move out to parent caller 284 // TODO next the parsing of this was relying on next and cannot 285 // anymmore, e.g. print 286 return block; 287 } 288 289 Node* 290 parse_declaration_statement(Parser* par) 291 { 292 Node* type_node = parse_type(par); // consumes the type (e.g., "float") 293 Token ident = expect(par, TOKEN_IDENT); // variable or function name 294 if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); } 295 296 Node* var = calloc(1, sizeof(Node)); 297 if (var == NULL) panic("parse_declaration_statement: could not alloc"); 298 var->type = NODE_VAR_DECL; 299 var->scope = NULL; 300 var->data.var_decl.name = (Span) { ident.start, ident.end }; 301 var->data.var_decl.type = type_node; 302 Token next_tok = peek(par); 303 if (next_tok.type == TOKEN_EQUAL) { 304 consume(par); 305 var->data.var_decl.init = parse_expression(par); 306 } else { 307 consume(par); 308 var->data.var_decl.init = NULL; 309 } 310 expect(par, TOKEN_SEMICOLON); 311 return var; 312 } 313 314 Node* 315 parse_func_decl(Parser* par) 316 { 317 bool exported = false; // TODO use exported in the sem/typer/matcher/gen 318 if (match(par, TOKEN_FX)) { 319 // parse exported function 320 exported = true; 321 } else if (match(par, TOKEN_FN)) { 322 // parse internal function 323 } else { 324 panic("parse_func_decl: func: expected fn/fx"); 325 } 326 327 Token ident = expect(par, TOKEN_IDENT); // variable or function name 328 329 expect(par, TOKEN_LPAREN); 330 NodeVec params = parse_param_list(par); 331 expect(par, TOKEN_RPAREN); 332 333 Node* return_type = parse_type_or_void(par); 334 335 expect(par, TOKEN_LBRACE); 336 Node* body = parse_block(par); 337 338 Node* fn = calloc(1, sizeof(Node)); 339 if (fn == NULL) panic("parse_func_decl: func: could not alloc"); 340 341 fn->type = NODE_FUNCTION_DECL; 342 fn->scope = NULL; 343 344 fn->data.function_decl.params = params.items; 345 fn->data.function_decl.p_cap = params.cap; 346 fn->data.function_decl.p_len = params.len; 347 348 fn->data.function_decl.body = body; 349 350 fn->data.function_decl.name = (Span) { ident.start, ident.end }; 351 fn->data.function_decl.return_type = return_type; 352 fn->data.function_decl.exported = exported; 353 fn->filename = par->filename; 354 fn->line = ident.line; 355 fn->col = ident.col; 356 return fn; 357 } 358 359 Node* 360 parse_decl_or_func_decl(Parser* par) 361 { 362 Node* type_node = parse_type(par); // consumes the type (e.g., "float") 363 Token ident = expect(par, TOKEN_IDENT); // variable or function name 364 365 if (match(par, TOKEN_LPAREN)) { // function 366 Node* fn = calloc(1, sizeof(Node)); 367 if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc"); 368 369 fn->type = NODE_FUNCTION_DECL; 370 fn->scope = NULL; 371 372 NodeVec v = parse_param_list(par); 373 fn->data.function_decl.params = v.items; 374 fn->data.function_decl.p_cap = v.cap; 375 fn->data.function_decl.p_len = v.len; 376 377 expect(par, TOKEN_RPAREN); 378 expect(par, TOKEN_LBRACE); 379 380 Node* body = parse_block(par); 381 fn->data.function_decl.body = body; 382 383 fn->data.function_decl.name = (Span) { ident.start, ident.end }; 384 fn->data.function_decl.return_type = type_node; 385 fn->filename = par->filename; 386 fn->line = ident.line; 387 fn->col = ident.col; 388 return fn; 389 390 } else { // variable 391 Node* var = calloc(1, sizeof(Node)); 392 if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc"); 393 var->type = NODE_VAR_DECL; 394 var->scope = NULL; 395 var->data.var_decl.name = (Span) { ident.start, ident.end }; 396 var->data.var_decl.type = type_node; 397 var->filename = par->filename; 398 var->line = ident.line; 399 var->col = ident.col; 400 Token next_tok = peek(par); 401 if (next_tok.type == TOKEN_EQUAL) { 402 consume(par); // consume '=' 403 var->data.var_decl.init = parse_expression(par); 404 } else { 405 var->data.var_decl.init = NULL; 406 } 407 expect(par, TOKEN_SEMICOLON); 408 return var; 409 } 410 } 411 412 Node* 413 parse_declarations(Parser* par) 414 { 415 Token tok = peek(par); 416 if (tok.type == TOKEN_EOF) return NULL; 417 418 switch (tok.type) { 419 case TOKEN_IDENT: 420 return parse_decl_or_func_decl(par); 421 break; 422 case TOKEN_FN: 423 case TOKEN_FX: 424 return parse_func_decl(par); 425 break; 426 default: 427 printf("unknown token to parse!: %s\n", token_type_str(tok.type)); 428 return NULL; 429 } 430 return NULL; 431 } 432 433 void 434 parser_parse(Ast* ast, Parser* par) 435 { 436 assert(par->token_count > 0 && "no tokens to parse"); 437 Node* node; 438 Node* program = make_program_node(par); 439 for (;;) { 440 node = parse_declarations(par); 441 if (node == NULL) break; 442 if (program->data.program.len == program->data.program.cap) { 443 program->data.program.cap *= 2; 444 program->data.program.decl 445 = (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*)); 446 assert(program->data.program.decl != NULL && "realloc failed"); 447 } 448 program->data.program.decl[program->data.program.len++] = node; 449 } 450 451 ast->src = par->src; 452 ast->node = program; 453 }