parser.c (9734B)
1 #include "../parser.h" 2 #include "../utils.h" 3 4 #include <stdio.h> 5 #include <string.h> 6 #include <stdbool.h> 7 #include <assert.h> 8 9 // TODO make sure ALL callocs have been successful 10 11 Parser 12 parser_init(Lexer* lex) 13 { 14 return (Parser) { 15 .pos = 0, .tokens = lex->tokens, .token_count = lex->token_count, .src = lex->src, .src_len = lex->src_len, .filename = lex->filename 16 }; 17 } 18 19 Token 20 peek(Parser* par) 21 { 22 Token t = par->tokens[par->pos]; 23 return t.type ? t : (Token) { .type = TOKEN_EOF }; 24 } 25 26 Token 27 peek2(Parser* par) 28 { 29 if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF }; 30 Token t = par->tokens[par->pos + 1]; 31 return t.type ? t : (Token) { .type = TOKEN_EOF }; 32 } 33 34 Token 35 consume(Parser* par) 36 { 37 Token t = par->tokens[par->pos]; 38 if (!t.type) return (Token) { .type = TOKEN_EOF }; 39 par->pos++; 40 return t; 41 } 42 43 bool 44 check(Parser* p, TokenType type) 45 { 46 return (peek(p).type == type); 47 } 48 49 Token 50 expect(Parser* par, TokenType type) 51 { 52 Token tok = peek(par); 53 if (tok.type != type) { 54 const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 }); 55 panic("Expected %s, but got '%s' (%d) at %s:%zu:%zu", token_type_str(type), name, tok.type, par->filename, tok.line, tok.col); 56 assert(tok.type == type); 57 } 58 return consume(par); 59 } 60 61 bool 62 match(Parser* p, TokenType type) 63 { 64 // printf("matching type %d\n", type); 65 if (peek(p).type == type) { 66 consume(p); 67 return true; 68 } 69 return false; 70 } 71 72 static Node* 73 parse_multiplicative(Parser* par) 74 { 75 Node* node = parse_term(par); 76 77 for (;;) { 78 if (match(par, TOKEN_STAR)) { 79 Node* rhs = parse_unary(par); 80 node = make_binary_node(OP_MUL, node, rhs); 81 } else if (match(par, TOKEN_SLASH)) { 82 Node* rhs = parse_unary(par); 83 node = make_binary_node(OP_DIV, node, rhs); 84 } else if (match(par, TOKEN_PERCENT)) { 85 Node* rhs = parse_unary(par); 86 node = make_binary_node(OP_MOD, node, rhs); 87 } else 88 break; 89 } 90 91 return node; 92 } 93 // additive: +, - 94 static Node* 95 parse_additive(Parser* par) 96 { 97 Node* node = parse_multiplicative(par); 98 for (;;) { 99 if (match(par, TOKEN_PLUS)) { 100 Node* rhs = parse_multiplicative(par); 101 node = make_binary_node(OP_PLUS, node, rhs); 102 } else if (match(par, TOKEN_MINUS)) { 103 Node* rhs = parse_multiplicative(par); 104 node = make_binary_node(OP_MINUS, node, rhs); 105 } else 106 break; 107 } 108 return node; 109 } 110 111 static Node* 112 parse_relational(Parser* par) 113 { 114 Node* node = parse_additive(par); 115 for (;;) { 116 if (match(par, TOKEN_LT)) { 117 Node* rhs = parse_additive(par); 118 node = make_binary_node(OP_LT, node, rhs); 119 } else if (match(par, TOKEN_LT_EQ)) { 120 Node* rhs = parse_additive(par); 121 node = make_binary_node(OP_LT_EQ, node, rhs); 122 } else if (match(par, TOKEN_GT)) { 123 Node* rhs = parse_additive(par); 124 node = make_binary_node(OP_GT, node, rhs); 125 } else if (match(par, TOKEN_GT_EQ)) { 126 Node* rhs = parse_additive(par); 127 node = make_binary_node(OP_GT_EQ, node, rhs); 128 } else 129 break; 130 } 131 return node; 132 } 133 134 static Node* 135 parse_equality(Parser* par) 136 { 137 Node* node = parse_relational(par); 138 139 // TODO handle all enum gcc_jit_comparison 140 // { 141 // /* (EXPR_A) == (EXPR_B). */ 142 // GCC_JIT_COMPARISON_EQ, 143 144 // /* (EXPR_A) != (EXPR_B). */ 145 // GCC_JIT_COMPARISON_NE, 146 147 // /* (EXPR_A) < (EXPR_B). */ 148 // GCC_JIT_COMPARISON_LT, 149 150 // /* (EXPR_A) <=(EXPR_B). */ 151 // GCC_JIT_COMPARISON_LE, 152 153 // /* (EXPR_A) > (EXPR_B). */ 154 // GCC_JIT_COMPARISON_GT, 155 156 // /* (EXPR_A) >= (EXPR_B). */ 157 // GCC_JIT_COMPARISON_GE 158 // }; 159 160 for (;;) { 161 if (match(par, TOKEN_EQUALITY)) { // "==" 162 Node* rhs = parse_relational(par); 163 node = make_binary_node(OP_EQUALITY, node, rhs); 164 } else if (match(par, TOKEN_INEQUALITY)) { // "!=" 165 Node* rhs = parse_relational(par); 166 node = make_binary_node(OP_INEQUALITY, node, rhs); 167 } else { 168 // do not error out for other things like `=`, `&`, etc. let it pass. 169 break; 170 } 171 } 172 return node; 173 } 174 175 Node* 176 parse_assignment_expr(Parser* par) 177 { 178 Node* left = parse_equality(par); 179 180 if (match(par, TOKEN_EQUAL)) { 181 Node* right = parse_assignment_expr(par); // right-associative 182 if (left->type != NODE_IDENT) panic("invalid assignment target"); 183 return make_binary_node(OP_ASSIGN, left, right); 184 } 185 return left; 186 } 187 188 Node* 189 parse_expression(Parser* par) 190 { 191 return parse_assignment_expr(par); 192 } 193 194 Node* 195 parse_expression_statement(Parser* par) 196 { 197 Node* expr = parse_expression(par); 198 expect(par, TOKEN_SEMICOLON); 199 200 Node* node = (Node*)calloc(1, sizeof(Node)); 201 if (node == NULL) panic("parse_expression_statement: could not alloc"); 202 node->type = NODE_EXPR_STATEMENT; 203 node->scope = NULL; 204 node->next = NULL; 205 node->data.expr_statement.expr = expr; 206 return node; 207 } 208 209 // 210 // parse_statement 211 // 212 Node* 213 parse_statement(Parser* par) 214 { 215 Token tok = peek(par), tok2 = peek2(par); 216 217 if (tok.type == TOKEN_LBRACE) { 218 consume(par); 219 return parse_block(par); 220 } 221 222 if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT) { return parse_decl_or_func_decl(par); } 223 224 if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_EQUAL) { return parse_assignment(par); } 225 226 switch (tok.type) { 227 case TOKEN_RETURN: 228 return parse_return_statement(par); 229 case TOKEN_IF: 230 return parse_if(par); 231 case TOKEN_WHILE: 232 return parse_while(par); 233 case TOKEN_FOR: 234 return parse_for(par); 235 case TOKEN_BREAK: 236 return parse_break(par); 237 case TOKEN_CONTINUE: 238 return parse_continue_statement(par); 239 case TOKEN_SEMICOLON: 240 expect(par, TOKEN_SEMICOLON); 241 return make_empty_statement(); 242 // case TOKEN_IDENT: // TODO? 243 // if (tok2.type == TOKEN_EQUAL) 244 // return parse_assignment(par); 245 // else 246 // return parse_expression_statement(par); 247 default: 248 return parse_expression_statement(par); 249 } 250 } 251 252 Node* 253 parse_block(Parser* par) 254 { 255 Node* stmt; 256 Node* block = (Node*)calloc(1, sizeof(Node)); 257 if (block == NULL) panic("parse_block: could not alloc"); 258 block->type = NODE_BLOCK; 259 block->scope = NULL; 260 while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) { 261 stmt = parse_statement(par); 262 263 if (block->data.block.cap == block->data.block.len) { 264 block->data.block.cap = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2; 265 block->data.block.stmts = realloc(block->data.block.stmts, block->data.block.cap * sizeof(Node*)); 266 if (block->data.block.stmts == NULL) { panic("realloc failed in parse_block"); } 267 } 268 269 block->data.block.stmts[block->data.block.len++] = stmt; 270 } 271 expect(par, TOKEN_RBRACE); 272 // TODO next the parsing of this was relying on next and cannot 273 // anymmore, e.g. print 274 return block; 275 } 276 277 Node* 278 parse_declaration_statement(Parser* par) 279 { 280 Node* type = parse_type(par); // consumes the type (e.g., "float") 281 Token ident = expect(par, TOKEN_IDENT); // variable or function name 282 if (match(par, TOKEN_LPAREN)) { perror("called a var decl but this looks to be a func decl"); } 283 284 Node* var = calloc(1, sizeof(Node)); 285 if (var == NULL) panic("parse_declaration_statement: could not alloc"); 286 var->type = NODE_VAR_DECL; 287 var->scope = NULL; 288 var->data.var_decl.name = (Span) { ident.start, ident.end }; 289 var->data.var_decl.type = type; 290 Token next_tok = peek(par); 291 if (next_tok.type == TOKEN_EQUAL) { 292 consume(par); 293 var->data.var_decl.init = parse_expression(par); 294 } else { 295 consume(par); 296 var->data.var_decl.init = NULL; 297 } 298 expect(par, TOKEN_SEMICOLON); 299 return var; 300 } 301 302 Node* 303 parse_decl_or_func_decl(Parser* par) 304 { 305 Node* type = parse_type(par); // consumes the type (e.g., "float") 306 Token ident = expect(par, TOKEN_IDENT); // variable or function name 307 308 if (match(par, TOKEN_LPAREN)) { // function 309 Node* fn = calloc(1, sizeof(Node)); 310 if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc"); 311 312 fn->type = NODE_FUNCTION_DECL; 313 fn->scope = NULL; 314 315 NodeVec v = parse_param_list(par); 316 fn->data.function_decl.params = v.items; 317 fn->data.function_decl.p_cap = v.cap; 318 fn->data.function_decl.p_len = v.len; 319 320 expect(par, TOKEN_RPAREN); 321 expect(par, TOKEN_LBRACE); 322 323 Node* body = parse_block(par); 324 fn->data.function_decl.body = body; 325 326 fn->data.function_decl.name = (Span) { ident.start, ident.end }; 327 fn->data.function_decl.return_type = type; 328 fn->filename = par->filename; 329 fn->line = ident.line; 330 fn->col = ident.col; 331 return fn; 332 333 } else { // variable 334 Node* var = calloc(1, sizeof(Node)); 335 if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc"); 336 var->type = NODE_VAR_DECL; 337 var->scope = NULL; 338 var->data.var_decl.name = (Span) { ident.start, ident.end }; 339 var->data.var_decl.type = type; 340 var->filename = par->filename; 341 var->line = ident.line; 342 var->col = ident.col; 343 Token next_tok = peek(par); 344 if (next_tok.type == TOKEN_EQUAL) { 345 consume(par); // consume '=' 346 var->data.var_decl.init = parse_expression(par); 347 } else { 348 var->data.var_decl.init = NULL; 349 } 350 expect(par, TOKEN_SEMICOLON); 351 return var; 352 } 353 } 354 355 Node* 356 parse_declarations(Parser* par) 357 { 358 Token tok = peek(par); 359 if (tok.type == TOKEN_EOF) return NULL; 360 361 switch (tok.type) { 362 case TOKEN_IDENT: 363 return parse_decl_or_func_decl(par); 364 break; 365 default: 366 printf("unknown token to parse!: %s\n", token_type_str(tok.type)); 367 return NULL; 368 } 369 return NULL; 370 } 371 372 void 373 parser_parse(Ast* ast, Parser* par) 374 { 375 assert(par->token_count > 0 && "no tokens to parse"); 376 Node* node; 377 Node* program = make_program_node(par); 378 for (;;) { 379 node = parse_declarations(par); 380 if (node == NULL) break; 381 if (program->data.program.len == program->data.program.cap) { 382 program->data.program.cap *= 2; 383 program->data.program.decl = (Node**)realloc(program->data.program.decl, program->data.program.cap * sizeof(Node*)); 384 assert(program->data.program.decl != NULL && "realloc failed"); 385 } 386 program->data.program.decl[program->data.program.len++] = node; 387 } 388 389 ast->src = par->src; 390 ast->node = program; 391 }