parser.c (8829B)
1 #include "../parser.h" 2 #include "../utils.h" 3 4 #include <stdio.h> 5 #include <string.h> 6 #include <stdbool.h> 7 #include <assert.h> 8 9 // TODO make sure ALL callocs have been successful 10 11 Parser 12 parser_init(Lexer* lex) 13 { 14 return (Parser) { .pos = 0, 15 .tokens = lex->tokens, 16 .token_count = lex->token_count, 17 .src = lex->src, 18 .src_len = lex->src_len, 19 .filename = lex->filename }; 20 } 21 22 Token 23 peek(Parser* par) 24 { 25 Token t = par->tokens[par->pos]; 26 return t.type ? t : (Token) { .type = TOKEN_EOF }; 27 } 28 29 Token 30 peek2(Parser* par) 31 { 32 if (par->pos + 1 >= par->token_count) return (Token) { .type = TOKEN_EOF }; 33 Token t = par->tokens[par->pos + 1]; 34 return t.type ? t : (Token) { .type = TOKEN_EOF }; 35 } 36 37 Token 38 consume(Parser* par) 39 { 40 Token t = par->tokens[par->pos]; 41 if (!t.type) return (Token) { .type = TOKEN_EOF }; 42 par->pos++; 43 return t; 44 } 45 46 bool 47 check(Parser* p, TokenType type) 48 { 49 return (peek(p).type == type); 50 } 51 52 Token 53 expect(Parser* par, TokenType type) 54 { 55 Token tok = peek(par); 56 if (tok.type != type) { 57 const char* name = range_str(par->src, tok.start, tok.end, (char[IDENTSZ]) { 0 }); 58 panic("Expected %d got '%s' (%d) at %s:%zu:%zu", 59 token_type_str(type), 60 name, 61 tok.type, 62 par->filename, 63 tok.line, 64 tok.col); 65 assert(tok.type == type); 66 } 67 return consume(par); 68 } 69 70 bool 71 match(Parser* p, TokenType type) 72 { 73 // printf("matching type %d\n", type); 74 if (peek(p).type == type) { 75 consume(p); 76 return true; 77 } 78 return false; 79 } 80 81 static Node* 82 parse_multiplicative(Parser* par) 83 { 84 Node* node = parse_term(par); 85 86 for (;;) { 87 if (match(par, TOKEN_STAR)) { 88 Node* rhs = parse_unary(par); 89 node = make_binary_node(OP_MUL, node, rhs); 90 } else if (match(par, TOKEN_SLASH)) { 91 Node* rhs = parse_unary(par); 92 node = make_binary_node(OP_DIV, node, rhs); 93 } else if (match(par, TOKEN_PERCENT)) { 94 Node* rhs = parse_unary(par); 95 node = make_binary_node(OP_MOD, node, rhs); 96 } else 97 break; 98 } 99 100 return node; 101 } 102 // additive: +, - 103 static Node* 104 parse_additive(Parser* par) 105 { 106 Node* node = parse_multiplicative(par); 107 for (;;) { 108 if (match(par, TOKEN_PLUS)) { 109 Node* rhs = parse_multiplicative(par); 110 node = make_binary_node(OP_PLUS, node, rhs); 111 } else if (match(par, TOKEN_MINUS)) { 112 Node* rhs = parse_multiplicative(par); 113 node = make_binary_node(OP_MINUS, node, rhs); 114 } else 115 break; 116 } 117 return node; 118 } 119 120 static Node* 121 parse_relational(Parser* par) 122 { 123 Node* node = parse_additive(par); 124 for (;;) { 125 if (match(par, TOKEN_LT)) { 126 Node* rhs = parse_additive(par); 127 node = make_binary_node('<', node, rhs); 128 } else if (match(par, TOKEN_LT_EQ)) { 129 Node* rhs = parse_additive(par); 130 node = make_binary_node(OP_LT_EQ, node, rhs); 131 } else if (match(par, TOKEN_GT)) { 132 Node* rhs = parse_additive(par); 133 node = make_binary_node('>', node, rhs); 134 } else if (match(par, TOKEN_GT_EQ)) { 135 Node* rhs = parse_additive(par); 136 node = make_binary_node(OP_GT_EQ, node, rhs); 137 } else 138 break; 139 } 140 return node; 141 } 142 143 static Node* 144 parse_equality(Parser* par) 145 { 146 Node* node = parse_relational(par); 147 for (;;) { 148 if (match(par, TOKEN_EQUALITY)) { // "==" 149 Node* rhs = parse_relational(par); 150 node = make_binary_node(OP_EQUALITY, node, rhs); 151 } else if (match(par, TOKEN_INEQUALITY)) { // "!=" 152 Node* rhs = parse_relational(par); 153 node = make_binary_node(OP_INEQUALITY, node, rhs); 154 } else 155 break; 156 } 157 return node; 158 } 159 160 Node* 161 parse_expression(Parser* par) 162 { 163 return parse_equality(par); 164 } 165 166 Node* 167 parse_expression_statement(Parser* par) 168 { 169 Node* expr = parse_expression(par); 170 expect(par, TOKEN_SEMICOLON); 171 172 Node* node = (Node*)calloc(1, sizeof(Node)); 173 if (node == NULL) panic("parse_expression_statement: could not alloc"); 174 node->type = NODE_EXPR_STATEMENT; 175 node->scope = NULL; 176 node->next = NULL; 177 node->data.expr_statement.expr = expr; 178 return node; 179 } 180 181 // 182 // parse_statement 183 // 184 Node* 185 parse_statement(Parser* par) 186 { 187 Token tok = peek(par), tok2 = peek2(par); 188 189 if (tok.type == TOKEN_LBRACE) { 190 consume(par); 191 return parse_block(par); 192 } 193 194 if (tok.type == TOKEN_IDENT && tok2.type == TOKEN_IDENT) 195 return parse_decl_or_func_decl(par); 196 197 switch (tok.type) { 198 case TOKEN_RETURN: 199 return parse_return_statement(par); 200 case TOKEN_IF: 201 return parse_if(par); 202 case TOKEN_WHILE: 203 return parse_while(par); 204 case TOKEN_FOR: 205 return parse_for(par); 206 case TOKEN_BREAK: 207 return parse_break(par); 208 case TOKEN_CONTINUE: 209 return parse_continue_statement(par); 210 case TOKEN_SEMICOLON: 211 expect(par, TOKEN_SEMICOLON); 212 return make_empty_statement(); 213 // case TOKEN_IDENT: // TODO? 214 // if (tok2.type == TOKEN_EQUAL) 215 // return parse_assignment(par); 216 // else 217 // return parse_expression_statement(par); 218 default: 219 return parse_expression_statement(par); 220 } 221 } 222 223 Node* 224 parse_block(Parser* par) 225 { 226 Node* stmt; 227 Node* block = (Node*)calloc(1, sizeof(Node)); 228 if (block == NULL) panic("parse_block: could not alloc"); 229 block->type = NODE_BLOCK; 230 block->scope = NULL; 231 while (peek(par).type != TOKEN_RBRACE && peek(par).type != TOKEN_EOF) { 232 stmt = parse_statement(par); 233 234 if (block->data.block.cap == block->data.block.len) { 235 block->data.block.cap 236 = block->data.block.cap == 0 ? 4 : block->data.block.cap * 2; 237 block->data.block.stmts = realloc( 238 block->data.block.stmts, block->data.block.cap * sizeof(Node*)); 239 if (block->data.block.stmts == NULL) { 240 panic("realloc failed in parse_block"); 241 } 242 } 243 244 block->data.block.stmts[block->data.block.len++] = stmt; 245 } 246 expect(par, TOKEN_RBRACE); 247 // TODO next the parsing of this was relying on next and cannot 248 // anymmore, e.g. print 249 return block; 250 } 251 252 Node* 253 parse_declaration_statement(Parser* par) 254 { 255 Node* type = parse_type(par); // consumes the type (e.g., "float") 256 Token ident = expect(par, TOKEN_IDENT); // variable or function name 257 if (match(par, TOKEN_LPAREN)) { 258 perror("called a var decl but this looks to be a func decl"); 259 } 260 261 Node* var = calloc(1, sizeof(Node)); 262 if (var == NULL) panic("parse_declaration_statement: could not alloc"); 263 var->type = NODE_VAR_DECL; 264 var->scope = NULL; 265 var->data.var_decl.name = (Span) { ident.start, ident.end }; 266 var->data.var_decl.type = type; 267 Token next_tok = peek(par); 268 if (next_tok.type == TOKEN_EQUAL) { 269 consume(par); 270 var->data.var_decl.init = parse_expression(par); 271 } else { 272 consume(par); 273 var->data.var_decl.init = NULL; 274 } 275 expect(par, TOKEN_SEMICOLON); 276 return var; 277 } 278 279 Node* 280 parse_decl_or_func_decl(Parser* par) 281 { 282 Node* type = parse_type(par); // consumes the type (e.g., "float") 283 Token ident = expect(par, TOKEN_IDENT); // variable or function name 284 285 if (match(par, TOKEN_LPAREN)) { // function 286 Node* fn = calloc(1, sizeof(Node)); 287 if (fn == NULL) panic("parse_decl_or_func_decl: func: could not alloc"); 288 289 fn->type = NODE_FUNCTION_DECL; 290 fn->scope = NULL; 291 292 NodeVec v = parse_param_list(par); 293 fn->data.function_decl.params = v.items; 294 fn->data.function_decl.p_cap = v.cap; 295 fn->data.function_decl.p_len = v.len; 296 297 expect(par, TOKEN_RPAREN); 298 expect(par, TOKEN_LBRACE); 299 300 Node* body = parse_block(par); 301 fn->data.function_decl.body = body; 302 303 fn->data.function_decl.name = (Span) { ident.start, ident.end }; 304 fn->data.function_decl.return_type = type; 305 fn->filename = par->filename; 306 fn->line = ident.line; 307 fn->col = ident.col; 308 return fn; 309 310 } else { // variable 311 Node* var = calloc(1, sizeof(Node)); 312 if (var == NULL) panic("parse_decl_or_func_decl: var: could not alloc"); 313 var->type = NODE_VAR_DECL; 314 var->scope = NULL; 315 var->data.var_decl.name = (Span) { ident.start, ident.end }; 316 var->data.var_decl.type = type; 317 var->filename = par->filename; 318 var->line = ident.line; 319 var->col = ident.col; 320 Token next_tok = peek(par); 321 if (next_tok.type == TOKEN_EQUAL) { 322 consume(par); // consume '=' 323 var->data.var_decl.init = parse_expression(par); 324 } else { 325 var->data.var_decl.init = NULL; 326 } 327 expect(par, TOKEN_SEMICOLON); 328 return var; 329 } 330 } 331 332 Node* 333 parse_declarations(Parser* par) 334 { 335 Token tok = peek(par); 336 if (tok.type == TOKEN_EOF) return NULL; 337 338 switch (tok.type) { 339 case TOKEN_IDENT: 340 return parse_decl_or_func_decl(par); 341 break; 342 default: 343 printf("unknown token to parse!: %s\n", token_type_str(tok.type)); 344 return NULL; 345 } 346 return NULL; 347 } 348 349 void 350 parser_parse(Ast* ast, Parser* par) 351 { 352 assert(par->token_count > 0 && "no tokens to parse"); 353 Node* node; 354 Node* program = make_program_node(); 355 for (;;) { 356 node = parse_declarations(par); 357 if (node == NULL) break; 358 if (program->data.program.len == program->data.program.cap) { 359 program->data.program.cap *= 2; 360 program->data.program.decl = (Node**)realloc(program->data.program.decl, 361 program->data.program.cap * sizeof(Node*)); 362 assert(program->data.program.decl != NULL && "realloc failed"); 363 } 364 program->data.program.decl[program->data.program.len++] = node; 365 } 366 367 ast->src = par->src; 368 ast->node = program; 369 }