ox

The Ox programming language, compiler and tools (WIP)
Log | Files | Refs | README | LICENSE

parser_utils.c (11058B)


      1 #include "../parser.h"
      2 
      3 #include <assert.h>
      4 #include <stdio.h>
      5 #include <string.h>
      6 #include <stdbool.h>
      7 
      8 /* basic range to str */
      9 const char*
     10 range_str(const char* src, size_t start, size_t end, char* stack_alloc_chptr)
     11 {
     12 	const size_t len = end - start;
     13 	if (!src || !stack_alloc_chptr) return NULL;
     14 	if (!strchr(src, '\0')) return NULL; // src has no '\0'
     15 	if (len <= 0) return NULL;
     16 	memcpy(stack_alloc_chptr, src + start, len);
     17 	stack_alloc_chptr[len] = '\0';
     18 	return stack_alloc_chptr;
     19 }
     20 
     21 const char*
     22 span_str(const char* src, Span s, char* stack_alloc_chptr)
     23 {
     24 	return range_str(src, s.start, s.end, stack_alloc_chptr);
     25 }
     26 
     27 bool
     28 span_ident_same(Span a, Span b, const char* src)
     29 {
     30 	const char* a_name = span_str(src, a, (char[IDENTSZ]) { 0 });
     31 	const char* b_name = span_str(src, b, (char[IDENTSZ]) { 0 });
     32 	return strcmp(a_name, b_name) == 0;
     33 }
     34 
     35 // int span_to_str(const char* src, size_t start, size_t end, char* out_buf) {
     36 //     if (!src || !out_buf) return -1; /* Null pointer passed */
     37 //     if (start >= end) return -2;     /* Empty or inverted span */
     38 //     const size_t len = end - start;
     39 //     if (len >= IDENTSZ) return -4; /* Identifier too long */
     40 //     const char* src_end = strchr(src, '\0');
     41 //     if (!src_end) return -5; /* src not NUL‑terminated */
     42 //     const size_t src_len = (size_t)(src_end - src);
     43 
     44 //     if (end > src_len) return -6; /* Span overruns source */
     45 
     46 //     if (memchr(src + start, '\0', len))
     47 //         return -7; /* span crosses a NUL byte */
     48 
     49 //     memcpy(out_buf, src + start, len);
     50 //     out_buf[len] = '\0';
     51 
     52 //     return 0;
     53 // }
     54 
     55 // char* span_to_str_alloc(const char* src, size_t start, size_t end) {
     56 //     if (!src || start >= end) return NULL;
     57 //     const char* src_end = strchr(src, '\0');
     58 //     if (!src_end) return NULL;
     59 //     size_t src_len = (size_t)(src_end - src);
     60 //     if (end > src_len) return NULL;
     61 //     size_t n = end - start;
     62 //     if (memchr(src + start, '\0', n)) return NULL;
     63 //     char* s = calloc(1, n + 1);
     64 //     if (!s) return NULL;
     65 //     memcpy(s, src + start, n);
     66 //     s[n] = '\0';
     67 //     return s;
     68 // }
     69 
     70 static void
     71 print_node(const char* source, Node* node, int level)
     72 {
     73 	assert(node != NULL);
     74 	assert(level < 192);
     75 
     76 	const char* name;
     77 	switch (node->type) {
     78 	case NODE_FUNCTION_DECL:
     79 		name = range_str(source, node->data.function_decl.name.start, node->data.function_decl.name.end, (char[IDENTSZ]) { 0 });
     80 		printf("%*s FUNC DECL: name='%s'\n", level, "", name);
     81 		if (node->data.function_decl.return_type) {
     82 			printf("%*s ↳ return type:\n", level * 2, "");
     83 			print_node(source, node->data.function_decl.return_type, level + 1);
     84 		}
     85 		if (node->data.function_decl.params) {
     86 			printf("%*s ↳ params:\n", level * 2, "");
     87 			for (size_t i = 0; i < node->data.function_decl.p_len; i++) {
     88 				Node* param = node->data.function_decl.params[i];
     89 				print_node(source, param, level + 1);
     90 			}
     91 		} else {
     92 			printf("%*s ↳ params: N/A\n", level * 2, "");
     93 		}
     94 		if (node->data.function_decl.body) {
     95 			printf("%*s ↳ body:\n", level * 2, "");
     96 			print_node(source, node->data.function_decl.body, level + 1);
     97 		}
     98 		break;
     99 	case NODE_PARAM:
    100 		name = range_str(source, node->data.param.name.start, node->data.param.name.end, (char[IDENTSZ]) { 0 });
    101 		printf("%*s ↳ param: name='%s'\n", level * 2, "", name);
    102 		if (node->data.param.type) { print_node(source, node->data.param.type, level + 1); }
    103 		break;
    104 	case NODE_VAR_DECL:
    105 		name = range_str(source, node->data.var_decl.name.start, node->data.var_decl.name.end, (char[IDENTSZ]) { 0 });
    106 		printf("%*s VAR DECL: name='%s'\n", level, "", name);
    107 		if (node->data.var_decl.type) {
    108 			printf("%*s ↳ type:\n", level * 2, "");
    109 			print_node(source, node->data.var_decl.type, level + 1);
    110 		}
    111 		if (node->data.var_decl.init) {
    112 			printf("%*s ↳ init:\n", level * 2, "");
    113 			print_node(source, node->data.var_decl.init, level + 1);
    114 		}
    115 		break;
    116 	case NODE_PROGRAM:
    117 		printf("%*s PROGRAM:\n", level, "");
    118 		if (node->data.program.decl) {
    119 			for (size_t i = 0; i < node->data.program.len; i++) {
    120 				print_node(source, node->data.program.decl[i], level + 1);
    121 			}
    122 		}
    123 		break;
    124 	case NODE_BLOCK:
    125 		printf("%*s BLOCK:\n", level, "");
    126 		if (node->data.block.stmts) {
    127 			for (size_t i = 0; i < node->data.block.len; i++) {
    128 				print_node(source, node->data.block.stmts[i], level + 1);
    129 			}
    130 		}
    131 		break;
    132 	case NODE_CALL_EXPR:
    133 		printf("%*s ↳ FUNC CALL:\n", level, "");
    134 		if (node->data.call_expr.callee) {
    135 			printf("%*s ↳ callee:\n", level * 2, "");
    136 			print_node(source, node->data.call_expr.callee, level + 1);
    137 		}
    138 		if (node->data.call_expr.args) {
    139 			printf("%*s ↳ args:\n", level * 2, "");
    140 			for (size_t i = 0; i < node->data.call_expr.len; i++) {
    141 				Node* arg = node->data.call_expr.args[i];
    142 				print_node(source, arg, level + 1);
    143 			}
    144 		}
    145 		break;
    146 	case NODE_RETURN:
    147 		printf("%*s RETURN statement:\n", level, "");
    148 		if (node->data.ret.expr) { print_node(source, node->data.ret.expr, level + 1); }
    149 		break;
    150 	case NODE_CONTINUE:
    151 		printf("%*s CONTINUE statement\n", level, "");
    152 		if (node->data.cont.expr) { print_node(source, node->data.cont.expr, level + 1); }
    153 		break;
    154 	case NODE_INT_LITERAL:
    155 		printf("%*s ↳ LITERAL INT NUMBER value=%f\n", level * 2, "", node->data.number.value);
    156 		break;
    157 	case NODE_FLOAT_LITERAL:
    158 		printf("%*s ↳ LITERAL FLOAT NUMBER value=%f\n", level * 2, "", node->data.number.value);
    159 		break;
    160 	case NODE_STRING_LITERAL: {
    161 		const char* lit = span_str(source, node->data.string.value, (char[IDENTSZ]) { 0 });
    162 		printf("%*s ↳ LITERAL STRING value=\"%s\"\n", level * 2, "", lit);
    163 		break;
    164 	}
    165 	case NODE_TYPE:
    166 		name = range_str(source, node->data.ident.name.start, node->data.ident.name.end, (char[IDENTSZ]) { 0 });
    167 		printf("%*s ↳ TYPE name='%s'\n", level * 2, "", name);
    168 		break;
    169 	case NODE_IDENT:
    170 		name = range_str(source, node->data.ident.name.start, node->data.ident.name.end, (char[IDENTSZ]) { 0 });
    171 		printf("%*s ↳ IDENT name='%s'\n", level * 2, "", name);
    172 		break;
    173 	// case NODE_VOID:
    174 	//     printf("%*s  <VOID>\n", level * 2, "");
    175 	//     break;
    176 	// case NODE_FLOAT:
    177 	//     printf("%*s  <FLOAT>\n", level * 2, "");
    178 	//     break;
    179 	// case NODE_INT:
    180 	//     printf("%*s  <INT>\n", level * 2, "");
    181 	//     break;
    182 	// case NODE_STRING:
    183 	//     printf("%*s  <STRING>\n", level * 2, "");
    184 	//     break;
    185 	case NODE_UNKNOWN:
    186 		break;
    187 	case NODE_VAR_ASSIGN: {
    188 		const Span name_s = node->data.var_assign.lhs->data.ident.name;
    189 		name = range_str(source, name_s.start, name_s.end, (char[IDENTSZ]) { 0 });
    190 		printf("%*s VAR ASSIGN: name='%s'\n", level, "", name);
    191 		break;
    192 	}
    193 	case NODE_BREAK:
    194 		printf("%*s BREAK statement\n", level, "");
    195 		break;
    196 	case NODE_BINARY_EXPR:
    197 		printf("%*s BINARY EXPR op='%c'\n", level, "", node->data.binary_expr.op);
    198 		if (node->data.binary_expr.lhs) {
    199 			printf("%*s ↳ lhs:\n", level * 2, "");
    200 			print_node(source, node->data.binary_expr.lhs, level + 1);
    201 		}
    202 		if (node->data.binary_expr.rhs) {
    203 			printf("%*s ↳ rhs:\n", level * 2, "");
    204 			print_node(source, node->data.binary_expr.rhs, level + 1);
    205 		}
    206 		break;
    207 	case NODE_UNARY_EXPR:
    208 		printf("%*s UNARY EXPR: op='%d' is_postfix='%s'\n",
    209 			level,
    210 			"",
    211 			node->data.unary_expr.op,
    212 			node->data.unary_expr.is_postfix ? "true" : "false");
    213 		if (node->data.unary_expr.operand) {
    214 			printf("%*s ↳ operand:\n", level * 2, "");
    215 			print_node(source, node->data.unary_expr.operand, level + 1);
    216 		}
    217 		break;
    218 	case NODE_EXPR_STATEMENT:
    219 		printf("%*s EXPR STMT:\n", level, "");
    220 		if (node->data.expr_statement.expr) { print_node(source, node->data.expr_statement.expr, level + 1); }
    221 		break;
    222 	case NODE_SUBSCRIPT_EXPR:
    223 		printf("%*s SUBSCRIPT expr:\n", level, "");
    224 		if (node->data.subscript_expr.array) {
    225 			printf("%*s ↳ array:\n", level * 2, "");
    226 			print_node(source, node->data.subscript_expr.array, level + 1);
    227 		}
    228 		if (node->data.subscript_expr.index) {
    229 			printf("%*s ↳ index:\n", level * 2, "");
    230 			print_node(source, node->data.subscript_expr.index, level + 1);
    231 		}
    232 		break;
    233 	case NODE_IF:
    234 		printf("%*s IF Statement:\n", level, "");
    235 		if (node->data.if_statement.cond) {
    236 			printf("%*s ↳ cond:\n", level * 2, "");
    237 			print_node(source, node->data.if_statement.cond, level + 1);
    238 		}
    239 		if (node->data.if_statement.then_body) {
    240 			printf("%*s ↳ then body:\n", level * 2, "");
    241 			print_node(source, node->data.if_statement.then_body, level + 1);
    242 		}
    243 		if (node->data.if_statement.else_body) {
    244 			printf("%*s ↳ else body:\n", level * 2, "");
    245 			print_node(source, node->data.if_statement.else_body, level + 1);
    246 		}
    247 		break;
    248 	case NODE_WHILE:
    249 		printf("%*s WHILE Statement:\n", level, "");
    250 		if (node->data.while_statement.cond) {
    251 			printf("%*s ↳ cond:\n", level * 2, "");
    252 			print_node(source, node->data.while_statement.cond, level + 1);
    253 		}
    254 		if (node->data.while_statement.body) {
    255 			printf("%*s ↳ body:\n", level * 2, "");
    256 			print_node(source, node->data.while_statement.body, level + 1);
    257 		}
    258 		break;
    259 	case NODE_FOR:
    260 		printf("%*s FOR Statement:\n", level, "");
    261 		if (node->data.for_statement.init) {
    262 			printf("%*s ↳ init:\n", level * 2, "");
    263 			print_node(source, node->data.for_statement.init, level + 1);
    264 		}
    265 		if (node->data.for_statement.cond) {
    266 			printf("%*s ↳ cond:\n", level * 2, "");
    267 			print_node(source, node->data.for_statement.cond, level + 1);
    268 		}
    269 		if (node->data.for_statement.increment) {
    270 			printf("%*s ↳ increment:\n", level * 2, "");
    271 			print_node(source, node->data.for_statement.increment, level + 1);
    272 		}
    273 		if (node->data.for_statement.body) {
    274 			printf("%*s ↳ body:\n", level * 2, "");
    275 			print_node(source, node->data.for_statement.body, level + 1);
    276 		}
    277 		break;
    278 	case NODE_EMPTY_STATEMENT:
    279 		printf("%*s EMPTY Statement\n", level, "");
    280 		break;
    281 	}
    282 
    283 	while (node->next) {
    284 		print_node(source, node->next, level);
    285 		node = node->next;
    286 	}
    287 }
    288 
    289 void
    290 ast_print(Ast* ast)
    291 {
    292 	print_node(ast->src, ast->node, 0);
    293 }
    294 
    295 void
    296 print_node_type_str(NodeType t)
    297 {
    298 	printf("print_node_type_str: %s\n", node_type_str(t));
    299 }
    300 
    301 const char*
    302 node_type_str(NodeType t)
    303 {
    304 	static const char* type_strings[] = { [NODE_PROGRAM] = "NODE_PROGRAM",
    305 		[NODE_FUNCTION_DECL] = "NODE_FUNCTION_DECL",
    306 		[NODE_PARAM] = "NODE_PARAM",
    307 		[NODE_VAR_DECL] = "NODE_VAR_DECL",
    308 		[NODE_BLOCK] = "NODE_BLOCK",
    309 		[NODE_CALL_EXPR] = "NODE_CALL_EXPR",
    310 		[NODE_RETURN] = "NODE_RETURN",
    311 		[NODE_CONTINUE] = "NODE_CONTINUE",
    312 		[NODE_INT_LITERAL] = "NODE_INT_LITERAL",
    313 		[NODE_FLOAT_LITERAL] = "NODE_FLOAT_LITERAL",
    314 		[NODE_STRING_LITERAL] = "NODE_STRING_LITERAL",
    315 		[NODE_TYPE] = "NODE_TYPE",
    316 		[NODE_IDENT] = "NODE_IDENT",
    317 		[NODE_UNKNOWN] = "NODE_UNKNOWN",
    318 		[NODE_VAR_ASSIGN] = "NODE_VAR_ASSIGN",
    319 		[NODE_BREAK] = "NODE_BREAK",
    320 		[NODE_BINARY_EXPR] = "NODE_BINARY_EXPR",
    321 		[NODE_UNARY_EXPR] = "NODE_UNARY_EXPR",
    322 		[NODE_EXPR_STATEMENT] = "NODE_EXPR_STATEMENT",
    323 		[NODE_SUBSCRIPT_EXPR] = "NODE_SUBSCRIPT_EXPR",
    324 		[NODE_IF] = "NODE_IF",
    325 		[NODE_WHILE] = "NODE_WHILE",
    326 		[NODE_FOR] = "NODE_FOR",
    327 		[NODE_EMPTY_STATEMENT] = "NODE_EMPTY_STATEMENT" };
    328 	if (t >= NODE_PROGRAM && t <= NODE_EMPTY_STATEMENT) {
    329 		return type_strings[t];
    330 	} else {
    331 		return "UNKNOWN_NODE_TYPE";
    332 	}
    333 }