parse.c (14092B)
1 #include <ctype.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <limits.h> 5 #include <stdbool.h> 6 7 #include "lexer.h" 8 #include "parse.h" 9 #include "util.h" 10 #include "maps.h" 11 12 #define SAME(s1, s2) (strcmp(s1, s2) == 0) 13 #define SIZE(arr) (sizeof(arr) / sizeof(arr[0])) 14 15 map **scope = &vars; 16 int haserrored = 0; 17 18 char *builtintypes[] = { 19 "f32", 20 "f64", 21 "i8", 22 "i16", 23 "i32", 24 "i64", 25 "void", 26 }; 27 28 char *builtinfns[] = { 29 "f32", 30 "f64", 31 "i8", 32 "i16", 33 "i32", 34 "i64", 35 }; 36 37 int funcargs = false; 38 39 void 40 parserErr(const char *msg, lex l) { 41 loc lo = findloc(l); 42 fprintf(stderr, "%s:%d:%d: %s\n", l.name, lo.line, lo.col, msg); 43 } 44 45 void 46 parserErrtok(const char *msg, tok t) { 47 loc lo = findloctok(t); 48 fprintf(stderr, "%s:%d:%d: %s\n", t.l.name, lo.line, lo.col, msg); 49 } 50 51 int 52 isbasictype(char *t) { 53 for (int i = 0; i < SIZE(builtintypes); i++) 54 if (SAME(t, builtintypes[i])) 55 return 1; 56 return 0; 57 } 58 59 type * 60 mkbasictype(char *name, mctx *ctx) { 61 type *t = alloczctx(ctx, sizeof(type)); 62 t->type = BASIC; 63 t->name = strdup(name); 64 return t; 65 } 66 67 type * 68 mkarrtype(type *nested, mctx *ctx) { 69 type *t = alloczctx(ctx, sizeof(type)); 70 t->type = ARR; 71 t->nested = nested; 72 return t; 73 } 74 75 int 76 getpres(tok t) { 77 switch (t.op) { 78 case LEOF: 79 return -1; 80 81 case ADD: 82 case SUB: 83 return 1; 84 85 case MUL: 86 case DIV: 87 return 4; 88 89 case INT: 90 case FLOAT: 91 case NAME: 92 return 6; 93 case OBRACE: 94 return 8; 95 96 case NEGATE: 97 return 5; 98 } 99 return -2; 100 } 101 102 int 103 isop(tok t) { 104 return (t.op == ADD || t.op == SUB || t.op == MUL || t.op == DIV); 105 } 106 107 tok * 108 findend(tok *start, tok *end) { 109 int d = 1; 110 tok *t; 111 112 if (start->op != OBRACE) { 113 parserErrtok("expected a '('", *start); 114 return NULL; 115 } 116 117 for (t = &start[1]; d != 0; t = &t[1]) { 118 if (end->op != CBRACE && t == end) { 119 parserErrtok("unclosed brace", *t); 120 return NULL; 121 } 122 123 if (t->op == OBRACE) d++; 124 else if (t->op == CBRACE) d--; 125 } 126 return &t[-1]; 127 } 128 129 tok * 130 findlowest(tok *start, tok *end) { 131 tok *lowest = start, prev = {.op = LEOF}; 132 133 for (tok *t = lowest ; t != end; t = &t[1]) { 134 /* re assign ops, this is for when an op has multiple meanings */ 135 if (isop(prev) || prev.op == LEOF) { 136 switch (t->op) { 137 case SUB: 138 t->op = NEGATE; 139 break; 140 case MUL: 141 t->op = DEREF; 142 break; 143 } 144 } 145 else if (getpres(*t) < getpres(*lowest)) lowest = t; 146 147 /* move to the end brace, we only process the contents when nothing else is left */ 148 if (t->op == OBRACE) { 149 if (!(t = findend(t, end))) return NULL; 150 } 151 152 /* checks if any of our skips ahead have put us at the end of expr */ 153 if (t == end) break; 154 155 prev = *t; 156 } 157 158 return lowest; 159 } 160 161 162 /* checks if t2 can be losslessly casted to t1 */ 163 int 164 isequaltype(type *t1, type *t2) { 165 if (t1->type == BASIC && t2->type == BASIC) { 166 if (t1->name[0] == t2->name[0]) { 167 if (atoi(t1->name + 1) >= atoi(t2->name + 1)) 168 return 1; 169 } 170 } else if (t1->type == PTR && t2->type == PTR) 171 return isequaltype(t1->nested, t2->nested); 172 else if (t1->type == ARR && t2->type == ARR) 173 return isequaltype(t1->nested, t2->nested); 174 return 0; 175 } 176 177 fn * 178 isbuiltin(char *name, mctx *ctx) { 179 fn *f = alloczctx(ctx, sizeof(fn)); 180 for (int i = 0; i < SIZE(builtinfns); i++) { 181 if (SAME(name, builtinfns[i])) { 182 f->name = name; 183 f->rtype = mkbasictype(name, ctx); 184 f->argc = 1; 185 return f; 186 } 187 } 188 return NULL; 189 } 190 191 rexpr * 192 parsefcall(tok *start, tok *end, mctx *ctx) { 193 rexpr *e = alloczctx(ctx, sizeof(rexpr)); 194 tok *exprend; 195 fn *f; 196 int builtin = 1; 197 198 /* TODO make this check for casts specifically, if the user casts, 199 * mark it as so, to allow for the comp step to fix everything up */ 200 if (!(f = isbuiltin(start->name, ctx))) { 201 if (!(f = (fn *)lookupmap(funcs, start->name))) { 202 parserErrtok("no such function", *start); 203 return NULL; 204 } 205 builtin = 0; 206 } 207 208 e->fname = f->name; 209 e->type = f->rtype; 210 e->op = FUNCALL; 211 212 if (builtin && isbasictype(f->name)) 213 e->op = CAST; 214 215 if (start[1].op != '(') 216 return NULL; 217 start = &start[2]; 218 exprend = start; 219 220 another: 221 while (!isterm(*exprend)) { 222 if (exprend->op == ')') { 223 if (exprend == start) 224 goto noargs; 225 break; 226 } 227 exprend = &exprend[1]; 228 } 229 e->argc++; 230 e->args = realloczctx(ctx, e->args, sizeof(rexpr) * e->argc); 231 e->args[e->argc - 1] = *parserexpr(start, &exprend[-1], ctx); 232 start = &exprend[1]; 233 234 if (!builtin) { 235 if (!isequaltype(e->args[e->argc - 1].type, &f->args[e->argc - 1])) { 236 parserErrtok("type missmatch", *start); 237 return NULL; 238 } 239 } 240 241 if (exprend->op != ')') 242 goto another; 243 244 noargs: 245 return e; 246 } 247 248 type * 249 mkptrtype(type *t, mctx *ctx) { 250 type *new = alloczctx(ctx, sizeof(type)); 251 new->type = PTR; 252 new->nested = t; 253 return new; 254 } 255 256 rexpr * 257 mklitteral(uint64_t n, type *t, mctx *ctx) { 258 rexpr *e = alloczctx(ctx, sizeof(rexpr)); 259 e->type = t; 260 e->n = n; 261 return e; 262 } 263 264 rexpr * 265 parsesimple(tok *start, tok *end, tok *lowest, mctx *ctx) { 266 rexpr *e; 267 var *v; 268 tok *current; 269 270 switch (lowest->op) { 271 case NAME: 272 if (lowest[1].op == '(') { 273 e = parsefcall(lowest, end, ctx); 274 return e; 275 } 276 case INT: 277 case FLOAT: 278 if (start != end) { 279 parserErrtok("trailing expression", *start); 280 return NULL; 281 } 282 e = alloczctx(ctx, sizeof(rexpr)); 283 e->op = lowest->op; 284 e->n = lowest->n; /* this is okay as n is 64 bits so any pointer type fits here */ 285 switch (e->op) { 286 case INT: 287 if (e->n > INT32_MAX) 288 e->type = mkbasictype("i64", ctx); 289 else if (e->n > INT16_MAX) 290 e->type = mkbasictype("i32", ctx); 291 else if (e->n > INT8_MAX) 292 e->type = mkbasictype("i16", ctx); 293 else 294 e->type = mkbasictype("i8", ctx); 295 break; 296 case FLOAT: 297 e->type = mkbasictype("f64", ctx); 298 break; 299 case NAME: 300 if (!(v = (var *)lookupmap(*scope, lowest->name))) { 301 parserErrtok("var is not declared", *start); 302 return NULL; 303 } 304 e->type = v->type; 305 break; 306 } 307 break; 308 case DQUOTE: 309 e = alloczctx(ctx, sizeof(rexpr)); 310 e->op = DQUOTE; 311 for (e->elementc = 0; e->elementc < strlen(lowest->name); e->elementc++) { 312 e->elements = realloczctx(ctx, e->elements, (1 + e->elementc) * sizeof(rexpr)); 313 e->elements[e->elementc] = *mklitteral(lowest->name[e->elementc], mkbasictype("i8", ctx), ctx); 314 } 315 e->type = mkarrtype(mkbasictype("i8", ctx), ctx); 316 break; 317 case OBRACE: 318 e = alloczctx(ctx, sizeof(rexpr)); 319 e->op = OBRACE; 320 e->e = parserexpr(&lowest[1], &findend(lowest, end)[-1], ctx); 321 e->type = e->e->type; 322 break; 323 case NEGATE: 324 e = alloczctx(ctx, sizeof(rexpr)); 325 e->op = NEGATE; 326 e->e = parserexpr(&lowest[1], end, ctx); 327 e->type = e->e->type; 328 break; 329 case ADDROF: 330 e = alloczctx(ctx, sizeof(rexpr)); 331 e->op = ADDROF; 332 e->e = parserexpr(&lowest[1], end, ctx); 333 e->type = mkptrtype(e->e->type, ctx); 334 break; 335 case DEREF: 336 e = alloczctx(ctx, sizeof(rexpr)); 337 e->op = DEREF; 338 e->e = parserexpr(&lowest[1], end, ctx); 339 e->type = e->e->type->nested; 340 break; 341 default: 342 parserErrtok("unexpected token type", *start); 343 return NULL; 344 } 345 346 return e; 347 } 348 349 rexpr * 350 parsebin(tok *start, tok *end, tok *lowest, mctx *ctx) { 351 rexpr *e = alloczctx(ctx, sizeof(rexpr)); 352 353 if (!isop(*lowest)) { 354 parserErrtok("expected op", *lowest); 355 return NULL; 356 } 357 358 e->op = lowest->op; 359 e->expr[0] = parserexpr(start, &lowest[-1], ctx); 360 e->expr[1] = parserexpr(&lowest[1], end, ctx); 361 362 if (!isequaltype(e->expr[0]->type, e->expr[1]->type)) { 363 parserErrtok("type missmatch", *start); 364 return NULL; 365 } 366 367 e->type = e->expr[0]->type; 368 369 return e; 370 } 371 372 rexpr * 373 parserexpr(tok *start, tok *end, mctx *ctx) { 374 rexpr *e; 375 tok *lowest; 376 if (!(lowest = findlowest(start, end))) return NULL; 377 378 /* simple 1 term expr ops */ 379 if (lowest == start) e = parsesimple(start, end, lowest, ctx); 380 381 /* binary ops */ 382 else e = parsebin(start, end, lowest, ctx); 383 384 return e; 385 } 386 387 388 389 type * 390 parsetype(lex *l, mctx *ctx) { 391 SAVE(l); 392 type *ty; 393 tok t = next(l), *ts = NULL; 394 int tcount = 0; 395 ty = alloczctx(ctx, sizeof(type)); 396 397 if (t.op == MUL) { 398 ty->type = PTR; 399 ty->nested = parsetype(l, ctx); 400 return ty; 401 } else if (t.op == OSBRACE) { 402 if ((t = next(l)).op != CSBRACE) { 403 do { 404 ts = realloczctx(ctx, ts, ++tcount * sizeof(tok)); 405 ts[tcount - 1] = t; 406 } while ((t = next(l)).op != CSBRACE); 407 ty->size = parserexpr(ts, &ts[tcount - 1], ctx); 408 if (!isequaltype(mkbasictype("i64", ctx), ty->size->type)) { 409 parserErr("can not use non int type as array size", *l); 410 RESTORE(l); 411 return NULL; 412 } 413 } 414 ty->type = ARR; 415 ty->nested = parsetype(l, ctx); 416 return ty; 417 } else if (t.op != NAME) { 418 RESTORE(l); 419 return NULL; 420 } 421 422 if (!isbasictype(t.name)) { 423 parserErr("not a known type!", *l); 424 RESTORE(l); 425 return NULL; 426 } 427 ty->type = BASIC; 428 ty->name = t.name; 429 return ty; 430 } 431 432 int 433 isterm(tok t) { 434 return (t.op == SEMI || t.op == COMMA); 435 } 436 437 lexpr * 438 parseassign(lex *l, tok name, mctx *ctx) { 439 SAVE(l); 440 lexpr *le; 441 type *ty; 442 rexpr *r = NULL; 443 tok *arr, t; 444 var *v; 445 int count = 1; 446 447 if (!(ty = parsetype(l, ctx))) { 448 RESTORE(l); 449 return NULL; 450 } 451 if ((t = next(l)).op != ASSIGN) { 452 if (isterm(t) || (funcargs && t.op == ')')) { 453 if (t.op != SEMI) 454 unlex(l); 455 goto shortdec; 456 } 457 RESTORE(l); 458 return NULL; 459 } 460 461 arr = alloczctx(ctx, sizeof(tok)); 462 while ((t = next(l)).op != LEOF && !isterm(t)) { 463 arr[count++ - 1] = t; 464 arr = realloczctx(ctx, arr, sizeof(tok) * count); 465 } 466 if (count == 1) { 467 RESTORE(l); 468 return NULL; 469 } else if (!(r = parserexpr(arr, &arr[count - 2], ctx))) { 470 RESTORE(l); 471 return NULL; 472 } 473 474 shortdec: 475 if (lookupmap(*scope, name.name)) { 476 parserErr("redeclaration of var", *l); 477 RESTORE(l); 478 return NULL; 479 } 480 481 le = alloczctx(ctx, sizeof(lexpr)); 482 le->name = alloczctx(ctx, strlen(name.name) + 1); 483 strcpy(le->name, name.name); 484 le->op = LEXPR_ASSIGN; 485 le->type = ty; 486 le->rexpr = r; 487 488 if (r && !isequaltype(ty, r->type)) { 489 parserErr("type missmatch", *l); 490 RESTORE(l); 491 return NULL; 492 } 493 494 v = alloczctx(ctx, sizeof(var)); 495 v->name = le->name; 496 v->type = le->type; 497 *scope = addmap(*scope, v, le->name, ctx); 498 499 return le; 500 } 501 502 503 lexpr * 504 parsefunc(lex *l, mctx *ctx) { 505 SAVE(l); 506 tok name, t; 507 lexpr *args = NULL, *arg = NULL, *func = NULL, *body = NULL; 508 type *ty; 509 int argc = 0; 510 511 if ((name = next(l)).op != NAME) { 512 parserErr("function has no name", *l); 513 RESTORE(l); 514 return NULL; 515 } 516 if (next(l).op != OBRACE) { 517 parserErr("expected '('", *l); 518 RESTORE(l); 519 return NULL; 520 } 521 for (;;) { 522 t = next(l); 523 switch (t.op) { 524 case NAME: 525 funcargs = 1; 526 if (!(arg = parseassign(l, t, ctx))) { 527 parserErr("malformed function arg", *l); 528 RESTORE(l); 529 return NULL; 530 } 531 args = realloczctx(ctx, args, sizeof(lexpr) * ++argc); 532 args[argc - 1] = *arg; 533 funcargs = 0; 534 break; 535 case COMMA: 536 continue; 537 case CBRACE: 538 goto done; 539 default: 540 parserErr("unexpected token", *l); 541 RESTORE(l); 542 return NULL; 543 } 544 } 545 done: 546 if (argc == 0) { 547 argc = 0; 548 args = NULL; 549 } 550 551 if (!(ty = parsetype(l, ctx))) { 552 parserErr("function has no type", *l); 553 RESTORE(l); 554 return NULL; 555 } 556 557 if ((t = next(l)).op == ';') 558 goto prototype; 559 else if (t.op != '{') { 560 parserErr("expected function body, or ';'", *l); 561 RESTORE(l); 562 return NULL; 563 } 564 565 func = alloczctx(ctx, sizeof(lexpr)); 566 do { 567 func->body = realloczctx(ctx, func->body, sizeof(lexpr) * (func->bodyc + 1)); 568 body = parselexpr(l, ctx); 569 if (!body) { 570 RESTORE(l); 571 return NULL; 572 } 573 func->body[func->bodyc++] = *body; 574 t = next(l); 575 } while (t.op != '}' && unlex(l)); 576 577 goto fullbody; 578 579 prototype: 580 func = alloczctx(ctx, sizeof(lexpr)); 581 fullbody: 582 func->op = LEXPR_FUNC; 583 func->rtype = ty; 584 func->name = name.name; 585 func->args = args; 586 func->argc = argc; 587 return func; 588 } 589 590 lexpr * 591 parsereassign(lex *l, tok t, mctx *ctx) { 592 SAVE(l); 593 lexpr *le; 594 rexpr *r; 595 tok *args = NULL, tk; 596 int argc = 0; 597 598 if (next(l).op != ASSIGN) { 599 RESTORE(l); 600 return NULL; 601 } 602 603 for (tk = next(l); !isterm(tk); tk = next(l)) { 604 args = realloczctx(ctx, args, sizeof(tok) * (argc + 1)); 605 args[argc++] = tk; 606 } 607 if (argc == 0) { 608 RESTORE(l); 609 return NULL; 610 } 611 if (!(r = parserexpr(&args[0], &args[argc - 1], ctx))) { 612 RESTORE(l); 613 return NULL; 614 } 615 616 le = alloczctx(ctx, sizeof(lexpr)); 617 le->op = LEXPR_REASSIGN; 618 le->name = t.name; 619 le->rexpr = r; 620 return le; 621 } 622 623 map * 624 mknewscope(map *prevscope, mctx *ctx) { 625 map *new = NULL; 626 if (!prevscope) 627 return NULL; 628 new = addmap(new, prevscope->data, prevscope->id, ctx); 629 if (prevscope->next) 630 new->next = mknewscope(prevscope->next, ctx); 631 return new; 632 } 633 634 lexpr * 635 parselexpr(lex *l, mctx *ctx) { 636 SAVE(l); 637 lexpr *le; 638 fn *f; 639 map *newscope; 640 tok t = next(l), *ts = NULL; 641 int tsc = 0; 642 643 switch (t.op) { 644 case NAME: 645 if (*l->ptr == '(') { 646 ts = alloczctx(ctx, sizeof(tok)); 647 ts[tsc++] = t; 648 while ((t = next(l)).op != ';') { 649 ts = realloczctx(ctx, ts, ++tsc * sizeof(tok)); 650 ts[tsc - 1] = t; 651 } 652 653 le = alloczctx(ctx, sizeof(lexpr)); 654 le->rexpr = parsefcall(ts, &ts[tsc - 1], ctx); 655 if (!le->rexpr) { 656 RESTORE(l); 657 haserrored = 1; 658 return NULL; 659 } 660 le->op = LEXPR_FCALL; 661 } 662 else if (!(le = parseassign(l, t, ctx))) 663 le = parsereassign(l, t, ctx); 664 break; 665 case FUNC: 666 newscope = mknewscope(vars, ctx); 667 scope = &newscope; 668 669 le = parsefunc(l, ctx); /* we can drop the FUNC token, as it doesnt hold any info */ 670 scope = &vars; 671 if (!le) { 672 RESTORE(l); 673 haserrored = 1; 674 return NULL; 675 } 676 677 f = alloczctx(ctx, sizeof(fn)); 678 f->name = le->fname; 679 f->rtype = le->rtype; 680 f->argc = le->argc; 681 for (int i = 0; i < f->argc; i++) { 682 f->args = realloczctx(ctx, f->args, (i + 1) * sizeof(type)); 683 f->args[i] = *le->args[i].type; 684 } 685 if (lookupmap(funcs, f->name)) { 686 parserErr("function already exists", *l); 687 RESTORE(l); 688 haserrored = 1; 689 return NULL; 690 } 691 funcs = addmap(funcs, f, f->name, ctx); 692 break; 693 case RETURN: 694 le = alloczctx(ctx, sizeof(lexpr)); 695 le->op = RETURN; 696 if (*l->ptr == ';') 697 break; 698 699 while ((t = next(l)).op != ';') { 700 ts = realloczctx(ctx, ts, ++tsc * sizeof(tok)); 701 ts[tsc - 1] = t; 702 } 703 704 le->ret = parserexpr(ts, &ts[tsc - 1], ctx); 705 break; 706 case LEOF: 707 return NULL; 708 default: 709 parserErr("unexpected tok type", *l); 710 haserrored = 1; 711 } 712 if (!le) { 713 RESTORE(l); 714 haserrored = 1; 715 return NULL; 716 } 717 718 return le; 719 } 720