spl

a Simple Programming Language
Log | Files | Refs

parse.c (14092B)


      1 #include <ctype.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <limits.h>
      5 #include <stdbool.h>
      6 
      7 #include "lexer.h"
      8 #include "parse.h"
      9 #include "util.h"
     10 #include "maps.h"
     11 
     12 #define SAME(s1, s2) (strcmp(s1, s2) == 0) 
     13 #define SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
     14 
     15 map **scope = &vars;
     16 int haserrored = 0;
     17 
     18 char *builtintypes[] = {
     19 	"f32",
     20 	"f64",
     21 	"i8",
     22 	"i16",
     23 	"i32",
     24 	"i64",
     25 	"void",
     26 };
     27 
     28 char *builtinfns[] = {
     29 	"f32",
     30 	"f64",
     31 	"i8",
     32 	"i16",
     33 	"i32",
     34 	"i64",
     35 };
     36 
     37 int funcargs = false;
     38 
     39 void
     40 parserErr(const char *msg, lex l) {
     41 	loc lo = findloc(l);
     42 	fprintf(stderr, "%s:%d:%d: %s\n", l.name, lo.line, lo.col, msg);
     43 }
     44 
     45 void
     46 parserErrtok(const char *msg, tok t) {
     47 	loc lo = findloctok(t);
     48 	fprintf(stderr, "%s:%d:%d: %s\n", t.l.name, lo.line, lo.col, msg);
     49 }
     50 
     51 int
     52 isbasictype(char *t) {
     53 	for (int i = 0; i < SIZE(builtintypes); i++) 
     54 		if (SAME(t, builtintypes[i]))
     55 			return 1;
     56 	return 0;
     57 }
     58 
     59 type *
     60 mkbasictype(char *name, mctx *ctx) {
     61 	type *t = alloczctx(ctx, sizeof(type));
     62 	t->type = BASIC;
     63 	t->name = strdup(name);
     64 	return t;
     65 }
     66 
     67 type *
     68 mkarrtype(type *nested, mctx *ctx) {
     69 	type *t = alloczctx(ctx, sizeof(type));
     70 	t->type = ARR;
     71 	t->nested = nested;
     72 	return t;
     73 }
     74 
     75 int
     76 getpres(tok t) {
     77 	switch (t.op) {
     78 	case LEOF:
     79 		return -1;
     80 
     81 	case ADD:
     82 	case SUB:
     83 		return 1;
     84 
     85 	case MUL:
     86 	case DIV:
     87 		return 4;
     88 
     89 	case INT:
     90 	case FLOAT:
     91 	case NAME:
     92 		return 6;
     93 	case OBRACE:
     94 		return 8;
     95 
     96 	case NEGATE:
     97 		return 5;
     98 	}
     99 	return -2;
    100 }
    101 
    102 int
    103 isop(tok t) {
    104 	return (t.op == ADD || t.op == SUB || t.op == MUL || t.op == DIV);
    105 }
    106 
    107 tok *
    108 findend(tok *start, tok *end) {
    109 	int d = 1;
    110 	tok *t;
    111 
    112 	if (start->op != OBRACE) {
    113 		parserErrtok("expected a '('", *start);
    114 		return NULL;
    115 	}
    116 
    117 	for (t = &start[1]; d != 0; t = &t[1]) {
    118 		if (end->op != CBRACE && t == end) {
    119 			parserErrtok("unclosed brace", *t);
    120 			return NULL;
    121 		}
    122 
    123 		if (t->op == OBRACE) d++;
    124 		else if (t->op == CBRACE) d--;
    125 	}
    126 	return &t[-1];
    127 }
    128 
    129 tok *
    130 findlowest(tok *start, tok *end) {
    131 	tok *lowest = start, prev = {.op = LEOF};
    132 
    133 	for (tok *t = lowest ; t != end; t = &t[1]) {
    134 		/* re assign ops, this is for when an op has multiple meanings */
    135 		if (isop(prev) || prev.op == LEOF) {
    136 			switch (t->op) {
    137 			case SUB:
    138 				t->op = NEGATE;
    139 				break;
    140 			case MUL:
    141 				t->op = DEREF;
    142 				break;
    143 			}
    144 		}
    145 		else if (getpres(*t) < getpres(*lowest)) lowest = t;
    146 
    147 		/* move to the end brace, we only process the contents when nothing else is left */
    148 		if (t->op == OBRACE) {
    149 			if (!(t = findend(t, end))) return NULL;
    150 		}
    151 
    152 		/* checks if any of our skips ahead have put us at the end of expr */
    153 		if (t == end) break;
    154 
    155 		prev = *t;
    156 	}
    157 
    158 	return lowest;
    159 }
    160 
    161 
    162 /* checks if t2 can be losslessly casted to t1 */
    163 int
    164 isequaltype(type *t1, type *t2) {
    165 	if (t1->type == BASIC && t2->type == BASIC) {
    166 		if (t1->name[0] == t2->name[0]) {
    167 			if (atoi(t1->name + 1) >= atoi(t2->name + 1))
    168 				return 1;
    169 		}
    170 	} else if (t1->type == PTR && t2->type == PTR) 
    171 		return isequaltype(t1->nested, t2->nested);		
    172 	else if (t1->type == ARR && t2->type == ARR) 
    173 		return isequaltype(t1->nested, t2->nested);		
    174 	return 0;
    175 }
    176 
    177 fn *
    178 isbuiltin(char *name, mctx *ctx) {
    179 	fn *f = alloczctx(ctx, sizeof(fn));
    180 	for (int i = 0; i < SIZE(builtinfns); i++) {
    181 		if (SAME(name, builtinfns[i])) {
    182 			f->name = name;
    183 			f->rtype = mkbasictype(name, ctx);
    184 			f->argc = 1;
    185 			return f;
    186 		}
    187 	}
    188 	return NULL;
    189 }
    190 
    191 rexpr *
    192 parsefcall(tok *start, tok *end, mctx *ctx) {
    193 	rexpr *e = alloczctx(ctx, sizeof(rexpr));
    194 	tok *exprend;
    195 	fn *f;
    196 	int builtin = 1;
    197 	
    198 	/* TODO make this check for casts specifically, if the user casts, 
    199 	 * mark it as so, to allow for the comp step to fix everything up */
    200 	if (!(f = isbuiltin(start->name, ctx))) {
    201 		if (!(f = (fn *)lookupmap(funcs, start->name))) {
    202 			parserErrtok("no such function", *start);
    203 			return NULL;
    204 		}
    205 		builtin = 0;
    206 	}
    207 
    208 	e->fname = f->name;
    209 	e->type = f->rtype;
    210 	e->op = FUNCALL;
    211 
    212 	if (builtin && isbasictype(f->name)) 
    213 		e->op = CAST;
    214 
    215 	if (start[1].op != '(')
    216 		return NULL;
    217 	start = &start[2];
    218 	exprend = start;
    219 
    220 another:
    221 	while (!isterm(*exprend)) {
    222 		if (exprend->op == ')') {
    223 			if (exprend == start) 
    224 				goto noargs;
    225 			break;
    226 		}
    227 		exprend = &exprend[1];
    228 	}
    229 	e->argc++;
    230 	e->args = realloczctx(ctx, e->args, sizeof(rexpr) * e->argc);
    231 	e->args[e->argc - 1] = *parserexpr(start, &exprend[-1], ctx);
    232 	start = &exprend[1];
    233 
    234 	if (!builtin) {
    235 		if (!isequaltype(e->args[e->argc - 1].type, &f->args[e->argc - 1])) {
    236 			parserErrtok("type missmatch", *start);
    237 			return NULL;
    238 		}
    239 	}
    240 
    241 	if (exprend->op != ')')
    242 		goto another;
    243 
    244 noargs:
    245 	return e;	
    246 }
    247 
    248 type *
    249 mkptrtype(type *t, mctx *ctx) {
    250 	type *new = alloczctx(ctx, sizeof(type));
    251 	new->type = PTR;
    252 	new->nested = t;
    253 	return new;
    254 }
    255 
    256 rexpr *
    257 mklitteral(uint64_t n, type *t, mctx *ctx) {
    258 	rexpr *e = alloczctx(ctx, sizeof(rexpr));
    259 	e->type = t;
    260 	e->n = n;
    261 	return e;
    262 }
    263 
    264 rexpr *
    265 parsesimple(tok *start, tok *end, tok *lowest, mctx *ctx) {
    266 	rexpr *e;
    267 	var *v;
    268 	tok *current;
    269 
    270 	switch (lowest->op) {
    271 	case NAME:
    272 		if (lowest[1].op == '(') {
    273 			e = parsefcall(lowest, end, ctx);
    274 			return e;
    275 		}
    276 	case INT:
    277 	case FLOAT:
    278 		if (start != end) {
    279 			parserErrtok("trailing expression", *start);
    280 			return NULL;
    281 		}
    282  		e = alloczctx(ctx, sizeof(rexpr));
    283 		e->op = lowest->op;
    284 		e->n = lowest->n;	/* this is okay as n is 64 bits so any pointer type fits here */
    285 		switch (e->op) {
    286 		case INT:
    287 			if (e->n > INT32_MAX)
    288 				e->type = mkbasictype("i64", ctx);
    289 			else if (e->n > INT16_MAX)
    290 				e->type = mkbasictype("i32", ctx);
    291 			else if (e->n > INT8_MAX)
    292 				e->type = mkbasictype("i16", ctx);
    293 			else 
    294 				e->type = mkbasictype("i8", ctx);
    295 			break;
    296 		case FLOAT:
    297 			e->type = mkbasictype("f64", ctx);
    298 			break;
    299 		case NAME:
    300 			if (!(v = (var *)lookupmap(*scope, lowest->name))) {
    301 				parserErrtok("var is not declared", *start);
    302 				return NULL;
    303 			}
    304 			e->type = v->type;
    305 			break;
    306 		}
    307 		break;
    308 	case DQUOTE:
    309  		e = alloczctx(ctx, sizeof(rexpr));
    310 		e->op = DQUOTE;
    311 		for (e->elementc = 0; e->elementc < strlen(lowest->name); e->elementc++) {
    312 			e->elements = realloczctx(ctx, e->elements, (1 + e->elementc) * sizeof(rexpr));
    313 			e->elements[e->elementc] = *mklitteral(lowest->name[e->elementc], mkbasictype("i8", ctx), ctx); 
    314 		}
    315 		e->type = mkarrtype(mkbasictype("i8", ctx), ctx);
    316 		break;
    317 	case OBRACE:
    318  		e = alloczctx(ctx, sizeof(rexpr));
    319 		e->op = OBRACE;
    320 		e->e = parserexpr(&lowest[1], &findend(lowest, end)[-1], ctx);
    321 		e->type = e->e->type;
    322 		break;
    323 	case NEGATE:
    324  		e = alloczctx(ctx, sizeof(rexpr));
    325 		e->op = NEGATE;
    326 		e->e = parserexpr(&lowest[1], end, ctx);
    327 		e->type = e->e->type;
    328 		break;
    329 	case ADDROF:
    330  		e = alloczctx(ctx, sizeof(rexpr));
    331 		e->op = ADDROF;
    332 		e->e = parserexpr(&lowest[1], end, ctx);
    333 		e->type = mkptrtype(e->e->type, ctx);
    334 		break;
    335 	case DEREF:
    336 		e = alloczctx(ctx, sizeof(rexpr));
    337 		e->op = DEREF;
    338 		e->e = parserexpr(&lowest[1], end, ctx);
    339 		e->type = e->e->type->nested;
    340 		break;
    341 	default:
    342 		parserErrtok("unexpected token type", *start);
    343 		return NULL;
    344 	}
    345 
    346 	return e;
    347 }
    348 
    349 rexpr *
    350 parsebin(tok *start, tok *end, tok *lowest, mctx *ctx) {
    351 	rexpr *e = alloczctx(ctx, sizeof(rexpr));
    352 
    353 	if (!isop(*lowest)) {
    354 		parserErrtok("expected op", *lowest);
    355 		return NULL;
    356 	}
    357 
    358 	e->op = lowest->op;
    359 	e->expr[0] = parserexpr(start, &lowest[-1], ctx);
    360 	e->expr[1] = parserexpr(&lowest[1], end, ctx);
    361 
    362 	if (!isequaltype(e->expr[0]->type, e->expr[1]->type)) {
    363 		parserErrtok("type missmatch", *start);
    364 		return NULL;
    365 	}
    366 
    367 	e->type = e->expr[0]->type;
    368 
    369 	return e;
    370 }
    371 
    372 rexpr *
    373 parserexpr(tok *start, tok *end, mctx *ctx) {
    374 	rexpr *e;
    375 	tok *lowest;
    376 	if (!(lowest = findlowest(start, end))) return NULL;
    377 
    378 	/* simple 1 term expr ops */
    379 	if (lowest == start) e = parsesimple(start, end, lowest, ctx);
    380 
    381 	/* binary ops */
    382 	else e = parsebin(start, end, lowest, ctx);
    383 	
    384 	return e;
    385 }
    386 
    387 
    388 
    389 type *
    390 parsetype(lex *l, mctx *ctx) {
    391 	SAVE(l);
    392 	type *ty;
    393 	tok t = next(l), *ts = NULL;
    394 	int tcount = 0;
    395 	ty = alloczctx(ctx, sizeof(type));
    396 
    397 	if (t.op == MUL) {
    398 		ty->type = PTR;
    399 		ty->nested = parsetype(l, ctx);
    400 		return ty;
    401 	} else if (t.op == OSBRACE) {
    402 		if ((t = next(l)).op != CSBRACE) {
    403 			do {
    404 				ts = realloczctx(ctx, ts, ++tcount * sizeof(tok));
    405 				ts[tcount - 1] = t;
    406 			} while ((t = next(l)).op != CSBRACE);
    407 			ty->size = parserexpr(ts, &ts[tcount - 1], ctx);	
    408 			if (!isequaltype(mkbasictype("i64", ctx), ty->size->type)) {
    409 				parserErr("can not use non int type as array size", *l);
    410 				RESTORE(l);
    411 				return NULL;
    412 			}
    413 		}
    414 		ty->type = ARR;
    415 		ty->nested = parsetype(l, ctx);
    416 		return ty;
    417 	} else if (t.op != NAME) {
    418 		RESTORE(l);
    419 		return NULL;
    420 	}
    421 
    422 	if (!isbasictype(t.name)) {
    423 		parserErr("not a known type!", *l);
    424 		RESTORE(l);
    425 		return NULL;
    426 	}
    427 	ty->type = BASIC;
    428 	ty->name = t.name;
    429 	return ty;
    430 }
    431 
    432 int
    433 isterm(tok t) {
    434 	return (t.op == SEMI || t.op == COMMA);
    435 }
    436 
    437 lexpr *
    438 parseassign(lex *l, tok name, mctx *ctx) {
    439 	SAVE(l);
    440 	lexpr *le;
    441 	type *ty;
    442 	rexpr *r = NULL;
    443 	tok *arr, t;
    444 	var *v;
    445 	int count = 1;
    446 
    447 	if (!(ty = parsetype(l, ctx))) {
    448 		RESTORE(l);
    449 		return NULL;
    450 	}
    451 	if ((t = next(l)).op != ASSIGN) {
    452 		if (isterm(t) || (funcargs && t.op == ')')) {
    453 			if (t.op != SEMI)
    454 				unlex(l);
    455 			goto shortdec;
    456 		}
    457 		RESTORE(l);
    458 		return NULL;
    459 	}
    460 
    461 	arr = alloczctx(ctx, sizeof(tok));
    462 	while ((t = next(l)).op != LEOF && !isterm(t)) {
    463 		arr[count++ - 1] = t;
    464 		arr = realloczctx(ctx, arr, sizeof(tok) * count);
    465 	}
    466 	if (count == 1) {
    467 		RESTORE(l);
    468 		return NULL;
    469 	} else if (!(r = parserexpr(arr, &arr[count - 2], ctx))) {
    470 		RESTORE(l);
    471 		return NULL;
    472 	}
    473 
    474 shortdec:
    475 	if (lookupmap(*scope, name.name)) {
    476 		parserErr("redeclaration of var", *l);
    477 		RESTORE(l);	
    478 		return NULL;
    479 	}
    480 
    481 	le = alloczctx(ctx, sizeof(lexpr));
    482 	le->name = alloczctx(ctx, strlen(name.name) + 1);
    483 	strcpy(le->name, name.name);
    484 	le->op = LEXPR_ASSIGN;
    485 	le->type = ty;
    486 	le->rexpr = r;
    487 
    488 	if (r && !isequaltype(ty, r->type)) {
    489 		parserErr("type missmatch", *l);
    490 		RESTORE(l);
    491 		return NULL;
    492 	}
    493 
    494 	v = alloczctx(ctx, sizeof(var));
    495 	v->name = le->name;
    496 	v->type = le->type;
    497 	*scope = addmap(*scope, v, le->name, ctx);
    498 
    499 	return le;
    500 }
    501 
    502 
    503 lexpr *
    504 parsefunc(lex *l, mctx *ctx) {
    505 	SAVE(l);
    506 	tok name, t;
    507 	lexpr *args = NULL, *arg = NULL, *func = NULL, *body = NULL;
    508 	type *ty;
    509 	int argc = 0;
    510 
    511 	if ((name = next(l)).op != NAME) {
    512 		parserErr("function has no name", *l);
    513 		RESTORE(l);
    514 		return NULL;
    515 	}
    516 	if (next(l).op != OBRACE) {
    517 		parserErr("expected '('", *l);
    518 		RESTORE(l);
    519 		return NULL;
    520 	}
    521 	for (;;) {
    522 		t = next(l);
    523 		switch (t.op) {
    524 		case NAME:
    525 			funcargs = 1;
    526 			if (!(arg = parseassign(l, t, ctx))) {
    527 				parserErr("malformed function arg", *l);
    528 				RESTORE(l);
    529 				return NULL;
    530 			}
    531 			args = realloczctx(ctx, args, sizeof(lexpr) * ++argc);
    532 			args[argc - 1] = *arg;
    533 			funcargs = 0;
    534 			break;
    535 		case COMMA:
    536 			continue;
    537 		case CBRACE:
    538 			goto done;
    539 		default:
    540 			parserErr("unexpected token", *l);
    541 			RESTORE(l);
    542 			return NULL;
    543 		}
    544 	}
    545 done:
    546 	if (argc == 0) {
    547 		argc = 0;
    548 		args = NULL;
    549 	}
    550 
    551 	if (!(ty = parsetype(l, ctx))) {
    552 		parserErr("function has no type", *l);
    553 		RESTORE(l);
    554 		return NULL;
    555 	}
    556 
    557 	if ((t = next(l)).op == ';')
    558 		goto prototype;
    559 	else if (t.op != '{') {
    560 		parserErr("expected function body, or ';'", *l);
    561 		RESTORE(l);
    562 		return NULL;
    563 	}
    564 
    565 	func = alloczctx(ctx, sizeof(lexpr));
    566 	do {
    567 		func->body = realloczctx(ctx, func->body, sizeof(lexpr) * (func->bodyc + 1));
    568 		body = parselexpr(l, ctx);
    569 		if (!body) {
    570 			RESTORE(l);	
    571 			return NULL;
    572 		}
    573 		func->body[func->bodyc++] = *body;
    574 		t = next(l);
    575 	} while (t.op != '}' && unlex(l));
    576 	
    577 	goto fullbody;
    578 
    579 prototype:
    580 	func = alloczctx(ctx, sizeof(lexpr));
    581 fullbody:
    582 	func->op = LEXPR_FUNC;
    583 	func->rtype = ty;
    584 	func->name = name.name;
    585 	func->args = args;
    586 	func->argc = argc;
    587 	return func;
    588 }
    589 
    590 lexpr *
    591 parsereassign(lex *l, tok t, mctx *ctx) {
    592 	SAVE(l);
    593 	lexpr *le;
    594 	rexpr *r;
    595 	tok *args = NULL, tk;
    596 	int argc = 0;
    597 
    598 	if (next(l).op != ASSIGN) {
    599 		RESTORE(l);
    600 		return NULL;
    601 	}
    602 
    603 	for (tk = next(l); !isterm(tk); tk = next(l)) {
    604 		args = realloczctx(ctx, args, sizeof(tok) * (argc + 1));
    605 		args[argc++] = tk;
    606 	}
    607 	if (argc == 0) {
    608 		RESTORE(l);
    609 		return NULL;
    610 	}
    611 	if (!(r = parserexpr(&args[0], &args[argc - 1], ctx))) {
    612 		RESTORE(l);
    613 		return NULL;
    614 	}
    615 
    616 	le = alloczctx(ctx, sizeof(lexpr));
    617 	le->op = LEXPR_REASSIGN;
    618 	le->name = t.name;
    619 	le->rexpr = r;
    620 	return le;
    621 }
    622 
    623 map *
    624 mknewscope(map *prevscope, mctx *ctx) {
    625 	map *new = NULL;
    626 	if (!prevscope)
    627 		return NULL;
    628 	new = addmap(new, prevscope->data, prevscope->id, ctx);
    629 	if (prevscope->next)
    630 		new->next = mknewscope(prevscope->next, ctx); 
    631 	return new;
    632 }
    633 
    634 lexpr *
    635 parselexpr(lex *l, mctx *ctx) {
    636 	SAVE(l);
    637 	lexpr *le;
    638 	fn *f;
    639 	map *newscope;
    640 	tok t = next(l), *ts = NULL;
    641 	int tsc = 0;
    642 
    643 	switch (t.op) {
    644 	case NAME:
    645 		if (*l->ptr == '(') {
    646 			ts = alloczctx(ctx, sizeof(tok));
    647 			ts[tsc++] = t;
    648 			while ((t = next(l)).op != ';') {
    649 				ts = realloczctx(ctx, ts, ++tsc * sizeof(tok));
    650 				ts[tsc - 1] = t;
    651 			}
    652 
    653 			le = alloczctx(ctx, sizeof(lexpr));
    654 			le->rexpr = parsefcall(ts, &ts[tsc - 1], ctx);
    655 			if (!le->rexpr) {
    656 				RESTORE(l);
    657 				haserrored = 1;
    658 				return NULL;
    659 			}
    660 			le->op = LEXPR_FCALL;
    661 		}
    662 		else if (!(le = parseassign(l, t, ctx)))
    663 			le = parsereassign(l, t, ctx);
    664 		break;
    665 	case FUNC:
    666 		newscope = mknewscope(vars, ctx);
    667 		scope = &newscope;
    668 
    669 		le = parsefunc(l, ctx);	/* we can drop the FUNC token, as it doesnt hold any info */
    670 		scope = &vars;
    671 		if (!le) {
    672 			RESTORE(l);	
    673 			haserrored = 1;
    674 			return NULL;
    675 		}
    676 
    677 		f = alloczctx(ctx, sizeof(fn));
    678 		f->name = le->fname;
    679 		f->rtype = le->rtype;
    680 		f->argc = le->argc;
    681 		for (int i = 0; i < f->argc; i++) {
    682 			f->args = realloczctx(ctx, f->args, (i + 1) * sizeof(type));
    683 			f->args[i] = *le->args[i].type;
    684 		}
    685 		if (lookupmap(funcs, f->name)) {
    686 			parserErr("function already exists", *l);
    687 			RESTORE(l);
    688 			haserrored = 1;
    689 			return NULL;
    690 		}
    691 		funcs = addmap(funcs, f, f->name, ctx);
    692 		break;
    693 	case RETURN:
    694 		le = alloczctx(ctx, sizeof(lexpr));
    695 		le->op = RETURN;
    696 		if (*l->ptr == ';') 
    697 			break;
    698 		
    699 		while ((t = next(l)).op != ';') {
    700 			ts = realloczctx(ctx, ts, ++tsc * sizeof(tok));
    701 			ts[tsc - 1] = t;
    702 		}
    703 
    704 		le->ret = parserexpr(ts, &ts[tsc - 1], ctx);
    705 		break;
    706 	case LEOF:
    707 		return NULL;
    708 	default:
    709 		parserErr("unexpected tok type", *l);
    710 		haserrored = 1;
    711 	}
    712 	if (!le) {
    713 		RESTORE(l);
    714 		haserrored = 1;
    715 		return NULL;
    716 	}
    717 
    718 	return le;
    719 }
    720