comp

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

lex.c (6057B)


      1 #include <stdlib.h>
      2 #include <string.h>
      3 #include <ctype.h>
      4 
      5 #include "comp.h"
      6 #include "y.tab.h"
      7 
      8 #define LEXRET(t) memcpy(&Gtoken, &(t), sizeof((t))); yylval.tok = &Gtoken; lasttok = (t); return (t);
      9 
     10 extern void yyerror(const char *msg);
     11 
     12 struct Pos {
     13 	int row, col;
     14 };
     15 
     16 struct Token {
     17 	struct Pos pos;
     18 	short type;
     19 	void *data;
     20 	void *dataend;
     21 };
     22 
     23 struct Lexer {
     24 	struct Pos pos;
     25 	char *input;
     26 	struct Token (*f)();
     27 };
     28 
     29 struct Token Gtoken;
     30 
     31 struct Token lex_func(); 
     32 struct Token lex_var(); 
     33 struct Token lex_name(); 
     34 struct Token lex_obrace(); 
     35 struct Token lex_cbrace(); 
     36 struct Token lex_ocbrace(); 
     37 struct Token lex_ccbrace(); 
     38 struct Token lex_semi(); 
     39 struct Token lex_assign(); 
     40 struct Token lex_int(); 
     41 struct Token lex_add(); 
     42 struct Token lex_sub(); 
     43 struct Token lex_mul(); 
     44 struct Token lex_div(); 
     45 struct Token lex_comma(); 
     46 
     47 struct Lexer lex = {0};
     48 
     49 struct Token lasttok = {0};
     50 
     51 void llerror(const char *msg) {
     52 	char *terms = ",{};";
     53 	char *first = NULL, *new;
     54 	do {
     55 		new = strchr(lex.input, *terms);
     56 		if (!first) first = new;
     57 		if (new && new < first) 
     58 			first = new;
     59 	} while (*(terms++) && new);
     60 
     61 	yyerror(msg);
     62 	if (!first) exit(1);
     63 	lex.input = first;
     64 
     65 	switch (lex.input[0]) {
     66 	case '{': lex.f = lex_ocbrace; break;
     67 	case '}': lex.f = lex_ccbrace; break;
     68 	case ',': lex.f = lex_comma; break;
     69 	case ';': lex.f = lex_semi; break;
     70 	}
     71 }
     72 
     73 void move(int off) {
     74 	lex.input += off;
     75 	lex.pos.col += off;
     76 }
     77 
     78 void skip() {
     79 	while (lex.input[0] && (isblank(lex.input[0]) || lex.input[0] == '\n')) {
     80 		if (lex.input[0] == '\n') {
     81 			lex.pos.row++;
     82 			lex.pos.col = 0;
     83 		}
     84 		move(1);
     85 	}		
     86 }
     87 
     88 char peek() {
     89 	skip();
     90 	return lex.input[0];
     91 }
     92 
     93 struct Token lex_char(char c) {
     94 	skip();
     95 	struct Pos start = lex.pos;
     96 
     97 	if (lex.input[0] != c)
     98 		llerror("Expected different char");
     99 	move(1);
    100 	skip();
    101 
    102 	LEXRET(((struct Token){start, c, NULL, NULL}));
    103 }
    104 
    105 struct Token lex_func() {
    106 	struct Pos start = lex.pos;
    107 
    108 	if (memcmp(lex.input, "func ", 5) != 0)
    109 		llerror("Expected 'func'!");
    110 	move(5);
    111 
    112 	lex.f = &lex_name;
    113 	LEXRET(((struct Token){start, FUNC, NULL, NULL}));
    114 }
    115 
    116 struct Token lex_var() {
    117 	struct Pos start = lex.pos;
    118 
    119 	if (memcmp(lex.input, "var ", 4) != 0)
    120 		llerror("Expected 'var'!");
    121 	move(4);
    122 
    123 	lex.f = &lex_name;
    124 	LEXRET(((struct Token){start, VAR, NULL, NULL}));
    125 }
    126 
    127 struct Token lex_name() {
    128 	struct Pos start = lex.pos;
    129 	int size = 0;
    130 
    131 	if (!isalpha(lex.input[0]))
    132 		llerror("Expected name!");
    133 
    134 	while (isalnum(lex.input[size]))
    135 		size++;
    136 	
    137 	struct Token t = {start, NAME, lex.input, lex.input + size};
    138 	move(size);
    139 
    140 	switch (peek()) {
    141 	case '(': lex.f = &lex_obrace; break;
    142 	case '{': lex.f = &lex_ocbrace; break;
    143 	case '+': lex.f = &lex_add; break;
    144 	case '-': lex.f = &lex_sub; break;
    145 	case '*': lex.f = &lex_mul; break;
    146 	case '/': lex.f = &lex_div; break;
    147 	case ',': lex.f = &lex_comma; break;
    148 	case '=': lex.f = &lex_assign; break;
    149 	default: 
    150 		  if (lasttok.type == VAR)
    151 			  lex.f = &lex_name;
    152 		  else
    153 		  	llerror("Unexpected token!");
    154 	}
    155 
    156 	LEXRET(t);
    157 }
    158 
    159 struct Token lex_obrace() {
    160 	struct Token t = lex_char('(');
    161 
    162 	switch (peek()) {
    163 	case ')': lex.f = &lex_cbrace; break;
    164 	case '(': lex.f = &lex_obrace; break;
    165 	default: 
    166 		  if (isdigit(peek())) lex.f = &lex_int;
    167 		  else llerror("Unexpected token!");
    168 	}
    169 
    170 	LEXRET(t);
    171 }
    172 
    173 struct Token lex_int() {
    174 	struct Pos start = lex.pos;
    175 	int size = 0;
    176 
    177 	if (!isdigit(lex.input[0]))
    178 		llerror("Expected number!");
    179 	
    180 	while (isdigit(lex.input[size]))
    181 		size++;
    182 
    183 	struct Token t = {start, INT, lex.input, lex.input + size};
    184 	move(size);
    185 
    186 	switch (peek()) {
    187 	case ')': lex.f = &lex_cbrace; break;
    188 	case '+': lex.f = &lex_add; break;
    189 	case '-': lex.f = &lex_sub; break;
    190 	case '*': lex.f = &lex_mul; break;
    191 	case '/': lex.f = &lex_div; break;
    192 	case ',': lex.f = &lex_comma; break;
    193 	case ';': lex.f = &lex_semi; break;
    194 	default: llerror("Unexpected token!");
    195 	}
    196 
    197 	LEXRET(t);
    198 }
    199 
    200 struct Token lex_ocbrace() {
    201 	struct Token t = lex_char('{');
    202 	switch (peek()) {
    203 	default: 
    204 		if (memcmp(lex.input, "var ", 4) == 0)
    205 			lex.f = &lex_var; 
    206 		else
    207 			lex.f = &lex_name; 
    208 		break;
    209 	}
    210 	LEXRET(t);
    211 }
    212 
    213 struct Token lex_ccbrace() {
    214 	struct Token t = lex_char('}');
    215 	switch (peek()) {
    216 	case 'f':
    217 		if (memcmp(lex.input, "func", 4) == 0) lex.f = &lex_func;
    218 		break;
    219 	default: lex.f = &lex_name;
    220 	}
    221 	LEXRET(t);
    222 }
    223 
    224 struct Token lex_cbrace() {
    225 	struct Token t = lex_char(')');
    226 
    227 	switch (peek()) {
    228 	case '(': lex.f = &lex_obrace; break;
    229 	case ')': lex.f = &lex_cbrace; break;
    230 	case '+': lex.f = &lex_add; break;
    231 	case '-': lex.f = &lex_sub; break;
    232 	case '*': lex.f = &lex_mul; break;
    233 	case '/': lex.f = &lex_div; break;
    234 	case ',': lex.f = &lex_comma; break;
    235 	case ';': lex.f = &lex_semi; break;
    236 	default: lex.f = &lex_name; break;
    237 	}
    238 
    239 	LEXRET(t);
    240 }
    241 
    242 struct Token lex_semi() {
    243 	struct Token t = lex_char(';');
    244 
    245 	switch (peek()) {
    246 	case '}': lex.f = &lex_ccbrace; break;
    247 	default: lex.f = &lex_name; break;
    248 	}
    249 
    250 	LEXRET(t);
    251 }
    252 
    253 struct Token lex_assign() {
    254 	struct Token t = lex_char('=');
    255 
    256 	switch (peek()) {
    257 	case '(': lex.f = &lex_obrace; break;
    258 	default: 
    259 		 lex.f = (isdigit(peek())) ? &lex_int : &lex_name; 
    260 		 break;
    261 	}
    262 
    263 	LEXRET(t);
    264 }
    265 
    266 
    267 
    268 struct Token lex_comma() {
    269 	struct Token t = lex_char(',');
    270 
    271 	if (isdigit(peek())) 
    272 		lex.f = &lex_int; 
    273 	else if (isalpha(peek()))
    274 		lex.f = &lex_name; 
    275 	else switch (peek()) {
    276 	case '(': lex.f = &lex_obrace; break;
    277 	default: llerror("Unexpeced token!"); 
    278 	}
    279 
    280 	LEXRET(t);
    281 }
    282 
    283 
    284 
    285 #define MAKEOP(name, op) struct Token lex_##name() { \
    286 	struct Token t = lex_char(op); \
    287 	if (isdigit(peek())) \
    288 		lex.f = &lex_int; \
    289 	else if (isalpha(peek())) \
    290 		lex.f = &lex_name; \
    291 	else llerror("Unexpeced token!"); \
    292 	LEXRET(t); \
    293 } 
    294 
    295 MAKEOP(add, '+')
    296 MAKEOP(sub, '-')
    297 MAKEOP(mul, '*')
    298 MAKEOP(div, '/')
    299 
    300 int yylex() {
    301 	skip();
    302 	if (lex.input[0] == 0) return YYEOF;
    303 	return lex.f().type;
    304 }
    305 
    306 extern Func magic;
    307 
    308 int main() {
    309 	lex = (struct Lexer){(struct Pos){0, 0}, 
    310 		"func foo() i32 { 		\n\
    311 			var a i8 = 5;		\n\
    312 			g((1 + 50) * 2); 	\n\
    313 			h(3, 5);		\n\
    314 		}				\n\
    315 		func main() i32 { 		\n\
    316 			var a i8 = 4;		\n\
    317 			g((1 + ) * 2); 	\n\
    318 			h(3, 5);		\n\
    319 		}", 
    320 		&lex_func};
    321 	yyparse();
    322 }