lex.c (6057B)
1 #include <stdlib.h> 2 #include <string.h> 3 #include <ctype.h> 4 5 #include "comp.h" 6 #include "y.tab.h" 7 8 #define LEXRET(t) memcpy(&Gtoken, &(t), sizeof((t))); yylval.tok = &Gtoken; lasttok = (t); return (t); 9 10 extern void yyerror(const char *msg); 11 12 struct Pos { 13 int row, col; 14 }; 15 16 struct Token { 17 struct Pos pos; 18 short type; 19 void *data; 20 void *dataend; 21 }; 22 23 struct Lexer { 24 struct Pos pos; 25 char *input; 26 struct Token (*f)(); 27 }; 28 29 struct Token Gtoken; 30 31 struct Token lex_func(); 32 struct Token lex_var(); 33 struct Token lex_name(); 34 struct Token lex_obrace(); 35 struct Token lex_cbrace(); 36 struct Token lex_ocbrace(); 37 struct Token lex_ccbrace(); 38 struct Token lex_semi(); 39 struct Token lex_assign(); 40 struct Token lex_int(); 41 struct Token lex_add(); 42 struct Token lex_sub(); 43 struct Token lex_mul(); 44 struct Token lex_div(); 45 struct Token lex_comma(); 46 47 struct Lexer lex = {0}; 48 49 struct Token lasttok = {0}; 50 51 void llerror(const char *msg) { 52 char *terms = ",{};"; 53 char *first = NULL, *new; 54 do { 55 new = strchr(lex.input, *terms); 56 if (!first) first = new; 57 if (new && new < first) 58 first = new; 59 } while (*(terms++) && new); 60 61 yyerror(msg); 62 if (!first) exit(1); 63 lex.input = first; 64 65 switch (lex.input[0]) { 66 case '{': lex.f = lex_ocbrace; break; 67 case '}': lex.f = lex_ccbrace; break; 68 case ',': lex.f = lex_comma; break; 69 case ';': lex.f = lex_semi; break; 70 } 71 } 72 73 void move(int off) { 74 lex.input += off; 75 lex.pos.col += off; 76 } 77 78 void skip() { 79 while (lex.input[0] && (isblank(lex.input[0]) || lex.input[0] == '\n')) { 80 if (lex.input[0] == '\n') { 81 lex.pos.row++; 82 lex.pos.col = 0; 83 } 84 move(1); 85 } 86 } 87 88 char peek() { 89 skip(); 90 return lex.input[0]; 91 } 92 93 struct Token lex_char(char c) { 94 skip(); 95 struct Pos start = lex.pos; 96 97 if (lex.input[0] != c) 98 llerror("Expected different char"); 99 move(1); 100 skip(); 101 102 LEXRET(((struct Token){start, c, NULL, NULL})); 103 } 104 105 struct Token lex_func() { 106 struct Pos start = lex.pos; 107 108 if (memcmp(lex.input, "func ", 5) != 0) 109 llerror("Expected 'func'!"); 110 move(5); 111 112 lex.f = &lex_name; 113 LEXRET(((struct Token){start, FUNC, NULL, NULL})); 114 } 115 116 struct Token lex_var() { 117 struct Pos start = lex.pos; 118 119 if (memcmp(lex.input, "var ", 4) != 0) 120 llerror("Expected 'var'!"); 121 move(4); 122 123 lex.f = &lex_name; 124 LEXRET(((struct Token){start, VAR, NULL, NULL})); 125 } 126 127 struct Token lex_name() { 128 struct Pos start = lex.pos; 129 int size = 0; 130 131 if (!isalpha(lex.input[0])) 132 llerror("Expected name!"); 133 134 while (isalnum(lex.input[size])) 135 size++; 136 137 struct Token t = {start, NAME, lex.input, lex.input + size}; 138 move(size); 139 140 switch (peek()) { 141 case '(': lex.f = &lex_obrace; break; 142 case '{': lex.f = &lex_ocbrace; break; 143 case '+': lex.f = &lex_add; break; 144 case '-': lex.f = &lex_sub; break; 145 case '*': lex.f = &lex_mul; break; 146 case '/': lex.f = &lex_div; break; 147 case ',': lex.f = &lex_comma; break; 148 case '=': lex.f = &lex_assign; break; 149 default: 150 if (lasttok.type == VAR) 151 lex.f = &lex_name; 152 else 153 llerror("Unexpected token!"); 154 } 155 156 LEXRET(t); 157 } 158 159 struct Token lex_obrace() { 160 struct Token t = lex_char('('); 161 162 switch (peek()) { 163 case ')': lex.f = &lex_cbrace; break; 164 case '(': lex.f = &lex_obrace; break; 165 default: 166 if (isdigit(peek())) lex.f = &lex_int; 167 else llerror("Unexpected token!"); 168 } 169 170 LEXRET(t); 171 } 172 173 struct Token lex_int() { 174 struct Pos start = lex.pos; 175 int size = 0; 176 177 if (!isdigit(lex.input[0])) 178 llerror("Expected number!"); 179 180 while (isdigit(lex.input[size])) 181 size++; 182 183 struct Token t = {start, INT, lex.input, lex.input + size}; 184 move(size); 185 186 switch (peek()) { 187 case ')': lex.f = &lex_cbrace; break; 188 case '+': lex.f = &lex_add; break; 189 case '-': lex.f = &lex_sub; break; 190 case '*': lex.f = &lex_mul; break; 191 case '/': lex.f = &lex_div; break; 192 case ',': lex.f = &lex_comma; break; 193 case ';': lex.f = &lex_semi; break; 194 default: llerror("Unexpected token!"); 195 } 196 197 LEXRET(t); 198 } 199 200 struct Token lex_ocbrace() { 201 struct Token t = lex_char('{'); 202 switch (peek()) { 203 default: 204 if (memcmp(lex.input, "var ", 4) == 0) 205 lex.f = &lex_var; 206 else 207 lex.f = &lex_name; 208 break; 209 } 210 LEXRET(t); 211 } 212 213 struct Token lex_ccbrace() { 214 struct Token t = lex_char('}'); 215 switch (peek()) { 216 case 'f': 217 if (memcmp(lex.input, "func", 4) == 0) lex.f = &lex_func; 218 break; 219 default: lex.f = &lex_name; 220 } 221 LEXRET(t); 222 } 223 224 struct Token lex_cbrace() { 225 struct Token t = lex_char(')'); 226 227 switch (peek()) { 228 case '(': lex.f = &lex_obrace; break; 229 case ')': lex.f = &lex_cbrace; break; 230 case '+': lex.f = &lex_add; break; 231 case '-': lex.f = &lex_sub; break; 232 case '*': lex.f = &lex_mul; break; 233 case '/': lex.f = &lex_div; break; 234 case ',': lex.f = &lex_comma; break; 235 case ';': lex.f = &lex_semi; break; 236 default: lex.f = &lex_name; break; 237 } 238 239 LEXRET(t); 240 } 241 242 struct Token lex_semi() { 243 struct Token t = lex_char(';'); 244 245 switch (peek()) { 246 case '}': lex.f = &lex_ccbrace; break; 247 default: lex.f = &lex_name; break; 248 } 249 250 LEXRET(t); 251 } 252 253 struct Token lex_assign() { 254 struct Token t = lex_char('='); 255 256 switch (peek()) { 257 case '(': lex.f = &lex_obrace; break; 258 default: 259 lex.f = (isdigit(peek())) ? &lex_int : &lex_name; 260 break; 261 } 262 263 LEXRET(t); 264 } 265 266 267 268 struct Token lex_comma() { 269 struct Token t = lex_char(','); 270 271 if (isdigit(peek())) 272 lex.f = &lex_int; 273 else if (isalpha(peek())) 274 lex.f = &lex_name; 275 else switch (peek()) { 276 case '(': lex.f = &lex_obrace; break; 277 default: llerror("Unexpeced token!"); 278 } 279 280 LEXRET(t); 281 } 282 283 284 285 #define MAKEOP(name, op) struct Token lex_##name() { \ 286 struct Token t = lex_char(op); \ 287 if (isdigit(peek())) \ 288 lex.f = &lex_int; \ 289 else if (isalpha(peek())) \ 290 lex.f = &lex_name; \ 291 else llerror("Unexpeced token!"); \ 292 LEXRET(t); \ 293 } 294 295 MAKEOP(add, '+') 296 MAKEOP(sub, '-') 297 MAKEOP(mul, '*') 298 MAKEOP(div, '/') 299 300 int yylex() { 301 skip(); 302 if (lex.input[0] == 0) return YYEOF; 303 return lex.f().type; 304 } 305 306 extern Func magic; 307 308 int main() { 309 lex = (struct Lexer){(struct Pos){0, 0}, 310 "func foo() i32 { \n\ 311 var a i8 = 5; \n\ 312 g((1 + 50) * 2); \n\ 313 h(3, 5); \n\ 314 } \n\ 315 func main() i32 { \n\ 316 var a i8 = 4; \n\ 317 g((1 + ) * 2); \n\ 318 h(3, 5); \n\ 319 }", 320 &lex_func}; 321 yyparse(); 322 }