lexer.c (2382B)
1 #include <ctype.h> 2 #include <stdio.h> 3 #include <stdlib.h> 4 #include <string.h> 5 #include <stdint.h> 6 7 #include "lexer.h" 8 9 loc 10 findloc(lex l) { 11 int lines = 1; 12 int col = 1; 13 for (char *c = l.input; c != l.ptr; c++, col++) { 14 if (*c == '\n') { 15 col = 0; 16 lines++; 17 } 18 } 19 20 return (loc){lines, col}; 21 } 22 23 loc 24 findloctok(tok t) { 25 return findloc(t.l); 26 } 27 28 void 29 lexErr(lex *l) { 30 fprintf(stderr, "unexpected token '%c'\n", *l->ptr); 31 exit(1); 32 } 33 34 double 35 lexFloat(lex *l) { 36 l->prev = l->ptr; 37 char *end = NULL; 38 double n = strtold(l->ptr, &end); 39 l->ptr = end; 40 return n; 41 } 42 43 uint64_t 44 lexNum(lex *l) { 45 l->prev = l->ptr; 46 char *end = NULL; 47 uint64_t n = strtoll(l->ptr, &end, 0); 48 l->ptr = end; 49 return n; 50 } 51 52 char * 53 lexName(lex *l) { 54 char name[32] = {0}; 55 for (int i = 0; i < 32 && l->ptr; i++, l->ptr++) { 56 if (!isalnum(*l->ptr)) break; 57 name[i] = *l->ptr; 58 } 59 return strdup(name); /* TODO need to cleanup this */ 60 } 61 62 char * 63 unlex(lex *l) { 64 return (l->ptr = l->prev); 65 } 66 67 tok 68 next(lex *l) { 69 l->prev = l->ptr; 70 tok t = {0}; 71 t.l = *l; 72 char *estring; 73 74 switch (*l->ptr) { 75 case 0: 76 t.op = LEOF; 77 return t; 78 79 case '\n': 80 case ' ': 81 case '\t': 82 case '\v': 83 l->ptr++; 84 return next(l); 85 86 case DQUOTE: 87 t.op = DQUOTE; 88 *(estring = strchr(++l->ptr, '\"')) = 0; 89 t.name = strdup(l->ptr); 90 l->ptr = estring + 1; 91 return t; 92 93 case ADD: 94 case SUB: 95 case MUL: 96 case DIV: 97 case OBRACE: 98 case CBRACE: 99 case OCBRACE: 100 case CCBRACE: 101 case OSBRACE: 102 case CSBRACE: 103 case ASSIGN: 104 case SEMI: 105 case COMMA: 106 case ADDROF: 107 t.op = *l->ptr++; 108 return t; 109 } 110 if (memcmp(l->ptr, ":=", 2) == 0) { 111 l->ptr += 2; 112 t.op = WALRUS; 113 } else if (memcmp(l->ptr, "return", 6) == 0) { 114 l->ptr += 6; 115 t.op = RETURN; 116 } else if (memcmp(l->ptr, "func", 4) == 0) { 117 l->ptr += 4; 118 t.op = FUNC; 119 } else if (isdigit(*l->ptr)) { 120 t.op = INT; 121 t.n = lexNum(l); 122 if (*l->ptr == '.') { 123 unlex(l); 124 t.f = lexFloat(l); 125 t.op = FLOAT; 126 } 127 } else if (isalpha(*l->ptr)) { 128 t.op = NAME; 129 t.name = lexName(l); 130 } else 131 lexErr(l); 132 133 return t; 134 } 135 136 void 137 printTok(tok *t) { 138 switch (t->op) { 139 case INT: 140 printf("INT: %ld\n", t->n); 141 break; 142 case LEOF: 143 printf("$\n"); 144 break; 145 case ADD: 146 printf("ADD\n"); 147 break; 148 case SUB: 149 printf("SUB\n"); 150 break; 151 case MUL: 152 printf("MUL\n"); 153 break; 154 case DIV: 155 printf("DIV\n"); 156 break; 157 } 158 } 159 160 lex 161 mklexer(char *input) { 162 lex l = (lex){input, input}; 163 return l; 164 }