lex.c (4070B)
1 #include <stdio.h> 2 #include <stdlib.h> 3 #include <string.h> 4 #include <ctype.h> 5 6 7 #include "lex.h" 8 9 #define strnul(s) (s + strlen(s)) 10 11 lex_val lex_error = {UNKNOWN, NULL}; 12 lex_val lv = { 0 }; 13 14 char *input, *startpos, *endpos; 15 size_t input_len = 0; 16 lex_val *(*nextfn)(void); 17 int skipped = 0; 18 19 void 20 drainfile(FILE *in) { 21 input = malloc(input_len + 1); 22 fread(input, 1, input_len, in); 23 input[input_len] = 0; 24 } 25 26 int 27 getchr(void) { 28 int c; 29 skipped = 0; 30 while ((c = *endpos) != 0 && (isblank(c) || c == '\n')) { 31 skipped++; 32 endpos++; 33 } 34 if (c == 0) 35 lex_error.type = EOI; 36 37 endpos++; 38 return c; 39 } 40 41 char * 42 check_bytes(char *str) { 43 char *s; 44 for (s = str; *s; s++) if (getchr() != *s) return NULL; 45 return str; 46 } 47 48 int 49 peekc(void) { 50 int c = getchr(); 51 endpos--; 52 return c; 53 } 54 55 lex_val * 56 lex_char(char c, enum lex_type t) { 57 if (getchr() != c) return &lex_error; 58 lv.type = t; 59 lv.data = NULL; 60 return &lv; 61 } 62 63 lex_val * 64 lex_type(void) { 65 switch (peekc()) { 66 case 'b': 67 if (!(lv.data = check_bytes("byte"))) return &lex_error; 68 lv.type = BYTE; 69 break; 70 case 's': 71 if (!(lv.data = check_bytes("short"))) return &lex_error; 72 lv.type = SHORT; 73 break; 74 case 'l': 75 if (!(lv.data = check_bytes("long"))) return &lex_error; 76 lv.type = LONG; 77 break; 78 case '*': 79 lv.type = PTR; 80 lv.data = check_bytes("*"); 81 break; 82 default: return &lex_error; 83 } 84 if (peekc() == '*') nextfn = &lex_type; 85 else nextfn = &lex_name; 86 return &lv; 87 } 88 89 90 91 lex_val * 92 lex_comma(void) { 93 return lex_char(',', COMMA); 94 } 95 96 lex_val * 97 lex_semi(void) { 98 nextfn = &lex_type; 99 return lex_char(';', SEMI); 100 } 101 102 lex_val * 103 lex_assign(void) { 104 nextfn = &lex_value; 105 return lex_char('=', ASSIGN); 106 } 107 108 lex_val * 109 lex_sym(void) { 110 switch (peekc()) { 111 case '<': 112 switch (peekc()) { 113 case '=': getchr(); lv.type = LTE; break; 114 default: lv.type = LT; break; 115 } 116 return &lv; 117 case '>': 118 switch (peekc()) { 119 case '=': getchr(); lv.type = GTE; break; 120 default: lv.type = GT; break; 121 } 122 return &lv; 123 case '!': 124 if (!check_bytes("=")) return &lex_error; 125 lv.type = NEQ; 126 break; 127 case '=': return lex_assign(); 128 case ',': return lex_comma(); 129 case ';': return lex_semi(); 130 131 case '+': lv.type = LT; break; 132 case '-': lv.type = LT; break; 133 case '/': lv.type = LT; break; 134 case '*': lv.type = LT; break; 135 case '&': lv.type = LT; break; 136 } 137 138 return &lv; 139 } 140 141 /* TODO read negative values */ 142 lex_val * 143 lex_number(void) { 144 static char num[16]; 145 char i, c; 146 for (i = 0, c = getchr(); i < 16 && isdigit(c); i++, c = getchr()) 147 num[(int)i] = c; 148 endpos--; 149 150 lv.type = NUM; 151 lv.data = num; 152 153 nextfn = &lex_sym; 154 155 return &lv; 156 } 157 158 /* TODO make this read ints and other litterals */ 159 lex_val * 160 lex_value(void) { 161 char c = peekc(); 162 if (isdigit(c)) /* number */ 163 return lex_number(); 164 switch (c) { 165 case '\"': /* strlit */ 166 167 default: /* name */ 168 return lex_name(); 169 } 170 } 171 172 lex_val * 173 lex_name(void) { 174 static char name[32]; 175 char c, len = 0; 176 177 memset(name, 0, 32); /* note this resets the previous value, 178 when converting to an ast, use a dup */ 179 180 if (peekc() == '*') { 181 getchr(); 182 lv.type = DEREF; 183 return &lv; 184 } 185 186 if (!isalpha((c = getchr()))) return &lex_error; 187 do { 188 memcpy(strnul(name), &c, 1); 189 if ((len++ + 1) == 32) return &lex_error; 190 } while(isalnum((c = getchr())) && skipped == 0); 191 endpos--; 192 lv.data = name; 193 lv.type = NAME; 194 195 switch (peekc()) { 196 case '=': 197 case ',': 198 case ';': 199 case '<': 200 case '>': 201 case '!': 202 case '+': 203 case '-': 204 case '/': 205 case '*': /* CHECK FOR MUL AND PTR */ 206 case '&': 207 nextfn = &lex_sym; break; 208 break; 209 default: return &lex_error; 210 } 211 212 return &lv; 213 } 214 215 void 216 init_lexer(FILE *in) { 217 fseek(in, 0, SEEK_END); 218 input_len = ftell(in); 219 rewind(in); 220 221 drainfile(in); 222 startpos = input; 223 endpos = startpos; 224 225 nextfn = &lex_type; 226 } 227 228 int 229 get_err_len(void) { 230 return endpos - startpos; 231 } 232 233 int get_line_num(char *s) { 234 int count = 1; 235 size_t i; 236 for (i = 0; i < input_len && &input[i] != s; i++) 237 if (input[i] == '\n') count++; 238 return count; 239 } 240 241 lex_val * 242 get_next(void) { 243 memset(&lv, 0, sizeof(lv)); 244 startpos = endpos; 245 return (nextfn) ? nextfn() : NULL; 246 }