hlc

High level language for lli
Log | Files | Refs

lex.c (4070B)


      1 #include <stdio.h>
      2 #include <stdlib.h>
      3 #include <string.h>
      4 #include <ctype.h>
      5 
      6 
      7 #include "lex.h"
      8 
      9 #define strnul(s) (s + strlen(s))
     10 
     11 lex_val lex_error = {UNKNOWN, NULL};
     12 lex_val lv = { 0 };
     13 
     14 char *input, *startpos, *endpos;
     15 size_t input_len = 0;
     16 lex_val *(*nextfn)(void);
     17 int skipped = 0;
     18 
     19 void 
     20 drainfile(FILE *in) {
     21 	input = malloc(input_len + 1);
     22 	fread(input, 1, input_len, in);
     23 	input[input_len] = 0;
     24 }
     25 
     26 int
     27 getchr(void) {
     28 	int c;
     29 	skipped = 0;
     30 	while ((c = *endpos) != 0 && (isblank(c) || c == '\n')) {
     31 		skipped++;
     32 		endpos++;
     33 	}
     34 	if (c == 0)
     35 		lex_error.type = EOI;
     36 
     37 	endpos++;
     38 	return c;
     39 }
     40 
     41 char *
     42 check_bytes(char *str) {
     43 	char *s;
     44 	for (s = str; *s; s++) if (getchr() != *s) return NULL;
     45 	return str;
     46 }
     47 
     48 int
     49 peekc(void) {
     50 	int c = getchr();
     51 	endpos--;
     52 	return c;
     53 }
     54 
     55 lex_val *
     56 lex_char(char c, enum lex_type t) {
     57 	if (getchr() != c) return &lex_error;
     58 	lv.type = t;
     59 	lv.data = NULL;
     60 	return &lv;
     61 }
     62 
     63 lex_val *
     64 lex_type(void) {
     65 	switch (peekc()) {
     66 	case 'b': 
     67 		if (!(lv.data = check_bytes("byte"))) return &lex_error;
     68 		lv.type = BYTE;
     69 		break;
     70 	case 's':
     71 		if (!(lv.data = check_bytes("short"))) return &lex_error;
     72 		lv.type = SHORT;
     73 		break;
     74 	case 'l':
     75 		if (!(lv.data = check_bytes("long"))) return &lex_error;
     76 		lv.type = LONG;
     77 		break;
     78 	case '*':
     79 		lv.type = PTR;
     80 		lv.data = check_bytes("*");
     81 		break;
     82 	default: return &lex_error;
     83 	}
     84 	if (peekc() == '*') nextfn = &lex_type;
     85 	else nextfn = &lex_name;
     86 	return &lv;
     87 }
     88 
     89 
     90 
     91 lex_val *
     92 lex_comma(void) {
     93 	return lex_char(',', COMMA);
     94 }
     95 
     96 lex_val *
     97 lex_semi(void) {
     98 	nextfn = &lex_type;
     99 	return lex_char(';', SEMI);
    100 }
    101 
    102 lex_val *
    103 lex_assign(void) {
    104 	nextfn = &lex_value;
    105 	return lex_char('=', ASSIGN);
    106 }
    107 
    108 lex_val *
    109 lex_sym(void) {
    110 	switch (peekc()) {
    111 	case '<': 
    112 		switch (peekc()) {
    113 		case '=': getchr(); lv.type = LTE; break;
    114 		default: lv.type = LT; break;
    115 		}
    116 		return &lv;
    117 	case '>': 
    118 		switch (peekc()) {
    119 		case '=': getchr(); lv.type = GTE; break;
    120 		default: lv.type = GT; break;
    121 		}
    122 		return &lv;
    123 	case '!': 
    124 		if (!check_bytes("=")) return &lex_error;
    125 			lv.type = NEQ; 
    126 		break;
    127 	case '=': return lex_assign(); 
    128 	case ',': return lex_comma();
    129 	case ';': return lex_semi(); 
    130 
    131 	case '+': lv.type = LT; break; 
    132 	case '-': lv.type = LT; break;
    133 	case '/': lv.type = LT; break;
    134 	case '*': lv.type = LT; break;
    135 	case '&': lv.type = LT; break; 
    136 	}
    137 
    138 	return &lv;
    139 }
    140 
    141 /* TODO read negative values */
    142 lex_val *
    143 lex_number(void) {
    144 	static char num[16];
    145 	char i, c;
    146 	for (i = 0, c = getchr(); i < 16 && isdigit(c); i++, c = getchr()) 
    147 		num[(int)i] = c;
    148 	endpos--;
    149 
    150 	lv.type = NUM; 
    151 	lv.data = num;
    152 
    153 	nextfn = &lex_sym;
    154 
    155 	return &lv;	
    156 }
    157 
    158 /* TODO make this read ints and other litterals */
    159 lex_val *
    160 lex_value(void) {
    161 	char c = peekc();
    162 	if (isdigit(c))  /* number */
    163 		return lex_number();
    164 	switch (c) {
    165 	case '\"': /* strlit */ 
    166 			
    167 	default: /* name */
    168 		return lex_name();
    169 	}
    170 }
    171 
    172 lex_val *
    173 lex_name(void) {
    174 	static char name[32];
    175 	char c, len = 0;
    176 
    177 	memset(name, 0, 32); /* note this resets the previous value, 
    178 				when converting to an ast, use a dup */
    179 
    180 	if (peekc() == '*') {
    181 		getchr();
    182 		lv.type = DEREF;
    183 		return &lv;
    184 	}
    185 
    186 	if (!isalpha((c = getchr()))) return &lex_error;
    187 	do {
    188 		memcpy(strnul(name), &c, 1);
    189 		if ((len++ + 1) == 32) return &lex_error;
    190 	} while(isalnum((c = getchr())) && skipped == 0);
    191 	endpos--;
    192 	lv.data = name;
    193 	lv.type = NAME;
    194 
    195 	switch (peekc()) {
    196 	case '=': 
    197 	case ',':
    198 	case ';':
    199 	case '<':
    200 	case '>':
    201 	case '!':
    202 	case '+': 
    203 	case '-':
    204 	case '/': 
    205 	case '*': /* CHECK FOR MUL AND PTR */
    206 	case '&':
    207 		  nextfn = &lex_sym; break;
    208 		  break;
    209 	default: return &lex_error;
    210 	}
    211 
    212 	return &lv;
    213 }
    214 
    215 void
    216 init_lexer(FILE *in) {
    217 	fseek(in, 0, SEEK_END);
    218 	input_len = ftell(in);
    219 	rewind(in);
    220 
    221 	drainfile(in);
    222 	startpos = input;
    223 	endpos = startpos;
    224 
    225 	nextfn = &lex_type;
    226 }
    227 
    228 int
    229 get_err_len(void) {
    230 	return endpos - startpos; 
    231 }
    232 
    233 int get_line_num(char *s) {
    234 	int count = 1;
    235 	size_t i;
    236 	for (i = 0; i < input_len && &input[i] != s; i++) 
    237 		if (input[i] == '\n') count++;
    238 	return count;
    239 }
    240 
    241 lex_val *
    242 get_next(void) {
    243 	memset(&lv, 0, sizeof(lv));
    244 	startpos = endpos;
    245 	return (nextfn) ? nextfn() : NULL;
    246 }