spl

a Simple Programming Language
Log | Files | Refs

lexer.c (2382B)


      1 #include <ctype.h>
      2 #include <stdio.h>
      3 #include <stdlib.h>
      4 #include <string.h>
      5 #include <stdint.h>
      6 
      7 #include "lexer.h"
      8 
      9 loc
     10 findloc(lex l) {
     11 	int lines = 1;
     12 	int col = 1;
     13 	for (char *c = l.input; c != l.ptr; c++, col++) {
     14 		if (*c == '\n') {
     15 			col = 0;
     16 			lines++;
     17 		}
     18 	}
     19 
     20 	return (loc){lines, col};
     21 }
     22 
     23 loc
     24 findloctok(tok t) {
     25 	return findloc(t.l);
     26 }
     27 
     28 void
     29 lexErr(lex *l) {
     30 	fprintf(stderr, "unexpected token '%c'\n", *l->ptr);
     31 	exit(1);
     32 }
     33 
     34 double
     35 lexFloat(lex *l) {
     36 	l->prev = l->ptr;
     37 	char *end = NULL;
     38 	double n = strtold(l->ptr, &end);
     39 	l->ptr = end;
     40 	return n;
     41 }
     42 
     43 uint64_t
     44 lexNum(lex *l) {
     45 	l->prev = l->ptr;
     46 	char *end = NULL;
     47 	uint64_t n = strtoll(l->ptr, &end, 0);
     48 	l->ptr = end;
     49 	return n;
     50 }
     51 
     52 char *
     53 lexName(lex *l) {
     54 	char name[32] = {0};
     55 	for (int i = 0; i < 32 && l->ptr; i++, l->ptr++) {
     56 		if (!isalnum(*l->ptr)) break;
     57 		name[i] = *l->ptr;
     58 	}
     59 	return strdup(name);	/* TODO need to cleanup this */
     60 }
     61 
     62 char *
     63 unlex(lex *l) {
     64 	return (l->ptr = l->prev);
     65 }
     66 
     67 tok
     68 next(lex *l) {
     69 	l->prev = l->ptr;
     70 	tok t = {0};
     71 	t.l = *l;
     72 	char *estring;
     73 
     74 	switch (*l->ptr) {
     75 	case 0:
     76 		t.op = LEOF;
     77 		return t;
     78 
     79 	case '\n':
     80 	case ' ':
     81 	case '\t':
     82 	case '\v':
     83 		l->ptr++;
     84 		return next(l);
     85 
     86 	case DQUOTE:
     87 		t.op = DQUOTE;
     88 		*(estring = strchr(++l->ptr, '\"')) = 0;
     89 		t.name = strdup(l->ptr);		
     90 		l->ptr = estring + 1;
     91 		return t;	
     92 
     93 	case ADD:
     94 	case SUB:
     95 	case MUL:
     96 	case DIV:
     97 	case OBRACE:
     98 	case CBRACE:
     99 	case OCBRACE:
    100 	case CCBRACE:
    101 	case OSBRACE:
    102 	case CSBRACE:
    103 	case ASSIGN:
    104 	case SEMI:
    105 	case COMMA:
    106 	case ADDROF:
    107 		t.op = *l->ptr++;
    108 		return t;
    109 	}
    110 	if (memcmp(l->ptr, ":=", 2) == 0) {
    111 		l->ptr += 2;
    112 		t.op = WALRUS;
    113 	} else if (memcmp(l->ptr, "return", 6) == 0) {
    114 		l->ptr += 6;
    115 		t.op = RETURN;
    116 	} else if (memcmp(l->ptr, "func", 4) == 0) {
    117 		l->ptr += 4;
    118 		t.op = FUNC;
    119 	} else if (isdigit(*l->ptr)) {
    120 		t.op = INT;
    121 		t.n = lexNum(l);
    122 		if (*l->ptr == '.') {
    123 			unlex(l);
    124 			t.f = lexFloat(l);
    125 			t.op = FLOAT;
    126 		}
    127 	} else if (isalpha(*l->ptr)) {
    128 		t.op = NAME;
    129 		t.name = lexName(l);
    130 	} else
    131 		lexErr(l);
    132 
    133 	return t;
    134 }
    135 
    136 void
    137 printTok(tok *t) {
    138 	switch (t->op) {
    139 	case INT:
    140 		printf("INT: %ld\n", t->n);
    141 		break;
    142 	case LEOF:
    143 		printf("$\n");
    144 		break;
    145 	case ADD:
    146 		printf("ADD\n");
    147 		break;
    148 	case SUB:
    149 		printf("SUB\n");
    150 		break;
    151 	case MUL:
    152 		printf("MUL\n");
    153 		break;
    154 	case DIV:
    155 		printf("DIV\n");
    156 		break;
    157 	}
    158 }
    159 
    160 lex
    161 mklexer(char *input) {
    162 	lex l = (lex){input, input};
    163 	return l;
    164 }