spl2

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

lex.ha (3392B)


      1 use io;
      2 use ascii;
      3 use fmt;
      4 use strings;
      5 
      6 export type range = (size, size);
      7 
      8 export type unknowntoken = !range;
      9 export type other = !range;
     10 export type error = !(unknowntoken | other);
     11 
     12 type asciifn = fn(r: rune) bool;
     13 
     14 export type ttype = enum {
     15 	EOF = -1,
     16 	OBRACE = '(',
     17 	CBRACE = ')',
     18 	OCBRACE = '{',
     19 	CCBRACE = '}',
     20 	OSBRACE = '[',
     21 	CSBRACE = ']',
     22 	ASSIGN = '=',
     23 	SEMI = ';',
     24 
     25 	ADD = '+',
     26 	SUB = '-',
     27 	MUL= '*',
     28 	DIV = '/',
     29 	MOD = '%',
     30 
     31 	LT = '<',
     32 	GT = '>',
     33 	NOT = '!',
     34 
     35 	EQU,	// ==
     36 	LTE,	// <=
     37 	GTE,	// >=
     38 	OR,	// ||
     39 	AND,	// &&
     40 
     41 	FUNC, 	// func
     42 	IF, 	// if
     43 
     44 	NAME,
     45 	NUMBER,
     46 };
     47 
     48 export type lexer = struct {
     49 	in: []u8,
     50 	pos: size,
     51 	prev: []size
     52 };
     53 
     54 export type token = struct {
     55 	ty: ttype,
     56 	// range of data
     57 	// "func main() i32 ..."
     58 	//  ^  ^ ^  ^^^ ^ ^
     59 	//           ^^
     60 	data: range,
     61 };
     62 
     63 export fn strerror(e: error, l: *lexer) str = match (e) {
     64 	case let e: unknowntoken => yield fmt::asprintf("Unknown token \"{}\"", strings::fromutf8(l.in[e.0 .. e.1])!)!;
     65 	case => yield "unknown error";
     66 };
     67 
     68 export fn finish(l: *lexer) void = {
     69 	free(l.prev);
     70 };
     71 
     72 fn readblock(l: *lexer, pred: *asciifn) range = {
     73 	let start = l.pos;
     74 	for (pred(l.in[l.pos]: rune); l.pos += 1) 
     75 		continue;
     76 	return (start, l.pos);
     77 };
     78 
     79 fn isnumber(r: rune) bool = ascii::isdigit(r) || (r == '-');
     80 fn isname(r: rune) bool = ascii::isalpha(r) || isnumber(r);
     81 fn iswhitespace(r: rune) bool = ascii::isblank(r) || (r == '\n');
     82 
     83 fn lexstr(l: *lexer, s: str) (range | void) = {
     84 	let start = l.pos;
     85 
     86 	for (let c .. strings::toutf8(s)) {
     87 		if (l.in[l.pos] == c) l.pos += 1
     88 		else {
     89 			l.pos = start;
     90 			return;
     91 		};
     92 	};
     93 
     94 	return (start, l.pos);
     95 };
     96 
     97 export fn prev(l: *lexer) void = {
     98 	l.pos = l.prev[len(l.prev) - 1];
     99 	l.prev = l.prev[0 .. len(l.prev) - 1];
    100 };
    101 
    102 export fn next(l: *lexer) (token | error) = {
    103 	if (l.pos >= len(l.in))
    104 		return token{ty = ttype::EOF, data = (l.pos, l.pos)};
    105 
    106 	if (iswhitespace(l.in[l.pos]: rune)) {
    107 		l.pos += 1;
    108 		return next(l);
    109 	};
    110 	append(l.prev, l.pos)!;
    111 
    112 	match (lexstr(l, "==")) {
    113 	case let data: range => return token{ty = ttype::EQU, data = data};
    114 	case => yield;
    115 	};
    116 
    117 	match (lexstr(l, "<=")) {
    118 	case let data: range => return token{ty = ttype::LTE, data = data};
    119 	case => yield;
    120 	};
    121 
    122 	match (lexstr(l, ">=")) {
    123 	case let data: range => return token{ty = ttype::GTE, data = data};
    124 	case => yield;
    125 	};
    126 
    127 	match (lexstr(l, "||")) {
    128 	case let data: range => return token{ty = ttype::OR, data = data};
    129 	case => yield;
    130 	};
    131 
    132 	match (lexstr(l, "&&")) {
    133 	case let data: range => return token{ty = ttype::AND, data = data};
    134 	case => yield;
    135 	};
    136 
    137 	match (lexstr(l, "func")) {
    138 	case let data: range => return token{ty = ttype::FUNC, data = data};
    139 	case => yield;
    140 	};
    141 
    142 	match (lexstr(l, "if")) {
    143 	case let data: range => return token{ty = ttype::IF, data = data};
    144 	case => yield;
    145 	};
    146 
    147 	switch (l.in[l.pos]: ttype) {
    148 	case ttype::OBRACE, ttype::CBRACE, ttype::OCBRACE, ttype::CCBRACE,
    149 		ttype::OSBRACE, ttype::CSBRACE, ttype::ASSIGN,
    150 		ttype::SEMI, ttype::ADD, ttype::SUB, ttype::MUL,
    151 		ttype::DIV, ttype::MOD =>
    152 		defer l.pos += 1;
    153 		return token{ty = l.in[l.pos]: ttype, data = (l.pos, l.pos + 1)};
    154 	case => yield;
    155 	};
    156 
    157 	if (ascii::isalpha(l.in[l.pos]: rune))
    158 		return token{ty = ttype::NAME, data = readblock(l, &isname)}
    159 	else if (isnumber(l.in[l.pos]: rune))
    160 		return token{ty = ttype::NUMBER, data = readblock(l, &isnumber)};
    161 
    162 	defer l.pos += 1;
    163 	return (l.pos, l.pos + 1): unknowntoken;
    164 };