sys

A set of unix utils in hare!
Log | Files | Refs | README

README (2300B)


      1 This module provides a general purpose lexer machine.
      2 
      3 The user add actions callbacks to the lexer. The longest pattern matched
      4 prefix wins. In case of ties, the pattern with the highest precedence
      5 wins.
      6 
      7 The user prepare a backend to use with a lexer. A backend take a list of
      8 action to compile its core.
      9 
     10 	let actions: []lex::action = [];
     11 	defer free(actions);
     12 
     13 	append(actions, lex::action {
     14 		expr = `"([^\\"]|(\\.))*"`,
     15 		cb = &literal,
     16 		name = "LIT_STR",
     17 		...
     18 	})!;
     19 
     20 	const backend = lex::def_backend()!(actions)!; // use default backend (DFA without environment variable)
     21 	defer lex::destroy(backend);
     22 
     23 	const lexer = lex::init(backend, in);
     24 	defer lex::finish(&lexer);
     25 
     26 An action callback is associated with an regular expression to
     27 match the tokens. The action callbacks are free to initialize tokens as
     28 they please, but the [[scanner]] object provide convenient functions.
     29 
     30 	fn literal(
     31 		scan: *lex::scanner,
     32 		lexeme: const str,
     33 		user: nullable *opaque,
     34 	) (str | *lex::token | lex::error) = {
     35 		return lex::scan_token(scan, void, lexeme);
     36 	};
     37 
     38 This action callback would return a token of the added action type
     39 (ex: "LIT_STR"), with a void value, and lexing the full lexeme pattern
     40 matched string (ex: "foo").
     41 
     42 When the callback return a string, it represents the lexeme to swallow.
     43 
     44 	append(actions, lex::action {
     45 		expr = "( |\t|\n|\r)+",
     46 		cb = &skip,
     47 		...
     48 	})!;
     49 
     50 	fn skip(
     51 		scan: *lex::scanner,
     52 		lexeme: const str,
     53 		user: nullable *opaque,
     54 	) (str | *lex::token | lex::error) = {
     55 		return lexeme;
     56 	};
     57 
     58 Action callbacks can be used to match hatch symbols, and then to lex the
     59 scanned input manually.
     60 
     61 	append(actions, lex::action {
     62 		expr = `\<`,
     63 		cb = &html,
     64 		name = "ID"
     65 		...
     66 	})!;
     67 
     68 	fn html(
     69 		scan: *lex::scanner,
     70 		lexeme: const str,
     71 		user: nullable *opaque,
     72 	) (str | *lex::token | lex::error) = {
     73 		let buf: []u8 = [];
     74 		defer free(buf);
     75 
     76 		append(buf, strings::toutf8(lexeme)...)!;
     77 
     78 		let brk = 1z;
     79 		const start = scan.start;
     80 
     81 		for (let byte .. strings::toutf8(scan.in)) {
     82 			append(buf, byte)?;
     83 			if (byte == '<') {
     84 				brk += 1;
     85 			} else if (byte == '>') {
     86 				brk -= 1;
     87 			};
     88 			if (brk == 0) {
     89 				const lexeme = strings::fromutf8(buf)!;
     90 				return lex::scan_token(scan, void, lexeme);
     91 			};
     92 		};
     93 
     94 		return lex::syntaxf(start, "unclosed HTML literal");
     95 	};