README (2300B)
1 This module provides a general purpose lexer machine. 2 3 The user add actions callbacks to the lexer. The longest pattern matched 4 prefix wins. In case of ties, the pattern with the highest precedence 5 wins. 6 7 The user prepare a backend to use with a lexer. A backend take a list of 8 action to compile its core. 9 10 let actions: []lex::action = []; 11 defer free(actions); 12 13 append(actions, lex::action { 14 expr = `"([^\\"]|(\\.))*"`, 15 cb = &literal, 16 name = "LIT_STR", 17 ... 18 })!; 19 20 const backend = lex::def_backend()!(actions)!; // use default backend (DFA without environment variable) 21 defer lex::destroy(backend); 22 23 const lexer = lex::init(backend, in); 24 defer lex::finish(&lexer); 25 26 An action callback is associated with an regular expression to 27 match the tokens. The action callbacks are free to initialize tokens as 28 they please, but the [[scanner]] object provide convenient functions. 29 30 fn literal( 31 scan: *lex::scanner, 32 lexeme: const str, 33 user: nullable *opaque, 34 ) (str | *lex::token | lex::error) = { 35 return lex::scan_token(scan, void, lexeme); 36 }; 37 38 This action callback would return a token of the added action type 39 (ex: "LIT_STR"), with a void value, and lexing the full lexeme pattern 40 matched string (ex: "foo"). 41 42 When the callback return a string, it represents the lexeme to swallow. 43 44 append(actions, lex::action { 45 expr = "( |\t|\n|\r)+", 46 cb = &skip, 47 ... 48 })!; 49 50 fn skip( 51 scan: *lex::scanner, 52 lexeme: const str, 53 user: nullable *opaque, 54 ) (str | *lex::token | lex::error) = { 55 return lexeme; 56 }; 57 58 Action callbacks can be used to match hatch symbols, and then to lex the 59 scanned input manually. 60 61 append(actions, lex::action { 62 expr = `\<`, 63 cb = &html, 64 name = "ID" 65 ... 66 })!; 67 68 fn html( 69 scan: *lex::scanner, 70 lexeme: const str, 71 user: nullable *opaque, 72 ) (str | *lex::token | lex::error) = { 73 let buf: []u8 = []; 74 defer free(buf); 75 76 append(buf, strings::toutf8(lexeme)...)!; 77 78 let brk = 1z; 79 const start = scan.start; 80 81 for (let byte .. strings::toutf8(scan.in)) { 82 append(buf, byte)?; 83 if (byte == '<') { 84 brk += 1; 85 } else if (byte == '>') { 86 brk -= 1; 87 }; 88 if (brk == 0) { 89 const lexeme = strings::fromutf8(buf)!; 90 return lex::scan_token(scan, void, lexeme); 91 }; 92 }; 93 94 return lex::syntaxf(start, "unclosed HTML literal"); 95 };