sys

A set of unix utils in hare!
Log | Files | Refs | README

resolve.ha (4053B)


      1 use bufio;
      2 use encoding::utf8;
      3 use strings;
      4 
      5 // Perform the automata with the given string. Returns the matched size, with
      6 // associated accepted output, or void otherwise.
      7 export fn resolve(
      8 	mata: *automata,
      9 	decoder: utf8::decoder
     10 ) (void | (size, *opaque) | error) = {
     11 	if (mata.deterministic) {
     12 		return resolve_deterministic(mata, decoder);
     13 	};
     14 
     15 	let loc = mata.start;
     16 
     17 	let results: [](size, *opaque) = [];
     18 	defer free(results);
     19 
     20 	for (let tree = 0z; tree < mata.curtree; tree += 1) {
     21 		match (crawl(mata, loc, decoder, tree)?) {
     22 		case void => void;
     23 		case let res: (size, *opaque) =>
     24 			append(results, res)?;
     25 			yield res.0;
     26 		};
     27 	};
     28 
     29 	let longest: (void | (size, *opaque)) = void;
     30 	for (let res .. results) {
     31 		match (longest) {
     32 		case void =>
     33 			longest = res;
     34 			continue;
     35 		case let this: (size, *opaque) =>
     36 			if (this.0 < res.0) {
     37 				longest = res;
     38 			};
     39 		};
     40 	};
     41 	return longest;
     42 };
     43 
     44 fn resolve_deterministic(
     45 	mata: *automata,
     46 	decoder: utf8::decoder,
     47 ) (void | (size, *opaque) | error) = {
     48 	assert(mata.deterministic);
     49 	let loc = mata.start;
     50 	let lastres: (void | (size, *opaque)) = void;
     51 	for: root (let i = 0z; true; i += 1) {
     52 		const r = match (utf8::next(&decoder)?) {
     53 		case let this: rune => yield this;
     54 		case done => break;
     55 		case utf8::more => abort("unsupported partial input");
     56 		};
     57 		for (let trans .. loc.transitions) {
     58 			if (!accept(trans, r)) continue;
     59 			loc = trans.to;
     60 			match (result(mata, loc, 0)) {
     61 			case let out: *opaque =>
     62 				lastres = (utf8::position(&decoder), out);
     63 			case null => void;
     64 			};
     65 			continue: root;
     66 		};
     67 		return lastres;
     68 	};
     69 	return lastres;
     70 };
     71 
     72 fn crawl(
     73 	mata: *automata,
     74 	loc: *state,
     75 	start: utf8::decoder,
     76 	tree: size,
     77 ) (void | (size, *opaque) | error) = {
     78 	const decoder = start;
     79 
     80 	if (0 == len(utf8::remaining(&decoder))) {
     81 		match (result(mata, loc, tree)) {
     82 		case let out: *opaque => return (utf8::position(&decoder), out);
     83 		case null => return void;
     84 		};
     85 	};
     86 
     87 	const r = match (utf8::next(&decoder)?) {
     88 	case let this: rune => yield this;
     89 	case done => abort("unreachable");
     90 	case utf8::more => abort("unsupported partial input");
     91 	};
     92 
     93 	let accepting: []*transition = [];
     94 	defer free(accepting);
     95 
     96 	for (let trans .. loc.transitions) {
     97 		if (trans.tree != tree) continue;
     98 		if (!accept(trans, r)) continue;
     99 		append(accepting, trans)?;
    100 	};
    101 
    102 	if (len(accepting) == 0) {
    103 		match (result(mata, loc, tree)) {
    104 		case null => return void;
    105 		case let out: *opaque => return (utf8::position(&start), out);
    106 		};
    107 	};
    108 
    109 	let results: [](size, *opaque) = [];
    110 	defer free(results);
    111 	for (let acc .. accepting) {
    112 		const d = if (0 == len(acc.accepts)) {
    113 			yield start;
    114 		} else {
    115 			yield decoder;
    116 		};
    117 		match (crawl(mata, acc.to, d, tree)?) {
    118 		case void => void;
    119 		case let this: (size, *opaque) =>
    120 			append(results, this)?;
    121 		};
    122 	};
    123 
    124 	let longest: (void | (size, *opaque)) = void;
    125 	for (let res .. results) {
    126 		match (longest) {
    127 		case void =>
    128 			longest = res;
    129 		case let this: (size, *opaque) =>
    130 			if (this.0 < res.0) {
    131 				longest = res;
    132 			};
    133 		};
    134 	};
    135 
    136 	if (longest is void) {
    137 		match (result(mata, loc, tree)) {
    138 		case null => return void;
    139 		case let out: *opaque => return (utf8::position(&start), out);
    140 		};
    141 	};
    142 
    143 	return longest;
    144 };
    145 
    146 fn accept(trans: *transition, _r: rune) bool = {
    147 	if (trans.epsilon) return true;
    148 	const res = !trans.negate;
    149 	for (let a .. trans.accepts) {
    150 		match (a) {
    151 		case dot => return res;
    152 		case let r: rune => if (_r == r) return res;
    153 		case let r: (rune, rune) =>
    154 			const a = r.0: u32;
    155 			const b = r.1: u32;
    156 			const min = if (a < b) a else b;
    157 			const max = if (a < b) b else a;
    158 			if (min <= _r: u32 && _r: u32 <= max) return res;
    159 		};
    160 	};
    161 	return !res;
    162 };
    163 
    164 fn result(
    165 	mata: *automata,
    166 	loc: *state,
    167 	tree: size
    168 ) nullable *opaque = {
    169 	if (loc.result is size) {
    170 		return mata.results[loc.result as size];
    171 	};
    172 	for (let trans .. loc.transitions) {
    173 		if (trans.tree != tree) continue;
    174 		if (!trans.epsilon) continue;
    175 		match (result(mata, trans.to, tree)) {
    176 		case null => void;
    177 		case let out: *opaque => return out;
    178 		};
    179 	};
    180 	return null;
    181 };