sys

A set of unix utils in hare!
Log | Files | Refs | README

compute.ha (3835B)


      1 use strings;
      2 
      3 // Compute new branches with an NFA form.
      4 export fn compute(
      5 	in: [](str, *opaque)
      6 ) (automata | error) = {
      7 	const mata = init()?;
      8 	for (let (in, out) .. in) {
      9 		add_tree(&mata, in)?;
     10 		append(mata.results, out)?;
     11 		mata.curtree += 1;
     12 	};
     13 	return mata;
     14 };
     15 
     16 fn add_tree(
     17 	mata: *automata,
     18 	in: str,
     19 ) (void | error) = {
     20 	const runes = strings::torunes(in)!;
     21 	defer free(runes);
     22 
     23 	let loc = mata.start;
     24 	let last = mata.start;
     25 	let groupstarts: []*state = [];
     26 	let unions: [][]*state = [];
     27 
     28 	for (let i = 0z; i < len(runes)) {
     29 		switch (runes[i]) {
     30 		case '(' =>
     31 			loc = add_epsilon(mata, loc)?;
     32 			append(groupstarts, loc)?;
     33 			append(unions, [])?;
     34 			i += 1;
     35 		case ')' =>
     36 			if (!(0 < len(unions))) {
     37 				return syntaxf("unexpected ) out of group");
     38 			};
     39 			loc = add_epsilon(mata, loc)?;
     40 			for (let _union .. unions[len(unions) - 1]) {
     41 				epsilon_to(mata, _union, loc)?;
     42 			};
     43 			assert(len(groupstarts) > 0);
     44 			last = groupstarts[len(groupstarts) - 1];
     45 			delete(groupstarts[len(groupstarts) - 1]);
     46 			free(unions[len(unions) - 1]);
     47 			delete(unions[len(unions) - 1]);
     48 			i += 1;
     49 		case '|' =>
     50 			append(unions[len(unions) - 1], loc)?;
     51 			loc = groupstarts[len(groupstarts) - 1];
     52 			i += 1;
     53 		case '*' =>
     54 			epsilon_to(mata, loc, last)?;
     55 			loc = last;
     56 			i += 1;
     57 		case '?' =>
     58 			epsilon_to(mata, last, loc)?;
     59 			i += 1;
     60 		case '+' =>
     61 			epsilon_to(mata, loc, last)?;
     62 			i += 1;
     63 		case =>
     64 			const (s, new) = add_one(
     65 				mata,
     66 				loc,
     67 				runes[i..],
     68 			)?;
     69 			i += s;
     70 			last = loc;
     71 			loc = new;
     72 		};
     73 	};
     74 
     75 	loc.result = mata.curtree;
     76 };
     77 
     78 fn add_one(
     79 	mata: *automata,
     80 	loc: *state,
     81 	runes: []rune,
     82 ) ((size, *state) | error) = {
     83 	const (s, negate, runes) = runes2runesets(runes)?;
     84 	const newstate = alloc(state {
     85 		index = mata.curindex,
     86 		result = void,
     87 		...
     88 	})?;
     89 	mata.curindex += 1;
     90 	const newtrans = alloc(transition {
     91 		from = loc,
     92 		to = newstate,
     93 		accepts = runes,
     94 		negate = negate,
     95 		tree = mata.curtree,
     96 		...
     97 	})?;
     98 	append(loc.transitions, newtrans)?;
     99 	append(mata.transitions, newtrans)?;
    100 	append(mata.states, newstate)?;
    101 	return (s, newstate);
    102 };
    103 
    104 fn add_epsilon(
    105 	mata: *automata,
    106 	loc: *state,
    107 ) (*state | error) = {
    108 	const newstate = alloc(state {
    109 		index = mata.curindex,
    110 		result = void,
    111 		...
    112 	})?;
    113 	mata.curindex += 1;
    114 	const newtrans = alloc(transition {
    115 		from = loc,
    116 		to = newstate,
    117 		epsilon = true,
    118 		tree = mata.curtree,
    119 		...
    120 	})?;
    121 	append(loc.transitions, newtrans)?;
    122 	append(mata.transitions, newtrans)?;
    123 	append(mata.states, newstate)?;
    124 	return newstate;
    125 };
    126 
    127 fn epsilon_to(
    128 	mata: *automata,
    129 	loc: *state,
    130 	to: *state,
    131 ) (void | error) = {
    132 	const newtrans = alloc(transition {
    133 		from = loc,
    134 		to = to,
    135 		epsilon = true,
    136 		tree = mata.curtree,
    137 		...
    138 	})?;
    139 	append(loc.transitions, newtrans)?;
    140 	append(mata.transitions, newtrans)?;
    141 };
    142 
    143 fn runes2runesets(in: []rune) ((size, bool, []runeset) | error) = {
    144 	let runes: []runeset = [];
    145 	let negate = false;
    146 	switch (in[0]) {
    147 	case '[' =>
    148 		let i = 1z;
    149 		if (in[i] == '^') {
    150 			negate = true;
    151 			i += 1;
    152 		};
    153 		for (i < len(in)) {
    154 			if (in[i] == ']') return (i + 1, negate, runes);
    155 			if (i + 2 < len(in) && in[i + 1] == '-' && in[i + 2] != ']') {
    156 				append(runes, (in[i], in[i + 2]))?;
    157 				i += 3;
    158 			} else {
    159 				const (s, r) = rune2runeset(in[i..])?;
    160 				i += s;
    161 				append(runes, r)?;
    162 			};
    163 		};
    164 		return syntaxf("unexpected eof while looking at ]");
    165 	case =>
    166 		const (s, r) = rune2runeset(in)?;
    167 		append(runes, r)?;
    168 		return (s, negate, runes);
    169 	};
    170 };
    171 
    172 fn rune2runeset(in: []rune) ((size, runeset) | error) = {
    173 	switch (in[0]) {
    174 	case '.' => return (1, dot);
    175 	case '\\' =>
    176 		if (!(1 < len(in))) return syntaxf("escaped eof");
    177 		switch (in[1]) {
    178 		case 'n' =>
    179 			return (2, '\n');
    180 		case 't' =>
    181 			return (2, '\t');
    182 		case 'r' =>
    183 			return (2, '\r');
    184 		case =>
    185 			return (2, in[1]);
    186 		};
    187 	case =>
    188 		return (1, in[0]);
    189 	};
    190 };