compute.ha (3835B)
1 use strings; 2 3 // Compute new branches with an NFA form. 4 export fn compute( 5 in: [](str, *opaque) 6 ) (automata | error) = { 7 const mata = init()?; 8 for (let (in, out) .. in) { 9 add_tree(&mata, in)?; 10 append(mata.results, out)?; 11 mata.curtree += 1; 12 }; 13 return mata; 14 }; 15 16 fn add_tree( 17 mata: *automata, 18 in: str, 19 ) (void | error) = { 20 const runes = strings::torunes(in)!; 21 defer free(runes); 22 23 let loc = mata.start; 24 let last = mata.start; 25 let groupstarts: []*state = []; 26 let unions: [][]*state = []; 27 28 for (let i = 0z; i < len(runes)) { 29 switch (runes[i]) { 30 case '(' => 31 loc = add_epsilon(mata, loc)?; 32 append(groupstarts, loc)?; 33 append(unions, [])?; 34 i += 1; 35 case ')' => 36 if (!(0 < len(unions))) { 37 return syntaxf("unexpected ) out of group"); 38 }; 39 loc = add_epsilon(mata, loc)?; 40 for (let _union .. unions[len(unions) - 1]) { 41 epsilon_to(mata, _union, loc)?; 42 }; 43 assert(len(groupstarts) > 0); 44 last = groupstarts[len(groupstarts) - 1]; 45 delete(groupstarts[len(groupstarts) - 1]); 46 free(unions[len(unions) - 1]); 47 delete(unions[len(unions) - 1]); 48 i += 1; 49 case '|' => 50 append(unions[len(unions) - 1], loc)?; 51 loc = groupstarts[len(groupstarts) - 1]; 52 i += 1; 53 case '*' => 54 epsilon_to(mata, loc, last)?; 55 loc = last; 56 i += 1; 57 case '?' => 58 epsilon_to(mata, last, loc)?; 59 i += 1; 60 case '+' => 61 epsilon_to(mata, loc, last)?; 62 i += 1; 63 case => 64 const (s, new) = add_one( 65 mata, 66 loc, 67 runes[i..], 68 )?; 69 i += s; 70 last = loc; 71 loc = new; 72 }; 73 }; 74 75 loc.result = mata.curtree; 76 }; 77 78 fn add_one( 79 mata: *automata, 80 loc: *state, 81 runes: []rune, 82 ) ((size, *state) | error) = { 83 const (s, negate, runes) = runes2runesets(runes)?; 84 const newstate = alloc(state { 85 index = mata.curindex, 86 result = void, 87 ... 88 })?; 89 mata.curindex += 1; 90 const newtrans = alloc(transition { 91 from = loc, 92 to = newstate, 93 accepts = runes, 94 negate = negate, 95 tree = mata.curtree, 96 ... 97 })?; 98 append(loc.transitions, newtrans)?; 99 append(mata.transitions, newtrans)?; 100 append(mata.states, newstate)?; 101 return (s, newstate); 102 }; 103 104 fn add_epsilon( 105 mata: *automata, 106 loc: *state, 107 ) (*state | error) = { 108 const newstate = alloc(state { 109 index = mata.curindex, 110 result = void, 111 ... 112 })?; 113 mata.curindex += 1; 114 const newtrans = alloc(transition { 115 from = loc, 116 to = newstate, 117 epsilon = true, 118 tree = mata.curtree, 119 ... 120 })?; 121 append(loc.transitions, newtrans)?; 122 append(mata.transitions, newtrans)?; 123 append(mata.states, newstate)?; 124 return newstate; 125 }; 126 127 fn epsilon_to( 128 mata: *automata, 129 loc: *state, 130 to: *state, 131 ) (void | error) = { 132 const newtrans = alloc(transition { 133 from = loc, 134 to = to, 135 epsilon = true, 136 tree = mata.curtree, 137 ... 138 })?; 139 append(loc.transitions, newtrans)?; 140 append(mata.transitions, newtrans)?; 141 }; 142 143 fn runes2runesets(in: []rune) ((size, bool, []runeset) | error) = { 144 let runes: []runeset = []; 145 let negate = false; 146 switch (in[0]) { 147 case '[' => 148 let i = 1z; 149 if (in[i] == '^') { 150 negate = true; 151 i += 1; 152 }; 153 for (i < len(in)) { 154 if (in[i] == ']') return (i + 1, negate, runes); 155 if (i + 2 < len(in) && in[i + 1] == '-' && in[i + 2] != ']') { 156 append(runes, (in[i], in[i + 2]))?; 157 i += 3; 158 } else { 159 const (s, r) = rune2runeset(in[i..])?; 160 i += s; 161 append(runes, r)?; 162 }; 163 }; 164 return syntaxf("unexpected eof while looking at ]"); 165 case => 166 const (s, r) = rune2runeset(in)?; 167 append(runes, r)?; 168 return (s, negate, runes); 169 }; 170 }; 171 172 fn rune2runeset(in: []rune) ((size, runeset) | error) = { 173 switch (in[0]) { 174 case '.' => return (1, dot); 175 case '\\' => 176 if (!(1 < len(in))) return syntaxf("escaped eof"); 177 switch (in[1]) { 178 case 'n' => 179 return (2, '\n'); 180 case 't' => 181 return (2, '\t'); 182 case 'r' => 183 return (2, '\r'); 184 case => 185 return (2, in[1]); 186 }; 187 case => 188 return (1, in[0]); 189 }; 190 };