commit 875c20f1a60e75a86db46acdc5bec10ded07846a
parent 42507db3380c48616172b3059b30473245113fa2
Author: thing1 <thing1@seacrossedlovers.xyz>
Date: Tue, 10 Mar 2026 19:10:11 +0000
ed stuff
Diffstat:
14 files changed, 1204 insertions(+), 9 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,13 +1,13 @@
.POSIX:
.SUFFIXES:
HARE=hare
-HAREFLAGS=
+HAREFLAGS=-lc
DESTDIR=
PREFIX=/usr/local
BINDIR=$(PREFIX)/bin
-all: bin/ls bin/rainbow bin/cat bin/uniq bin/split bin/wc bin/yes bin/mc bin/ed
+all: bin/ls bin/rainbow bin/cat bin/uniq bin/split bin/wc bin/yes bin/mc bin/ed bin/rcsh
clean:
rm -rf bin/*
@@ -22,6 +22,10 @@ EDSRC != find cmd/ed/ -name *.ha
bin/ed: $(EDSRC)
$(HARE) build $(HAREFLAGS) -o $@ cmd/ed/
+RCSHSRC != find cmd/rcsh/ -name *.ha
+bin/rcsh: $(RCSHSRC)
+ $(HARE) build $(HAREFLAGS) -o $@ cmd/rcsh/
+
bin/ls: cmd/ls.ha
$(HARE) build $(HAREFLAGS) -o $@ cmd/ls.ha
diff --git a/cmd/ed/cmds.ha b/cmd/ed/cmds.ha
@@ -1,12 +1,18 @@
+use bufio;
+use io;
+use os;
+use strings;
+use fmt;
+
export type option = enum {
CANT,
WILL,
MUST,
};
-export type cmdfn = fn(file: *lines, current: size, args: []str) void;
+export type cmdfn = fn(file: **lines, current: *size, range: (size, size), args: []str) void;
-export type cmds = struct {
+export type cmd = struct {
// the char to call the command
cmd: rune,
// help text for the command
@@ -15,12 +21,61 @@ export type cmds = struct {
// does the command allow a range
range: option,
- // does the command allow a repeater
- repeat: option,
-
// the number of arguments a cmd can take
args: size,
// function
f: *cmdfn,
};
+
+const cmds = [
+ cmd{cmd = 'i', msg = "insert lines", range = option::CANT, args = 0, f = &insfn},
+ cmd{cmd = 'a', msg = "append lines", range = option::CANT, args = 0, f = &appfn},
+ cmd{cmd = 'd', msg = "delete lines", range = option::WILL, args = 0, f = &delfn},
+ cmd{cmd = 'p', msg = "print lines", range = option::WILL, args = 0, f = &prifn},
+];
+
+fn insfn(file: **lines, current: *size, range: (size, size), args: []str) void = {
+ let sc = bufio::newscanner(os::stdin);
+ defer bufio::finish(&sc);
+
+ for (true) {
+ match (bufio::scan_line(&sc)) {
+ case let s: str =>
+ if (s == ".") break;
+ *file= insertline(*file, newline(s), *current);
+ *current += 1;
+ case => abort();
+ };
+ };
+};
+
+fn appfn(file: **lines, current: *size, range: (size, size), args: []str) void = {
+ *current += 1;
+ insfn(file, current, range, args);
+};
+
+fn delfn(file: **lines, current: *size, range: (size, size), args: []str) void = {
+ *file = deleteline(*file, *current);
+};
+
+fn prifn(file: **lines, current: *size, range: (size, size), args: []str) void = {
+ if (range.0: i64 == -1) {
+ let l = getline(*file, *current);
+ fmt::println(l.s)!;
+ } else {
+ if (range.1: i64 != -1) {
+ for (let i = range.0; i: i64 < range.1: i64; i += 1) {
+ let l = getline(*file, i);
+ fmt::println(l.s)!;
+ };
+ } else {
+ for (let i = *current; i: i64 < range.0: i64; i += 1) {
+ let l = getline(*file, i);
+ fmt::println(l.s)!;
+ };
+ };
+ };
+};
+
+
diff --git a/cmd/ed/main.ha b/cmd/ed/main.ha
@@ -4,9 +4,20 @@ use bufio;
use strings;
use fs;
use os;
+use ascii;
+use strconv;
use util;
+type command = struct {
+ c: rune,
+ range: (size, size),
+ args: []str
+};
+
+let file: nullable *lines = null;
+let pos: size = 0;
+
fn loadfile(h: io::handle) nullable *lines = {
let ls: nullable *lines = null;
let sc = bufio::newscanner(h);
@@ -39,6 +50,66 @@ fn openfile(name: str) nullable *lines = {
return loadfile(h);
};
+fn parsenum(runes: []rune) (size, []rune) = {
+ let digits: []rune = [];
+ defer free(digits);
+
+ let i = 0z;
+
+ for (ascii::isdigit(runes[i])) {
+ append(digits, runes[i])!;
+ i += 1;
+ };
+
+ let num = strings::fromrunes(digits)!;
+ defer free(num);
+
+ return (strconv::stoz(num)!, runes[i..]);
+};
+
+fn parsecmd(s: str) command = {
+ let c: command = command{c = ' ', range = (-1, -1), args = []};
+
+ let runes = strings::torunes(s)!;
+ let sav = runes;
+ defer free(sav);
+
+ if (ascii::isdigit(runes[0])) {
+ let res = parsenum(runes);
+ c.range.0 = res.0; runes = res.1;
+ if (runes[0] == ',') {
+ res = parsenum(runes[1..]);
+ c.range.0 = res.0; runes = res.1;
+ } else c.range.1 = -1;
+ } else c.range = (-1, -1);
+
+ c.c = runes[0];
+
+ return c;
+};
+
+fn runcmd() void = {
+ let sc = bufio::newscanner(os::stdin);
+ defer bufio::finish(&sc);
+
+ let in = parsecmd(bufio::scan_line(&sc) as str);
+
+ for (let c .. cmds) {
+ if (c.cmd == in.c) {
+ c.f(&(file as *lines), &pos, in.range, in.args);
+ break;
+ };
+ };
+};
+
export fn main() void = {
- abort();
+ if (len(os::args) < 2) {
+ util::die("", "needs a file!");
+ };
+ let f = os::open(os::args[1])!;
+ file = loadfile(f);
+
+ for (true) {
+ runcmd();
+ };
};
diff --git a/cmd/fmt.ha b/cmd/fmt.ha
@@ -0,0 +1,29 @@
+use fmt;
+use io;
+
+use util;
+
+export fn main() void = {
+ const cmd = getopt::parse(os::args,
+ "text formater",
+ ('w', "width", "The max width of output
+ );
+ defer getopt::finish(&cmd);
+
+ for (let opt .. cmd.opts) {
+ switch (opt.0) {
+ case 'w' =>
+ width = strconv::stoi(opt.1)!;
+ case => abort();
+ };
+ };
+
+ let sc = bufio::newscanner(os::stdin);
+ for (true) {
+ let s = match (bufio::scan_str(&sc, " ")) {
+ case let s: str =>
+ case io::EOF => break;
+ case => util::die("stdin", "couldn't read");
+ };
+ };
+};
diff --git a/cmd/rcsh/lex.ha b/cmd/rcsh/lex.ha
@@ -0,0 +1,75 @@
+use lex;
+use strings;
+use strconv;
+
+fn lexinit(in: str) lex::lexer = {
+ let actions: []lex::action = [];
+
+ append(actions, lex::action{
+ expr = `([A-z][A-z0-9]+)`,
+ cb = &word,
+ name = "word",
+ ...
+ })!;
+
+ append(actions, lex::action{
+ expr = `(-?[0-9]+)`,
+ cb = &number,
+ name = "word",
+ ...
+ })!;
+
+ append(actions, lex::action{
+ expr = `\$([A-z][A-z0-9]+)`,
+ cb = &var,
+ name = "var",
+ ...
+ })!;
+
+ append(actions, lex::action{
+ expr = `( |\t|\n|\r)+`,
+ cb = &skip,
+ ...
+ })!;
+
+ const backend = lex::def_backend()!(actions)!;
+ return lex::init(backend, in);
+};
+
+fn word(sc: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque
+ ) (str | *lex::token | lex::error) = {
+
+ let t = lex::scan_token(sc, void, lexeme)?;
+ t.value = t.morphene;
+ return t;
+};
+
+fn var(sc: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque
+ ) (str | *lex::token | lex::error) = {
+
+ let t = lex::scan_token(sc, void, lexeme)?;
+ t.value = strings::sub(t.morphene, 1);
+ return t;
+};
+
+fn number(sc: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque
+ ) (str | *lex::token | lex::error) = {
+
+ let t = lex::scan_token(sc, void, lexeme)?;
+ t.value = strconv::stoi64(t.morphene)!;
+ return t;
+};
+
+fn skip(sc: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque
+ ) (str | *lex::token | lex::error) = {
+ return lexeme;
+};
+
diff --git a/cmd/rcsh/main.ha b/cmd/rcsh/main.ha
@@ -0,0 +1,8 @@
+use lex;
+use fmt;
+
+export fn main() void = {
+ const lx = &lexinit("$TERM");
+ let tok = lex::next(lx)!;
+ fmt::println(tok.value)!;
+};
diff --git a/cmd/rcsh/parse.ha b/cmd/rcsh/parse.ha
@@ -0,0 +1,14 @@
+use lex;
+
+type value = [](expr | str);
+
+type expr = struct {
+ // must be length 1
+ cmd: value,
+ // can be any length, even 0
+ args: value
+};
+
+fn parseexpr(l: *lex::lexer) expr = {
+
+};
diff --git a/lex/backend.ha b/lex/backend.ha
@@ -1,5 +1,5 @@
use encoding::utf8;
-use lexical::machine;
+use machine;
use os;
use strings;
diff --git a/machine/compute.ha b/machine/compute.ha
@@ -0,0 +1,190 @@
+use strings;
+
+// Compute new branches with an NFA form.
+export fn compute(
+ in: [](str, *opaque)
+) (automata | error) = {
+ const mata = init()?;
+ for (let (in, out) .. in) {
+ add_tree(&mata, in)?;
+ append(mata.results, out)?;
+ mata.curtree += 1;
+ };
+ return mata;
+};
+
+fn add_tree(
+ mata: *automata,
+ in: str,
+) (void | error) = {
+ const runes = strings::torunes(in)!;
+ defer free(runes);
+
+ let loc = mata.start;
+ let last = mata.start;
+ let groupstarts: []*state = [];
+ let unions: [][]*state = [];
+
+ for (let i = 0z; i < len(runes)) {
+ switch (runes[i]) {
+ case '(' =>
+ loc = add_epsilon(mata, loc)?;
+ append(groupstarts, loc)?;
+ append(unions, [])?;
+ i += 1;
+ case ')' =>
+ if (!(0 < len(unions))) {
+ return syntaxf("unexpected ) out of group");
+ };
+ loc = add_epsilon(mata, loc)?;
+ for (let _union .. unions[len(unions) - 1]) {
+ epsilon_to(mata, _union, loc)?;
+ };
+ assert(len(groupstarts) > 0);
+ last = groupstarts[len(groupstarts) - 1];
+ delete(groupstarts[len(groupstarts) - 1]);
+ free(unions[len(unions) - 1]);
+ delete(unions[len(unions) - 1]);
+ i += 1;
+ case '|' =>
+ append(unions[len(unions) - 1], loc)?;
+ loc = groupstarts[len(groupstarts) - 1];
+ i += 1;
+ case '*' =>
+ epsilon_to(mata, loc, last)?;
+ loc = last;
+ i += 1;
+ case '?' =>
+ epsilon_to(mata, last, loc)?;
+ i += 1;
+ case '+' =>
+ epsilon_to(mata, loc, last)?;
+ i += 1;
+ case =>
+ const (s, new) = add_one(
+ mata,
+ loc,
+ runes[i..],
+ )?;
+ i += s;
+ last = loc;
+ loc = new;
+ };
+ };
+
+ loc.result = mata.curtree;
+};
+
+fn add_one(
+ mata: *automata,
+ loc: *state,
+ runes: []rune,
+) ((size, *state) | error) = {
+ const (s, negate, runes) = runes2runesets(runes)?;
+ const newstate = alloc(state {
+ index = mata.curindex,
+ result = void,
+ ...
+ })?;
+ mata.curindex += 1;
+ const newtrans = alloc(transition {
+ from = loc,
+ to = newstate,
+ accepts = runes,
+ negate = negate,
+ tree = mata.curtree,
+ ...
+ })?;
+ append(loc.transitions, newtrans)?;
+ append(mata.transitions, newtrans)?;
+ append(mata.states, newstate)?;
+ return (s, newstate);
+};
+
+fn add_epsilon(
+ mata: *automata,
+ loc: *state,
+) (*state | error) = {
+ const newstate = alloc(state {
+ index = mata.curindex,
+ result = void,
+ ...
+ })?;
+ mata.curindex += 1;
+ const newtrans = alloc(transition {
+ from = loc,
+ to = newstate,
+ epsilon = true,
+ tree = mata.curtree,
+ ...
+ })?;
+ append(loc.transitions, newtrans)?;
+ append(mata.transitions, newtrans)?;
+ append(mata.states, newstate)?;
+ return newstate;
+};
+
+fn epsilon_to(
+ mata: *automata,
+ loc: *state,
+ to: *state,
+) (void | error) = {
+ const newtrans = alloc(transition {
+ from = loc,
+ to = to,
+ epsilon = true,
+ tree = mata.curtree,
+ ...
+ })?;
+ append(loc.transitions, newtrans)?;
+ append(mata.transitions, newtrans)?;
+};
+
+fn runes2runesets(in: []rune) ((size, bool, []runeset) | error) = {
+ let runes: []runeset = [];
+ let negate = false;
+ switch (in[0]) {
+ case '[' =>
+ let i = 1z;
+ if (in[i] == '^') {
+ negate = true;
+ i += 1;
+ };
+ for (i < len(in)) {
+ if (in[i] == ']') return (i + 1, negate, runes);
+ if (i + 2 < len(in) && in[i + 1] == '-' && in[i + 2] != ']') {
+ append(runes, (in[i], in[i + 2]))?;
+ i += 3;
+ } else {
+ const (s, r) = rune2runeset(in[i..])?;
+ i += s;
+ append(runes, r)?;
+ };
+ };
+ return syntaxf("unexpected eof while looking at ]");
+ case =>
+ const (s, r) = rune2runeset(in)?;
+ append(runes, r)?;
+ return (s, negate, runes);
+ };
+};
+
+fn rune2runeset(in: []rune) ((size, runeset) | error) = {
+ switch (in[0]) {
+ case '.' => return (1, dot);
+ case '\\' =>
+ if (!(1 < len(in))) return syntaxf("escaped eof");
+ switch (in[1]) {
+ case 'n' =>
+ return (2, '\n');
+ case 't' =>
+ return (2, '\t');
+ case 'r' =>
+ return (2, '\r');
+ case =>
+ return (2, in[1]);
+ };
+ case =>
+ return (1, in[0]);
+ };
+};
diff --git a/machine/debug.ha b/machine/debug.ha
@@ -0,0 +1,92 @@
+use fmt;
+use strings;
+use io;
+
+export fn debug_automata(mata: *automata) (void | nomem | io::error) = {
+ fmt::errorln("digraph {")?;
+ for (let state .. mata.states) {
+ match (state.result) {
+ case void => void;
+ case let res: size =>
+ fmt::errorf("s{} [", state.index)?;
+ fmt::error("color = blue; ")?;
+ fmt::errorf("xlabel = \"A: {}\"; ", res)?;
+ fmt::errorln("]")?;
+ };
+ };
+ for (let state .. mata.states) {
+ for (let trans .. state.transitions) {
+ const to = trans.to;
+ fmt::errorf("s{} -> s{} [", state.index, to.index)?;
+ if (trans.negate) fmt::error("color = red; ")?;
+ fmt::error("arrowsize = 0.5; ")?;
+ fmt::error("headlabel = \"")?;
+ for (let a .. trans.accepts) {
+ match(a) {
+ case let r: rune =>
+ print_rune(r)?;
+ case let r: (rune, rune) =>
+ print_rune(r.0)?;
+ fmt::errorf("-")?;
+ print_rune(r.1)?;
+ case => void;
+ };
+ };
+ fmt::error("\"; ")?;
+ if (!mata.deterministic) {
+ fmt::errorf("taillabel = \"T: {}\"; ", trans.tree)?;
+ };
+ if (trans.epsilon)
+ fmt::error("color = green; ")?;
+ fmt::errorfln("]")?;
+ };
+ };
+ fmt::errorln("}")?;
+};
+
+fn debug_dfa_transition(dfa: *dfa_transition) (void | nomem | io::error) = {
+ for (let trans_state .. dfa.trans_states) {
+ fmt::error("{")?;
+ for (let i = 0z; i < len(trans_state.states); i += 1) {
+ if (i != 0z) fmt::error(",")?;
+ fmt::errorf("s{}", trans_state.states[i].index)?;
+ };
+ fmt::error("}: ")?;
+ for (let i = 0z; i < len(trans_state.transitions); i += 1) {
+ let trans = trans_state.transitions[i];
+ if (i != 0z) fmt::error("; ")?;
+ match (trans.input) {
+ case let acc: dfa_accept =>
+ fmt::error(acc)?;
+ case let neg: dfa_negate =>
+ fmt::error("^")?;
+ for (let i = 0z; i < len(neg); i += 1) {
+ fmt::error(neg[i])?;
+ };
+ };
+ fmt::error(" {")?;
+ let to = trans.to;
+ for (let i = 0z; i < len(to.states); i += 1) {
+ if (i != 0z) fmt::error(",")?;
+ fmt::errorf("s{}", to.states[i].index)?;
+ };
+ fmt::error("}")?;
+ };
+ fmt::errorln()?;
+ };
+};
+
+fn print_rune(r: rune) (void | io::error) = {
+ switch (r) {
+ case '\n' => fmt::error(`\\n`)?;
+ case '\t' => fmt::error(`\\t`)?;
+ case '\r' => fmt::error(`\\r`)?;
+ case '"' => fmt::error(`\"`)?;
+ case '\\' => fmt::error(`\\`)?;
+ case ' ' => fmt::error(`\\w`)?;
+ case =>
+ const accept = strings::fromrunes([r])?;
+ defer free(accept);
+ fmt::error(accept)?;
+ };
+};
diff --git a/machine/determine.ha b/machine/determine.ha
@@ -0,0 +1,385 @@
+// DFA transition table
+type dfa_transition = struct {
+ trans_states: []*dfa_transition_state,
+};
+
+// DFA transition state (row)
+type dfa_transition_state = struct {
+ states: []*state,
+ transitions: []*dfa_transition_trans,
+};
+
+// DFA transition transition (col)
+type dfa_transition_trans = struct {
+ from: *dfa_transition_state,
+ to: *dfa_transition_state,
+ input: dfa_input,
+};
+
+type dfa_input = (dfa_accept | dfa_negate | dfa_dot);
+type dfa_accept = rune;
+type dfa_negate = []rune;
+type dfa_dot = void; // only when gathering inputs, then use negate empty
+
+fn dfa_transition_finish(dfa: *dfa_transition) void = {
+ for (let state .. dfa.trans_states) {
+ dfa_transition_state_finish(state);
+ free(state);
+ };
+ free(dfa.trans_states);
+};
+
+fn dfa_transition_state_finish(dfa_state: *dfa_transition_state) void = {
+ for (let trans .. dfa_state.transitions) {
+ dfa_transition_trans_finish(trans);
+ free(trans);
+ };
+ free(dfa_state.transitions);
+ free(dfa_state.states);
+};
+
+fn dfa_transition_trans_finish(dfa_trans: *dfa_transition_trans) void = {
+ if (dfa_trans.input is dfa_negate)
+ free(dfa_trans.input as dfa_negate);
+};
+
+// Build a DFA from an NDFA. The input automata can then be finished.
+export fn determine(mata: *automata) (automata | nomem) = {
+ let dfa = dfa_transition { ... };
+ defer dfa_transition_finish(&dfa);
+
+ fill_dfa_transition(&dfa, mata.start)?;
+
+ const new = automata {
+ curindex = 0z,
+ curtree = 1z,
+ start = null: *state,
+ deterministic = true,
+ ...
+ };
+ append(new.results, mata.results...)?;
+
+ fill_states(&new, &dfa)?;
+ new.start = new.states[0];
+
+ return new;
+};
+
+// Build a dfa transition table from a group of state.
+fn fill_dfa_transition(
+ dfa: *dfa_transition,
+ states: *state...
+) (*dfa_transition_state | nomem) = {
+ let states = crawl_elipsis(states...)?;
+ defer free(states);
+
+ match (get_state(dfa, states)) {
+ case null => void;
+ case let out: *dfa_transition_state => return out;
+ };
+
+ const dfa_state = alloc(dfa_transition_state {
+ ...
+ })?;
+ append(dfa_state.states, states...)?;
+ append(dfa.trans_states, dfa_state)?;
+
+ const all_trans = gather_trans(states)?;
+ defer free(all_trans);
+
+ const inputs = trans_inputs(all_trans)?;
+ defer free(inputs);
+
+ const groups = group_trans_input(all_trans, inputs)?;
+ defer {
+ for (let group .. groups) {
+ free(group.1);
+ };
+ free(groups);
+ };
+
+ for (let group .. groups) {
+ const next_states: []*state = [];
+ defer free(next_states);
+ for (let trans .. group.1) {
+ append(next_states, trans.to)?;
+ };
+ const next = fill_dfa_transition(dfa, next_states...)?;
+ const new = alloc(dfa_transition_trans {
+ from = dfa_state,
+ to = next,
+ input = group.0,
+ })?;
+ append(dfa_state.transitions, new)?;
+ };
+
+ return dfa_state;
+};
+
+// Gather all reachable states through elipsises.
+fn crawl_elipsis(locs: *state...) ([]*state | nomem) = {
+ let out: []*state = [];
+ for: root (let loc .. locs) {
+ for (let out .. out) if (out == loc) continue: root;
+ append(out, loc)?;
+ for: trans (let trans .. loc.transitions) {
+ if (0 != len(trans.accepts)) continue;
+ for (let out .. out) if (out == trans.to) continue: trans;
+ let new = crawl_elipsis(trans.to)?;
+ defer free(new);
+ for: new (let new .. new)
+ for (let out .. out) if (out == new) continue: new;
+ append(out, new...)?;
+ };
+ };
+ return out;
+};
+
+// Return the dfa transition state for a group of state when already builded.
+fn get_state(
+ dfa: *dfa_transition,
+ exp_states: []*state,
+) nullable *dfa_transition_state = {
+ for (let trans_state .. dfa.trans_states) {
+ if (len(trans_state.states) != len(exp_states))
+ continue;
+ let matched = 0z;
+ for : root (let state .. trans_state.states) {
+ for (let _state .. exp_states) {
+ if (state == _state) {
+ matched += 1;
+ continue : root;
+ };
+ };
+ };
+ if (matched == len(exp_states)) return trans_state;
+ };
+ return null;
+};
+
+// All trans from a group of state that are not elipsises.
+fn gather_trans(states: []*state) ([]*transition | nomem) = {
+ let trans: []*transition = [];
+ for (let state .. states) {
+ for (let _trans .. state.transitions) {
+ if (len(_trans.accepts) == 0) {
+ continue;
+ };
+ append(trans, _trans)?;
+ };
+ };
+ return trans;
+};
+
+// Gather all inputs from a group of trans.
+fn trans_inputs(trans: []*transition) ([]dfa_input | nomem) = {
+ let out: []dfa_input = [];
+
+ for (let trans .. trans) {
+ for: accept (let accept .. trans.accepts) {
+ match (accept) {
+ case dot =>
+ append(out, dfa_dot)?;
+ continue;
+ case let r: rune =>
+ // check already appended
+ for (let out .. out) {
+ match (out) {
+ case let out: dfa_accept =>
+ if (out == r)
+ continue: accept;
+ case => void;
+ };
+ };
+ append(out, r)?;
+ case let r: (rune, rune) =>
+ const a = r.0: u32;
+ const b = r.1: u32;
+ const min = if (a < b) a else b;
+ const max = if (a < b) b else a;
+ for: range (let i = min; i <= max; i += 1) {
+ let r = i: rune;
+ // check already appended
+ for (let out .. out) {
+ match (out) {
+ case let out: dfa_accept =>
+ if (out == r)
+ continue: range;
+ case => void;
+ };
+ };
+ append(out, r)?;
+ };
+ };
+ };
+ };
+
+ return out;
+};
+
+// Group all accepted transitions per input.
+fn group_trans_input(
+ trans: []*transition,
+ inputs: []dfa_input
+) ([](dfa_input, []*transition) | nomem) = {
+ let out: [](dfa_input, []*transition) = [];
+
+ let neg: dfa_negate = [];
+ defer free(neg);
+
+ let work: []*transition = [];
+ defer free(work);
+
+ for (let input .. inputs) {
+ match (input) {
+ case dfa_dot => void;
+ case dfa_negate => abort("unreachable");
+ case let input: dfa_accept =>
+ for (let trans .. trans) {
+ if (accept(trans, input)) {
+ append(work, trans)?;
+ };
+ };
+ append(neg, input)?;
+ };
+ if (len(work) != 0) {
+ append(out, (input, []))?;
+ let new = &out[len(out) - 1];
+ append(new.1, work...)?;
+ delete(work[..]);
+ };
+ };
+ for: trans (let trans .. trans) {
+ for (let a .. trans.accepts) {
+ if (!(a is dot)) continue;
+ append(work, trans)?;
+ continue: trans;
+ };
+ if (!(trans.negate)) continue;
+ let allfound = true;
+ for (let a .. trans.accepts) {
+ let found = false;
+ match (a) {
+ case let a: rune =>
+ for (let input .. neg) {
+ if (input == a) {
+ found = true;
+ break;
+ };
+ };
+ case => found = false;
+ };
+ if (!found) {
+ allfound = false;
+ break;
+ };
+ };
+ if (allfound) {
+ append(work, trans)?;
+ };
+ };
+ if (len(work) != 0) {
+ let newneg: dfa_negate = [];
+ append(newneg, neg...)?;
+ append(out, (newneg, []))?;
+ let new = &out[len(out) - 1];
+ append(new.1, work...)?;
+ };
+
+ return out;
+};
+
+// Build states from a dfa transition table.
+fn fill_states(
+ mata: *automata,
+ dfa: *dfa_transition,
+) (void | nomem) = {
+ // Produce a hashmap (dfa_state -> state).
+ let dfa2mata: [](*dfa_transition_state, *state) = [];
+ defer free(dfa2mata);
+ for (let dfa_state .. dfa.trans_states) {
+ const state = alloc(state {
+ index = mata.curindex,
+ result = void,
+ ...
+ })?;
+ mata.curindex += 1;
+ for (let _state .. dfa_state.states) {
+ if (_state.result is size) {
+ state.result = _state.result;
+ break;
+ };
+ };
+ append(dfa2mata, (dfa_state, state))?;
+ append(mata.states, state)?;
+ };
+
+ // Add all states linearly
+ for (let i = 0z; i < len(dfa.trans_states); i += 1) {
+ const dfa_state = dfa.trans_states[i];
+ const from = dfa2mata[i].1;
+ for: trans (let dfa_trans .. dfa_state.transitions) {
+ let to: nullable *state = null;
+ for (let dfa2mata .. dfa2mata) {
+ if (dfa2mata.0 == dfa_trans.to) {
+ to = dfa2mata.1;
+ break;
+ };
+ };
+ let to = to as *state;
+
+ // Append accepts to existing trans.
+ for (let trans .. from.transitions) {
+ if (trans.to != to) continue;
+ match (dfa_trans.input) {
+ case let acc: dfa_accept =>
+ if (trans.negate) continue;
+ append_to_accepts(&trans.accepts, acc)?;
+ case let neg: dfa_negate =>
+ if (!trans.negate) continue;
+ for (let neg .. neg) {
+ append_to_accepts(&trans.accepts, neg)?;
+ };
+ };
+ continue: trans;
+ };
+
+ // Or create a new trans.
+ const trans = alloc(transition {
+ from = from,
+ to = to,
+ tree = 0,
+ ...
+ })?;
+ match (dfa_trans.input) {
+ case let acc: dfa_accept =>
+ append(trans.accepts, acc)?;
+ case let neg: dfa_negate =>
+ trans.negate = true;
+ for (let neg .. neg)
+ append(trans.accepts, neg)?;
+ };
+ append(mata.transitions, trans)?;
+ append(from.transitions, trans)?;
+ };
+ };
+};
+
+fn append_to_accepts(accepts: *[]runeset, r: rune) (void | nomem) = {
+ assert(len(accepts) != 0);
+ match (&accepts[len(accepts) - 1]) {
+ case let lastr: *rune =>
+ if (*lastr: u32 == r: u32 - 1) {
+ const lastr = *lastr;
+ delete(accepts[len(accepts) - 1]);
+ append(accepts, (lastr, r))?;
+ return;
+ };
+ case let lastrange: *(rune, rune) =>
+ if (lastrange.1: u32 == r: u32 - 1) {
+ lastrange.1 = r;
+ return;
+ };
+ };
+ append(accepts, r)?;
+};
diff --git a/machine/error.ha b/machine/error.ha
@@ -0,0 +1,19 @@
+use encoding::utf8;
+use fmt;
+
+export type syntax = !str;
+export type error = !(syntax | nomem | utf8::invalid);
+
+export fn strerror(err: error) str = {
+ match (err) {
+ case let err: syntax => return err;
+ case let err: utf8::invalid => return utf8::strerror(err);
+ case nomem => return "nomem";
+ };
+};
+
+export fn syntaxf(msg: const str, args: fmt::field...) syntax = {
+ static let buf: [2048]u8 = [0...];
+ const msg = fmt::bsprintf(buf, msg, args...)!;
+ return msg;
+};
diff --git a/machine/resolve.ha b/machine/resolve.ha
@@ -0,0 +1,181 @@
+use bufio;
+use encoding::utf8;
+use strings;
+
+// Perform the automata with the given string. Returns the matched size, with
+// associated accepted output, or void otherwise.
+export fn resolve(
+ mata: *automata,
+ decoder: utf8::decoder
+) (void | (size, *opaque) | error) = {
+ if (mata.deterministic) {
+ return resolve_deterministic(mata, decoder);
+ };
+
+ let loc = mata.start;
+
+ let results: [](size, *opaque) = [];
+ defer free(results);
+
+ for (let tree = 0z; tree < mata.curtree; tree += 1) {
+ match (crawl(mata, loc, decoder, tree)?) {
+ case void => void;
+ case let res: (size, *opaque) =>
+ append(results, res)?;
+ yield res.0;
+ };
+ };
+
+ let longest: (void | (size, *opaque)) = void;
+ for (let res .. results) {
+ match (longest) {
+ case void =>
+ longest = res;
+ continue;
+ case let this: (size, *opaque) =>
+ if (this.0 < res.0) {
+ longest = res;
+ };
+ };
+ };
+ return longest;
+};
+
+fn resolve_deterministic(
+ mata: *automata,
+ decoder: utf8::decoder,
+) (void | (size, *opaque) | error) = {
+ assert(mata.deterministic);
+ let loc = mata.start;
+ let lastres: (void | (size, *opaque)) = void;
+ for: root (let i = 0z; true; i += 1) {
+ const r = match (utf8::next(&decoder)?) {
+ case let this: rune => yield this;
+ case done => break;
+ case utf8::more => abort("unsupported partial input");
+ };
+ for (let trans .. loc.transitions) {
+ if (!accept(trans, r)) continue;
+ loc = trans.to;
+ match (result(mata, loc, 0)) {
+ case let out: *opaque =>
+ lastres = (utf8::position(&decoder), out);
+ case null => void;
+ };
+ continue: root;
+ };
+ return lastres;
+ };
+ return lastres;
+};
+
+fn crawl(
+ mata: *automata,
+ loc: *state,
+ start: utf8::decoder,
+ tree: size,
+) (void | (size, *opaque) | error) = {
+ const decoder = start;
+
+ if (0 == len(utf8::remaining(&decoder))) {
+ match (result(mata, loc, tree)) {
+ case let out: *opaque => return (utf8::position(&decoder), out);
+ case null => return void;
+ };
+ };
+
+ const r = match (utf8::next(&decoder)?) {
+ case let this: rune => yield this;
+ case done => abort("unreachable");
+ case utf8::more => abort("unsupported partial input");
+ };
+
+ let accepting: []*transition = [];
+ defer free(accepting);
+
+ for (let trans .. loc.transitions) {
+ if (trans.tree != tree) continue;
+ if (!accept(trans, r)) continue;
+ append(accepting, trans)?;
+ };
+
+ if (len(accepting) == 0) {
+ match (result(mata, loc, tree)) {
+ case null => return void;
+ case let out: *opaque => return (utf8::position(&start), out);
+ };
+ };
+
+ let results: [](size, *opaque) = [];
+ defer free(results);
+ for (let acc .. accepting) {
+ const d = if (0 == len(acc.accepts)) {
+ yield start;
+ } else {
+ yield decoder;
+ };
+ match (crawl(mata, acc.to, d, tree)?) {
+ case void => void;
+ case let this: (size, *opaque) =>
+ append(results, this)?;
+ };
+ };
+
+ let longest: (void | (size, *opaque)) = void;
+ for (let res .. results) {
+ match (longest) {
+ case void =>
+ longest = res;
+ case let this: (size, *opaque) =>
+ if (this.0 < res.0) {
+ longest = res;
+ };
+ };
+ };
+
+ if (longest is void) {
+ match (result(mata, loc, tree)) {
+ case null => return void;
+ case let out: *opaque => return (utf8::position(&start), out);
+ };
+ };
+
+ return longest;
+};
+
+fn accept(trans: *transition, _r: rune) bool = {
+ if (trans.epsilon) return true;
+ const res = !trans.negate;
+ for (let a .. trans.accepts) {
+ match (a) {
+ case dot => return res;
+ case let r: rune => if (_r == r) return res;
+ case let r: (rune, rune) =>
+ const a = r.0: u32;
+ const b = r.1: u32;
+ const min = if (a < b) a else b;
+ const max = if (a < b) b else a;
+ if (min <= _r: u32 && _r: u32 <= max) return res;
+ };
+ };
+ return !res;
+};
+
+fn result(
+ mata: *automata,
+ loc: *state,
+ tree: size
+) nullable *opaque = {
+ if (loc.result is size) {
+ return mata.results[loc.result as size];
+ };
+ for (let trans .. loc.transitions) {
+ if (trans.tree != tree) continue;
+ if (!trans.epsilon) continue;
+ match (result(mata, trans.to, tree)) {
+ case null => void;
+ case let out: *opaque => return out;
+ };
+ };
+ return null;
+};
diff --git a/machine/type.ha b/machine/type.ha
@@ -0,0 +1,72 @@
+// An automata.
+export type automata = struct {
+ curindex: size,
+ curtree: size,
+ start: *state,
+ states: []*state,
+ transitions: []*transition,
+ results: []*opaque,
+ deterministic: bool,
+};
+
+// A state.
+export type state = struct {
+ index: size,
+ result: (void | size),
+ transitions: []*transition,
+};
+
+// A transition.
+export type transition = struct {
+ from: *state,
+ to: *state,
+ tree: size,
+ epsilon: bool,
+ negate: bool, // [^x]
+ accepts: []runeset,
+};
+
+export type dot = void;
+
+// A runeset.
+export type runeset = (dot | rune | (rune, rune));
+
+// Initialize an automata. The caller must free associated resources with
+// [[finish]] when they have done using it.
+export fn init() (automata | error) = {
+ const start = alloc(state {
+ index = 0z,
+ result = void,
+ ...
+ })?;
+ const states: []*state = [];
+ append(states, start)?;
+ return automata {
+ curindex = 1z,
+ curtree = 0z,
+ start = start,
+ states = states,
+ ...
+ };
+};
+
+// Free resources associated with an [[automata]].
+export fn finish(mata: *automata) void = {
+ for (let state .. mata.states)
+ state_destroy(state);
+ free(mata.states);
+ for (let trans .. mata.transitions)
+ transition_destroy(trans);
+ free(mata.results);
+ free(mata.transitions);
+};
+
+fn state_destroy(state: *state) void = {
+ free(state.transitions);
+ free(state);
+};
+
+fn transition_destroy(trans: *transition) void = {
+ free(trans.accepts);
+ free(trans);
+};