sys

A set of unix utils in hare!
Log | Files | Refs | README

commit 875c20f1a60e75a86db46acdc5bec10ded07846a
parent 42507db3380c48616172b3059b30473245113fa2
Author: thing1 <thing1@seacrossedlovers.xyz>
Date:   Tue, 10 Mar 2026 19:10:11 +0000

ed stuff

Diffstat:
MMakefile | 8++++++--
Mcmd/ed/cmds.ha | 65++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----
Mcmd/ed/main.ha | 73++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
Acmd/fmt.ha | 29+++++++++++++++++++++++++++++
Acmd/rcsh/lex.ha | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/rcsh/main.ha | 8++++++++
Acmd/rcsh/parse.ha | 14++++++++++++++
Mlex/backend.ha | 2+-
Amachine/compute.ha | 190+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amachine/debug.ha | 92+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amachine/determine.ha | 385+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amachine/error.ha | 19+++++++++++++++++++
Amachine/resolve.ha | 181+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amachine/type.ha | 72++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
14 files changed, 1204 insertions(+), 9 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,13 +1,13 @@ .POSIX: .SUFFIXES: HARE=hare -HAREFLAGS= +HAREFLAGS=-lc DESTDIR= PREFIX=/usr/local BINDIR=$(PREFIX)/bin -all: bin/ls bin/rainbow bin/cat bin/uniq bin/split bin/wc bin/yes bin/mc bin/ed +all: bin/ls bin/rainbow bin/cat bin/uniq bin/split bin/wc bin/yes bin/mc bin/ed bin/rcsh clean: rm -rf bin/* @@ -22,6 +22,10 @@ EDSRC != find cmd/ed/ -name *.ha bin/ed: $(EDSRC) $(HARE) build $(HAREFLAGS) -o $@ cmd/ed/ +RCSHSRC != find cmd/rcsh/ -name *.ha +bin/rcsh: $(RCSHSRC) + $(HARE) build $(HAREFLAGS) -o $@ cmd/rcsh/ + bin/ls: cmd/ls.ha $(HARE) build $(HAREFLAGS) -o $@ cmd/ls.ha diff --git a/cmd/ed/cmds.ha b/cmd/ed/cmds.ha @@ -1,12 +1,18 @@ +use bufio; +use io; +use os; +use strings; +use fmt; + export type option = enum { CANT, WILL, MUST, }; -export type cmdfn = fn(file: *lines, current: size, args: []str) void; +export type cmdfn = fn(file: **lines, current: *size, range: (size, size), args: []str) void; -export type cmds = struct { +export type cmd = struct { // the char to call the command cmd: rune, // help text for the command @@ -15,12 +21,61 @@ export type cmds = struct { // does the command allow a range range: option, - // does the command allow a repeater - repeat: option, - // the number of arguments a cmd can take args: size, // function f: *cmdfn, }; + +const cmds = [ + cmd{cmd = 'i', msg = "insert lines", range = option::CANT, args = 0, f = &insfn}, + cmd{cmd = 'a', msg = "append lines", range = option::CANT, args = 0, f = &appfn}, + cmd{cmd = 'd', msg = "delete lines", range = option::WILL, args = 0, f = &delfn}, + cmd{cmd = 'p', msg = "print lines", range = option::WILL, args = 0, f = &prifn}, +]; + +fn insfn(file: **lines, current: *size, range: (size, size), args: []str) void = { + let sc = bufio::newscanner(os::stdin); + defer bufio::finish(&sc); + + for (true) { + match (bufio::scan_line(&sc)) { + case let s: str => + if (s == ".") break; + *file= insertline(*file, newline(s), *current); + *current += 1; + case => abort(); + }; + }; +}; + +fn appfn(file: **lines, current: *size, range: (size, size), args: []str) void = { + *current += 1; + insfn(file, current, range, args); +}; + +fn delfn(file: **lines, current: *size, range: (size, size), args: []str) void = { + *file = deleteline(*file, *current); +}; + +fn prifn(file: **lines, current: *size, range: (size, size), args: []str) void = { + if (range.0: i64 == -1) { + let l = getline(*file, *current); + fmt::println(l.s)!; + } else { + if (range.1: i64 != -1) { + for (let i = range.0; i: i64 < range.1: i64; i += 1) { + let l = getline(*file, i); + fmt::println(l.s)!; + }; + } else { + for (let i = *current; i: i64 < range.0: i64; i += 1) { + let l = getline(*file, i); + fmt::println(l.s)!; + }; + }; + }; +}; + + diff --git a/cmd/ed/main.ha b/cmd/ed/main.ha @@ -4,9 +4,20 @@ use bufio; use strings; use fs; use os; +use ascii; +use strconv; use util; +type command = struct { + c: rune, + range: (size, size), + args: []str +}; + +let file: nullable *lines = null; +let pos: size = 0; + fn loadfile(h: io::handle) nullable *lines = { let ls: nullable *lines = null; let sc = bufio::newscanner(h); @@ -39,6 +50,66 @@ fn openfile(name: str) nullable *lines = { return loadfile(h); }; +fn parsenum(runes: []rune) (size, []rune) = { + let digits: []rune = []; + defer free(digits); + + let i = 0z; + + for (ascii::isdigit(runes[i])) { + append(digits, runes[i])!; + i += 1; + }; + + let num = strings::fromrunes(digits)!; + defer free(num); + + return (strconv::stoz(num)!, runes[i..]); +}; + +fn parsecmd(s: str) command = { + let c: command = command{c = ' ', range = (-1, -1), args = []}; + + let runes = strings::torunes(s)!; + let sav = runes; + defer free(sav); + + if (ascii::isdigit(runes[0])) { + let res = parsenum(runes); + c.range.0 = res.0; runes = res.1; + if (runes[0] == ',') { + res = parsenum(runes[1..]); + c.range.0 = res.0; runes = res.1; + } else c.range.1 = -1; + } else c.range = (-1, -1); + + c.c = runes[0]; + + return c; +}; + +fn runcmd() void = { + let sc = bufio::newscanner(os::stdin); + defer bufio::finish(&sc); + + let in = parsecmd(bufio::scan_line(&sc) as str); + + for (let c .. cmds) { + if (c.cmd == in.c) { + c.f(&(file as *lines), &pos, in.range, in.args); + break; + }; + }; +}; + export fn main() void = { - abort(); + if (len(os::args) < 2) { + util::die("", "needs a file!"); + }; + let f = os::open(os::args[1])!; + file = loadfile(f); + + for (true) { + runcmd(); + }; }; diff --git a/cmd/fmt.ha b/cmd/fmt.ha @@ -0,0 +1,29 @@ +use fmt; +use io; + +use util; + +export fn main() void = { + const cmd = getopt::parse(os::args, + "text formater", + ('w', "width", "The max width of output + ); + defer getopt::finish(&cmd); + + for (let opt .. cmd.opts) { + switch (opt.0) { + case 'w' => + width = strconv::stoi(opt.1)!; + case => abort(); + }; + }; + + let sc = bufio::newscanner(os::stdin); + for (true) { + let s = match (bufio::scan_str(&sc, " ")) { + case let s: str => + case io::EOF => break; + case => util::die("stdin", "couldn't read"); + }; + }; +}; diff --git a/cmd/rcsh/lex.ha b/cmd/rcsh/lex.ha @@ -0,0 +1,75 @@ +use lex; +use strings; +use strconv; + +fn lexinit(in: str) lex::lexer = { + let actions: []lex::action = []; + + append(actions, lex::action{ + expr = `([A-z][A-z0-9]+)`, + cb = &word, + name = "word", + ... + })!; + + append(actions, lex::action{ + expr = `(-?[0-9]+)`, + cb = &number, + name = "word", + ... + })!; + + append(actions, lex::action{ + expr = `\$([A-z][A-z0-9]+)`, + cb = &var, + name = "var", + ... + })!; + + append(actions, lex::action{ + expr = `( |\t|\n|\r)+`, + cb = &skip, + ... + })!; + + const backend = lex::def_backend()!(actions)!; + return lex::init(backend, in); +}; + +fn word(sc: *lex::scanner, + lexeme: const str, + user: nullable *opaque + ) (str | *lex::token | lex::error) = { + + let t = lex::scan_token(sc, void, lexeme)?; + t.value = t.morphene; + return t; +}; + +fn var(sc: *lex::scanner, + lexeme: const str, + user: nullable *opaque + ) (str | *lex::token | lex::error) = { + + let t = lex::scan_token(sc, void, lexeme)?; + t.value = strings::sub(t.morphene, 1); + return t; +}; + +fn number(sc: *lex::scanner, + lexeme: const str, + user: nullable *opaque + ) (str | *lex::token | lex::error) = { + + let t = lex::scan_token(sc, void, lexeme)?; + t.value = strconv::stoi64(t.morphene)!; + return t; +}; + +fn skip(sc: *lex::scanner, + lexeme: const str, + user: nullable *opaque + ) (str | *lex::token | lex::error) = { + return lexeme; +}; + diff --git a/cmd/rcsh/main.ha b/cmd/rcsh/main.ha @@ -0,0 +1,8 @@ +use lex; +use fmt; + +export fn main() void = { + const lx = &lexinit("$TERM"); + let tok = lex::next(lx)!; + fmt::println(tok.value)!; +}; diff --git a/cmd/rcsh/parse.ha b/cmd/rcsh/parse.ha @@ -0,0 +1,14 @@ +use lex; + +type value = [](expr | str); + +type expr = struct { + // must be length 1 + cmd: value, + // can be any length, even 0 + args: value +}; + +fn parseexpr(l: *lex::lexer) expr = { + +}; diff --git a/lex/backend.ha b/lex/backend.ha @@ -1,5 +1,5 @@ use encoding::utf8; -use lexical::machine; +use machine; use os; use strings; diff --git a/machine/compute.ha b/machine/compute.ha @@ -0,0 +1,190 @@ +use strings; + +// Compute new branches with an NFA form. +export fn compute( + in: [](str, *opaque) +) (automata | error) = { + const mata = init()?; + for (let (in, out) .. in) { + add_tree(&mata, in)?; + append(mata.results, out)?; + mata.curtree += 1; + }; + return mata; +}; + +fn add_tree( + mata: *automata, + in: str, +) (void | error) = { + const runes = strings::torunes(in)!; + defer free(runes); + + let loc = mata.start; + let last = mata.start; + let groupstarts: []*state = []; + let unions: [][]*state = []; + + for (let i = 0z; i < len(runes)) { + switch (runes[i]) { + case '(' => + loc = add_epsilon(mata, loc)?; + append(groupstarts, loc)?; + append(unions, [])?; + i += 1; + case ')' => + if (!(0 < len(unions))) { + return syntaxf("unexpected ) out of group"); + }; + loc = add_epsilon(mata, loc)?; + for (let _union .. unions[len(unions) - 1]) { + epsilon_to(mata, _union, loc)?; + }; + assert(len(groupstarts) > 0); + last = groupstarts[len(groupstarts) - 1]; + delete(groupstarts[len(groupstarts) - 1]); + free(unions[len(unions) - 1]); + delete(unions[len(unions) - 1]); + i += 1; + case '|' => + append(unions[len(unions) - 1], loc)?; + loc = groupstarts[len(groupstarts) - 1]; + i += 1; + case '*' => + epsilon_to(mata, loc, last)?; + loc = last; + i += 1; + case '?' => + epsilon_to(mata, last, loc)?; + i += 1; + case '+' => + epsilon_to(mata, loc, last)?; + i += 1; + case => + const (s, new) = add_one( + mata, + loc, + runes[i..], + )?; + i += s; + last = loc; + loc = new; + }; + }; + + loc.result = mata.curtree; +}; + +fn add_one( + mata: *automata, + loc: *state, + runes: []rune, +) ((size, *state) | error) = { + const (s, negate, runes) = runes2runesets(runes)?; + const newstate = alloc(state { + index = mata.curindex, + result = void, + ... + })?; + mata.curindex += 1; + const newtrans = alloc(transition { + from = loc, + to = newstate, + accepts = runes, + negate = negate, + tree = mata.curtree, + ... + })?; + append(loc.transitions, newtrans)?; + append(mata.transitions, newtrans)?; + append(mata.states, newstate)?; + return (s, newstate); +}; + +fn add_epsilon( + mata: *automata, + loc: *state, +) (*state | error) = { + const newstate = alloc(state { + index = mata.curindex, + result = void, + ... + })?; + mata.curindex += 1; + const newtrans = alloc(transition { + from = loc, + to = newstate, + epsilon = true, + tree = mata.curtree, + ... + })?; + append(loc.transitions, newtrans)?; + append(mata.transitions, newtrans)?; + append(mata.states, newstate)?; + return newstate; +}; + +fn epsilon_to( + mata: *automata, + loc: *state, + to: *state, +) (void | error) = { + const newtrans = alloc(transition { + from = loc, + to = to, + epsilon = true, + tree = mata.curtree, + ... + })?; + append(loc.transitions, newtrans)?; + append(mata.transitions, newtrans)?; +}; + +fn runes2runesets(in: []rune) ((size, bool, []runeset) | error) = { + let runes: []runeset = []; + let negate = false; + switch (in[0]) { + case '[' => + let i = 1z; + if (in[i] == '^') { + negate = true; + i += 1; + }; + for (i < len(in)) { + if (in[i] == ']') return (i + 1, negate, runes); + if (i + 2 < len(in) && in[i + 1] == '-' && in[i + 2] != ']') { + append(runes, (in[i], in[i + 2]))?; + i += 3; + } else { + const (s, r) = rune2runeset(in[i..])?; + i += s; + append(runes, r)?; + }; + }; + return syntaxf("unexpected eof while looking at ]"); + case => + const (s, r) = rune2runeset(in)?; + append(runes, r)?; + return (s, negate, runes); + }; +}; + +fn rune2runeset(in: []rune) ((size, runeset) | error) = { + switch (in[0]) { + case '.' => return (1, dot); + case '\\' => + if (!(1 < len(in))) return syntaxf("escaped eof"); + switch (in[1]) { + case 'n' => + return (2, '\n'); + case 't' => + return (2, '\t'); + case 'r' => + return (2, '\r'); + case => + return (2, in[1]); + }; + case => + return (1, in[0]); + }; +}; diff --git a/machine/debug.ha b/machine/debug.ha @@ -0,0 +1,92 @@ +use fmt; +use strings; +use io; + +export fn debug_automata(mata: *automata) (void | nomem | io::error) = { + fmt::errorln("digraph {")?; + for (let state .. mata.states) { + match (state.result) { + case void => void; + case let res: size => + fmt::errorf("s{} [", state.index)?; + fmt::error("color = blue; ")?; + fmt::errorf("xlabel = \"A: {}\"; ", res)?; + fmt::errorln("]")?; + }; + }; + for (let state .. mata.states) { + for (let trans .. state.transitions) { + const to = trans.to; + fmt::errorf("s{} -> s{} [", state.index, to.index)?; + if (trans.negate) fmt::error("color = red; ")?; + fmt::error("arrowsize = 0.5; ")?; + fmt::error("headlabel = \"")?; + for (let a .. trans.accepts) { + match(a) { + case let r: rune => + print_rune(r)?; + case let r: (rune, rune) => + print_rune(r.0)?; + fmt::errorf("-")?; + print_rune(r.1)?; + case => void; + }; + }; + fmt::error("\"; ")?; + if (!mata.deterministic) { + fmt::errorf("taillabel = \"T: {}\"; ", trans.tree)?; + }; + if (trans.epsilon) + fmt::error("color = green; ")?; + fmt::errorfln("]")?; + }; + }; + fmt::errorln("}")?; +}; + +fn debug_dfa_transition(dfa: *dfa_transition) (void | nomem | io::error) = { + for (let trans_state .. dfa.trans_states) { + fmt::error("{")?; + for (let i = 0z; i < len(trans_state.states); i += 1) { + if (i != 0z) fmt::error(",")?; + fmt::errorf("s{}", trans_state.states[i].index)?; + }; + fmt::error("}: ")?; + for (let i = 0z; i < len(trans_state.transitions); i += 1) { + let trans = trans_state.transitions[i]; + if (i != 0z) fmt::error("; ")?; + match (trans.input) { + case let acc: dfa_accept => + fmt::error(acc)?; + case let neg: dfa_negate => + fmt::error("^")?; + for (let i = 0z; i < len(neg); i += 1) { + fmt::error(neg[i])?; + }; + }; + fmt::error(" {")?; + let to = trans.to; + for (let i = 0z; i < len(to.states); i += 1) { + if (i != 0z) fmt::error(",")?; + fmt::errorf("s{}", to.states[i].index)?; + }; + fmt::error("}")?; + }; + fmt::errorln()?; + }; +}; + +fn print_rune(r: rune) (void | io::error) = { + switch (r) { + case '\n' => fmt::error(`\\n`)?; + case '\t' => fmt::error(`\\t`)?; + case '\r' => fmt::error(`\\r`)?; + case '"' => fmt::error(`\"`)?; + case '\\' => fmt::error(`\\`)?; + case ' ' => fmt::error(`\\w`)?; + case => + const accept = strings::fromrunes([r])?; + defer free(accept); + fmt::error(accept)?; + }; +}; diff --git a/machine/determine.ha b/machine/determine.ha @@ -0,0 +1,385 @@ +// DFA transition table +type dfa_transition = struct { + trans_states: []*dfa_transition_state, +}; + +// DFA transition state (row) +type dfa_transition_state = struct { + states: []*state, + transitions: []*dfa_transition_trans, +}; + +// DFA transition transition (col) +type dfa_transition_trans = struct { + from: *dfa_transition_state, + to: *dfa_transition_state, + input: dfa_input, +}; + +type dfa_input = (dfa_accept | dfa_negate | dfa_dot); +type dfa_accept = rune; +type dfa_negate = []rune; +type dfa_dot = void; // only when gathering inputs, then use negate empty + +fn dfa_transition_finish(dfa: *dfa_transition) void = { + for (let state .. dfa.trans_states) { + dfa_transition_state_finish(state); + free(state); + }; + free(dfa.trans_states); +}; + +fn dfa_transition_state_finish(dfa_state: *dfa_transition_state) void = { + for (let trans .. dfa_state.transitions) { + dfa_transition_trans_finish(trans); + free(trans); + }; + free(dfa_state.transitions); + free(dfa_state.states); +}; + +fn dfa_transition_trans_finish(dfa_trans: *dfa_transition_trans) void = { + if (dfa_trans.input is dfa_negate) + free(dfa_trans.input as dfa_negate); +}; + +// Build a DFA from an NDFA. The input automata can then be finished. +export fn determine(mata: *automata) (automata | nomem) = { + let dfa = dfa_transition { ... }; + defer dfa_transition_finish(&dfa); + + fill_dfa_transition(&dfa, mata.start)?; + + const new = automata { + curindex = 0z, + curtree = 1z, + start = null: *state, + deterministic = true, + ... + }; + append(new.results, mata.results...)?; + + fill_states(&new, &dfa)?; + new.start = new.states[0]; + + return new; +}; + +// Build a dfa transition table from a group of state. +fn fill_dfa_transition( + dfa: *dfa_transition, + states: *state... +) (*dfa_transition_state | nomem) = { + let states = crawl_elipsis(states...)?; + defer free(states); + + match (get_state(dfa, states)) { + case null => void; + case let out: *dfa_transition_state => return out; + }; + + const dfa_state = alloc(dfa_transition_state { + ... + })?; + append(dfa_state.states, states...)?; + append(dfa.trans_states, dfa_state)?; + + const all_trans = gather_trans(states)?; + defer free(all_trans); + + const inputs = trans_inputs(all_trans)?; + defer free(inputs); + + const groups = group_trans_input(all_trans, inputs)?; + defer { + for (let group .. groups) { + free(group.1); + }; + free(groups); + }; + + for (let group .. groups) { + const next_states: []*state = []; + defer free(next_states); + for (let trans .. group.1) { + append(next_states, trans.to)?; + }; + const next = fill_dfa_transition(dfa, next_states...)?; + const new = alloc(dfa_transition_trans { + from = dfa_state, + to = next, + input = group.0, + })?; + append(dfa_state.transitions, new)?; + }; + + return dfa_state; +}; + +// Gather all reachable states through elipsises. +fn crawl_elipsis(locs: *state...) ([]*state | nomem) = { + let out: []*state = []; + for: root (let loc .. locs) { + for (let out .. out) if (out == loc) continue: root; + append(out, loc)?; + for: trans (let trans .. loc.transitions) { + if (0 != len(trans.accepts)) continue; + for (let out .. out) if (out == trans.to) continue: trans; + let new = crawl_elipsis(trans.to)?; + defer free(new); + for: new (let new .. new) + for (let out .. out) if (out == new) continue: new; + append(out, new...)?; + }; + }; + return out; +}; + +// Return the dfa transition state for a group of state when already builded. +fn get_state( + dfa: *dfa_transition, + exp_states: []*state, +) nullable *dfa_transition_state = { + for (let trans_state .. dfa.trans_states) { + if (len(trans_state.states) != len(exp_states)) + continue; + let matched = 0z; + for : root (let state .. trans_state.states) { + for (let _state .. exp_states) { + if (state == _state) { + matched += 1; + continue : root; + }; + }; + }; + if (matched == len(exp_states)) return trans_state; + }; + return null; +}; + +// All trans from a group of state that are not elipsises. +fn gather_trans(states: []*state) ([]*transition | nomem) = { + let trans: []*transition = []; + for (let state .. states) { + for (let _trans .. state.transitions) { + if (len(_trans.accepts) == 0) { + continue; + }; + append(trans, _trans)?; + }; + }; + return trans; +}; + +// Gather all inputs from a group of trans. +fn trans_inputs(trans: []*transition) ([]dfa_input | nomem) = { + let out: []dfa_input = []; + + for (let trans .. trans) { + for: accept (let accept .. trans.accepts) { + match (accept) { + case dot => + append(out, dfa_dot)?; + continue; + case let r: rune => + // check already appended + for (let out .. out) { + match (out) { + case let out: dfa_accept => + if (out == r) + continue: accept; + case => void; + }; + }; + append(out, r)?; + case let r: (rune, rune) => + const a = r.0: u32; + const b = r.1: u32; + const min = if (a < b) a else b; + const max = if (a < b) b else a; + for: range (let i = min; i <= max; i += 1) { + let r = i: rune; + // check already appended + for (let out .. out) { + match (out) { + case let out: dfa_accept => + if (out == r) + continue: range; + case => void; + }; + }; + append(out, r)?; + }; + }; + }; + }; + + return out; +}; + +// Group all accepted transitions per input. +fn group_trans_input( + trans: []*transition, + inputs: []dfa_input +) ([](dfa_input, []*transition) | nomem) = { + let out: [](dfa_input, []*transition) = []; + + let neg: dfa_negate = []; + defer free(neg); + + let work: []*transition = []; + defer free(work); + + for (let input .. inputs) { + match (input) { + case dfa_dot => void; + case dfa_negate => abort("unreachable"); + case let input: dfa_accept => + for (let trans .. trans) { + if (accept(trans, input)) { + append(work, trans)?; + }; + }; + append(neg, input)?; + }; + if (len(work) != 0) { + append(out, (input, []))?; + let new = &out[len(out) - 1]; + append(new.1, work...)?; + delete(work[..]); + }; + }; + for: trans (let trans .. trans) { + for (let a .. trans.accepts) { + if (!(a is dot)) continue; + append(work, trans)?; + continue: trans; + }; + if (!(trans.negate)) continue; + let allfound = true; + for (let a .. trans.accepts) { + let found = false; + match (a) { + case let a: rune => + for (let input .. neg) { + if (input == a) { + found = true; + break; + }; + }; + case => found = false; + }; + if (!found) { + allfound = false; + break; + }; + }; + if (allfound) { + append(work, trans)?; + }; + }; + if (len(work) != 0) { + let newneg: dfa_negate = []; + append(newneg, neg...)?; + append(out, (newneg, []))?; + let new = &out[len(out) - 1]; + append(new.1, work...)?; + }; + + return out; +}; + +// Build states from a dfa transition table. +fn fill_states( + mata: *automata, + dfa: *dfa_transition, +) (void | nomem) = { + // Produce a hashmap (dfa_state -> state). + let dfa2mata: [](*dfa_transition_state, *state) = []; + defer free(dfa2mata); + for (let dfa_state .. dfa.trans_states) { + const state = alloc(state { + index = mata.curindex, + result = void, + ... + })?; + mata.curindex += 1; + for (let _state .. dfa_state.states) { + if (_state.result is size) { + state.result = _state.result; + break; + }; + }; + append(dfa2mata, (dfa_state, state))?; + append(mata.states, state)?; + }; + + // Add all states linearly + for (let i = 0z; i < len(dfa.trans_states); i += 1) { + const dfa_state = dfa.trans_states[i]; + const from = dfa2mata[i].1; + for: trans (let dfa_trans .. dfa_state.transitions) { + let to: nullable *state = null; + for (let dfa2mata .. dfa2mata) { + if (dfa2mata.0 == dfa_trans.to) { + to = dfa2mata.1; + break; + }; + }; + let to = to as *state; + + // Append accepts to existing trans. + for (let trans .. from.transitions) { + if (trans.to != to) continue; + match (dfa_trans.input) { + case let acc: dfa_accept => + if (trans.negate) continue; + append_to_accepts(&trans.accepts, acc)?; + case let neg: dfa_negate => + if (!trans.negate) continue; + for (let neg .. neg) { + append_to_accepts(&trans.accepts, neg)?; + }; + }; + continue: trans; + }; + + // Or create a new trans. + const trans = alloc(transition { + from = from, + to = to, + tree = 0, + ... + })?; + match (dfa_trans.input) { + case let acc: dfa_accept => + append(trans.accepts, acc)?; + case let neg: dfa_negate => + trans.negate = true; + for (let neg .. neg) + append(trans.accepts, neg)?; + }; + append(mata.transitions, trans)?; + append(from.transitions, trans)?; + }; + }; +}; + +fn append_to_accepts(accepts: *[]runeset, r: rune) (void | nomem) = { + assert(len(accepts) != 0); + match (&accepts[len(accepts) - 1]) { + case let lastr: *rune => + if (*lastr: u32 == r: u32 - 1) { + const lastr = *lastr; + delete(accepts[len(accepts) - 1]); + append(accepts, (lastr, r))?; + return; + }; + case let lastrange: *(rune, rune) => + if (lastrange.1: u32 == r: u32 - 1) { + lastrange.1 = r; + return; + }; + }; + append(accepts, r)?; +}; diff --git a/machine/error.ha b/machine/error.ha @@ -0,0 +1,19 @@ +use encoding::utf8; +use fmt; + +export type syntax = !str; +export type error = !(syntax | nomem | utf8::invalid); + +export fn strerror(err: error) str = { + match (err) { + case let err: syntax => return err; + case let err: utf8::invalid => return utf8::strerror(err); + case nomem => return "nomem"; + }; +}; + +export fn syntaxf(msg: const str, args: fmt::field...) syntax = { + static let buf: [2048]u8 = [0...]; + const msg = fmt::bsprintf(buf, msg, args...)!; + return msg; +}; diff --git a/machine/resolve.ha b/machine/resolve.ha @@ -0,0 +1,181 @@ +use bufio; +use encoding::utf8; +use strings; + +// Perform the automata with the given string. Returns the matched size, with +// associated accepted output, or void otherwise. +export fn resolve( + mata: *automata, + decoder: utf8::decoder +) (void | (size, *opaque) | error) = { + if (mata.deterministic) { + return resolve_deterministic(mata, decoder); + }; + + let loc = mata.start; + + let results: [](size, *opaque) = []; + defer free(results); + + for (let tree = 0z; tree < mata.curtree; tree += 1) { + match (crawl(mata, loc, decoder, tree)?) { + case void => void; + case let res: (size, *opaque) => + append(results, res)?; + yield res.0; + }; + }; + + let longest: (void | (size, *opaque)) = void; + for (let res .. results) { + match (longest) { + case void => + longest = res; + continue; + case let this: (size, *opaque) => + if (this.0 < res.0) { + longest = res; + }; + }; + }; + return longest; +}; + +fn resolve_deterministic( + mata: *automata, + decoder: utf8::decoder, +) (void | (size, *opaque) | error) = { + assert(mata.deterministic); + let loc = mata.start; + let lastres: (void | (size, *opaque)) = void; + for: root (let i = 0z; true; i += 1) { + const r = match (utf8::next(&decoder)?) { + case let this: rune => yield this; + case done => break; + case utf8::more => abort("unsupported partial input"); + }; + for (let trans .. loc.transitions) { + if (!accept(trans, r)) continue; + loc = trans.to; + match (result(mata, loc, 0)) { + case let out: *opaque => + lastres = (utf8::position(&decoder), out); + case null => void; + }; + continue: root; + }; + return lastres; + }; + return lastres; +}; + +fn crawl( + mata: *automata, + loc: *state, + start: utf8::decoder, + tree: size, +) (void | (size, *opaque) | error) = { + const decoder = start; + + if (0 == len(utf8::remaining(&decoder))) { + match (result(mata, loc, tree)) { + case let out: *opaque => return (utf8::position(&decoder), out); + case null => return void; + }; + }; + + const r = match (utf8::next(&decoder)?) { + case let this: rune => yield this; + case done => abort("unreachable"); + case utf8::more => abort("unsupported partial input"); + }; + + let accepting: []*transition = []; + defer free(accepting); + + for (let trans .. loc.transitions) { + if (trans.tree != tree) continue; + if (!accept(trans, r)) continue; + append(accepting, trans)?; + }; + + if (len(accepting) == 0) { + match (result(mata, loc, tree)) { + case null => return void; + case let out: *opaque => return (utf8::position(&start), out); + }; + }; + + let results: [](size, *opaque) = []; + defer free(results); + for (let acc .. accepting) { + const d = if (0 == len(acc.accepts)) { + yield start; + } else { + yield decoder; + }; + match (crawl(mata, acc.to, d, tree)?) { + case void => void; + case let this: (size, *opaque) => + append(results, this)?; + }; + }; + + let longest: (void | (size, *opaque)) = void; + for (let res .. results) { + match (longest) { + case void => + longest = res; + case let this: (size, *opaque) => + if (this.0 < res.0) { + longest = res; + }; + }; + }; + + if (longest is void) { + match (result(mata, loc, tree)) { + case null => return void; + case let out: *opaque => return (utf8::position(&start), out); + }; + }; + + return longest; +}; + +fn accept(trans: *transition, _r: rune) bool = { + if (trans.epsilon) return true; + const res = !trans.negate; + for (let a .. trans.accepts) { + match (a) { + case dot => return res; + case let r: rune => if (_r == r) return res; + case let r: (rune, rune) => + const a = r.0: u32; + const b = r.1: u32; + const min = if (a < b) a else b; + const max = if (a < b) b else a; + if (min <= _r: u32 && _r: u32 <= max) return res; + }; + }; + return !res; +}; + +fn result( + mata: *automata, + loc: *state, + tree: size +) nullable *opaque = { + if (loc.result is size) { + return mata.results[loc.result as size]; + }; + for (let trans .. loc.transitions) { + if (trans.tree != tree) continue; + if (!trans.epsilon) continue; + match (result(mata, trans.to, tree)) { + case null => void; + case let out: *opaque => return out; + }; + }; + return null; +}; diff --git a/machine/type.ha b/machine/type.ha @@ -0,0 +1,72 @@ +// An automata. +export type automata = struct { + curindex: size, + curtree: size, + start: *state, + states: []*state, + transitions: []*transition, + results: []*opaque, + deterministic: bool, +}; + +// A state. +export type state = struct { + index: size, + result: (void | size), + transitions: []*transition, +}; + +// A transition. +export type transition = struct { + from: *state, + to: *state, + tree: size, + epsilon: bool, + negate: bool, // [^x] + accepts: []runeset, +}; + +export type dot = void; + +// A runeset. +export type runeset = (dot | rune | (rune, rune)); + +// Initialize an automata. The caller must free associated resources with +// [[finish]] when they have done using it. +export fn init() (automata | error) = { + const start = alloc(state { + index = 0z, + result = void, + ... + })?; + const states: []*state = []; + append(states, start)?; + return automata { + curindex = 1z, + curtree = 0z, + start = start, + states = states, + ... + }; +}; + +// Free resources associated with an [[automata]]. +export fn finish(mata: *automata) void = { + for (let state .. mata.states) + state_destroy(state); + free(mata.states); + for (let trans .. mata.transitions) + transition_destroy(trans); + free(mata.results); + free(mata.transitions); +}; + +fn state_destroy(state: *state) void = { + free(state.transitions); + free(state); +}; + +fn transition_destroy(trans: *transition) void = { + free(trans.accepts); + free(trans); +};