commit 15f856cdf00111e2a6bcb2985ca8657e015a6e9c
Author: thing1 <thing1@seacrossedlovers.xyz>
Date: Mon, 23 Feb 2026 16:42:18 +0000
init commit
Diffstat:
| A | .gitignore | | | 1 | + |
| A | Makefile | | | 27 | +++++++++++++++++++++++++++ |
| A | README.md | | | 7 | +++++++ |
| A | TODO.md | | | 2 | ++ |
| A | bin/cat | | | 0 | |
| A | bin/ls | | | 0 | |
| A | bin/rainbow | | | 0 | |
| A | cmd/cat.ha | | | 25 | +++++++++++++++++++++++++ |
| A | cmd/ls.ha | | | 146 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | cmd/rainbow.ha | | | 26 | ++++++++++++++++++++++++++ |
| A | color/color.ha | | | 49 | +++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lex/+test/test.ha | | | 121 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lex/README | | | 95 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lex/ast.ha | | | 7 | +++++++ |
| A | lex/backend.ha | | | 145 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lex/lex.ha | | | 246 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | lex/type.ha | | | 73 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | newcmd.sh | | | 4 | ++++ |
| A | parse/+test.ha | | | 143 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | parse/parse.ha | | | 73 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | util/die.ha | | | 12 | ++++++++++++ |
21 files changed, 1202 insertions(+), 0 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1 @@
+./bin/*
diff --git a/Makefile b/Makefile
@@ -0,0 +1,27 @@
+.POSIX:
+.SUFFIXES:
+HARE=hare
+HAREFLAGS=
+
+DESTDIR=
+PREFIX=/usr/local
+BINDIR=$(PREFIX)/bin
+
+all: bin/ls bin/rainbow bin/cat
+
+clean:
+ rm -rf bin/*
+
+.PHONY: all check clean install uninstall
+
+bin/ls: cmd/ls.ha
+ $(HARE) build $(HAREFLAGS) -o $@ cmd/ls.ha
+
+bin/rainbow: cmd/rainbow.ha
+ $(HARE) build $(HAREFLAGS) -o $@ cmd/rainbow.ha
+
+bin/cat: cmd/cat.ha
+ $(HARE) build $(HAREFLAGS) -o $@ cmd/cat.ha
+
+
+
diff --git a/README.md b/README.md
@@ -0,0 +1,7 @@
+# sys
+
+Operating system utils written in hare
+
+## uses
+
+- `https://git.sr.ht/~stacyharper/hare-lex`
diff --git a/TODO.md b/TODO.md
@@ -0,0 +1,2 @@
+- add rainbow printing
+- shell
diff --git a/bin/cat b/bin/cat
Binary files differ.
diff --git a/bin/ls b/bin/ls
Binary files differ.
diff --git a/bin/rainbow b/bin/rainbow
Binary files differ.
diff --git a/cmd/cat.ha b/cmd/cat.ha
@@ -0,0 +1,25 @@
+use fmt;
+use io;
+use os;
+use strings;
+use fs;
+
+use util;
+
+export fn main() void = {
+ let b: []u8 = [0];
+
+ for (let f .. os::args[1..]) {
+ let file = match(os::open(f)) {
+ case let f: io::file => yield f;
+ case let e: fs::error => util::die(fs::strerror(e), f);
+ };
+
+ for (let i = 0z; true; i += 1) {
+ match (io::read(file, b)) {
+ case size => fmt::print(b[0]: rune)!;
+ case => break;
+ };
+ };
+ };
+};
diff --git a/cmd/ls.ha b/cmd/ls.ha
@@ -0,0 +1,146 @@
+use fmt;
+use os;
+use getopt;
+use fs;
+use strings;
+use io;
+
+use color;
+use util;
+
+type mode = enum uint {
+ DIR = 1 << 0,
+ COLOR = 1 << 1,
+ ALL = 1 << 2,
+};
+
+let script = false;
+let color = true;
+let all = false;
+let showdirs = true;
+let path = "";
+
+fn bit(ent: uint, m: uint) bool = ((ent & m) == m);
+
+fn getcol(ent: str, link: bool) color::col = {
+ let m = os::stat(ent)!.mode;
+
+ if (bit(m, fs::mode::BLK) || bit(m, fs::mode::CHR))
+ return (color::colors::YELLOW, color::mode::BOLD)
+
+ else if (bit(m, fs::mode::DIR)) {
+ if (link)
+ return (color::colors::BLUE, color::mode::ITAL);
+ return (color::colors::BLUE, color::mode::BOLD);
+ } else if (link)
+ return (color::colors::NORM, color::mode::ITAL)
+
+ else if (bit(m, fs::mode::OTHER_X) ||
+ bit(m, fs::mode::USER_X) ||
+ bit(m, fs::mode::GROUP_X)) return (color::colors::GREEN, color::mode::BOLD)
+
+
+
+ else return (color::colors::NORM, color::mode::NORM);
+};
+
+fn printent(ent: fs::dirent, opts: mode, file: str) void = {
+ if (!bit(opts, mode::ALL)) {
+ match (strings::index(ent.name, '.')) {
+ case void => yield;
+ case let i: size => if (i == 0) return;
+ };
+ };
+
+ let res = fmt::asprint(ent.name)!;
+ defer free(res);
+
+ if (bit(opts, mode::DIR) && bit(ent.ftype, fs::mode::DIR) && !bit(ent.ftype, fs::mode::BLK))
+ res = fmt::asprintf("{}{}", res, "/")!;
+
+ let link = false;
+ if (bit(ent.ftype, fs::mode::LINK)) {
+ link = true;
+ file = os::readlink(ent.name)!;
+ if (bit(opts, mode::DIR) && bit(os::stat(file)!.mode, fs::mode::DIR))
+ res = fmt::asprintf("{}{}", res, "/")!;
+ };
+
+
+ if (bit(opts, mode::COLOR))
+ color::println(res, getcol(file, link))
+ else
+ fmt::println(res)!;
+};
+
+export fn main() void = {
+ const cmd = getopt::parse(os::args,
+ "list directory contents",
+ ('s', "scriptable output, equivelant to -aCD"),
+ ('a', "show all files, including hidden files"),
+ ('c', "color output (default)"),
+ ('d', "show dirs with '/' (default)"),
+ ('A', "don't show all files (default)"),
+ ('C', "no color output"),
+ ('D', "don't show dirs with '/'"),
+ "where"
+ );
+ defer getopt::finish(&cmd);
+
+ for (let opt .. cmd.opts) {
+ switch (opt.0) {
+ case 's' => script = true;
+ case 'a' => all = true;
+ case 'c' => color = true;
+ case 'A' => all = false;
+ case 'C' => color = false;
+ case 'd' => showdirs = true;
+ case 'D' => showdirs = false;
+ case => abort();
+ };
+ };
+
+ if (len(cmd.args) > 1) util::die("can only list one directory")
+ else if (len(cmd.args) == 1) path = cmd.args[0]
+ else path = "./";
+
+ if (script) {
+ all = true;
+ color = false;
+ showdirs = false;
+ };
+ let dir = match (os::diropen(path)) {
+ case let fs: *fs::fs => os::chdir(fs)!;
+ case let e: fs::error => util::die(fs::strerror(e));
+ };
+
+ let dir = match (os::diropen("./")) {
+ case let fs: *fs::fs => yield fs;
+ case let e: fs::error => util::die(fs::strerror(e));
+ };
+ defer fs::close(dir);
+
+ let iter = match(fs::iter(dir, "./")) {
+ case let iter: *fs::iterator => yield iter;
+ case let e: fs::error => util::die(fs::strerror(e));
+ };
+ defer fs::finish(iter);
+
+ let dirs: []fs::dirent = [];
+ defer free(dirs);
+
+ for (let dirent = fs::next(iter)!; dirent is fs::dirent; dirent = fs::next(iter)!) {
+ let d = dirent as fs::dirent;
+ append(dirs, d)!;
+ };
+
+ for (let ent .. dirs) {
+ let opts: mode = 0;
+
+ if (all) opts |= mode::ALL;
+ if (color) opts |= mode::COLOR;
+ if (showdirs) opts |= mode::DIR;
+
+ printent(ent, opts, ent.name);
+ };
+};
diff --git a/cmd/rainbow.ha b/cmd/rainbow.ha
@@ -0,0 +1,26 @@
+use fmt;
+use io;
+use os;
+use strings;
+
+use color;
+
+const colors = [
+ (color::colors::RED, color::mode::NORM),
+ (color::colors::YELLOW, color::mode::NORM),
+ (color::colors::GREEN, color::mode::NORM),
+ (color::colors::BLUE, color::mode::NORM),
+ (color::colors::CYAN, color::mode::NORM),
+ (color::colors::MAGENTA, color::mode::NORM)
+];
+
+export fn main() void = {
+ let b: []u8 = [0];
+
+ for (let i = 0z; true; i += 1) {
+ match (io::read(os::stdin, b)) {
+ case size => color::print(strings::fromutf8(b)!, colors[i % len(colors)]);
+ case => break;
+ };
+ };
+};
diff --git a/color/color.ha b/color/color.ha
@@ -0,0 +1,49 @@
+use fmt;
+use strings;
+use strconv;
+
+export type colors = enum uint {
+ BLACK = 30,
+ RED,
+ GREEN,
+ YELLOW,
+ BLUE,
+ MAGENTA,
+ CYAN,
+ WHITE,
+ NORM = 39,
+};
+
+export type mode = enum uint {
+ NORM = 0,
+ BOLD = 1,
+ DIM = 2,
+ ITAL = 3,
+ UL = 4,
+ BLINK = 5,
+ REV = 7,
+ INVIS = 8,
+ STRIKE = 9
+};
+
+export type col = (colors, mode);
+
+export fn print(s: str, c: col) void = {
+ let out = strings::concat(
+ `[`,
+ strings::dup(strconv::itos(c.1: int))!,
+ ";",
+ strings::dup(strconv::itos(c.0: int))!,
+ "m"
+ )!;
+ defer free(out);
+
+ fmt::print(out)!;
+ fmt::print(s)!;
+ fmt::print(`[0m`)!;
+};
+
+export fn println(s: str, c: col) void = {
+ print(s, c);
+ fmt::println()!;
+};
diff --git a/lex/+test/test.ha b/lex/+test/test.ha
@@ -0,0 +1,121 @@
+use io;
+use fmt;
+
+fn same(a: *token, b: *token, quick: bool) bool = {
+ if (a.name != b.name) return false;
+ match (a.value) {
+ case let avalue: f64 =>
+ if (!(b.value is f64)) return false;
+ if (avalue != b.value as f64) return false;
+ case let avalue: i64 =>
+ if (!(b.value is i64)) return false;
+ if (avalue != b.value as i64) return false;
+ case let avalue: size =>
+ if (!(b.value is size)) return false;
+ if (avalue != b.value as size) return false;
+ case let avalue: u64 =>
+ if (!(b.value is u64)) return false;
+ if (avalue != b.value as u64) return false;
+ case let avalue: str =>
+ if (!(b.value is str)) return false;
+ if (avalue != b.value as str) return false;
+ case let avalue: rune =>
+ if (!(b.value is rune)) return false;
+ if (avalue != b.value as rune) return false;
+ case let avalue: void =>
+ if (!(b.value is void)) return false;
+ };
+ if (a.start.line != b.start.line) return false;
+ if (a.start.col != b.start.col) return false;
+
+ if (quick) return true;
+
+ if (a.morphene != b.morphene) return false;
+ if (a.start.off != b.start.off) return false;
+ if (a.end.line != b.end.line) return false;
+ if (a.end.col != b.end.col) return false;
+ if (a.end.off != b.end.off) return false;
+ return true;
+};
+
+fn error_token(tok: *token, quick: bool) (void | io::error) = {
+ switch (quick) {
+ case false =>
+ fmt::errorfln("{}:{}:{}: '{}' '{}' '{}' {}:{}:{}",
+ tok.start.line, tok.start.col, tok.start.off,
+ tok.name, tok.value, tok.morphene,
+ tok.end.line, tok.end.col, tok.end.off)?;
+ case true =>
+ fmt::errorfln("{}:{}: '{}' '{}'",
+ tok.start.line, tok.start.col,
+ tok.name, tok.value)?;
+ };
+};
+
+export fn testexps(lexer: *lexer, exp: []token, quick: bool = false) void = {
+ for (let exp &.. exp) {
+ const token = match (next(lexer)) {
+ case let err: error =>
+ fmt::fatal(strerror(err));
+ case let tok: *token => yield tok;
+ };
+
+ if (!same(exp, token, quick)) {
+ fmt::errorf("Expected: ")!;
+ error_token(exp, quick)!;
+ fmt::errorf("Got: ")!;
+ error_token(token, quick)!;
+ abort();
+ };
+ };
+};
+
+export fn testtok(
+ name: const str,
+ value: value,
+ morphene: const str,
+ start: location,
+ end: location,
+) token = {
+ return token {
+ name = name,
+ value = value,
+ morphene = morphene,
+ lexeme = morphene,
+ start = start,
+ end = end,
+ tostrfn = null: *tokstrfn,
+ freefn = null: *tokfreefn,
+ };
+};
+
+export fn testtok_quick(
+ name: const str,
+ value: value,
+ line: uint,
+ col: uint,
+) token = {
+ return token {
+ name = name,
+ value = value,
+ lexeme = "",
+ morphene = "",
+ tostrfn = null: *tokstrfn,
+ freefn = null: *tokfreefn,
+ start = location {
+ line = line,
+ col = col,
+ ...
+ },
+ ...
+ };
+};
+
+export fn testloc(line: uint, col: uint, off: uint) location = {
+ return location {
+ line = line,
+ col = col,
+ off = off,
+ };
+};
+
diff --git a/lex/README b/lex/README
@@ -0,0 +1,95 @@
+This module provides a general purpose lexer machine.
+
+The user add actions callbacks to the lexer. The longest pattern matched
+prefix wins. In case of ties, the pattern with the highest precedence
+wins.
+
+The user prepare a backend to use with a lexer. A backend take a list of
+action to compile its core.
+
+ let actions: []lex::action = [];
+ defer free(actions);
+
+ append(actions, lex::action {
+ expr = `"([^\\"]|(\\.))*"`,
+ cb = &literal,
+ name = "LIT_STR",
+ ...
+ })!;
+
+ const backend = lex::def_backend()!(actions)!; // use default backend (DFA without environment variable)
+ defer lex::destroy(backend);
+
+ const lexer = lex::init(backend, in);
+ defer lex::finish(&lexer);
+
+An action callback is associated with an regular expression to
+match the tokens. The action callbacks are free to initialize tokens as
+they please, but the [[scanner]] object provide convenient functions.
+
+ fn literal(
+ scan: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque,
+ ) (str | *lex::token | lex::error) = {
+ return lex::scan_token(scan, void, lexeme);
+ };
+
+This action callback would return a token of the added action type
+(ex: "LIT_STR"), with a void value, and lexing the full lexeme pattern
+matched string (ex: "foo").
+
+When the callback return a string, it represents the lexeme to swallow.
+
+ append(actions, lex::action {
+ expr = "( |\t|\n|\r)+",
+ cb = &skip,
+ ...
+ })!;
+
+ fn skip(
+ scan: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque,
+ ) (str | *lex::token | lex::error) = {
+ return lexeme;
+ };
+
+Action callbacks can be used to match hatch symbols, and then to lex the
+scanned input manually.
+
+ append(actions, lex::action {
+ expr = `\<`,
+ cb = &html,
+ name = "ID"
+ ...
+ })!;
+
+ fn html(
+ scan: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque,
+ ) (str | *lex::token | lex::error) = {
+ let buf: []u8 = [];
+ defer free(buf);
+
+ append(buf, strings::toutf8(lexeme)...)!;
+
+ let brk = 1z;
+ const start = scan.start;
+
+ for (let byte .. strings::toutf8(scan.in)) {
+ append(buf, byte)?;
+ if (byte == '<') {
+ brk += 1;
+ } else if (byte == '>') {
+ brk -= 1;
+ };
+ if (brk == 0) {
+ const lexeme = strings::fromutf8(buf)!;
+ return lex::scan_token(scan, void, lexeme);
+ };
+ };
+
+ return lex::syntaxf(start, "unclosed HTML literal");
+ };
diff --git a/lex/ast.ha b/lex/ast.ha
@@ -0,0 +1,7 @@
+// The location of an AST node
+export type ast_location = struct {
+ // The location of the start of the AST node
+ start: location,
+ // The location of the end of the AST node
+ end: location,
+};
diff --git a/lex/backend.ha b/lex/backend.ha
@@ -0,0 +1,145 @@
+use encoding::utf8;
+use lexical::machine;
+use os;
+use strings;
+
+// An action callback. Can return a lexeme to swallow completely this string,
+// and continue, or a token to return to the [[next]].
+export type actioncb = fn(
+ scan: *scanner,
+ result: const str,
+ user: nullable *opaque = null,
+) (str | *token | error);
+
+// A backend action.
+export type action = struct {
+ expr: str,
+ cb: *actioncb,
+ name: const str,
+ user: nullable *opaque,
+};
+
+// A backend.
+export type backend = struct {
+ performfn: *backend_performfn,
+ destroyfn: *backend_destroyfn,
+};
+
+// A backend perform function.
+export type backend_performfn = fn(be: *backend, in: str) (void | (*action, str) | error);
+// A backend destroy function.
+export type backend_destroyfn = fn(be: *backend) void;
+
+// Perform the backend, and return the matched action and bytes.
+export fn perform(be: *backend, in: str) (void | (*action, str) | error) = be.performfn(be, in);
+// Destroy a backend.
+export fn destroy(be: *backend) void = be.destroyfn(be);
+
+// The default backend constructor.
+export fn def_backend() (*backendinitfb | error) = {
+ match (os::getenv("LEXER_BACKEND")) {
+ case void =>
+ return &deterministic: *backendinitfb;
+ case let value: str =>
+ switch (value) {
+ case "dfa" =>
+ return &deterministic: *backendinitfb;
+ case "ndfa" =>
+ return &nondeterministic: *backendinitfb;
+ case =>
+ return "unknown backend": compile;
+ };
+ };
+};
+
+export type backendinitfb = fn(actions: []action ) (*backend | error);
+
+export type ndfa_backend = struct {
+ backend,
+ actions: []action,
+ mata: machine::automata,
+};
+
+fn ndfa_destroy(be: *backend) void = {
+ let be = be: *ndfa_backend;
+ machine::finish(&be.mata);
+ free(be.actions);
+ free(be);
+};
+
+fn ndfa_perform(
+ be: *backend,
+ in: str,
+) (void | (*action, str) | error) = {
+ let be = be: *ndfa_backend;
+ match (machine::resolve(&be.mata, utf8::decode(strings::toutf8(in)))) {
+ case void => return void;
+ case let err: machine::error => abort(machine::strerror(err));
+ case let this: (size, *opaque) =>
+ const new = strings::sub(in, 0, this.0);
+ return (this.1: *action, new);
+ };
+};
+
+// A non-deterministic backend. Its tests all expressions in parallel, and
+// uses the longest matched prefix as winer. The actions are duplicated locally,
+// and can be freed by the caller.
+export fn nondeterministic(
+ actions: []action
+) (*backend | error) = {
+ const be = build_ndfa_be(actions)?: *ndfa_backend;
+ match (os::getenv("LEXER_DEBUG")) {
+ case void => void;
+ case let value: str =>
+ if (value == "1") {
+ machine::debug_automata(&be.mata)!;
+ };
+ };
+ return be;
+};
+
+// A deterministic backend. It crawl acceptable transitions linearly, and return
+// the last encountered acceptance. The actions are duplicated locally, and can
+// be freed by the caller.
+export fn deterministic(
+ actions: []action
+) (*backend | error) = {
+ const be = build_ndfa_be(actions)?: *ndfa_backend;
+ const old = be.mata;
+ defer machine::finish(&old);
+ be.mata = machine::determine(&old)?;
+ match (os::getenv("LEXER_DEBUG")) {
+ case void => void;
+ case let value: str =>
+ if (value == "1") {
+ machine::debug_automata(&be.mata)!;
+ };
+ };
+ return be;
+};
+
+fn build_ndfa_be(
+ _actions: []action
+) (*backend | error) = {
+ let actions: []action = [];
+ append(actions, _actions...)?;
+
+ let exprs: [](str, *opaque) = [];
+ defer free(exprs);
+ for (let act &.. actions) {
+ append(exprs, (act.expr, act))?;
+ };
+
+ const mata = match (machine::compute(exprs)) {
+ case let this: machine::automata => yield this;
+ case nomem => return nomem;
+ case let syn: machine::syntax => return syn: compile;
+ };
+
+ return alloc(ndfa_backend {
+ actions = actions,
+ mata = mata,
+ performfn = &ndfa_perform,
+ destroyfn = &ndfa_destroy,
+ })?;
+};
diff --git a/lex/lex.ha b/lex/lex.ha
@@ -0,0 +1,246 @@
+use regex;
+use strconv;
+use fmt;
+use strings;
+use encoding::utf8;
+
+// The default token constructor.
+export fn default_token(
+ scan: *scanner,
+ name: const str,
+ value: value,
+ morphene: const str,
+ lexeme: const str,
+) (*token | error) = {
+ const end = scan.start;
+ const last = end;
+ const decoder = utf8::decode(strings::toutf8(morphene));
+ for (let r => utf8::next(&decoder)!) {
+ last = end;
+ forwardr(&end, [r as rune]);
+ };
+ return alloc(token {
+ name = name,
+ value = value,
+ morphene = morphene,
+ lexeme = lexeme,
+ start = scan.start,
+ end = last,
+ tostrfn = &tokstr,
+ freefn = &tokfree,
+ })?;
+};
+
+fn tokstr(tok: *token) str = {
+ match (tok.value) {
+ case void => return tok.morphene;
+ case let val: f64 => return strconv::f64tos(val);
+ case let val: i64 => return strconv::i64tos(val);
+ case let val: size => return strconv::ztos(val);
+ case let val: u64 => return strconv::u64tos(val);
+ case let val: str => return val;
+ case let val: rune => return strings::fromutf8_unsafe(utf8::encoderune(val));
+ };
+};
+
+fn tokfree(tok: *token) void = {
+ free(tok);
+};
+
+// Format a token as a string.
+export fn strtoken(tok: *token) str = tok.tostrfn(tok);
+
+// Initialize a new [[lexer]] to lex the input bytes. The caller must free
+// associated resources with [[finish]].
+export fn init(
+ be: *backend,
+ in: const str = "",
+ tokfn: nullable *tokenfn = null,
+) lexer = {
+ const loc = location {
+ off = 0,
+ line = 1,
+ col = 1,
+ };
+ return lexer {
+ be = be,
+ in = in,
+ token = if (tokfn is *tokenfn) tokfn as *tokenfn else &default_token,
+ un = null,
+ loc = loc,
+ prevunlocs = [(loc, loc)...],
+ ...
+ };
+};
+
+// Convenient function to reuse an existing lexer with a new input string.
+export fn reuse(lex: *lexer, in: const str) void = {
+ const loc = location {
+ off = 0,
+ line = 1,
+ col = 1,
+ };
+ lex.in = in;
+ lex.un = null;
+ lex.loc = loc;
+ lex.prevunlocs = [(loc, loc)...];
+ for (let tok .. lex.tokens)
+ tok.freefn(tok);
+ delete(lex.tokens[..]);
+ match (lex.reuse) {
+ case null => void;
+ case let reuse: *reusecb => reuse(lex);
+ };
+};
+
+// Free resources associated with a [[lexer]].
+export fn finish(lex: *lexer) void = {
+ for (let tok .. lex.tokens)
+ tok.freefn(tok);
+ free(lex.tokens);
+};
+
+// Format a [[syntax]] error as a formatable.
+export fn syntaxf(loc: location, fmt: const str, args: fmt::field...) syntax = {
+ static let buf: [2048]u8 = [0...];
+ const msg = fmt::bsprintf(buf, fmt, args...)!;
+ return (loc, msg);
+};
+
+// Gives the current location of the lexer.
+export fn mkloc(lex: *lexer) location = {
+ match (lex.un) {
+ case null => return lex.loc;
+ case let tok: *token => return lex.prevunlocs[1].1;
+ };
+};
+
+// Gives the previous location of the lexer.
+export fn prevloc(lex: *lexer) location = {
+ match (lex.un) {
+ case null => return lex.prevrloc;
+ case let tok: *token => return lex.prevunlocs[1].0;
+ };
+};
+
+// Initialize a token based on the scan context. When the lexeme is not present,
+// this considers the morphene as both. When the name is not present, the name
+// comes from the action will be use.
+export fn scan_token(
+ scan: *scanner,
+ value: value,
+ first: const str,
+ second: const str = "",
+ name: const str = ""
+) (*token | error) = {
+ const (morphene, lexeme) = if (second == "") {
+ yield (first, first);
+ } else {
+ yield (first, second);
+ };
+ return scan.lex.token(
+ scan,
+ if (name == "") scan.name else name,
+ value,
+ morphene,
+ lexeme
+ );
+};
+
+// Return the lexer associated to a scanner.
+export fn scan_lexer(scan: *scanner) *lexer = scan.lex;
+
+// Return the token name associated to a scanner.
+export fn scan_name(scan: *scanner) str = scan.name;
+
+// Return a string representing the error.
+export fn strerror(err: error) str = {
+ static let buf: [2048]u8 = [0...];
+ match (err) {
+ case let s: syntax =>
+ return fmt::bsprintf(buf, "{}:{}: syntax error: {}",
+ s.0.line, s.0.col, s.1)!;
+ case let com: compile => return com;
+ case nomem => return "nomem";
+ };
+};
+
+// Give the next token from the lexer.
+export fn next(lex: *lexer) (*token | error) = {
+ if (lex.un is *token) {
+ const prev = lex.un as *token;
+ lex.un = null;
+ return prev;
+ };
+
+ defer {
+ lex.prevunlocs[1] = lex.prevunlocs[0];
+ lex.prevunlocs[0] = (prevloc(lex), mkloc(lex));
+ };
+
+
+ let scan = scanner {
+ lex = lex,
+ start = lex.loc,
+ ...
+ };
+
+ for (true) {
+ scan.in = strings::sub(lex.in, lex.loc.off, strings::end);
+ if (len(scan.in) == 0) {
+ const tok = scan.lex.token(&scan, EOF, void, "", "")?;
+ append(lex.tokens, tok)?;
+ return tok;
+ };
+
+ const (action, lexeme) = match (perform(lex.be, scan.in)) {
+ case void => return syntaxf(mkloc(lex), "no matching token");
+ case let this: (*action, str) => yield this;
+ };
+
+ scan.name = action.name;
+
+ match (action.cb(&scan, lexeme, action.user)?) {
+ case let lexeme: str =>
+ forwardlex(lex, lexeme);
+ scan.start = lex.loc;
+ case let tok: *token =>
+ forwardlex(lex, tok.lexeme);
+ append(lex.tokens, tok)?;
+ return tok;
+ };
+ };
+};
+
+// Unlex a token, so that it get lexed back with [[next]].
+export fn unlex(lex: *lexer, value: *token) void = {
+ assert(lex.un is null);
+ lex.un = value;
+};
+
+fn forwardlex(lex: *lexer, in: str) void = {
+ const decoder = utf8::decode(strings::toutf8(in));
+ for (let r => utf8::next(&decoder)!) {
+ lex.prevrloc = lex.loc;
+ forwardr(&lex.loc, [r as rune]);
+ };
+};
+
+// Moves a location based on bytes.
+export fn forward(loc: *location, in: str) void = {
+ const decoder = utf8::decode(strings::toutf8(in));
+ for (let r => utf8::next(&decoder)!) {
+ forwardr(loc, [r as rune]);
+ };
+};
+
+fn forwardr(loc: *location, r: []rune) void = {
+ for (let r .. r) {
+ loc.off += 1;
+ loc.col += 1;
+ if (r == '\n') {
+ loc.col = 1;
+ loc.line += 1;
+ };
+ };
+};
diff --git a/lex/type.ha b/lex/type.ha
@@ -0,0 +1,73 @@
+use regex;
+
+// A location.
+export type location = struct {
+ line: uint,
+ col: uint,
+ off: uint,
+};
+
+// A token value.
+export type value = (f64 | i64 | size | u64 | str | rune | void);
+
+// A lexed token.
+export type token = struct {
+ name: const str,
+ value: value,
+ morphene: const str, // meaningfull part "foo"
+ lexeme: const str, // all swallowed bytes "foos"
+ start: location,
+ end: location,
+ tostrfn: *tokstrfn,
+ freefn: *tokfreefn,
+};
+
+export def EOF = "EOF";
+
+// Function to format a token as a string.
+export type tokstrfn = fn(tok: *token) str;
+// Function to free resources associated with a token.
+export type tokfreefn = fn(tok: *token) void;
+
+// A syntax error.
+export type syntax = !(location, str);
+
+// A backend compile error.
+export type compile = !str;
+
+// All possible errors for this module.
+export type error = !(syntax | compile | nomem);
+
+// A lexer.
+export type lexer = struct {
+ be: *backend,
+ in: const str, // the full bytes
+ loc: location, // the cursor location
+ prevrloc: location, // last rune location
+ prevunlocs: [2](location, location),
+ un: nullable *token,
+ tokens: []*token, // to free them
+ token: *tokenfn, // the function to build tokens with [[scan_token]]
+ reuse: nullable *reusecb, // the callback when reusing with [[reuse]]
+};
+
+// The toolkit given to the [[actioncb]] callback to help the user to
+// initialize the tokens.
+export type scanner = struct {
+ lex: *lexer,
+ in: const str, // the remaining bytes
+ name: const str, // the scanned token name
+ start: location, // the lexeme start location
+};
+
+// A function to initialize tokens.
+export type tokenfn = fn(
+ scan: *scanner,
+ name: const str, // token type name
+ value: value, // token value
+ morphene: const str, // meaningfull part
+ lexeme: const str, // full bytes to swallow
+) (*token | error);
+
+// A callback to reuse a lexer.
+export type reusecb = fn(lex: *lexer) void;
diff --git a/newcmd.sh b/newcmd.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+echo "bin/$1: cmd/$1.ha"
+echo " \$(HARE) build \$(HAREFLAGS) -o \$@ cmd/$1.ha"
diff --git a/parse/+test.ha b/parse/+test.ha
@@ -0,0 +1,143 @@
+use lexical::lex;
+
+fn literal(
+ scan: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque,
+) (str | *lex::token | lex::error) = {
+ return lex::scan_token(scan, void, lexeme);
+};
+
+fn skip(
+ scan: *lex::scanner,
+ lexeme: const str,
+ user: nullable *opaque,
+) (str | *lex::token | lex::error) = {
+ return lexeme;
+};
+
+def FOO = "FOO";
+def BAR = "BAR";
+def NAME = "NAME";
+
+def keywords = [
+ (FOO, "foo"),
+ (BAR, "bar"),
+];
+
+def exprs = [
+ (NAME, `([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*`),
+];
+
+@test fn test_parse() void = {
+ let actions: []lex::action = [];
+ defer free(actions);
+ for (let keyword .. keywords) {
+ append(actions, lex::action {
+ expr = keyword.1,
+ cb = &literal,
+ name = keyword.0,
+ ...
+ })!;
+ };
+ for (let expr .. exprs) {
+ append(actions, lex::action {
+ expr = expr.1,
+ cb = &literal,
+ name = expr.0,
+ ...
+ })!;
+ };
+ append(actions, lex::action {
+ expr = "( |\t|\n|\r)+",
+ cb = &skip,
+ ...
+ })!;
+ const be = lex::def_backend()!(actions)!;
+ defer lex::destroy(be);
+
+ const in = "foo bar foobar foo";
+ const lexer = lex::init(be, in);
+ defer lex::finish(&lexer);
+
+ const res = want(&lexer, BAR, lex::EOF);
+ assert(res is lex::error);
+ assert(res is lex::syntax);
+ const res = res as lex::syntax;
+ assert(res.1 == "Unexpected 'FOO', was expecting 'BAR', 'EOF'");
+ assert(res.0.line == 1);
+ assert(res.0.col == 1);
+ assert(res.0.off == 0);
+
+ const res = want(&lexer, BAR, FOO);
+ assert(res is *lex::token);
+ const res = res as *lex::token;
+ assert(res.name == FOO);
+ assert(res.lexeme == "foo");
+ assert(res.value is void);
+ assert(res.start.line == 1);
+ assert(res.start.col == 1);
+ assert(res.start.off == 0);
+ assert(res.end.line == 1);
+ assert(res.end.col == 3);
+ assert(res.end.off == 2);
+
+ const res = try(&lexer, FOO);
+ assert(res is void);
+
+ const res = want(&lexer, FOO);
+ assert(res is lex::error);
+ assert(res is lex::syntax);
+ const res = res as lex::syntax;
+ assert(res.1 == "Unexpected 'BAR', was expecting 'FOO'");
+ assert(res.0.line == 1);
+ assert(res.0.col == 5);
+ assert(res.0.off == 4);
+
+ const res = try(&lexer, BAR);
+ assert(res is *lex::token);
+ const res = res as *lex::token;
+ assert(res.name == BAR);
+ assert(res.lexeme == "bar");
+ assert(res.value is void);
+ assert(res.start.line == 1);
+ assert(res.start.col == 5);
+ assert(res.start.off == 4);
+ assert(res.end.line == 1);
+ assert(res.end.col == 7);
+ assert(res.end.off == 6);
+
+ const res = peek(&lexer, lex::EOF);
+ assert(res is void);
+
+ const res = peek(&lexer, NAME);
+ assert(res is *lex::token);
+
+ const res = want(&lexer, NAME);
+ assert(res is *lex::token);
+ const res = res as *lex::token;
+ assert(res.name == NAME);
+ assert(res.lexeme == "foobar");
+ assert(res.value is void);
+ assert(res.start.line == 1);
+ assert(res.start.col == 9);
+ assert(res.start.off == 8);
+ assert(res.end.line == 1);
+ assert(res.end.col == 14);
+ assert(res.end.off == 13);
+
+ const res = want(&lexer, FOO);
+ assert(res is *lex::token);
+
+ const res = want(&lexer, FOO);
+ assert(res is lex::error);
+ assert(res is lex::syntax);
+ const res = res as lex::syntax;
+ assert(res.1 == "Unexpected 'EOF', was expecting 'FOO'");
+ assert(res.0.line == 1);
+ assert(res.0.col == 19);
+ assert(res.0.off == 18);
+
+ const res = want(&lexer, lex::EOF);
+ assert(res is *lex::token);
+};
diff --git a/parse/parse.ha b/parse/parse.ha
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: MPL-2.0
+// (c) Hare authors <https://harelang.org>
+
+use fmt;
+use lexical::lex;
+use io;
+use memio;
+
+// All possible error types.
+export type error = !lex::error;
+
+// Requires the next token to have a matching ltok. Returns that token, or an
+// error.
+export fn want(
+ lexer: *lex::lexer,
+ want: str...
+) (*lex::token | error) = {
+ let tok = lex::next(lexer)?;
+ if (len(want) == 0) return tok;
+ for (let want .. want) if (tok.name == want) return tok;
+
+ let buf = memio::dynamic();
+ defer io::close(&buf)!;
+ lex::unlex(lexer, tok);
+ for (let i = 0z; i < len(want); i += 1) {
+ fmt::fprintf(&buf, "'{}'", want[i])!;
+ if (i + 1 < len(want)) fmt::fprint(&buf, ", ")!;
+ };
+ return lex::syntaxf(tok.start,
+ "Unexpected '{}', was expecting {}",
+ tok.name, memio::string(&buf)!);
+};
+
+// Looks for a matching ltok from the lexer, and if not present, unlexes the
+// token and returns void. If found, the token is consumed from the lexer and is
+// returned.
+export fn try(
+ lexer: *lex::lexer,
+ want: str...
+) (*lex::token | error | void) = {
+ let tok = lex::next(lexer)?;
+ assert(len(want) > 0);
+ for (let want .. want) if (tok.name == want) return tok;
+ lex::unlex(lexer, tok);
+};
+
+// Looks for a matching ltok from the lexer, unlexes the token, and returns
+// it; or void if it was not an ltok.
+export fn peek(
+ lexer: *lex::lexer,
+ want: str...
+) (*lex::token | error | void) = {
+ let tok = lex::next(lexer)?;
+ lex::unlex(lexer, tok);
+ if (len(want) == 0) return tok;
+ for (let want .. want) if (tok.name == want) return tok;
+};
+
+// Returns a syntax error if cond is false and void otherwise
+export fn synassert(
+ loc: lex::location,
+ cond: bool,
+ msg: const str,
+) (void | error) = {
+ if (!cond) return lex::syntaxf(loc, msg);
+};
+
+export fn loc_from(lexer: *lex::lexer, start: lex::location) lex::ast_location = {
+ return lex::ast_location {
+ start = start,
+ end = lex::prevloc(lexer),
+ };
+};
diff --git a/util/die.ha b/util/die.ha
@@ -0,0 +1,12 @@
+use fmt;
+use os;
+
+export fn die(msg: str, args: fmt::formattable...)never = {
+
+ fmt::printf("{}: {}: ", os::args[0], msg)!;
+
+ if (len(args) != 0)
+ fmt::println(args...)!;
+
+ os::exit(255);
+};