sys

A set of unix utils in hare!
Log | Files | Refs | README

commit 15f856cdf00111e2a6bcb2985ca8657e015a6e9c
Author: thing1 <thing1@seacrossedlovers.xyz>
Date:   Mon, 23 Feb 2026 16:42:18 +0000

init commit

Diffstat:
A.gitignore | 1+
AMakefile | 27+++++++++++++++++++++++++++
AREADME.md | 7+++++++
ATODO.md | 2++
Abin/cat | 0
Abin/ls | 0
Abin/rainbow | 0
Acmd/cat.ha | 25+++++++++++++++++++++++++
Acmd/ls.ha | 146+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Acmd/rainbow.ha | 26++++++++++++++++++++++++++
Acolor/color.ha | 49+++++++++++++++++++++++++++++++++++++++++++++++++
Alex/+test/test.ha | 121+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/README | 95+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/ast.ha | 7+++++++
Alex/backend.ha | 145+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/lex.ha | 246+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Alex/type.ha | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Anewcmd.sh | 4++++
Aparse/+test.ha | 143+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aparse/parse.ha | 73+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Autil/die.ha | 12++++++++++++
21 files changed, 1202 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +./bin/* diff --git a/Makefile b/Makefile @@ -0,0 +1,27 @@ +.POSIX: +.SUFFIXES: +HARE=hare +HAREFLAGS= + +DESTDIR= +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin + +all: bin/ls bin/rainbow bin/cat + +clean: + rm -rf bin/* + +.PHONY: all check clean install uninstall + +bin/ls: cmd/ls.ha + $(HARE) build $(HAREFLAGS) -o $@ cmd/ls.ha + +bin/rainbow: cmd/rainbow.ha + $(HARE) build $(HAREFLAGS) -o $@ cmd/rainbow.ha + +bin/cat: cmd/cat.ha + $(HARE) build $(HAREFLAGS) -o $@ cmd/cat.ha + + + diff --git a/README.md b/README.md @@ -0,0 +1,7 @@ +# sys + +Operating system utils written in hare + +## uses + +- `https://git.sr.ht/~stacyharper/hare-lex` diff --git a/TODO.md b/TODO.md @@ -0,0 +1,2 @@ +- add rainbow printing +- shell diff --git a/bin/cat b/bin/cat Binary files differ. diff --git a/bin/ls b/bin/ls Binary files differ. diff --git a/bin/rainbow b/bin/rainbow Binary files differ. diff --git a/cmd/cat.ha b/cmd/cat.ha @@ -0,0 +1,25 @@ +use fmt; +use io; +use os; +use strings; +use fs; + +use util; + +export fn main() void = { + let b: []u8 = [0]; + + for (let f .. os::args[1..]) { + let file = match(os::open(f)) { + case let f: io::file => yield f; + case let e: fs::error => util::die(fs::strerror(e), f); + }; + + for (let i = 0z; true; i += 1) { + match (io::read(file, b)) { + case size => fmt::print(b[0]: rune)!; + case => break; + }; + }; + }; +}; diff --git a/cmd/ls.ha b/cmd/ls.ha @@ -0,0 +1,146 @@ +use fmt; +use os; +use getopt; +use fs; +use strings; +use io; + +use color; +use util; + +type mode = enum uint { + DIR = 1 << 0, + COLOR = 1 << 1, + ALL = 1 << 2, +}; + +let script = false; +let color = true; +let all = false; +let showdirs = true; +let path = ""; + +fn bit(ent: uint, m: uint) bool = ((ent & m) == m); + +fn getcol(ent: str, link: bool) color::col = { + let m = os::stat(ent)!.mode; + + if (bit(m, fs::mode::BLK) || bit(m, fs::mode::CHR)) + return (color::colors::YELLOW, color::mode::BOLD) + + else if (bit(m, fs::mode::DIR)) { + if (link) + return (color::colors::BLUE, color::mode::ITAL); + return (color::colors::BLUE, color::mode::BOLD); + } else if (link) + return (color::colors::NORM, color::mode::ITAL) + + else if (bit(m, fs::mode::OTHER_X) || + bit(m, fs::mode::USER_X) || + bit(m, fs::mode::GROUP_X)) return (color::colors::GREEN, color::mode::BOLD) + + + + else return (color::colors::NORM, color::mode::NORM); +}; + +fn printent(ent: fs::dirent, opts: mode, file: str) void = { + if (!bit(opts, mode::ALL)) { + match (strings::index(ent.name, '.')) { + case void => yield; + case let i: size => if (i == 0) return; + }; + }; + + let res = fmt::asprint(ent.name)!; + defer free(res); + + if (bit(opts, mode::DIR) && bit(ent.ftype, fs::mode::DIR) && !bit(ent.ftype, fs::mode::BLK)) + res = fmt::asprintf("{}{}", res, "/")!; + + let link = false; + if (bit(ent.ftype, fs::mode::LINK)) { + link = true; + file = os::readlink(ent.name)!; + if (bit(opts, mode::DIR) && bit(os::stat(file)!.mode, fs::mode::DIR)) + res = fmt::asprintf("{}{}", res, "/")!; + }; + + + if (bit(opts, mode::COLOR)) + color::println(res, getcol(file, link)) + else + fmt::println(res)!; +}; + +export fn main() void = { + const cmd = getopt::parse(os::args, + "list directory contents", + ('s', "scriptable output, equivelant to -aCD"), + ('a', "show all files, including hidden files"), + ('c', "color output (default)"), + ('d', "show dirs with '/' (default)"), + ('A', "don't show all files (default)"), + ('C', "no color output"), + ('D', "don't show dirs with '/'"), + "where" + ); + defer getopt::finish(&cmd); + + for (let opt .. cmd.opts) { + switch (opt.0) { + case 's' => script = true; + case 'a' => all = true; + case 'c' => color = true; + case 'A' => all = false; + case 'C' => color = false; + case 'd' => showdirs = true; + case 'D' => showdirs = false; + case => abort(); + }; + }; + + if (len(cmd.args) > 1) util::die("can only list one directory") + else if (len(cmd.args) == 1) path = cmd.args[0] + else path = "./"; + + if (script) { + all = true; + color = false; + showdirs = false; + }; + let dir = match (os::diropen(path)) { + case let fs: *fs::fs => os::chdir(fs)!; + case let e: fs::error => util::die(fs::strerror(e)); + }; + + let dir = match (os::diropen("./")) { + case let fs: *fs::fs => yield fs; + case let e: fs::error => util::die(fs::strerror(e)); + }; + defer fs::close(dir); + + let iter = match(fs::iter(dir, "./")) { + case let iter: *fs::iterator => yield iter; + case let e: fs::error => util::die(fs::strerror(e)); + }; + defer fs::finish(iter); + + let dirs: []fs::dirent = []; + defer free(dirs); + + for (let dirent = fs::next(iter)!; dirent is fs::dirent; dirent = fs::next(iter)!) { + let d = dirent as fs::dirent; + append(dirs, d)!; + }; + + for (let ent .. dirs) { + let opts: mode = 0; + + if (all) opts |= mode::ALL; + if (color) opts |= mode::COLOR; + if (showdirs) opts |= mode::DIR; + + printent(ent, opts, ent.name); + }; +}; diff --git a/cmd/rainbow.ha b/cmd/rainbow.ha @@ -0,0 +1,26 @@ +use fmt; +use io; +use os; +use strings; + +use color; + +const colors = [ + (color::colors::RED, color::mode::NORM), + (color::colors::YELLOW, color::mode::NORM), + (color::colors::GREEN, color::mode::NORM), + (color::colors::BLUE, color::mode::NORM), + (color::colors::CYAN, color::mode::NORM), + (color::colors::MAGENTA, color::mode::NORM) +]; + +export fn main() void = { + let b: []u8 = [0]; + + for (let i = 0z; true; i += 1) { + match (io::read(os::stdin, b)) { + case size => color::print(strings::fromutf8(b)!, colors[i % len(colors)]); + case => break; + }; + }; +}; diff --git a/color/color.ha b/color/color.ha @@ -0,0 +1,49 @@ +use fmt; +use strings; +use strconv; + +export type colors = enum uint { + BLACK = 30, + RED, + GREEN, + YELLOW, + BLUE, + MAGENTA, + CYAN, + WHITE, + NORM = 39, +}; + +export type mode = enum uint { + NORM = 0, + BOLD = 1, + DIM = 2, + ITAL = 3, + UL = 4, + BLINK = 5, + REV = 7, + INVIS = 8, + STRIKE = 9 +}; + +export type col = (colors, mode); + +export fn print(s: str, c: col) void = { + let out = strings::concat( + `[`, + strings::dup(strconv::itos(c.1: int))!, + ";", + strings::dup(strconv::itos(c.0: int))!, + "m" + )!; + defer free(out); + + fmt::print(out)!; + fmt::print(s)!; + fmt::print(``)!; +}; + +export fn println(s: str, c: col) void = { + print(s, c); + fmt::println()!; +}; diff --git a/lex/+test/test.ha b/lex/+test/test.ha @@ -0,0 +1,121 @@ +use io; +use fmt; + +fn same(a: *token, b: *token, quick: bool) bool = { + if (a.name != b.name) return false; + match (a.value) { + case let avalue: f64 => + if (!(b.value is f64)) return false; + if (avalue != b.value as f64) return false; + case let avalue: i64 => + if (!(b.value is i64)) return false; + if (avalue != b.value as i64) return false; + case let avalue: size => + if (!(b.value is size)) return false; + if (avalue != b.value as size) return false; + case let avalue: u64 => + if (!(b.value is u64)) return false; + if (avalue != b.value as u64) return false; + case let avalue: str => + if (!(b.value is str)) return false; + if (avalue != b.value as str) return false; + case let avalue: rune => + if (!(b.value is rune)) return false; + if (avalue != b.value as rune) return false; + case let avalue: void => + if (!(b.value is void)) return false; + }; + if (a.start.line != b.start.line) return false; + if (a.start.col != b.start.col) return false; + + if (quick) return true; + + if (a.morphene != b.morphene) return false; + if (a.start.off != b.start.off) return false; + if (a.end.line != b.end.line) return false; + if (a.end.col != b.end.col) return false; + if (a.end.off != b.end.off) return false; + return true; +}; + +fn error_token(tok: *token, quick: bool) (void | io::error) = { + switch (quick) { + case false => + fmt::errorfln("{}:{}:{}: '{}' '{}' '{}' {}:{}:{}", + tok.start.line, tok.start.col, tok.start.off, + tok.name, tok.value, tok.morphene, + tok.end.line, tok.end.col, tok.end.off)?; + case true => + fmt::errorfln("{}:{}: '{}' '{}'", + tok.start.line, tok.start.col, + tok.name, tok.value)?; + }; +}; + +export fn testexps(lexer: *lexer, exp: []token, quick: bool = false) void = { + for (let exp &.. exp) { + const token = match (next(lexer)) { + case let err: error => + fmt::fatal(strerror(err)); + case let tok: *token => yield tok; + }; + + if (!same(exp, token, quick)) { + fmt::errorf("Expected: ")!; + error_token(exp, quick)!; + fmt::errorf("Got: ")!; + error_token(token, quick)!; + abort(); + }; + }; +}; + +export fn testtok( + name: const str, + value: value, + morphene: const str, + start: location, + end: location, +) token = { + return token { + name = name, + value = value, + morphene = morphene, + lexeme = morphene, + start = start, + end = end, + tostrfn = null: *tokstrfn, + freefn = null: *tokfreefn, + }; +}; + +export fn testtok_quick( + name: const str, + value: value, + line: uint, + col: uint, +) token = { + return token { + name = name, + value = value, + lexeme = "", + morphene = "", + tostrfn = null: *tokstrfn, + freefn = null: *tokfreefn, + start = location { + line = line, + col = col, + ... + }, + ... + }; +}; + +export fn testloc(line: uint, col: uint, off: uint) location = { + return location { + line = line, + col = col, + off = off, + }; +}; + diff --git a/lex/README b/lex/README @@ -0,0 +1,95 @@ +This module provides a general purpose lexer machine. + +The user add actions callbacks to the lexer. The longest pattern matched +prefix wins. In case of ties, the pattern with the highest precedence +wins. + +The user prepare a backend to use with a lexer. A backend take a list of +action to compile its core. + + let actions: []lex::action = []; + defer free(actions); + + append(actions, lex::action { + expr = `"([^\\"]|(\\.))*"`, + cb = &literal, + name = "LIT_STR", + ... + })!; + + const backend = lex::def_backend()!(actions)!; // use default backend (DFA without environment variable) + defer lex::destroy(backend); + + const lexer = lex::init(backend, in); + defer lex::finish(&lexer); + +An action callback is associated with an regular expression to +match the tokens. The action callbacks are free to initialize tokens as +they please, but the [[scanner]] object provide convenient functions. + + fn literal( + scan: *lex::scanner, + lexeme: const str, + user: nullable *opaque, + ) (str | *lex::token | lex::error) = { + return lex::scan_token(scan, void, lexeme); + }; + +This action callback would return a token of the added action type +(ex: "LIT_STR"), with a void value, and lexing the full lexeme pattern +matched string (ex: "foo"). + +When the callback return a string, it represents the lexeme to swallow. + + append(actions, lex::action { + expr = "( |\t|\n|\r)+", + cb = &skip, + ... + })!; + + fn skip( + scan: *lex::scanner, + lexeme: const str, + user: nullable *opaque, + ) (str | *lex::token | lex::error) = { + return lexeme; + }; + +Action callbacks can be used to match hatch symbols, and then to lex the +scanned input manually. + + append(actions, lex::action { + expr = `\<`, + cb = &html, + name = "ID" + ... + })!; + + fn html( + scan: *lex::scanner, + lexeme: const str, + user: nullable *opaque, + ) (str | *lex::token | lex::error) = { + let buf: []u8 = []; + defer free(buf); + + append(buf, strings::toutf8(lexeme)...)!; + + let brk = 1z; + const start = scan.start; + + for (let byte .. strings::toutf8(scan.in)) { + append(buf, byte)?; + if (byte == '<') { + brk += 1; + } else if (byte == '>') { + brk -= 1; + }; + if (brk == 0) { + const lexeme = strings::fromutf8(buf)!; + return lex::scan_token(scan, void, lexeme); + }; + }; + + return lex::syntaxf(start, "unclosed HTML literal"); + }; diff --git a/lex/ast.ha b/lex/ast.ha @@ -0,0 +1,7 @@ +// The location of an AST node +export type ast_location = struct { + // The location of the start of the AST node + start: location, + // The location of the end of the AST node + end: location, +}; diff --git a/lex/backend.ha b/lex/backend.ha @@ -0,0 +1,145 @@ +use encoding::utf8; +use lexical::machine; +use os; +use strings; + +// An action callback. Can return a lexeme to swallow completely this string, +// and continue, or a token to return to the [[next]]. +export type actioncb = fn( + scan: *scanner, + result: const str, + user: nullable *opaque = null, +) (str | *token | error); + +// A backend action. +export type action = struct { + expr: str, + cb: *actioncb, + name: const str, + user: nullable *opaque, +}; + +// A backend. +export type backend = struct { + performfn: *backend_performfn, + destroyfn: *backend_destroyfn, +}; + +// A backend perform function. +export type backend_performfn = fn(be: *backend, in: str) (void | (*action, str) | error); +// A backend destroy function. +export type backend_destroyfn = fn(be: *backend) void; + +// Perform the backend, and return the matched action and bytes. +export fn perform(be: *backend, in: str) (void | (*action, str) | error) = be.performfn(be, in); +// Destroy a backend. +export fn destroy(be: *backend) void = be.destroyfn(be); + +// The default backend constructor. +export fn def_backend() (*backendinitfb | error) = { + match (os::getenv("LEXER_BACKEND")) { + case void => + return &deterministic: *backendinitfb; + case let value: str => + switch (value) { + case "dfa" => + return &deterministic: *backendinitfb; + case "ndfa" => + return &nondeterministic: *backendinitfb; + case => + return "unknown backend": compile; + }; + }; +}; + +export type backendinitfb = fn(actions: []action ) (*backend | error); + +export type ndfa_backend = struct { + backend, + actions: []action, + mata: machine::automata, +}; + +fn ndfa_destroy(be: *backend) void = { + let be = be: *ndfa_backend; + machine::finish(&be.mata); + free(be.actions); + free(be); +}; + +fn ndfa_perform( + be: *backend, + in: str, +) (void | (*action, str) | error) = { + let be = be: *ndfa_backend; + match (machine::resolve(&be.mata, utf8::decode(strings::toutf8(in)))) { + case void => return void; + case let err: machine::error => abort(machine::strerror(err)); + case let this: (size, *opaque) => + const new = strings::sub(in, 0, this.0); + return (this.1: *action, new); + }; +}; + +// A non-deterministic backend. Its tests all expressions in parallel, and +// uses the longest matched prefix as winer. The actions are duplicated locally, +// and can be freed by the caller. +export fn nondeterministic( + actions: []action +) (*backend | error) = { + const be = build_ndfa_be(actions)?: *ndfa_backend; + match (os::getenv("LEXER_DEBUG")) { + case void => void; + case let value: str => + if (value == "1") { + machine::debug_automata(&be.mata)!; + }; + }; + return be; +}; + +// A deterministic backend. It crawl acceptable transitions linearly, and return +// the last encountered acceptance. The actions are duplicated locally, and can +// be freed by the caller. +export fn deterministic( + actions: []action +) (*backend | error) = { + const be = build_ndfa_be(actions)?: *ndfa_backend; + const old = be.mata; + defer machine::finish(&old); + be.mata = machine::determine(&old)?; + match (os::getenv("LEXER_DEBUG")) { + case void => void; + case let value: str => + if (value == "1") { + machine::debug_automata(&be.mata)!; + }; + }; + return be; +}; + +fn build_ndfa_be( + _actions: []action +) (*backend | error) = { + let actions: []action = []; + append(actions, _actions...)?; + + let exprs: [](str, *opaque) = []; + defer free(exprs); + for (let act &.. actions) { + append(exprs, (act.expr, act))?; + }; + + const mata = match (machine::compute(exprs)) { + case let this: machine::automata => yield this; + case nomem => return nomem; + case let syn: machine::syntax => return syn: compile; + }; + + return alloc(ndfa_backend { + actions = actions, + mata = mata, + performfn = &ndfa_perform, + destroyfn = &ndfa_destroy, + })?; +}; diff --git a/lex/lex.ha b/lex/lex.ha @@ -0,0 +1,246 @@ +use regex; +use strconv; +use fmt; +use strings; +use encoding::utf8; + +// The default token constructor. +export fn default_token( + scan: *scanner, + name: const str, + value: value, + morphene: const str, + lexeme: const str, +) (*token | error) = { + const end = scan.start; + const last = end; + const decoder = utf8::decode(strings::toutf8(morphene)); + for (let r => utf8::next(&decoder)!) { + last = end; + forwardr(&end, [r as rune]); + }; + return alloc(token { + name = name, + value = value, + morphene = morphene, + lexeme = lexeme, + start = scan.start, + end = last, + tostrfn = &tokstr, + freefn = &tokfree, + })?; +}; + +fn tokstr(tok: *token) str = { + match (tok.value) { + case void => return tok.morphene; + case let val: f64 => return strconv::f64tos(val); + case let val: i64 => return strconv::i64tos(val); + case let val: size => return strconv::ztos(val); + case let val: u64 => return strconv::u64tos(val); + case let val: str => return val; + case let val: rune => return strings::fromutf8_unsafe(utf8::encoderune(val)); + }; +}; + +fn tokfree(tok: *token) void = { + free(tok); +}; + +// Format a token as a string. +export fn strtoken(tok: *token) str = tok.tostrfn(tok); + +// Initialize a new [[lexer]] to lex the input bytes. The caller must free +// associated resources with [[finish]]. +export fn init( + be: *backend, + in: const str = "", + tokfn: nullable *tokenfn = null, +) lexer = { + const loc = location { + off = 0, + line = 1, + col = 1, + }; + return lexer { + be = be, + in = in, + token = if (tokfn is *tokenfn) tokfn as *tokenfn else &default_token, + un = null, + loc = loc, + prevunlocs = [(loc, loc)...], + ... + }; +}; + +// Convenient function to reuse an existing lexer with a new input string. +export fn reuse(lex: *lexer, in: const str) void = { + const loc = location { + off = 0, + line = 1, + col = 1, + }; + lex.in = in; + lex.un = null; + lex.loc = loc; + lex.prevunlocs = [(loc, loc)...]; + for (let tok .. lex.tokens) + tok.freefn(tok); + delete(lex.tokens[..]); + match (lex.reuse) { + case null => void; + case let reuse: *reusecb => reuse(lex); + }; +}; + +// Free resources associated with a [[lexer]]. +export fn finish(lex: *lexer) void = { + for (let tok .. lex.tokens) + tok.freefn(tok); + free(lex.tokens); +}; + +// Format a [[syntax]] error as a formatable. +export fn syntaxf(loc: location, fmt: const str, args: fmt::field...) syntax = { + static let buf: [2048]u8 = [0...]; + const msg = fmt::bsprintf(buf, fmt, args...)!; + return (loc, msg); +}; + +// Gives the current location of the lexer. +export fn mkloc(lex: *lexer) location = { + match (lex.un) { + case null => return lex.loc; + case let tok: *token => return lex.prevunlocs[1].1; + }; +}; + +// Gives the previous location of the lexer. +export fn prevloc(lex: *lexer) location = { + match (lex.un) { + case null => return lex.prevrloc; + case let tok: *token => return lex.prevunlocs[1].0; + }; +}; + +// Initialize a token based on the scan context. When the lexeme is not present, +// this considers the morphene as both. When the name is not present, the name +// comes from the action will be use. +export fn scan_token( + scan: *scanner, + value: value, + first: const str, + second: const str = "", + name: const str = "" +) (*token | error) = { + const (morphene, lexeme) = if (second == "") { + yield (first, first); + } else { + yield (first, second); + }; + return scan.lex.token( + scan, + if (name == "") scan.name else name, + value, + morphene, + lexeme + ); +}; + +// Return the lexer associated to a scanner. +export fn scan_lexer(scan: *scanner) *lexer = scan.lex; + +// Return the token name associated to a scanner. +export fn scan_name(scan: *scanner) str = scan.name; + +// Return a string representing the error. +export fn strerror(err: error) str = { + static let buf: [2048]u8 = [0...]; + match (err) { + case let s: syntax => + return fmt::bsprintf(buf, "{}:{}: syntax error: {}", + s.0.line, s.0.col, s.1)!; + case let com: compile => return com; + case nomem => return "nomem"; + }; +}; + +// Give the next token from the lexer. +export fn next(lex: *lexer) (*token | error) = { + if (lex.un is *token) { + const prev = lex.un as *token; + lex.un = null; + return prev; + }; + + defer { + lex.prevunlocs[1] = lex.prevunlocs[0]; + lex.prevunlocs[0] = (prevloc(lex), mkloc(lex)); + }; + + + let scan = scanner { + lex = lex, + start = lex.loc, + ... + }; + + for (true) { + scan.in = strings::sub(lex.in, lex.loc.off, strings::end); + if (len(scan.in) == 0) { + const tok = scan.lex.token(&scan, EOF, void, "", "")?; + append(lex.tokens, tok)?; + return tok; + }; + + const (action, lexeme) = match (perform(lex.be, scan.in)) { + case void => return syntaxf(mkloc(lex), "no matching token"); + case let this: (*action, str) => yield this; + }; + + scan.name = action.name; + + match (action.cb(&scan, lexeme, action.user)?) { + case let lexeme: str => + forwardlex(lex, lexeme); + scan.start = lex.loc; + case let tok: *token => + forwardlex(lex, tok.lexeme); + append(lex.tokens, tok)?; + return tok; + }; + }; +}; + +// Unlex a token, so that it get lexed back with [[next]]. +export fn unlex(lex: *lexer, value: *token) void = { + assert(lex.un is null); + lex.un = value; +}; + +fn forwardlex(lex: *lexer, in: str) void = { + const decoder = utf8::decode(strings::toutf8(in)); + for (let r => utf8::next(&decoder)!) { + lex.prevrloc = lex.loc; + forwardr(&lex.loc, [r as rune]); + }; +}; + +// Moves a location based on bytes. +export fn forward(loc: *location, in: str) void = { + const decoder = utf8::decode(strings::toutf8(in)); + for (let r => utf8::next(&decoder)!) { + forwardr(loc, [r as rune]); + }; +}; + +fn forwardr(loc: *location, r: []rune) void = { + for (let r .. r) { + loc.off += 1; + loc.col += 1; + if (r == '\n') { + loc.col = 1; + loc.line += 1; + }; + }; +}; diff --git a/lex/type.ha b/lex/type.ha @@ -0,0 +1,73 @@ +use regex; + +// A location. +export type location = struct { + line: uint, + col: uint, + off: uint, +}; + +// A token value. +export type value = (f64 | i64 | size | u64 | str | rune | void); + +// A lexed token. +export type token = struct { + name: const str, + value: value, + morphene: const str, // meaningfull part "foo" + lexeme: const str, // all swallowed bytes "foos" + start: location, + end: location, + tostrfn: *tokstrfn, + freefn: *tokfreefn, +}; + +export def EOF = "EOF"; + +// Function to format a token as a string. +export type tokstrfn = fn(tok: *token) str; +// Function to free resources associated with a token. +export type tokfreefn = fn(tok: *token) void; + +// A syntax error. +export type syntax = !(location, str); + +// A backend compile error. +export type compile = !str; + +// All possible errors for this module. +export type error = !(syntax | compile | nomem); + +// A lexer. +export type lexer = struct { + be: *backend, + in: const str, // the full bytes + loc: location, // the cursor location + prevrloc: location, // last rune location + prevunlocs: [2](location, location), + un: nullable *token, + tokens: []*token, // to free them + token: *tokenfn, // the function to build tokens with [[scan_token]] + reuse: nullable *reusecb, // the callback when reusing with [[reuse]] +}; + +// The toolkit given to the [[actioncb]] callback to help the user to +// initialize the tokens. +export type scanner = struct { + lex: *lexer, + in: const str, // the remaining bytes + name: const str, // the scanned token name + start: location, // the lexeme start location +}; + +// A function to initialize tokens. +export type tokenfn = fn( + scan: *scanner, + name: const str, // token type name + value: value, // token value + morphene: const str, // meaningfull part + lexeme: const str, // full bytes to swallow +) (*token | error); + +// A callback to reuse a lexer. +export type reusecb = fn(lex: *lexer) void; diff --git a/newcmd.sh b/newcmd.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +echo "bin/$1: cmd/$1.ha" +echo " \$(HARE) build \$(HAREFLAGS) -o \$@ cmd/$1.ha" diff --git a/parse/+test.ha b/parse/+test.ha @@ -0,0 +1,143 @@ +use lexical::lex; + +fn literal( + scan: *lex::scanner, + lexeme: const str, + user: nullable *opaque, +) (str | *lex::token | lex::error) = { + return lex::scan_token(scan, void, lexeme); +}; + +fn skip( + scan: *lex::scanner, + lexeme: const str, + user: nullable *opaque, +) (str | *lex::token | lex::error) = { + return lexeme; +}; + +def FOO = "FOO"; +def BAR = "BAR"; +def NAME = "NAME"; + +def keywords = [ + (FOO, "foo"), + (BAR, "bar"), +]; + +def exprs = [ + (NAME, `([a-z]|[A-Z])([a-z]|[A-Z]|[0-9]|_)*`), +]; + +@test fn test_parse() void = { + let actions: []lex::action = []; + defer free(actions); + for (let keyword .. keywords) { + append(actions, lex::action { + expr = keyword.1, + cb = &literal, + name = keyword.0, + ... + })!; + }; + for (let expr .. exprs) { + append(actions, lex::action { + expr = expr.1, + cb = &literal, + name = expr.0, + ... + })!; + }; + append(actions, lex::action { + expr = "( |\t|\n|\r)+", + cb = &skip, + ... + })!; + const be = lex::def_backend()!(actions)!; + defer lex::destroy(be); + + const in = "foo bar foobar foo"; + const lexer = lex::init(be, in); + defer lex::finish(&lexer); + + const res = want(&lexer, BAR, lex::EOF); + assert(res is lex::error); + assert(res is lex::syntax); + const res = res as lex::syntax; + assert(res.1 == "Unexpected 'FOO', was expecting 'BAR', 'EOF'"); + assert(res.0.line == 1); + assert(res.0.col == 1); + assert(res.0.off == 0); + + const res = want(&lexer, BAR, FOO); + assert(res is *lex::token); + const res = res as *lex::token; + assert(res.name == FOO); + assert(res.lexeme == "foo"); + assert(res.value is void); + assert(res.start.line == 1); + assert(res.start.col == 1); + assert(res.start.off == 0); + assert(res.end.line == 1); + assert(res.end.col == 3); + assert(res.end.off == 2); + + const res = try(&lexer, FOO); + assert(res is void); + + const res = want(&lexer, FOO); + assert(res is lex::error); + assert(res is lex::syntax); + const res = res as lex::syntax; + assert(res.1 == "Unexpected 'BAR', was expecting 'FOO'"); + assert(res.0.line == 1); + assert(res.0.col == 5); + assert(res.0.off == 4); + + const res = try(&lexer, BAR); + assert(res is *lex::token); + const res = res as *lex::token; + assert(res.name == BAR); + assert(res.lexeme == "bar"); + assert(res.value is void); + assert(res.start.line == 1); + assert(res.start.col == 5); + assert(res.start.off == 4); + assert(res.end.line == 1); + assert(res.end.col == 7); + assert(res.end.off == 6); + + const res = peek(&lexer, lex::EOF); + assert(res is void); + + const res = peek(&lexer, NAME); + assert(res is *lex::token); + + const res = want(&lexer, NAME); + assert(res is *lex::token); + const res = res as *lex::token; + assert(res.name == NAME); + assert(res.lexeme == "foobar"); + assert(res.value is void); + assert(res.start.line == 1); + assert(res.start.col == 9); + assert(res.start.off == 8); + assert(res.end.line == 1); + assert(res.end.col == 14); + assert(res.end.off == 13); + + const res = want(&lexer, FOO); + assert(res is *lex::token); + + const res = want(&lexer, FOO); + assert(res is lex::error); + assert(res is lex::syntax); + const res = res as lex::syntax; + assert(res.1 == "Unexpected 'EOF', was expecting 'FOO'"); + assert(res.0.line == 1); + assert(res.0.col == 19); + assert(res.0.off == 18); + + const res = want(&lexer, lex::EOF); + assert(res is *lex::token); +}; diff --git a/parse/parse.ha b/parse/parse.ha @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: MPL-2.0 +// (c) Hare authors <https://harelang.org> + +use fmt; +use lexical::lex; +use io; +use memio; + +// All possible error types. +export type error = !lex::error; + +// Requires the next token to have a matching ltok. Returns that token, or an +// error. +export fn want( + lexer: *lex::lexer, + want: str... +) (*lex::token | error) = { + let tok = lex::next(lexer)?; + if (len(want) == 0) return tok; + for (let want .. want) if (tok.name == want) return tok; + + let buf = memio::dynamic(); + defer io::close(&buf)!; + lex::unlex(lexer, tok); + for (let i = 0z; i < len(want); i += 1) { + fmt::fprintf(&buf, "'{}'", want[i])!; + if (i + 1 < len(want)) fmt::fprint(&buf, ", ")!; + }; + return lex::syntaxf(tok.start, + "Unexpected '{}', was expecting {}", + tok.name, memio::string(&buf)!); +}; + +// Looks for a matching ltok from the lexer, and if not present, unlexes the +// token and returns void. If found, the token is consumed from the lexer and is +// returned. +export fn try( + lexer: *lex::lexer, + want: str... +) (*lex::token | error | void) = { + let tok = lex::next(lexer)?; + assert(len(want) > 0); + for (let want .. want) if (tok.name == want) return tok; + lex::unlex(lexer, tok); +}; + +// Looks for a matching ltok from the lexer, unlexes the token, and returns +// it; or void if it was not an ltok. +export fn peek( + lexer: *lex::lexer, + want: str... +) (*lex::token | error | void) = { + let tok = lex::next(lexer)?; + lex::unlex(lexer, tok); + if (len(want) == 0) return tok; + for (let want .. want) if (tok.name == want) return tok; +}; + +// Returns a syntax error if cond is false and void otherwise +export fn synassert( + loc: lex::location, + cond: bool, + msg: const str, +) (void | error) = { + if (!cond) return lex::syntaxf(loc, msg); +}; + +export fn loc_from(lexer: *lex::lexer, start: lex::location) lex::ast_location = { + return lex::ast_location { + start = start, + end = lex::prevloc(lexer), + }; +}; diff --git a/util/die.ha b/util/die.ha @@ -0,0 +1,12 @@ +use fmt; +use os; + +export fn die(msg: str, args: fmt::formattable...)never = { + + fmt::printf("{}: {}: ", os::args[0], msg)!; + + if (len(args) != 0) + fmt::println(args...)!; + + os::exit(255); +};