zpy

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit de964f635d782f9c6ad51eb14c87d1f4e54e3f76
Author: thing1 <thing1@seacrossedlovers.xyz>
Date:   Tue,  5 May 2026 15:54:39 +0100

init commit

Diffstat:
A.gitignore | 1+
AMakefile | 32++++++++++++++++++++++++++++++++
Azpy/README | 1+
Azpy/lex/+test.ha | 119+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Azpy/lex/README | 3+++
Azpy/lex/error.ha | 34++++++++++++++++++++++++++++++++++
Azpy/lex/genlex.ha | 9+++++++++
Azpy/lex/lexer.ha | 75+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Azpy/lex/tokens.ha | 77+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
9 files changed, 351 insertions(+), 0 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1 @@ +zpyi diff --git a/Makefile b/Makefile @@ -0,0 +1,32 @@ +.POSIX: +.SUFFIXES: +HARE=hare +HAREFLAGS= + +DESTDIR= +PREFIX=/usr/local +BINDIR=$(PREFIX)/bin + +HARE_SOURCES != find . -name '*.ha' + +all: zpyi + +zpyi: $(HARE_SOURCES) + $(HARE) build $(HAREFLAGS) -o zpyi cmd/$@/ + +release: $(HARE_SOURCES) + $(HARE) build -R $(HAREFLAGS) -o zpyi cmd/zpyi/ + +check: + $(HARE) test $(HAREFLAGS) + +clean: + rm -f zpy + +install: + install -Dm755 zpy $(DESTDIR)$(BINDIR)/zpy + +uninstall: + rm -f $(DESTDIR)$(BINDIR)/zpy + +.PHONY: all check clean install uninstall release diff --git a/zpy/README b/zpy/README @@ -0,0 +1 @@ +zpy: The zippy language diff --git a/zpy/lex/+test.ha b/zpy/lex/+test.ha @@ -0,0 +1,119 @@ +use memio; +use strings; + +// checks that should return tokens + +fn check(lex: *lexer, expect: [](types | invalid)) void = { + for (let exp .. expect) { + let n = next(lex); + + match (exp) { + case let ty: types => + assert((n as token).ty == ty); + case invalid => + assert(n is error); + }; + }; +}; + +@test +fn NoArgExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(foo)")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, types::NAME, types::CBRACE, types::EOF]); +}; + +@test +fn HasArgsExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(foo 1 2 3)")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, types::NAME, types::NUM, types::NUM, types::NUM, types::CBRACE, types::EOF]); +}; + +@test +fn NestedArgsExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(foo (bar 1 2) 3)")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, types::NAME, types::OBRACE, types::NAME, types::NUM, types::NUM, types::CBRACE, types::NUM, types::CBRACE, types::EOF]); +}; + +@test +fn NameNumExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(foo bar 1)")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, types::NAME, types::NAME, types::NUM, types::CBRACE, types::EOF]); +}; + +@test +fn NumNameExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(foo 1 bar)")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, types::NAME, types::NUM, types::NAME, types::CBRACE, types::EOF]); +}; + + +// checks that should return errors + +@test +fn EmptyExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [invalid]); +}; + +@test +fn SingleOBraceExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("(")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [types::OBRACE, invalid]); +}; + +@test +fn SingleCBraceExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8(")")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [invalid]); +}; + +@test +fn SingleNameExpr() void = { + let lex = &lexer{ + in = &memio::fixed(strings::toutf8("foo")), + nexts = [types::OBRACE], + items = items + }; + + check(lex, [invalid]); +}; diff --git a/zpy/lex/README b/zpy/lex/README @@ -0,0 +1,3 @@ +lex: A lexer for zippy + +Implements [[lexer]] for zippy, along side supporting types/functions diff --git a/zpy/lex/error.ha b/zpy/lex/error.ha @@ -0,0 +1,34 @@ +use strings; +use io; +use fmt; +use os; +use memio; + +export type error = !(str, pos); + +fn wanted(wanted: []types, l: *lexer) error = { + let msg = "Wanted: "; + + for (let ty &.. wanted) + msg = strings::concat(msg, + if (ty != &wanted[0]) ", " else "", + strtypes(*ty))!; + + return (msg, io::tell(l.in)!); +}; + +@test +fn WantedStr() void = { + let lex = &new(&memio::fixed(strings::toutf8("(foo "))); + next(lex)!; + next(lex)!; + assert((next(lex) as error).0 == "Wanted: NAME, NUM, '(', ')'"); +}; + +@test +fn WantedPos() void = { + let lex = &new(&memio::fixed(strings::toutf8("(foo "))); + next(lex)!; + next(lex)!; + assert((next(lex) as error).1 == 5); +}; diff --git a/zpy/lex/genlex.ha b/zpy/lex/genlex.ha @@ -0,0 +1,9 @@ +// A function that will consume some amount of the lexer, or return invalid +export type consumer = fn(_: *lexer) (token | invalid); + +// A token item in the lexer, used to control the behaviour of the lexer +export type lexItem = struct { + ty: types, + f: *consumer, + nexts: []types +}; diff --git a/zpy/lex/lexer.ha b/zpy/lex/lexer.ha @@ -0,0 +1,75 @@ +use memio; +use bufio; +use io; +use fmt; +use strings; + +export type invalid = !void; + +// A lexer object, should be created via [[new]], doesn't allocate any memory, +// and thus doesn't have a finish function, the user should close the +// [[memio::stream]] +export type lexer = struct { + in: *memio::stream, + nexts: []types, + items: []lexItem +}; + +// A token returned by the lexer, the data may be empty if the token is of a +// fixed nature, such as "(" +export type token = struct { + ty: types, + data: str +}; + +// A position in the lexer, [[where]] can be used to find this as a line number +// column number pair, which is more helpful in error messages +export type pos = io::off; + +fn getItem(l: *lexer, ty: types) lexItem = { + for (let item .. l.items) { + if (item.ty == ty) return item; + }; + abort(); +}; + +fn stripspace(l: *lexer) void = { + let start= io::tell(l.in)!; + + match (bufio::read_rune(l.in)) { + case let r: rune => if (r == ' ') stripspace(l) else io::seek(l.in, start, io::whence::SET)!; + case io::EOF => io::seek(l.in, start, io::whence::SET)!; + case => fmt::fatal("Fucked up input"); + }; +}; + +// Creates a new lexer object with the correct starting tokens +export fn new(prog: *memio::stream) lexer = { + return lexer{ + in = prog, + nexts = [types::OBRACE], + items = items + }; +}; + +// Gets the next token from the lexer, or returns an error, the error will +// occur if the next element of the input doesn't match any of the valid next +// tokens +export fn next(l: *lexer) (token | error) = { + for (let next .. l.nexts) { + stripspace(l); + let off = io::tell(l.in)!; + + match (getItem(l, next).f(l)) { + case let t: token => + l.nexts = getItem(l, next).nexts; + return t; + + case => yield; + }; + + io::seek(l.in, off, io::whence::SET)!; + }; + + return wanted(l.nexts, l); +}; diff --git a/zpy/lex/tokens.ha b/zpy/lex/tokens.ha @@ -0,0 +1,77 @@ +use bufio; +use io; +use fmt; +use ascii; +use strings; +use memio; + +// The different token types the lexer can return +export type types = enum { + OBRACE, + CBRACE, + NAME, + NUM, + EOF +}; + +// Converts a type to a relvant string +export fn strtypes(ty: types) str = switch (ty) { + case types::OBRACE => yield "'('"; + case types::CBRACE => yield "')'"; + case types::NAME => yield "NAME"; + case types::NUM => yield "NUM"; + case types::EOF => yield "EOF"; +}; + + +fn consumeRune(l: *lexer, ty: types, r: rune) (token | invalid) = { + match (bufio::read_rune(l.in)) { + case let read: rune => + return if (read == r) token{ty = ty, data = ""} + else invalid; + case io::EOF => return invalid; + case => fmt::fatal("Fucked up input"); + }; +}; + +fn consumeBlock(l: *lexer, ty: types, allow: *fn(_: rune) bool) (token | invalid) = { + let runes: []rune = []; + for (true) { + let r = match (bufio::read_rune(l.in)) { + case let r: rune => yield r; + case io::EOF => return invalid; + case => fmt::fatal("Fucked up input"); + }; + + if (allow(r)) + append(runes, r)! + else { + io::seek(l.in, -1, io::whence::CUR)!; + break; + }; + }; + + if (len(runes) == 0) return invalid; + return token{ty = ty, data = strings::fromrunes(runes)!}; +}; + +fn consumeEOF(l: *lexer) (token | invalid) = { + match (bufio::read_rune(l.in)) { + case io::EOF => return token{ty = types::EOF, data = ""}; + case rune => return invalid; + case => fmt::fatal("Fucked up input"); + }; +}; + +fn consumeObrace(l: *lexer) (token | invalid) = consumeRune(l, types::OBRACE, '('); +fn consumeCbrace(l: *lexer) (token | invalid) = consumeRune(l, types::CBRACE, ')'); +fn consumeName(l: *lexer) (token | invalid) = consumeBlock(l, types::NAME, &ascii::isalpha); +fn consumeNum(l: *lexer) (token | invalid) = consumeBlock(l, types::NUM, &ascii::isdigit); + +const items = [ + lexItem{ty = types::OBRACE, f = &consumeObrace, nexts = [types::NAME]}, + lexItem{ty = types::CBRACE, f = &consumeCbrace, nexts = [types::NUM, types::CBRACE, types::OBRACE, types::EOF]}, + lexItem{ty = types::NAME, f = &consumeName, nexts = [types::NAME, types::NUM, types::OBRACE, types::CBRACE]}, + lexItem{ty = types::NUM, f = &consumeNum, nexts = [types::NAME, types::NUM, types::OBRACE, types::CBRACE]}, + lexItem{ty = types::EOF, f = &consumeEOF, nexts = []}, +];