commit b682acf4e47acae6edd1aeb7a86d6c4dd3aa14e6
parent c6cc2ba557076fd1385da67a6381e3a427a2124b
Author: thing1 <thing1@seacrossedlovers.xyz>
Date: Thu, 20 Nov 2025 22:08:17 +0000
implement a pivot parser, started work on bracketed exprs
Diffstat:
| A | .clang-format | | | 12 | ++++++++++++ |
| M | Makefile | | | 5 | ++++- |
| M | lexer.c | | | 57 | +++++++++++++++++++++++++++++++++++++++------------------ |
| M | lexer.h | | | 17 | ++++++++++++----- |
| M | spl.c | | | 106 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------------- |
| A | util.c | | | 70 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
| A | util.h | | | 17 | +++++++++++++++++ |
7 files changed, 227 insertions(+), 57 deletions(-)
diff --git a/.clang-format b/.clang-format
@@ -0,0 +1,12 @@
+UseTab: Always
+IndentWidth: 8
+TabWidth: 8
+ColumnLimit: 79
+DerivePointerAlignment: false
+PointerAlignment: Right
+AlignAfterOpenBracket: true
+AlignArrayOfStructures: Right
+AlignEscapedNewlines: LeftWithLastLine
+AlignOperands: true
+IndentCaseBlocks: false
+BreakAfterReturnType: TopLevel
diff --git a/Makefile b/Makefile
@@ -1,6 +1,6 @@
CFLAGS=-ggdb
-SRC = spl.c lexer.c
+SRC = spl.c lexer.c util.c
OBJ = ${SRC:.c=.o}
EXEC = spl
@@ -10,3 +10,6 @@ ${OBJ}: ${SRC}
clean:
rm -rf ${OBJ} ${EXEC}
+format:
+ clang-format -i *.c
+
diff --git a/lexer.c b/lexer.c
@@ -1,12 +1,12 @@
-#include <stdlib.h>
-#include <stdio.h>
#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
#include "lexer.h"
-void
+void
lexErr(lex *l) {
- fprintf(stderr, "unexpected token '%c'\n", *l->ptr);
+ fprintf(stderr, "unexpected token '%c'\n", *l->ptr);
exit(1);
}
@@ -20,25 +20,34 @@ lexNum(lex *l) {
tok
next(lex *l) {
- tok t = {0};
+ tok t = {0};
switch (*l->ptr) {
case 0:
- t.op = LEOF;
+ t.op = LEOF;
return t;
- case '\n': case ' ': case '\t': case '\v':
- l->ptr++;
+ case '\n':
+ case ' ':
+ case '\t':
+ case '\v':
+ l->ptr++;
return next(l);
- case ADD: case SUB: case MUL: case DIV:
+ case ADD:
+ case SUB:
+ case MUL:
+ case DIV:
+ case OBRACE:
+ case CBRACE:
t.op = *l->ptr++;
return t;
}
+
if (isdigit(*l->ptr)) {
t.op = INT;
t.n = lexNum(l);
- }
- else lexErr(l);
+ } else
+ lexErr(l);
return t;
}
@@ -46,16 +55,28 @@ next(lex *l) {
void
printTok(tok *t) {
switch (t->op) {
- case INT: printf("INT: %d\n", t->n); break;
- case LEOF: printf("$\n"); break;
- case ADD: printf("ADD\n"); break;
- case SUB: printf("SUB\n"); break;
- case MUL: printf("MUL\n"); break;
- case DIV: printf("DIV\n"); break;
+ case INT:
+ printf("INT: %d\n", t->n);
+ break;
+ case LEOF:
+ printf("$\n");
+ break;
+ case ADD:
+ printf("ADD\n");
+ break;
+ case SUB:
+ printf("SUB\n");
+ break;
+ case MUL:
+ printf("MUL\n");
+ break;
+ case DIV:
+ printf("DIV\n");
+ break;
}
}
-lex
+lex
mklexer(char *input) {
lex l = (lex){input, input};
return l;
diff --git a/lexer.h b/lexer.h
@@ -8,6 +8,8 @@ enum ops {
SUB = '-',
MUL = '*',
DIV = '/',
+ OBRACE = '(',
+ CBRACE = ')',
LEOF = '$',
};
@@ -20,10 +22,15 @@ typedef struct lex {
char *input, *ptr;
} lex;
-void lexErr(lex *l);
-int lexNum(lex *l);
-tok next(lex *l);
-void printTok(tok *t);
-lex mklexer(char *input);
+void
+lexErr(lex *l);
+int
+lexNum(lex *l);
+tok
+next(lex *l);
+void
+printTok(tok *t);
+lex
+mklexer(char *input);
#endif
diff --git a/spl.c b/spl.c
@@ -1,14 +1,24 @@
-#include <stdlib.h>
-#include <stdio.h>
#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
#include "lexer.h"
+#include "util.h"
typedef struct rexpr {
char op;
-
- struct rexpr *expr[2];
- int n[2];
+ union {
+ struct { /* for bracketed exprs */
+ tok *ts;
+ int *tc;
+ };
+ struct { /* operators */
+ struct rexpr *expr[2];
+ };
+ struct { /* litterals */
+ int n;
+ };
+ };
} rexpr;
void
@@ -18,17 +28,22 @@ parserErr(const char *msg) {
}
int
-getpres(rexpr r) {
- switch (r.op) {
- case INT: return 0;
+getpres(tok t) {
+ switch (t.op) {
+ case ADD:
+ return 1;
+ case SUB:
+ return 1;
- case ADD: return 1;
- case SUB: return 1;
-
- case MUL: return 2;
- case DIV: return 2;
+ case MUL:
+ return 2;
+ case DIV:
+ return 2;
- case LEOF: return -1;
+ case LEOF:
+ return -1;
+ case INT:
+ return 3;
}
}
@@ -38,31 +53,56 @@ isop(tok t) {
}
rexpr *
-parseRexpr(lex *l) {
- rexpr *e = malloc(sizeof(rexpr)), *e2;
- tok t1 = next(l), t2 = next(l);
-
- if (t1.op != INT)
- parserErr("Expected INT in rexpr");
- e->n[0] = t1.n;
- if (isop(t2)) {
- e->op = t2.op;
- e2 = parseRexpr(l);
- if (e2->op == INT) e->n[1] = e2->n[0];
- else e->expr[1] = e2;
+pivotParse(tok *start, tok *end, mctx *ctx) {
+ rexpr *e = alloczctx(ctx, sizeof(rexpr));
+ tok *lowest = start;
+ int i = 0;
+ for (tok *t = lowest; t != end; i++, t = &start[i]) {
+ if (getpres(*t) < getpres(*lowest))
+ lowest = t;
}
- else if (t2.op == LEOF) {
+ if (lowest == start) {
+ if (lowest->op != INT)
+ parserErr("Expected int expression");
+ if (start != end)
+ parserErr("Trailing expression");
e->op = INT;
- return e;
+ e->n = lowest->n;
+ } else {
+ if (!isop(*lowest))
+ parserErr("Expected op");
+ e->op = lowest->op;
+ e->expr[0] = pivotParse(start, &lowest[-1], ctx);
+ e->expr[1] = pivotParse(&lowest[1], end, ctx);
}
- else
- parserErr("Unexpected token in rexpr");
+ return e;
+}
+
+rexpr *
+parseRexpr(lex *l, mctx *ctx) {
}
int
-main() {
- lex l = mklexer("5 + 2");
+eval(rexpr *e) {
+ switch (e->op) {
+ case INT:
+ return e->n;
+ case ADD:
+ return eval(e->expr[0]) + eval(e->expr[1]);
+ case SUB:
+ return eval(e->expr[0]) - eval(e->expr[1]);
+ case MUL:
+ return eval(e->expr[0]) * eval(e->expr[1]);
+ case DIV:
+ return eval(e->expr[0]) / eval(e->expr[1]);
+ }
+}
- rexpr *e = parseRexpr(&l);
+int
+main() {
+ lex l = mklexer("2 + 4 * 9 / 3");
+ mctx *ctx = newctx();
+ parseRexpr(&l, ctx);
+ freectx(ctx);
}
diff --git a/util.c b/util.c
@@ -0,0 +1,70 @@
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#define allocztn(type, n) allocz(sizeof(type) * n)
+#define alloczt(type) allocz(sizeof(type))
+
+typedef struct mctx {
+ int ptrc;
+ void **ptrs;
+} mctx;
+
+mctx *gctx;
+
+void *
+alloczctx(mctx *ctx, const size_t size) {
+ ctx->ptrs = realloc(ctx->ptrs, sizeof(void *) * (ctx->ptrc + 1));
+ ctx->ptrs[ctx->ptrc] = malloc(size);
+ memset(ctx->ptrs[ctx->ptrc], 0, size);
+ return ctx->ptrs[ctx->ptrc++];
+}
+
+void *
+realloczctx(mctx *ctx, void *ptr, const size_t size) {
+ if (!ptr)
+ return alloczctx(ctx, size);
+
+ for (int i = 0; i < ctx->ptrc; i++) {
+ if (ctx->ptrs[i] == ptr) {
+ ctx->ptrs[i] = realloc(ctx->ptrs[i], size);
+ memset(ctx->ptrs[i], 0, size);
+ return ctx->ptrs[i];
+ }
+ }
+ kill(getpid(), SIGSEGV);
+}
+
+void *
+reallocz(void *ptr, const size_t size) {
+ return realloczctx(gctx, ptr, size);
+}
+
+void *
+allocz(const size_t size) {
+ return alloczctx(gctx, size);
+}
+
+mctx *
+newctx() {
+ mctx *ctx = malloc(sizeof(mctx));
+ memset(ctx, 0, sizeof(mctx));
+ return ctx;
+}
+
+void
+freectx(mctx *ctx) {
+ for (int i = 0; i < ctx->ptrc; i++)
+ free(ctx->ptrs[i]);
+ free(ctx->ptrs);
+ free(ctx);
+}
+
+char *
+strslice(char *s, int len) {
+ char *slice = allocz(len + 1);
+ memcpy(slice, s, len);
+ return slice;
+}
diff --git a/util.h b/util.h
@@ -0,0 +1,17 @@
+#define allocztn(type, n) allocz(sizeof(type) * n)
+#define alloczt(type) allocz(sizeof(type))
+
+typedef struct mctx {
+ int ptrc;
+ void **ptrs;
+} mctx;
+
+extern mctx *gctx;
+
+void *alloczctx(mctx *ctx, const size_t size);
+void *realloczctx(mctx *ctx, void *ptr, const size_t size);
+void *reallocz(void *ptr, const size_t size);
+void *allocz(const size_t size);
+mctx *newctx();
+void freectx(mctx *ctx);
+char *strslice(char *s, int len);