spl

a Simple Programming Language
Log | Files | Refs

commit a7e362e3fe8a5abde59ea1e91bec132b1fb0c2f7
parent 92c7b581389a40b5ba868477da057b72f7578267
Author: thing1 <thing1@seacrossedlovers.xyz>
Date:   Sat, 29 Nov 2025 22:59:20 +0000

got vars to work!

Diffstat:
MMakefile | 2+-
Mlexer.c | 18++++++++++++++++++
Mlexer.h | 31++++++++++++++++++-------------
Mspl.c | 272+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------
Mutil.c | 2+-
5 files changed, 248 insertions(+), 77 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,5 +1,5 @@ CFLAGS = -ggdb -pedantic -CPPFLAGS = -D_POSIX_C_SOURCE +CPPFLAGS = -D_POSIX_C_SOURCE -D_XOPEN_SOURCE=500 LDFLAGS = SRC = spl.c lexer.c util.c diff --git a/lexer.c b/lexer.c @@ -1,6 +1,7 @@ #include <ctype.h> #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "lexer.h" @@ -18,6 +19,16 @@ lexNum(lex *l) { return n; } +char * +lexName(lex *l) { + char name[32] = {0}; + for (int i = 0; i < 32 && l->ptr; i++, l->ptr++) { + if (!isalnum(*l->ptr)) break; + name[i] = *l->ptr; + } + return strdup(name); /* TODO need to cleanup this */ +} + tok next(lex *l) { tok t = {0}; @@ -39,13 +50,20 @@ next(lex *l) { case DIV: case OBRACE: case CBRACE: + case ASSIGN: + case SEMI: t.op = *l->ptr++; return t; + + } if (isdigit(*l->ptr)) { t.op = INT; t.n = lexNum(l); + } else if (isalpha(*l->ptr)) { + t.op = NAME; + t.name = lexName(l); } else lexErr(l); diff --git a/lexer.h b/lexer.h @@ -1,7 +1,11 @@ #ifndef __LEXER_H_ #define __LEXER_H_ +#include <string.h> -enum ops { +#define SAVE(lexer) lex __SAVE_LEX = *lexer; +#define RESTORE(lexer) memcpy(lexer, &__SAVE_LEX, sizeof(lex)); + +enum lex_ops { NOP = 0, INT = -1, ADD = '+', @@ -11,26 +15,27 @@ enum ops { OBRACE = '(', CBRACE = ')', LEOF = '$', + ASSIGN = '=', + SEMI = ';', + NEGATE, + NAME, }; typedef struct tok { - enum ops op; - int n; + enum lex_ops op; + union { + int n; + char *name; + }; } tok; typedef struct lex { char *input, *ptr; } lex; -void -lexErr(lex *l); -int -lexNum(lex *l); -tok -next(lex *l); -void -printTok(tok *t); -lex -mklexer(char *input); +void lexErr(lex *l); +tok next(lex *l); +void printTok(tok *t); +lex mklexer(char *input); #endif diff --git a/spl.c b/spl.c @@ -5,27 +5,54 @@ #include "lexer.h" #include "util.h" +enum lexpr_ops { + LEXPR_ASSIGN, +}; + +enum type_types { + BASIC, +}; + typedef struct rexpr { - char op; + enum lex_ops op; union { - struct { /* bin ops */ + struct { /* unary ops */ struct rexpr *e; }; - struct { /* operators */ + struct { /* operators */ struct rexpr *expr[2]; }; - struct { /* litterals */ + struct { /* litterals */ int n; }; }; } rexpr; -rexpr *pivotParse(tok *start, tok *end, mctx *ctx); +typedef struct type { + enum type_types type; + union { + struct { /* basic types */ + char *name; + }; + }; +} type; + +typedef struct lexpr { + enum lexpr_ops op; + union { + struct { /* assignments */ + type *type; + char *name; + rexpr *rexpr; + }; + }; +} lexpr; + +rexpr *parserexpr(tok *start, tok *end, mctx *ctx); void parserErr(const char *msg) { fprintf(stderr, "%s\n", msg); - exit(1); } int @@ -35,21 +62,22 @@ getpres(tok t) { return -1; case ADD: - return 1; case SUB: return 1; case MUL: - return 2; case DIV: - return 2; + return 4; case INT: - return 3; + return 6; case OBRACE: - return 4; + return 8; + + case NEGATE: + return 5; } - parserErr("Not an op"); + return -2; } int @@ -58,65 +86,112 @@ isop(tok t) { } tok * -findend(tok *start, tok *end) { /* this doesnt quite work yet */ +findend(tok *start, tok *end) { int d = 1; tok *t; - if (start->op != OBRACE) + if (start->op != OBRACE) { parserErr("Expected a '('"); + return NULL; + } for (t = &start[1]; d != 0; t = &t[1]) { - if (end->op != CBRACE && t == end) + if (end->op != CBRACE && t == end) { parserErr("unclosed brace"); - if (t->op == OBRACE) - d++; - else if (t->op == CBRACE) - d--; + return NULL; + } + + if (t->op == OBRACE) d++; + else if (t->op == CBRACE) d--; } return &t[-1]; } -rexpr * -pivotParse(tok *start, tok *end, mctx *ctx) { - rexpr *e = alloczctx(ctx, sizeof(rexpr)); - tok *lowest = start; +tok * +findlowest(tok *start, tok *end) { + tok *lowest = start, prev = {.op = LEOF}; for (tok *t = lowest ; t != end; t = &t[1]) { - if (t->op == CBRACE) - continue; - if (getpres(*t) < getpres(*lowest)) - lowest = t; - if (t->op == OBRACE) - t = findend(t, end); - - if (t == end) - break; - } + /* re assign ops, this is for when an op has multiple meanings */ + if (isop(prev) || prev.op == LEOF) { + switch (t->op) { + case SUB: + t->op = NEGATE; + break; + } + } + else if (getpres(*t) < getpres(*lowest)) lowest = t; - if (lowest == start) { - switch (lowest->op) { - case INT: - if (start != end) - parserErr("Trailing expression"); - e->op = INT; - e->n = lowest->n; - break; - case OBRACE: - e->op = OBRACE; - e->e = pivotParse(&lowest[1], &findend(lowest, end)[-1], ctx); - break; - default: - parserErr("Unexpected token type"); + /* move to the end brace, we only process the contents when nothing else is left */ + if (t->op == OBRACE) { + if (!(t = findend(t, end))) return NULL; } - } else { - if (!isop(*lowest)) { - printf("%c, %d\n", lowest->op, lowest->n); - parserErr("Expected op"); + + /* checks if any of our skips ahead have put us at the end of expr */ + if (t == end) break; + + prev = *t; + } + + return lowest; +} + +rexpr * +parsesimple(tok *start, tok *end, tok *lowest, mctx *ctx) { + rexpr *e; + switch (lowest->op) { + case INT: + if (start != end) { + parserErr("Trailing expression"); + return NULL; } - e->op = lowest->op; - e->expr[0] = pivotParse(start, &lowest[-1], ctx); - e->expr[1] = pivotParse(&lowest[1], end, ctx); + e->op = INT; + e->n = lowest->n; + break; + case OBRACE: + e->op = OBRACE; + e->e = parserexpr(&lowest[1], &findend(lowest, end)[-1], ctx); + break; + case NEGATE: + e->op = NEGATE; + e->e = parserexpr(&lowest[1], end, ctx); + break; + default: + parserErr("Unexpected token type"); + return NULL; + } + + return e; +} + +rexpr * +parsebin(tok *start, tok *end, tok *lowest, mctx *ctx) { + rexpr *e = alloczctx(ctx, sizeof(rexpr)); + + if (!isop(*lowest)) { + parserErr("Expected op"); + return NULL; } + + e->op = lowest->op; + if (!(e->expr[0] = parserexpr(start, &lowest[-1], ctx))) return NULL; + else if (!(e->expr[1] = parserexpr(&lowest[1], end, ctx))) return NULL; + + return e; +} + +rexpr * +parserexpr(tok *start, tok *end, mctx *ctx) { + rexpr *e; + tok *lowest; + if (!(lowest = findlowest(start, end))) return NULL; + + /* simple 1 term expr ops */ + if (lowest == start) e = parsesimple(start, end, lowest, ctx); + + /* binary ops */ + else e = parsebin(start, end, lowest, ctx); + return e; } @@ -127,6 +202,8 @@ eval(rexpr *e) { return e->n; case OBRACE: return eval(e->e); + case NEGATE: + return -eval(e->e); case ADD: return eval(e->expr[0]) + eval(e->expr[1]); case SUB: @@ -138,17 +215,88 @@ eval(rexpr *e) { } } +type * +parsetype(lex *l, mctx *ctx) { + SAVE(l); + type *ty; + tok t = next(l); + if (t.op != NAME) { + RESTORE(l); + return NULL; + } + ty = alloczctx(ctx, sizeof(type)); + ty->type = BASIC; + ty->name = t.name; + return ty; +} + +lexpr * +parseassign(lex *l, tok name, mctx *ctx) { + SAVE(l); + lexpr *le; + type *ty; + rexpr *r; + tok *arr, t; + int count = 1; + + if (!(ty = parsetype(l, ctx))) { + RESTORE(l); + return NULL; + } + if (next(l).op != ASSIGN) { + RESTORE(l); + return NULL; + } + + arr = alloczctx(ctx, sizeof(tok)); + while ((t = next(l)).op != LEOF && t.op != SEMI) { + arr[count++ - 1] = t; + arr = realloczctx(ctx, arr, sizeof(tok) * count); + } + if (count == 1) { + RESTORE(l); + return NULL; + } else if (!(r = parserexpr(arr, &arr[count - 2], ctx))) { + RESTORE(l); + return NULL; + } + + le = alloczctx(ctx, sizeof(lexpr)); + le->op = LEXPR_ASSIGN; + le->name = name.name; + le->type = ty; + le->rexpr = r; + return le; +} + +lexpr * +parselexpr(lex *l, mctx *ctx) { + SAVE(l); + lexpr *le; + tok t = next(l); + switch (t.op) { + case NAME: + le = parseassign(l, t, ctx); + break; + default: + parserErr("unexpected tok type"); + } + if (!le) { + RESTORE(l); + return NULL; + } + + return le; +} + int main() { - lex l = mklexer("1 - (5 + 5)"); - tok list[32] = {0}, t = next(&l); - int i; - for (i = 0; i < 32 && t.op != LEOF; i++, t = next(&l)) { - list[i] = t; - } + lex l = mklexer("a int = 5 +);"); mctx *ctx = newctx(); - rexpr *e = pivotParse(list, &list[i-1], ctx); - printf("%d\n", eval(e)); + lexpr *e = parselexpr(&l, ctx); + if (!e) { + parserErr("failed to parse lexpr"); + } freectx(ctx); } diff --git a/util.c b/util.c @@ -30,7 +30,7 @@ realloczctx(mctx *ctx, void *ptr, const size_t size) { for (int i = 0; i < ctx->ptrc; i++) { if (ctx->ptrs[i] == ptr) { ctx->ptrs[i] = realloc(ctx->ptrs[i], size); - memset(ctx->ptrs[i], 0, size); + //memset(ctx->ptrs[i], 0, size); return ctx->ptrs[i]; } }