commit 085a1dc565b0b0917f4fb6a231e279a6d9d7b9b7
parent 17383102ddec303f3707e1c2168f017ae18acc7b
Author: thing1 <thing1@seacrossedlovers.xyz>
Date: Wed, 18 Mar 2026 14:12:31 +0000
first working version of the lexer
Diffstat:
| A | .gitignore | | | 4 | ++++ |
| A | Makefile | | | 12 | ++++++++++++ |
| M | comp.y | | | 32 | ++++++++++++++++++++++++++++++-- |
| M | lex.c | | | 230 | ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- |
4 files changed, 275 insertions(+), 3 deletions(-)
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,4 @@
+lex
+*.o
+y.tab.c
+y.tab.h
diff --git a/Makefile b/Makefile
@@ -0,0 +1,12 @@
+CFLAGS=-ggdb
+
+all: lex
+
+y.tab.c: comp.y
+ yacc -ytd comp.y
+
+comp.o: y.tab.c
+ cc -DYYDEBUG y.tab.c -c -o comp.o ${CFLAGS}
+
+lex: lex.c comp.o
+ cc lex.c comp.o -o lex ${CFLAGS}
diff --git a/comp.y b/comp.y
@@ -1,5 +1,15 @@
%{
+#include <stdio.h>
#include "comp.h"
+
+struct Pos {int row, col;};
+struct Token {struct Pos pos;};
+
+extern int yylex();
+extern struct Token lasttok;
+
+void yyerror(const char *msg);
+
%}
%union {
@@ -10,6 +20,13 @@
Value *value;
}
+%type <func> func
+%type <type> type
+%type <rhs> name
+%type <rhs> rhs
+%type <expr> expr
+%type <value> value
+
%token FUNC
%token NAME
%token INT
@@ -24,7 +41,8 @@ prog : func
| prog func
;
-func : FUNC name '(' args ')' type '{' exprs '}'
+func : FUNC name '(' args ')' type '{' exprs '}'
+ | FUNC name '(' ')' type '{' exprs '}'
;
name : NAME
@@ -51,7 +69,7 @@ params : rhs
rhs : value
| name '(' params ')'
- | rhs '+' rhs
+ | rhs '+' rhs
| rhs '-' rhs
| rhs '*' rhs
| rhs '/' rhs
@@ -62,3 +80,13 @@ value : INT
| FLOAT
;
%%
+
+char *getpos(struct Pos p) {
+ static char buf[128];
+ snprintf(buf, 128, "%d:%d", p.row, p.col);
+ return buf;
+}
+
+void yyerror(const char *msg) {
+ fprintf(stderr, "%s: %s\n", getpos(lasttok.pos), msg);
+}
diff --git a/lex.c b/lex.c
@@ -1,5 +1,233 @@
-int yylex() {
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+
+#include "comp.h"
+#include "y.tab.h"
+
+#define LEXRET(t) lasttok = (t); return (t);
+
+extern void yyerror(const char *msg);
+
+struct Pos {
+ int row, col;
+};
+
+struct Token {
+ struct Pos pos;
+ short type;
+ void *data;
+ void *dataend;
+};
+
+struct Lexer {
+ struct Pos pos;
+ char *input;
+ struct Token (*f)();
+};
+
+struct Token lex_func();
+struct Token lex_name();
+struct Token lex_obrace();
+struct Token lex_cbrace();
+struct Token lex_ocbrace();
+struct Token lex_ccbrace();
+struct Token lex_semi();
+struct Token lex_int();
+struct Token lex_add();
+struct Token lex_sub();
+struct Token lex_mul();
+struct Token lex_div();
+struct Token lex_comma();
+
+struct Lexer lex = {0};
+
+struct Token lasttok = {0};
+
+void move(int off) {
+ lex.input += off;
+ lex.pos.col += off;
+}
+
+void skip() {
+ while (lex.input[0] && (isblank(lex.input[0]) || lex.input[0] == '\n')) {
+ if (lex.input[0] == '\n') {
+ lex.pos.row++;
+ lex.pos.col = 0;
+ }
+ move(1);
+ }
+}
+
+char peek() {
+ skip();
+ return lex.input[0];
+}
+
+struct Token lex_char(char c) {
+ skip();
+ struct Pos start = lex.pos;
+
+ if (lex.input[0] != c)
+ yyerror("Expected different char");
+ move(1);
+ skip();
+
+ LEXRET(((struct Token){start, c, NULL, NULL}));
+}
+
+struct Token lex_func() {
+ struct Pos start = lex.pos;
+ if (memcmp(lex.input, "func", 4) != 0)
+ yyerror("Expected func!");
+ move(4);
+
+ lex.f = &lex_name;
+ LEXRET(((struct Token){start, FUNC, NULL, NULL}));
}
+struct Token lex_name() {
+ struct Pos start = lex.pos;
+
+ if (!isalpha(lex.input[0]))
+ yyerror("Expected name!");
+
+ struct Token t = {start, NAME, lex.input, lex.input + 1};
+ move(1);
+
+ switch (peek()) {
+ case '(': lex.f = &lex_obrace; break;
+ case '{': lex.f = &lex_ocbrace; break;
+ case '+': lex.f = &lex_add; break;
+ case '-': lex.f = &lex_sub; break;
+ case '*': lex.f = &lex_mul; break;
+ case '/': lex.f = &lex_div; break;
+ case ',': lex.f = &lex_comma; break;
+ default: yyerror("Unexpected token!");
+ }
+
+ LEXRET(t);
+}
+
+struct Token lex_obrace() {
+ struct Token t = lex_char('(');
+
+ switch (peek()) {
+ case ')': lex.f = &lex_cbrace; break;
+ case '(': lex.f = &lex_obrace; break;
+ default:
+ if (isdigit(peek())) lex.f = &lex_int;
+ else yyerror("Unexpected token!");
+ }
+
+ LEXRET(t);
+}
+struct Token lex_int() {
+ struct Pos start = lex.pos;
+
+ if (!isdigit(lex.input[0]))
+ yyerror("Expected number!");
+
+ struct Token t = {start, INT, lex.input, lex.input + 1};
+ move(1);
+
+ switch (peek()) {
+ case ')': lex.f = &lex_cbrace; break;
+ case '+': lex.f = &lex_add; break;
+ case '-': lex.f = &lex_sub; break;
+ case '*': lex.f = &lex_mul; break;
+ case '/': lex.f = &lex_div; break;
+ case ',': lex.f = &lex_comma; break;
+ default: yyerror("Unexpected token!");
+ }
+
+ LEXRET(t);
+}
+
+struct Token lex_ocbrace() {
+ struct Token t = lex_char('{');
+ switch (peek()) {
+ default: lex.f = &lex_name; break;
+ }
+ LEXRET(t);
+}
+
+struct Token lex_ccbrace() {
+ struct Token t = lex_char('}');
+ lex.f = &lex_name;
+ LEXRET(t);
+}
+
+struct Token lex_cbrace() {
+ struct Token t = lex_char(')');
+
+ switch (peek()) {
+ case '(': lex.f = &lex_obrace; break;
+ case ')': lex.f = &lex_cbrace; break;
+ case '+': lex.f = &lex_add; break;
+ case '-': lex.f = &lex_sub; break;
+ case '*': lex.f = &lex_mul; break;
+ case '/': lex.f = &lex_div; break;
+ case ',': lex.f = &lex_comma; break;
+ case ';': lex.f = &lex_semi; break;
+ default: lex.f = &lex_name; break;
+ }
+
+ LEXRET(t);
+}
+
+struct Token lex_semi() {
+ struct Token t = lex_char(';');
+
+ switch (peek()) {
+ case '}': lex.f = &lex_ccbrace; break;
+ default: lex.f = &lex_name; break;
+ }
+
+ LEXRET(t);
+}
+
+struct Token lex_comma() {
+ struct Token t = lex_char(',');
+
+ if (isdigit(peek()))
+ lex.f = &lex_int;
+ else if (isalpha(peek()))
+ lex.f = &lex_name;
+ else switch (peek()) {
+ case '(': lex.f = &lex_obrace; break;
+ default: yyerror("Unexpeced token!");
+ }
+
+ LEXRET(t);
+}
+
+
+
+#define MAKEOP(name, op) struct Token lex_##name() { \
+ struct Token t = lex_char(op); \
+ if (isdigit(peek())) \
+ lex.f = &lex_int; \
+ else if (isalpha(peek())) \
+ lex.f = &lex_name; \
+ else yyerror("Unexpeced token!"); \
+ LEXRET(t); \
+}
+
+MAKEOP(add, '+')
+MAKEOP(sub, '-')
+MAKEOP(mul, '*')
+MAKEOP(div, '/')
+
+int yylex() {
+ skip();
+ if (lex.input[0] == 0) return YYEOF;
+ return lex.f().type;
+}
+
+int main() {
+ lex = (struct Lexer){(struct Pos){0, 0}, "func f() v {\n\tg((1 + 5) * 2);\nh(3, 5);\n}", &lex_func};
+ yyparse();
+}