comp

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 085a1dc565b0b0917f4fb6a231e279a6d9d7b9b7
parent 17383102ddec303f3707e1c2168f017ae18acc7b
Author: thing1 <thing1@seacrossedlovers.xyz>
Date:   Wed, 18 Mar 2026 14:12:31 +0000

first working version of the lexer

Diffstat:
A.gitignore | 4++++
AMakefile | 12++++++++++++
Mcomp.y | 32++++++++++++++++++++++++++++++--
Mlex.c | 230++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
4 files changed, 275 insertions(+), 3 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -0,0 +1,4 @@ +lex +*.o +y.tab.c +y.tab.h diff --git a/Makefile b/Makefile @@ -0,0 +1,12 @@ +CFLAGS=-ggdb + +all: lex + +y.tab.c: comp.y + yacc -ytd comp.y + +comp.o: y.tab.c + cc -DYYDEBUG y.tab.c -c -o comp.o ${CFLAGS} + +lex: lex.c comp.o + cc lex.c comp.o -o lex ${CFLAGS} diff --git a/comp.y b/comp.y @@ -1,5 +1,15 @@ %{ +#include <stdio.h> #include "comp.h" + +struct Pos {int row, col;}; +struct Token {struct Pos pos;}; + +extern int yylex(); +extern struct Token lasttok; + +void yyerror(const char *msg); + %} %union { @@ -10,6 +20,13 @@ Value *value; } +%type <func> func +%type <type> type +%type <rhs> name +%type <rhs> rhs +%type <expr> expr +%type <value> value + %token FUNC %token NAME %token INT @@ -24,7 +41,8 @@ prog : func | prog func ; -func : FUNC name '(' args ')' type '{' exprs '}' +func : FUNC name '(' args ')' type '{' exprs '}' + | FUNC name '(' ')' type '{' exprs '}' ; name : NAME @@ -51,7 +69,7 @@ params : rhs rhs : value | name '(' params ')' - | rhs '+' rhs + | rhs '+' rhs | rhs '-' rhs | rhs '*' rhs | rhs '/' rhs @@ -62,3 +80,13 @@ value : INT | FLOAT ; %% + +char *getpos(struct Pos p) { + static char buf[128]; + snprintf(buf, 128, "%d:%d", p.row, p.col); + return buf; +} + +void yyerror(const char *msg) { + fprintf(stderr, "%s: %s\n", getpos(lasttok.pos), msg); +} diff --git a/lex.c b/lex.c @@ -1,5 +1,233 @@ -int yylex() { +#include <stdlib.h> +#include <string.h> +#include <ctype.h> + +#include "comp.h" +#include "y.tab.h" + +#define LEXRET(t) lasttok = (t); return (t); + +extern void yyerror(const char *msg); + +struct Pos { + int row, col; +}; + +struct Token { + struct Pos pos; + short type; + void *data; + void *dataend; +}; + +struct Lexer { + struct Pos pos; + char *input; + struct Token (*f)(); +}; + +struct Token lex_func(); +struct Token lex_name(); +struct Token lex_obrace(); +struct Token lex_cbrace(); +struct Token lex_ocbrace(); +struct Token lex_ccbrace(); +struct Token lex_semi(); +struct Token lex_int(); +struct Token lex_add(); +struct Token lex_sub(); +struct Token lex_mul(); +struct Token lex_div(); +struct Token lex_comma(); + +struct Lexer lex = {0}; + +struct Token lasttok = {0}; + +void move(int off) { + lex.input += off; + lex.pos.col += off; +} + +void skip() { + while (lex.input[0] && (isblank(lex.input[0]) || lex.input[0] == '\n')) { + if (lex.input[0] == '\n') { + lex.pos.row++; + lex.pos.col = 0; + } + move(1); + } +} + +char peek() { + skip(); + return lex.input[0]; +} + +struct Token lex_char(char c) { + skip(); + struct Pos start = lex.pos; + + if (lex.input[0] != c) + yyerror("Expected different char"); + move(1); + skip(); + + LEXRET(((struct Token){start, c, NULL, NULL})); +} + +struct Token lex_func() { + struct Pos start = lex.pos; + if (memcmp(lex.input, "func", 4) != 0) + yyerror("Expected func!"); + move(4); + + lex.f = &lex_name; + LEXRET(((struct Token){start, FUNC, NULL, NULL})); } +struct Token lex_name() { + struct Pos start = lex.pos; + + if (!isalpha(lex.input[0])) + yyerror("Expected name!"); + + struct Token t = {start, NAME, lex.input, lex.input + 1}; + move(1); + + switch (peek()) { + case '(': lex.f = &lex_obrace; break; + case '{': lex.f = &lex_ocbrace; break; + case '+': lex.f = &lex_add; break; + case '-': lex.f = &lex_sub; break; + case '*': lex.f = &lex_mul; break; + case '/': lex.f = &lex_div; break; + case ',': lex.f = &lex_comma; break; + default: yyerror("Unexpected token!"); + } + + LEXRET(t); +} + +struct Token lex_obrace() { + struct Token t = lex_char('('); + + switch (peek()) { + case ')': lex.f = &lex_cbrace; break; + case '(': lex.f = &lex_obrace; break; + default: + if (isdigit(peek())) lex.f = &lex_int; + else yyerror("Unexpected token!"); + } + + LEXRET(t); +} +struct Token lex_int() { + struct Pos start = lex.pos; + + if (!isdigit(lex.input[0])) + yyerror("Expected number!"); + + struct Token t = {start, INT, lex.input, lex.input + 1}; + move(1); + + switch (peek()) { + case ')': lex.f = &lex_cbrace; break; + case '+': lex.f = &lex_add; break; + case '-': lex.f = &lex_sub; break; + case '*': lex.f = &lex_mul; break; + case '/': lex.f = &lex_div; break; + case ',': lex.f = &lex_comma; break; + default: yyerror("Unexpected token!"); + } + + LEXRET(t); +} + +struct Token lex_ocbrace() { + struct Token t = lex_char('{'); + switch (peek()) { + default: lex.f = &lex_name; break; + } + LEXRET(t); +} + +struct Token lex_ccbrace() { + struct Token t = lex_char('}'); + lex.f = &lex_name; + LEXRET(t); +} + +struct Token lex_cbrace() { + struct Token t = lex_char(')'); + + switch (peek()) { + case '(': lex.f = &lex_obrace; break; + case ')': lex.f = &lex_cbrace; break; + case '+': lex.f = &lex_add; break; + case '-': lex.f = &lex_sub; break; + case '*': lex.f = &lex_mul; break; + case '/': lex.f = &lex_div; break; + case ',': lex.f = &lex_comma; break; + case ';': lex.f = &lex_semi; break; + default: lex.f = &lex_name; break; + } + + LEXRET(t); +} + +struct Token lex_semi() { + struct Token t = lex_char(';'); + + switch (peek()) { + case '}': lex.f = &lex_ccbrace; break; + default: lex.f = &lex_name; break; + } + + LEXRET(t); +} + +struct Token lex_comma() { + struct Token t = lex_char(','); + + if (isdigit(peek())) + lex.f = &lex_int; + else if (isalpha(peek())) + lex.f = &lex_name; + else switch (peek()) { + case '(': lex.f = &lex_obrace; break; + default: yyerror("Unexpeced token!"); + } + + LEXRET(t); +} + + + +#define MAKEOP(name, op) struct Token lex_##name() { \ + struct Token t = lex_char(op); \ + if (isdigit(peek())) \ + lex.f = &lex_int; \ + else if (isalpha(peek())) \ + lex.f = &lex_name; \ + else yyerror("Unexpeced token!"); \ + LEXRET(t); \ +} + +MAKEOP(add, '+') +MAKEOP(sub, '-') +MAKEOP(mul, '*') +MAKEOP(div, '/') + +int yylex() { + skip(); + if (lex.input[0] == 0) return YYEOF; + return lex.f().type; +} + +int main() { + lex = (struct Lexer){(struct Pos){0, 0}, "func f() v {\n\tg((1 + 5) * 2);\nh(3, 5);\n}", &lex_func}; + yyparse(); +}