commit 75b1868517b633d8dec332fefc4ae697539687ba
parent 0e31cba13af2a9fbb5111e36dfbc5748d6a4add2
Author: thing1 <l.standen@posteo.com>
Date: Sun, 5 Oct 2025 22:58:38 +0100
Started lexer
Diffstat:
| M | Makefile | | | 14 | +++++++++++++- |
| M | lex.c | | | 135 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------------------ |
| A | lex.h | | | 43 | +++++++++++++++++++++++++++++++++++++++++++ |
| A | main.c | | | 15 | +++++++++++++++ |
| A | test.hlc | | | 1 | + |
5 files changed, 177 insertions(+), 31 deletions(-)
diff --git a/Makefile b/Makefile
@@ -1,7 +1,19 @@
-all: spec
+CC=c89
+CFLAGS=-ggdb
+
+SRC = lex.c main.c
+OBJ = ${SRC:.c=.o}
+
+all: spec hlc
spec: spec.md
smu spec.md > spec.html
+.c.o:
+ ${CC} -c ${CFLAGS} $<
+
+hlc: ${OBJ}
+ ${CC} -o $@ ${OBJ} ${CFLAGS}
+
clean:
rm -rf *.html *.o hlc
diff --git a/lex.c b/lex.c
@@ -1,33 +1,108 @@
-#include <stdlib.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
+#include <ctype.h>
+
+#include "lex.h"
+
+#define strnul(s) (s + strlen(s))
+
+lex_val lex_error = {UNKNOWN, NULL};
+lex_val lv = { 0 };
+
+FILE *input;
+lex_val *(*nextfn)(void);
+
+int
+getchr(FILE *in) {
+ int c;
+ while (isblank((c = getc(in)))) continue;
+ return c;
+}
+
+char *
+check_bytes(char *str) {
+ char *s;
+ for (s = str; *s; s++) if (getchr(input) != *s) return NULL;
+ return str;
+}
+
+int
+peekc(FILE *in) {
+ int c = getchr(in);
+ ungetc(c, in);
+ return c;
+}
+
+lex_val *
+lex_type(void) {
+ switch (peekc(input)) {
+ case 'b':
+ if (!(lv.data = check_bytes("byte"))) return &lex_error;
+ lv.type = BYTE;
+ break;
+ case 's':
+ if (!(lv.data = check_bytes("short"))) return &lex_error;
+ lv.type = SHORT;
+ break;
+ case 'l':
+ if (!(lv.data = check_bytes("long"))) return &lex_error;
+ lv.type = LONG;
+ break;
+ case '*':
+ lv.type = PTR;
+ lv.data = check_bytes("*");
+ break;
+ default: return &lex_error;
+ }
+ if (peekc(input) == '*') nextfn = &lex_type;
+ else nextfn = &lex_name;
+ return &lv;
+}
+
+lex_val *
+lex_name(void) {
+ static char name[32] = { 0 };
+ char c, len = 0;
+ if (!isalpha((c = getchr(input)))) return &lex_error;
+ do {
+ memcpy(strnul(name), &c, 1);
+ if ((len++ + 1) == 32) return &lex_error;
+ } while(isalnum((c = getc(input))));
+ lv.data = name;
+ lv.type = NAME;
+
+ /*
+ switch (peekc(input)) {
+ case '=': nextfn = &lex_assign; break; CHECK FOR == and =
+ case ',': nextfn = &lex_comma; break;
+ case ';': nextfn = &lex_semi; break;
+
+ case '<':
+ case '>':
+ case '!':
+ case '+':
+ case '-':
+ case '/':
+ case '*': CHECK FOR MUL AND PTR
+ case '&':
+ nextfn = &lex_sym; break;
+ break;
+ default: return &lex_error;
+ }
+ */
+
+ return &lv;
+}
+
+void
+init_lexer(FILE *in) {
+ input = in;
+ nextfn = &lex_type;
+}
-enum lex_type {
- STRING,
- NAME,
- NUM,
- SEMI,
- DEREF,
- UNARRAY,
- QUOTE,
- OBRACE,
- CBRACE,
- ASSIGN,
- EQU,
- NEQ,
- LT,
- GT,
- LTE,
- GTE,
- FUNC,
- COMMA,
- ADD,
- SUB,
- DIV,
- MUL,
- BYTE,
- SHORT,
- LONG,
- OCBRACE,
- CCBRACE,
-};
+lex_val *
+get_next() {
+ memset(&lv, 0, sizeof(typeof(lv)));
+ return (nextfn) ? nextfn() : NULL;
+}
diff --git a/lex.h b/lex.h
@@ -0,0 +1,43 @@
+
+enum lex_type {
+ UNKNOWN,
+
+ STRING,
+ NAME,
+ NUM,
+ SEMI,
+ PTR,
+ UNARRAY,
+ QUOTE,
+ OBRACE,
+ CBRACE,
+ ASSIGN,
+ EQU,
+ NEQ,
+ LT,
+ GT,
+ LTE,
+ GTE,
+ FUNC,
+ COMMA,
+ ADD,
+ SUB,
+ DIV,
+ MUL,
+ BYTE,
+ SHORT,
+ LONG,
+ OCBRACE,
+ CCBRACE,
+};
+
+typedef struct lex_val {
+ enum lex_type type;
+ char *data;
+} lex_val;
+
+lex_val *lex_name(void);
+lex_val *lex_type(void);
+
+lex_val *get_next(void);
+void init_lexer(FILE *f);
diff --git a/main.c b/main.c
@@ -0,0 +1,15 @@
+#include <stdio.h>
+
+#include "lex.h"
+
+int
+main() {
+ FILE *f = fopen("test.hlc", "r");
+ init_lexer(f);
+
+ lex_val *val;
+ while (val = get_next())
+ continue;
+
+ fclose(f);
+}
diff --git a/test.hlc b/test.hlc
@@ -0,0 +1 @@
+byte *name