hlc

High level language for lli
Log | Files | Refs

commit 9cca5ce74afac3b0967f04dbff6acdfa07d3af4b
parent 9f14011d13c1e9c2b20af43e06e9ece090bd3222
Author: thing1 <l.standen@posteo.com>
Date:   Sun, 12 Oct 2025 14:17:09 +0100

added the ability to lex numbers and have more advanced erorrs

Diffstat:
MMakefile | 11++++++-----
Mlex.c | 139++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------------------
Mlex.h | 8++++++--
Alex_names.c | 15+++++++++++++++
Alex_names.h | 1+
Mmain.c | 67+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
Mtest.hlc | 3++-
7 files changed, 194 insertions(+), 50 deletions(-)

diff --git a/Makefile b/Makefile @@ -1,14 +1,14 @@ CC=c89 -CFLAGS=-ggdb -pedantic -fsanitize=address - -SRC = lex.c main.c +CFLAGS=-ggdb -pedantic -fsanitize=address -Wall -Wextra -D_XOPEN_SOURCE=500 -D_POSIX_C_SOURCE=200112L +#CFLAGS=-ggdb -pedantic -Wall -Wextra +SRC = lex.c lex_names.c main.c OBJ = ${SRC:.c=.o} -all: spec hlc +.POSIX: +all: hlc spec: spec.md smu spec.md > spec.html - .c.o: ${CC} -c ${CFLAGS} $< @@ -17,3 +17,4 @@ hlc: ${OBJ} clean: rm -rf *.html *.o hlc + diff --git a/lex.c b/lex.c @@ -3,6 +3,7 @@ #include <string.h> #include <ctype.h> + #include "lex.h" #define strnul(s) (s + strlen(s)) @@ -10,36 +11,50 @@ lex_val lex_error = {UNKNOWN, NULL}; lex_val lv = { 0 }; -FILE *input; +char *input, *startpos, *endpos; +size_t input_len = 0; lex_val *(*nextfn)(void); +int skipped = 0; + +void +drainfile(FILE *in) { + input = malloc(input_len + 1); + fread(input, 1, input_len, in); + input[input_len] = 0; +} int -getchr(FILE *in) { +getchr(void) { int c; - while ((c = getc(in)) != EOF && (isblank(c) || c == '\n')) - continue; - if (c == EOF) + skipped = 0; + while ((c = *endpos) != 0 && (isblank(c) || c == '\n')) { + skipped++; + endpos++; + } + if (c == 0) lex_error.type = EOI; + + endpos++; return c; } char * check_bytes(char *str) { char *s; - for (s = str; *s; s++) if (getchr(input) != *s) return NULL; + for (s = str; *s; s++) if (getchr() != *s) return NULL; return str; } int -peekc(FILE *in) { - int c = getchr(in); - ungetc(c, in); +peekc(void) { + int c = getchr(); + endpos--; return c; } lex_val * lex_char(char c, enum lex_type t) { - if (getchr(input) != c) return &lex_error; + if (getchr() != c) return &lex_error; lv.type = t; lv.data = NULL; return &lv; @@ -47,7 +62,7 @@ lex_char(char c, enum lex_type t) { lex_val * lex_type(void) { - switch (peekc(input)) { + switch (peekc()) { case 'b': if (!(lv.data = check_bytes("byte"))) return &lex_error; lv.type = BYTE; @@ -66,7 +81,7 @@ lex_type(void) { break; default: return &lex_error; } - if (peekc(input) == '*') nextfn = &lex_type; + if (peekc() == '*') nextfn = &lex_type; else nextfn = &lex_name; return &lv; } @@ -74,33 +89,34 @@ lex_type(void) { lex_val * -lex_comma() { +lex_comma(void) { return lex_char(',', COMMA); } lex_val * -lex_semi() { +lex_semi(void) { + nextfn = &lex_type; return lex_char(';', SEMI); } lex_val * -lex_assign() { +lex_assign(void) { nextfn = &lex_value; return lex_char('=', ASSIGN); } lex_val * -lex_sym() { - switch (getchr(input)) { +lex_sym(void) { + switch (peekc()) { case '<': - switch (peekc(input)) { - case '=': getchr(input); lv.type = LTE; break; + switch (peekc()) { + case '=': getchr(); lv.type = LTE; break; default: lv.type = LT; break; } return &lv; case '>': - switch (peekc(input)) { - case '=': getchr(input); lv.type = GTE; break; + switch (peekc()) { + case '=': getchr(); lv.type = GTE; break; default: lv.type = GT; break; } return &lv; @@ -108,18 +124,49 @@ lex_sym() { if (!check_bytes("=")) return &lex_error; lv.type = NEQ; break; + case '=': return lex_assign(); + case ',': return lex_comma(); + case ';': return lex_semi(); + case '+': lv.type = LT; break; case '-': lv.type = LT; break; case '/': lv.type = LT; break; case '*': lv.type = LT; break; case '&': lv.type = LT; break; } + + return &lv; +} + +/* TODO read negative values */ +lex_val * +lex_number(void) { + static char num[16]; + char i, c; + for (i = 0, c = getchr(); i < 16 && isdigit(c); i++, c = getchr()) + num[(int)i] = c; + endpos--; + + lv.type = NUM; + lv.data = num; + + nextfn = &lex_sym; + + return &lv; } /* TODO make this read ints and other litterals */ lex_val * -lex_value() { - return lex_name(); +lex_value(void) { + char c = peekc(); + if (isdigit(c)) /* number */ + return lex_number(); + switch (c) { + case '\"': /* strlit */ + + default: /* name */ + return lex_name(); + } } lex_val * @@ -130,26 +177,25 @@ lex_name(void) { memset(name, 0, 32); /* note this resets the previous value, when converting to an ast, use a dup */ - if (peekc(input) == '*') { - getchr(input); + if (peekc() == '*') { + getchr(); lv.type = DEREF; return &lv; } - if (!isalpha((c = getchr(input)))) return &lex_error; + if (!isalpha((c = getchr()))) return &lex_error; do { memcpy(strnul(name), &c, 1); if ((len++ + 1) == 32) return &lex_error; - } while(isalnum((c = getc(input)))); - ungetc(c, input); + } while(isalnum((c = getchr())) && skipped == 0); + endpos--; lv.data = name; lv.type = NAME; - switch (peekc(input)) { - case '=': nextfn = &lex_assign; break; /* CHECK FOR == and = */ - case ',': nextfn = &lex_comma; break; /* leads to lex_type */ - case ';': nextfn = &lex_semi; break; - + switch (peekc()) { + case '=': + case ',': + case ';': case '<': case '>': case '!': @@ -168,12 +214,33 @@ lex_name(void) { void init_lexer(FILE *in) { - input = in; + fseek(in, 0, SEEK_END); + input_len = ftell(in); + rewind(in); + + drainfile(in); + startpos = input; + endpos = startpos; + nextfn = &lex_type; } +int +get_err_len(void) { + return endpos - startpos; +} + +int get_line_num(char *s) { + int count = 1; + size_t i; + for (i = 0; i < input_len && &input[i] != s; i++) + if (input[i] == '\n') count++; + return count; +} + lex_val * -get_next() { - memset(&lv, 0, sizeof(typeof(lv))); +get_next(void) { + memset(&lv, 0, sizeof(lv)); + startpos = endpos; return (nextfn) ? nextfn() : NULL; } diff --git a/lex.h b/lex.h @@ -1,4 +1,3 @@ - enum lex_type { UNKNOWN, @@ -38,6 +37,9 @@ typedef struct lex_val { char *data; } lex_val; +extern char *input, *startpos, *endpos; +extern lex_val *(*nextfn)(void); + lex_val *lex_name(void); lex_val *lex_value(void); lex_val *lex_sym(void); @@ -45,5 +47,7 @@ lex_val *lex_comma(void); lex_val *lex_semi(void); lex_val *lex_type(void); +int get_err_len(void); +int get_line_num(char *); lex_val *get_next(void); -void init_lexer(FILE *f); +void init_lexer(FILE *); diff --git a/lex_names.c b/lex_names.c @@ -0,0 +1,15 @@ +#include <stdlib.h> +#include <stdio.h> +#include "lex.h" + +#define is(f) if (func == f) + +char *get_name(lex_val *(*func)(void)) { + is(lex_name) return "name"; + is(lex_type) return "type"; + is(lex_value) return "value"; + is(lex_semi) return ";"; + is(lex_sym) return "symbol"; + is(lex_comma) return "comma"; + return NULL; +} diff --git a/lex_names.h b/lex_names.h @@ -0,0 +1 @@ +char *get_name(lex_val *(*func)(void)); diff --git a/main.c b/main.c @@ -1,19 +1,74 @@ #include <stdio.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stddef.h> #include "lex.h" +#include "lex_names.h" + +FILE *in; + +void +cleanup() { + free(input); + fclose(in); +} + +void +errorf(char *fmt, ...) { + va_list ap; + char str[256]; + va_start(ap, fmt); + vsnprintf(str, 255, fmt, ap); + va_end(ap); + fprintf(stderr, "%s\n", str); +} + +void +syntax_error() { + char *loc = startpos - 1, *line = loc, *linestart; + int i, off; + + errorf("Syntax error: Expected %s, got %.*s", get_name(nextfn), endpos - startpos, loc); + + while (line != input && *line != '\n') line--; + if (*line == '\n') line++; + linestart = line; + + off = fprintf(stderr, "%d: ", get_line_num(loc)); + while (*line && *line != '\n') + putc(*line++, stderr); + + putc('\n', stderr); + + while (linestart - off != loc) { + putc(' ', stderr); + linestart++; + } + + for (i = 0; i < get_err_len(); i++) + putc('^', stderr); + putc('\n', stderr); + +} int main() { lex_val *val; - FILE *f = fopen("test.hlc", "r"); - init_lexer(f); - while (val = get_next()) { + in = fopen("test.hlc", "r"); + init_lexer(in); + + while ((val = get_next())) { if (val->type == EOI) break; - if (val->type == UNKNOWN) - fprintf(stderr, "unknown syntax\n"); + if (val->type == UNKNOWN) { + syntax_error(); + cleanup(); + return 1; + } } - fclose(f); + cleanup(); + return 0; } diff --git a/test.hlc b/test.hlc @@ -1 +1,2 @@ -byte *name = *first; +short age = 21; +