begin work on new lexer

This commit is contained in:
Lobo 2026-01-26 08:09:00 -03:00
parent 30ff72b3ae
commit 35bad08a0d
33 changed files with 2464 additions and 3 deletions

View file

@ -400,8 +400,6 @@ static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
}
Bc *compile_program(Cm *cm, mpc_ast_t *ast) {
mpc_ast_print_to(ast, stderr);
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal

View file

@ -7,7 +7,7 @@
#include "vendor/mpc.h"
#define COMPILER_DEBUG 1
#define COMPILER_DEBUG DEBUG
/** Compiler context */
typedef struct Cm {

179
src/lexer.c Normal file
View file

@ -0,0 +1,179 @@
#include <ctype.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <utf.h>
#include "lexer.h"
#include "vendor/yar.h"
static inline int is_delimiter(int i) {
return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' ||
i == ';' || i == '\\' || i == '"';
}
static inline void appendrune(Lx *lx, Rune rn) {
char data[5];
I len = runetochar(data, &rn);
yar_append_many(lx, data, len);
}
static inline void appendbyte(Lx *lx, char byte) { *yar_append(lx) = byte; }
static int getc_ws(Lx *lx) {
if (ST_EOF(lx->stream))
return -1;
for (;;) {
int ch = ST_GETC(lx->stream);
if (isspace(ch))
continue;
return ch;
}
}
static int scanword(Lx *lx) {
int next = ST_GETC(lx->stream);
for (;;) {
if (next == -1) {
if (lx->cursor == 0)
lx->kind = TOK_EOF;
appendbyte(lx, 0);
return lx->kind;
} else if (is_delimiter(next) || isspace(next)) {
ST_UNGETC(next, lx->stream);
appendbyte(lx, 0);
return lx->kind;
} else {
appendbyte(lx, next);
next = ST_GETC(lx->stream);
continue;
}
}
}
static void scanescape(Lx *lx) {
char escbuf[7], *escptr = escbuf;
int next;
Rune tmp;
for (;;) {
next = ST_GETC(lx->stream);
if (next == -1) {
errx(1, "unterminated hex sequence '%s'", escbuf);
} else if (next == ';') {
*escptr = 0;
break;
} else if (!isxdigit(next)) {
errx(1, "invalid hex digit '%c'", next);
}
if (escptr - escbuf >= 6) {
errx(1, "hex sequence too long (6 chars max.)");
} else {
*(escptr++) = next;
}
}
tmp = strtol(escbuf, &escptr, 16);
if (*escptr == '\0')
appendrune(lx, tmp);
else
errx(1, "invalid hex sequence '%s'", escbuf);
}
static int scanstring(Lx *lx) {
int next;
for (;;) {
next = ST_GETC(lx->stream);
switch (next) {
case -1:
goto eof;
case '\\':
next = ST_GETC(lx->stream);
if (next == -1)
goto eof;
switch (next) {
case 't':
appendbyte(lx, '\t');
break;
case 'n':
appendbyte(lx, '\n');
break;
case 'r':
appendbyte(lx, '\r');
break;
case 'b':
appendbyte(lx, '\b');
break;
case 'v':
appendbyte(lx, '\v');
break;
case 'f':
appendbyte(lx, '\f');
break;
case '0':
appendbyte(lx, '\0');
break;
case 'e':
appendbyte(lx, '\x1b');
break;
case '\\':
case '"':
appendbyte(lx, next);
break;
case 'x':
scanescape(lx);
break;
default:
fprintf(stderr, "unknown escape sequence '\\%c'\n", next);
abort();
}
break;
case '"':
appendbyte(lx, 0);
return (lx->kind = TOK_STRING);
default:
appendbyte(lx, next);
}
}
eof:
errx(1, "unterminated string literal");
return 0;
}
I lexer_next(Lx *lx) {
int next;
lx->cursor = 0;
if (ST_EOF(lx->stream)) {
lx->kind = TOK_EOF;
return 0;
}
next = getc_ws(lx);
switch (next) {
case '\\':
for (; next != '\n'; next = ST_GETC(lx->stream))
;
return lexer_next(lx);
case '(':
case ')':
case '[':
case ']':
case '{':
case '}':
case ';':
return (lx->kind = next);
case '"':
return scanstring(lx);
default:
ST_UNGETC(next, lx->stream);
lx->kind = TOK_WORD;
return scanword(lx);
};
}

33
src/lexer.h Normal file
View file

@ -0,0 +1,33 @@
#ifndef LEXER_H
#define LEXER_H
#include "common.h"
#include "stream.h"
enum {
TOK_INVALID = -1,
TOK_EOF = 0,
TOK_WORD = 'a',
TOK_STRING = '"',
TOK_SEMICOLON = ';',
TOK_LPAREN = '(',
TOK_RPAREN = ')',
TOK_LBRACKET = '[',
TOK_RBRACKET = ']',
TOK_LBRACE = '{',
TOK_RBRACE = '}',
TOK_COMMENT = '\\',
};
typedef struct Lx {
I kind;
I cursor;
Stream *stream;
char *items;
Z count, capacity;
} Lx;
Lx *lexer_make(Stream *);
I lexer_next(Lx *);
#endif

39
src/stream.c Normal file
View file

@ -0,0 +1,39 @@
#include "stream.h"
#include <stdio.h>
static int filestream_getc(void *f) { return fgetc((FILE *)f); }
static int filestream_ungetc(int c, void *f) { return ungetc(c, (FILE *)f); }
static int filestream_eof(void *f) { return feof((FILE *)f); }
static int bufstream_getc(void *f) {
Buf *b = f;
if (b->unread != -1) {
int c = b->unread;
b->unread = -1;
return c;
} else if (b->pos >= b->len) {
return -1;
}
return b->data[b->pos++];
}
static int bufstream_ungetc(int c, void *f) { return ((Buf *)f)->unread = c; }
static int bufstream_eof(void *f) {
Buf *b = f;
if (b->unread != -1)
return 0;
return b->pos >= b->len;
}
// clang-format off
static const StreamVtable _filestream_vtable = {
filestream_getc, filestream_ungetc, filestream_eof
};
const StreamVtable *filestream_vtable = &_filestream_vtable;
static const StreamVtable _bufstream_vtable = {
bufstream_getc, bufstream_ungetc, bufstream_eof
};
const StreamVtable *bufstream_vtable = &_bufstream_vtable;
// clang-format on

30
src/stream.h Normal file
View file

@ -0,0 +1,30 @@
#ifndef STREAM_H
#define STREAM_H
typedef struct StreamVtable {
int (*__sgetc)(void *);
int (*__sungetc)(int, void *);
int (*__seof)(void *);
} StreamVtable;
typedef struct Stream {
const StreamVtable *vtable;
void *data;
} Stream;
typedef struct Buf {
const char *data;
int len, pos;
int unread;
} Buf;
#define ST_GETC(R) ((R)->vtable->__sgetc((R)->data))
#define ST_UNGETC(C, R) ((R)->vtable->__sungetc(C, (R)->data))
#define ST_EOF(R) ((R)->vtable->__seof((R)->data))
#define BUF(s) ((Buf){s, sizeof(s)-1, 0, -1})
extern const StreamVtable *filestream_vtable;
extern const StreamVtable *bufstream_vtable;
#endif