new compiler/parser

This commit is contained in:
Lobo 2026-01-28 11:57:33 -03:00
parent 35bad08a0d
commit 7efa99d064
16 changed files with 419 additions and 302 deletions

View file

@ -27,7 +27,6 @@ sources = [
'src/userdata.c',
'src/vm.c',
'src/vendor/linenoise.c',
'src/vendor/mpc.c',
'src/vendor/yar.c',
]

View file

@ -7,12 +7,11 @@
#include "debug.h"
#include "gc.h"
#include "object.h"
#include "parser.h"
#include "src/primitive.h"
#include "string.h"
#include "vm.h"
#include "vendor/mpc.h"
#include "vendor/yar.h"
#include "vm.h"
// clang-format off
struct {
@ -26,6 +25,7 @@ struct {
{"2dup", {OP_2DUP, 0}},
{"2drop", {OP_2DROP, 0}},
{"2swap", {OP_2SWAP, 0}},
{"2over", {OP_2TOR, OP_2DUP, OP_2FROMR, OP_2SWAP, 0}},
{"over", {OP_OVER, 0}},
{"nip", {OP_NIP, 0}},
{"bury", {OP_BURY, 0}},
@ -118,8 +118,7 @@ static V optim_tailcall(Bc *chunk) {
}
}
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_expr(Cm *cm, Ast *node);
static I compile_constant(Cm *cm, O value, I line, I col) {
I idx = chunk_add_constant(cm->chunk, value);
@ -169,31 +168,16 @@ static I compile_call(Cm *cm, const char *name, I line, I col) {
return 1;
}
static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
curr = mpc_ast_traverse_next(next);
const char *name = curr->contents;
I name_line = curr->state.row;
I name_col = curr->state.col;
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ";") == 0)
break;
I res = compile_expr(cm, curr, next);
if (!res)
static I compile_command(Cm *cm, Ast *node) {
for (size_t i = 0; i < node->children.count; i++) {
if (!compile_expr(cm, node->children.items[i]))
return 0;
curr = mpc_ast_traverse_next(next);
}
compile_call(cm, name, name_line, name_col);
return 1;
return compile_call(cm, node->name, node->line, node->col);
}
static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
(void)mpc_ast_traverse_next(next); // skip 'def'
curr = mpc_ast_traverse_next(next);
const char *name = arena_strdup(cm->arena, curr->contents);
(void)mpc_ast_traverse_next(next); // skip '{'
static I compile_definition(Cm *cm, Ast *node) {
const char *name = arena_strdup(cm->arena, node->name);
Dt *entry = upsert(cm->dictionary, name, cm->arena);
Cm inner = {0};
@ -202,19 +186,14 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
inner.vm = cm->vm;
inner.dictionary = cm->dictionary;
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "}") == 0)
break;
if (!compile_expr(&inner, curr, next)) {
for (size_t i = 0; i < node->children.count; i++) {
if (!compile_expr(&inner, node->children.items[i])) {
chunk_release(inner.chunk);
return 0;
}
curr = mpc_ast_traverse_next(next);
}
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row,
curr->state.col);
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col);
optim_tailcall(inner.chunk);
entry->chunk = inner.chunk;
@ -226,7 +205,7 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return 1;
}
static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
static O compile_quotation_obj(Cm *cm, Ast *node) {
Cm inner = {0};
inner.arena = cm->arena;
@ -234,20 +213,13 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
inner.vm = cm->vm;
inner.dictionary = cm->dictionary;
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0)
break;
I res = compile_expr(&inner, curr, next);
if (!res) {
for (size_t i = 0; i < node->children.count; i++) {
if (!compile_expr(&inner, node->children.items[i])) {
chunk_release(inner.chunk);
return res;
return NIL;
}
curr = mpc_ast_traverse_next(next);
}
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row,
curr->state.col);
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col);
optim_tailcall(inner.chunk);
Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *));
@ -258,158 +230,103 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return BOX(hd);
}
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next,
I line, I col) {
return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col);
static I compile_quotation(Cm *cm, Ast *node) {
O obj = compile_quotation_obj(cm, node);
if (obj == NIL)
return 0;
return compile_constant(cm, obj, node->line, node->col);
}
static I compile_pragma(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
const char *name = curr->contents;
I line = curr->state.row;
I col = curr->state.col;
curr = mpc_ast_traverse_next(next);
I has_args = 0;
if (curr != NULL && strcmp(curr->tag, "char") == 0 &&
strcmp(curr->contents, "(") == 0) {
has_args = 1;
curr = mpc_ast_traverse_next(next); // Skip '('
}
if (strcmp(name, "load") == 0) {
if (!has_args) {
fprintf(stderr,
"compiler error at %ld:%ld: #load requires a filename argument\n",
line + 1, col + 1);
static I compile_pragma(Cm *cm, Ast *node) {
if (strcmp(node->name, "#load") == 0) {
if (node->children.count == 0) {
fprintf(stderr, "compiler error: #load requires argument\n");
return 0;
}
if (!strstr(curr->tag, "expr|string")) {
fprintf(stderr,
"compiler error at %ld:%ld: #load requires a string argument\n",
line + 1, col + 1);
Ast *arg = node->children.items[0];
if (arg->type != AST_STR) {
fprintf(stderr, "compiler error: #load requires string\n");
return 0;
}
char *fname_raw = curr->contents;
Z len = strlen(fname_raw);
char *fname = malloc(len + 1);
memcpy(fname, fname_raw + 1, len - 2);
fname[len - 2] = '\0';
fname = mpcf_unescape(fname);
mpc_result_t res;
extern mpc_parser_t *Program;
if (!mpc_parse_contents(fname, Program, &res)) {
fprintf(stderr, "compiler error at %ld:%ld: failed to parse file '%s':\n",
line + 1, col + 1, fname);
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error);
free(fname);
char *fname = arg->name;
FILE *f = fopen(fname, "rb");
if (!f) {
fprintf(stderr, "compiler error: cannot open file '%s'\n", fname);
return 0;
}
mpc_ast_trav_t *inner_next =
mpc_ast_traverse_start(res.output, mpc_ast_trav_order_pre);
mpc_ast_t *inner_curr = mpc_ast_traverse_next(&inner_next);
Stream s = {filestream_vtable, f};
Lx *lx = lexer_make(&s);
Ast *root = parser_parse(lx);
I success = compile_ast(cm, inner_curr, &inner_next);
mpc_ast_delete(res.output);
if (!success) {
fprintf(stderr,
"compiler error at %ld:%ld: failed to compile file '%s'\n",
line + 1, col + 1, fname);
free(fname);
return 0;
}
free(fname);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ")") == 0)
I success = 1;
for (size_t i = 0; i < root->children.count; i++) {
if (!compile_expr(cm, root->children.items[i])) {
success = 0;
break;
curr = mpc_ast_traverse_next(next);
}
} else {
fprintf(stderr, "compiler warning at %ld:%ld: unknown pragma \"%s\"\n",
line + 1, col + 1, name);
}
if (has_args) {
if (curr == NULL || strcmp(curr->contents, ")") != 0) {
fprintf(stderr, "error at %ld:%ld: expected ')' after pragma arguments\n",
line + 1, col + 1);
ast_free(root);
lexer_free(lx);
fclose(f);
return success;
}
fprintf(stderr, "compiler warning: unknown pragma \"%s\"\n", node->name);
return 1;
}
static I compile_expr(Cm *cm, Ast *node) {
if (!node)
return 0;
switch (node->type) {
case AST_INT: {
O num = NUM(node->int_val);
return compile_constant(cm, num, node->line, node->col);
}
case AST_STR: {
O obj = string_make(cm->vm, node->name, -1);
return compile_constant(cm, obj, node->line, node->col);
}
case AST_WORD:
return compile_call(cm, node->name, node->line, node->col);
case AST_QUOTE:
return compile_quotation(cm, node);
case AST_DEF:
return compile_definition(cm, node);
case AST_CMD:
return compile_command(cm, node);
case AST_PRAGMA:
return compile_pragma(cm, node);
case AST_PROGRAM:
for (size_t i = 0; i < node->children.count; i++) {
if (!compile_expr(cm, node->children.items[i]))
return 0;
}
return 1;
default:
fprintf(stderr, "compiler error: nyi ast type %d\n", (int)node->type);
return 0;
}
}
return 1;
}
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
I line = curr->state.row;
I col = curr->state.col;
if (strstr(curr->tag, "expr|number") != NULL) {
I num = strtol(curr->contents, NULL, 0);
return compile_constant(cm, NUM(num), line, col);
} else if (strstr(curr->tag, "expr|string") != NULL) {
curr->contents[strlen(curr->contents) - 1] = '\0';
char *unescaped = malloc(strlen(curr->contents + 1) + 1);
strcpy(unescaped, curr->contents + 1);
unescaped = mpcf_unescape(unescaped);
O obj = string_make(cm->vm, unescaped, -1);
free(unescaped);
return compile_constant(cm, obj, line, col);
} else if (strstr(curr->tag, "expr|word") != NULL) {
return compile_call(cm, curr->contents, line, col);
} else if (strstr(curr->tag, "expr|quotation") != NULL) {
return compile_quotation(cm, curr, next, line, col);
} else if (strstr(curr->tag, "expr|def") != NULL) {
return compile_definition(cm, curr, next);
} else if (strstr(curr->tag, "expr|command") != NULL) {
return compile_command(cm, curr, next);
} else if (strstr(curr->tag, "expr|pragma") != NULL) {
return compile_pragma(cm, curr, next);
} else if (strstr(curr->tag, "expr|comment") != NULL) {
return 1;
} else {
fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1,
col + 1, curr->tag);
return 0;
}
}
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0)
break;
I res = compile_expr(cm, curr, next);
if (!res)
return res;
curr = mpc_ast_traverse_next(next);
}
return 1;
}
Bc *compile_program(Cm *cm, mpc_ast_t *ast) {
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal
if (!compile_ast(cm, curr, &next)) {
Bc *compile_program(Cm *cm, Ast *ast) {
if (ast->type == AST_PROGRAM) {
for (size_t i = 0; i < ast->children.count; i++) {
if (!compile_expr(cm, ast->children.items[i])) {
chunk_release(cm->chunk);
return NULL;
}
Bc *chunk = cm->chunk;
chunk_emit_byte(chunk, OP_RETURN);
optim_tailcall(chunk);
return chunk;
}
} else {
if (!compile_expr(cm, ast)) {
chunk_release(cm->chunk);
return NULL;
}
}
chunk_emit_byte(cm->chunk, OP_RETURN);
optim_tailcall(cm->chunk);
return cm->chunk;
}

View file

@ -4,10 +4,9 @@
#include "chunk.h"
#include "gc.h"
#include "vm.h"
#include "parser.h"
#include "vendor/mpc.h"
#define COMPILER_DEBUG DEBUG
#define COMPILER_DEBUG 0
/** Compiler context */
typedef struct Cm {
@ -19,4 +18,4 @@ typedef struct Cm {
V compiler_init(Cm *, Vm *, const char *);
V compiler_deinit(Cm *);
Bc *compile_program(Cm *, mpc_ast_t *);
Bc *compile_program(Cm *, Ast *);

View file

@ -3,8 +3,8 @@
#include "chunk.h"
#include "debug.h"
#include "dictionary.h"
#include "primitive.h"
#include "print.h"
#include "src/primitive.h"
#include "vm.h"
static I decode_sleb128(U8 *ptr, Z *bytes_read) {
@ -70,7 +70,7 @@ static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) {
print(obj);
printf(")");
if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) {
if (!IMM(obj) && obj != NIL && type(obj) == OBJ_QUOT) {
putchar('\n');
Hd *hdr = UNBOX(obj);
Bc **chunk_ptr = (Bc **)(hdr + 1);

View file

@ -4,7 +4,7 @@
#include "common.h"
#include "object.h"
#define GC_DEBUG 0
#define GC_DEBUG 1
#if GC_DEBUG
#define HEAP_BYTES (8 * 1024)
#else

View file

@ -1,12 +1,42 @@
#include <ctype.h>
#include <err.h>
#include <stdio.h>
#include <stdlib.h>
#include <utf.h>
#include "lexer.h"
#include "vendor/yar.h"
Lx *lexer_make(Stream *s) {
Lx *lx = calloc(1, sizeof(Lx));
lx->stream = s;
return lx;
}
V lexer_free(Lx *lx) {
yar_free(lx);
free(lx);
}
static int lx_getc(Lx *lx) {
int c = ST_GETC(lx->stream);
if (c == '\n') {
lx->curr_line++;
lx->curr_col = 0;
} else if (c != -1) {
lx->curr_col++;
}
return c;
}
static void lx_ungetc(Lx *lx, int c) {
ST_UNGETC(c, lx->stream);
if (c == '\n') {
lx->curr_line--;
} else if (c != -1) {
lx->curr_col--;
}
}
static inline int is_delimiter(int i) {
return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' ||
i == ';' || i == '\\' || i == '"';
@ -24,7 +54,7 @@ static int getc_ws(Lx *lx) {
if (ST_EOF(lx->stream))
return -1;
for (;;) {
int ch = ST_GETC(lx->stream);
int ch = lx_getc(lx);
if (isspace(ch))
continue;
return ch;
@ -32,21 +62,21 @@ static int getc_ws(Lx *lx) {
}
static int scanword(Lx *lx) {
int next = ST_GETC(lx->stream);
int next = lx_getc(lx);
for (;;) {
if (next == -1) {
if (lx->cursor == 0)
if (lx->count == 0)
lx->kind = TOK_EOF;
appendbyte(lx, 0);
return lx->kind;
} else if (is_delimiter(next) || isspace(next)) {
ST_UNGETC(next, lx->stream);
lx_ungetc(lx, next);
appendbyte(lx, 0);
return lx->kind;
} else {
appendbyte(lx, next);
next = ST_GETC(lx->stream);
next = lx_getc(lx);
continue;
}
}
@ -58,7 +88,7 @@ static void scanescape(Lx *lx) {
Rune tmp;
for (;;) {
next = ST_GETC(lx->stream);
next = lx_getc(lx);
if (next == -1) {
errx(1, "unterminated hex sequence '%s'", escbuf);
@ -77,22 +107,28 @@ static void scanescape(Lx *lx) {
}
tmp = strtol(escbuf, &escptr, 16);
if (*escptr == '\0')
if (*escptr == '\0') {
if (tmp < 256) {
appendbyte(lx, (U8)(tmp & 255));
} else {
appendrune(lx, tmp);
else
}
} else {
errx(1, "invalid hex sequence '%s'", escbuf);
}
}
static int scanstring(Lx *lx) {
int next;
for (;;) {
next = ST_GETC(lx->stream);
next = lx_getc(lx);
switch (next) {
case -1:
goto eof;
case '\\':
next = ST_GETC(lx->stream);
next = lx_getc(lx);
if (next == -1)
goto eof;
switch (next) {
@ -128,8 +164,7 @@ static int scanstring(Lx *lx) {
scanescape(lx);
break;
default:
fprintf(stderr, "unknown escape sequence '\\%c'\n", next);
abort();
return (lx->kind = TOK_INVALID);
}
break;
case '"':
@ -141,13 +176,13 @@ static int scanstring(Lx *lx) {
}
eof:
errx(1, "unterminated string literal");
return 0;
return (lx->kind = TOK_INVALID);
}
I lexer_next(Lx *lx) {
int next;
lx->cursor = 0;
lx->count = 0;
if (ST_EOF(lx->stream)) {
lx->kind = TOK_EOF;
@ -156,9 +191,12 @@ I lexer_next(Lx *lx) {
next = getc_ws(lx);
lx->start_line = lx->curr_line;
lx->start_col = (lx->curr_col > 0) ? lx->curr_col - 1 : 0;
switch (next) {
case '\\':
for (; next != '\n'; next = ST_GETC(lx->stream))
for (; next != '\n'; next = lx_getc(lx))
;
return lexer_next(lx);
case '(':
@ -172,7 +210,7 @@ I lexer_next(Lx *lx) {
case '"':
return scanstring(lx);
default:
ST_UNGETC(next, lx->stream);
lx_ungetc(lx, next);
lx->kind = TOK_WORD;
return scanword(lx);
};

View file

@ -22,12 +22,15 @@ enum {
typedef struct Lx {
I kind;
I cursor;
I curr_line, curr_col;
I start_line, start_col;
Stream *stream;
char *items;
Z count, capacity;
} Lx;
Lx *lexer_make(Stream *);
V lexer_free(Lx *lx);
I lexer_next(Lx *);
#endif

View file

@ -1,5 +1,6 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "chunk.h"
#include "compile.h"
@ -8,7 +9,6 @@
#include "vm.h"
#include "vendor/linenoise.h"
#include "vendor/mpc.h"
#define REPL_BUFFER_SIZE 4096
@ -18,16 +18,18 @@ I repl(void) {
char *line;
while ((line = linenoise("growl> ")) != NULL) {
mpc_result_t res;
if (!mpc_parse("<repl>", line, Program, &res)) {
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error);
continue;
}
Buf b = { line, (int)strlen(line), 0, -1 };
Stream s = { bufstream_vtable, &b };
Lx *lx = lexer_make(&s);
Ast *root = parser_parse(lx);
Cm cm = {0};
compiler_init(&cm, &vm, "<repl>");
Bc *chunk = compile_program(&cm, res.output);
mpc_ast_delete(res.output);
Bc *chunk = compile_program(&cm, root);
ast_free(root);
lexer_free(lx);
if (chunk != NULL) {
vm_run(&vm, chunk, 0);
chunk_release(chunk);
@ -44,18 +46,23 @@ I loadfile(const char *fname) {
Vm vm = {0};
vm_init(&vm);
mpc_result_t res;
if (!mpc_parse_contents(fname, Program, &res)) {
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error);
FILE *f = fopen(fname, "rb");
if (!f) {
fprintf(stderr, "error: cannot open file '%s'\n", fname);
return 1;
}
Stream s = { filestream_vtable, f };
Lx *lx = lexer_make(&s);
Ast *root = parser_parse(lx);
Cm cm = {0};
compiler_init(&cm, &vm, fname);
Bc *chunk = compile_program(&cm, res.output);
mpc_ast_delete(res.output);
Bc *chunk = compile_program(&cm, root);
ast_free(root);
lexer_free(lx);
fclose(f);
if (chunk != NULL) {
#if COMPILER_DEBUG
@ -72,9 +79,6 @@ I loadfile(const char *fname) {
}
int main(int argc, const char *argv[]) {
parser_init();
atexit(parser_deinit);
switch (argc) {
case 1:
return repl();

View file

@ -2,9 +2,9 @@
I type(O o) {
if (o == NIL)
return TYPE_NIL;
return OBJ_NIL;
if (IMM(o))
return TYPE_NUM;
return OBJ_NUM;
Hd *h = UNBOX(o);
return h->type;
}

View file

@ -11,25 +11,17 @@
#define ORD(x) ((intptr_t)(x) >> 1)
enum {
OBJ_NIL = 0,
OBJ_NUM = 1,
OBJ_FWD = 2,
OBJ_QUOT,
OBJ_COMPOSE,
OBJ_CURRY,
OBJ_STR,
OBJ_ARRAY,
OBJ_USERDATA,
};
enum {
TYPE_NIL = 0,
TYPE_NUM = 1,
TYPE_FWD = OBJ_FWD,
TYPE_QUOT = OBJ_QUOT,
TYPE_COMPOSE = OBJ_COMPOSE,
TYPE_CURRY = OBJ_CURRY,
TYPE_STR = OBJ_STR,
TYPE_USERDATA = OBJ_USERDATA,
};
typedef uintptr_t O;
/** Object header */
@ -50,7 +42,7 @@ typedef struct Qc {
I type(O);
static inline I callable(O o) {
I t = type(o);
return t == TYPE_QUOT || t == TYPE_COMPOSE || t == TYPE_CURRY;
return t == OBJ_QUOT || t == OBJ_COMPOSE || t == OBJ_CURRY;
}
#endif

View file

@ -1,51 +1,156 @@
#include "parser.h"
#include "vendor/mpc.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition,
*Command, *List, *Table, *Quotation, *Program;
static Ast *ast_new(I type, I line, I col) {
Ast *node = calloc(1, sizeof(Ast));
node->type = type;
node->line = line;
node->col = col;
return node;
}
V parser_init(V) {
Pragma = mpc_new("pragma");
Comment = mpc_new("comment");
Expr = mpc_new("expr");
Number = mpc_new("number");
String = mpc_new("string");
Word = mpc_new("word");
Definition = mpc_new("def");
Command = mpc_new("command");
List = mpc_new("list");
Table = mpc_new("table");
Quotation = mpc_new("quotation");
Program = mpc_new("program");
void ast_free(Ast *ast) {
if (!ast)
return;
if (ast->name)
free(ast->name);
for (size_t i = 0; i < ast->children.count; i++) {
ast_free(ast->children.items[i]);
}
yar_free(&ast->children);
free(ast);
}
mpc_err_t *err = mpca_lang(
MPCA_LANG_DEFAULT,
" pragma : '#' <word> ('(' <expr>* ')')? ; "
" comment : /\\\\[^\\n]*/ ; "
" expr : ( <pragma> | <def> | <command> | <quotation> "
" | <number> | <list> | <table> | <string> "
" | <word> | <comment> ) ; "
" number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; "
" string : /\"(\\\\.|[^\"])*\"/ ; "
" word : /[a-zA-Z0-9_!?.,@#$%^&*_+\\-=><|\\/]+/ ; "
" def : \"def\" <word> '{' <expr>* '}' ; "
" command : <word> ':' <expr>+ ';' ; "
" list : '(' <expr>* ')' ; "
" table : '{' <expr>* '}' ; "
" quotation : '[' <expr>* ']' ; "
" program : /^/ <expr>* /$/ ; ",
Pragma, Comment, Expr, Number, String, Word, Definition, Command, List,
Table, Quotation, Program, NULL);
static Ast *parse_expr_at(Lx *lx);
// crash if i do a woopsie
if (err != NULL) {
mpc_err_print(err);
mpc_err_delete(err);
abort();
static void parse_block(Lx *lx, Ast *parent, int close_token) {
while (1) {
if (lx->kind == TOK_EOF) {
if (close_token != TOK_EOF)
fprintf(stderr, "syntax error: unexpected EOF, expected '%c'\n",
close_token);
break;
}
if (lx->kind == close_token) {
lexer_next(lx);
break;
}
Ast *expr = parse_expr_at(lx);
*yar_append(&parent->children) = expr;
}
}
V parser_deinit(V) {
mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition,
Command, List, Table, Quotation, Program);
static Ast *parse_expr_at(Lx *lx) {
int kind = lx->kind;
I line = lx->start_line;
I col = lx->start_col;
if (kind == TOK_WORD) {
char *text = lx->items;
if (strcmp(text, "def") == 0) {
Ast *node = ast_new(AST_DEF, line, col);
lexer_next(lx);
if (lx->kind != TOK_WORD) {
fprintf(stderr, "syntax error: expected word after 'def' at %ld:%ld\n",
(long)line + 1, (long)col + 1);
return node;
}
node->name = strdup(lx->items);
lexer_next(lx);
if (lx->kind != '{') {
fprintf(stderr,
"syntax error: expected '{' after def name at %ld:%ld\n",
(long)lx->start_line + 1, (long)lx->start_col + 1);
return node;
}
lexer_next(lx);
parse_block(lx, node, '}');
return node;
}
size_t len = strlen(text);
if (len > 0 && text[len - 1] == ':') {
Ast *node = ast_new(AST_CMD, line, col);
node->name = strndup(text, len - 1);
lexer_next(lx);
parse_block(lx, node, ';');
return node;
}
if (text[0] == '#') {
Ast *node = ast_new(AST_PRAGMA, line, col);
node->name = strdup(text);
lexer_next(lx);
if (lx->kind == '(') {
lexer_next(lx);
parse_block(lx, node, ')');
}
return node;
}
char *end;
long val = strtol(text, &end, 0);
if (*end == '\0') {
Ast *node = ast_new(AST_INT, line, col);
node->int_val = val;
lexer_next(lx);
return node;
}
Ast *node = ast_new(AST_WORD, line, col);
node->name = strdup(text);
lexer_next(lx);
return node;
}
if (kind == TOK_STRING) {
Ast *node = ast_new(AST_STR, line, col);
node->name = strdup(lx->items);
lexer_next(lx);
return node;
}
if (kind == '[') {
Ast *node = ast_new(AST_QUOTE, line, col);
lexer_next(lx);
parse_block(lx, node, ']');
return node;
}
if (kind == '{') {
Ast *node = ast_new(AST_TABLE, line, col);
lexer_next(lx);
parse_block(lx, node, '}');
return node;
}
if (kind == '(') {
Ast *node = ast_new(AST_LIST, line, col);
lexer_next(lx);
parse_block(lx, node, ')');
return node;
}
if (kind == TOK_INVALID) {
fprintf(stderr, "syntax error: invalid token at %ld:%ld\n", (long)line + 1,
(long)col + 1);
} else {
fprintf(stderr, "syntax error: unexpected token '%c' (%d) at %ld:%ld\n",
kind, kind, (long)line + 1, (long)col + 1);
}
lexer_next(lx);
return NULL;
}
Ast *parser_parse(Lx *lx) {
Ast *root = ast_new(AST_PROGRAM, 0, 0);
lexer_next(lx);
parse_block(lx, root, TOK_EOF);
return root;
}

View file

@ -2,11 +2,34 @@
#define PARSER_H
#include "common.h"
#include "vendor/mpc.h"
#include "lexer.h"
#include "vendor/yar.h"
V parser_init(V);
V parser_deinit(V);
enum {
AST_PROGRAM,
AST_INT,
AST_STR,
AST_WORD,
AST_LIST,
AST_TABLE,
AST_QUOTE,
AST_DEF,
AST_CMD,
AST_PRAGMA,
};
extern mpc_parser_t *Program;
typedef struct Ast {
I type;
char *name;
I int_val;
struct {
struct Ast **items;
Z count, capacity;
} children;
I line, col;
} Ast;
Ast *parser_parse(Lx *lx);
void ast_free(Ast *ast);
#endif

View file

@ -1,7 +1,6 @@
#ifndef PRIMITIVE_H
#define PRIMITIVE_H
#include "common.h"
#include "vm.h"
typedef struct Pr {

View file

@ -1,13 +1,56 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "object.h"
#include "print.h"
#include "string.h"
#include "userdata.h"
#include "vendor/mpc.h"
static V print_string(Str *s) {
putchar('"');
for (Z i = 0; i < s->len; i++) {
unsigned char c = s->data[i];
switch (c) {
case '\t':
printf("\\t");
break;
case '\n':
printf("\\n");
break;
case '\r':
printf("\\r");
break;
case '\b':
printf("\\b");
break;
case '\v':
printf("\\v");
break;
case '\f':
printf("\\f");
break;
case '\0':
printf("\\0");
break;
case '\x1b':
printf("\\e");
break;
case '\\':
printf("\\\\");
break;
case '\"':
printf("\\\"");
break;
default:
if (c < 32 || c > 126) {
printf("\\x%02x;", c);
} else {
putchar(c);
}
}
}
putchar('"');
}
V print(O o) {
if (o == NIL) {
@ -27,14 +70,8 @@ V print(O o) {
printf("<curried>");
break;
case OBJ_STR: {
// TODO: make this binary safe
Str *s = string_unwrap(o);
char *escaped = malloc(s->len + 1);
memcpy(escaped, s->data, s->len);
escaped[s->len] = 0;
escaped = mpcf_escape(escaped);
printf("\"%s\"", escaped);
free(escaped);
print_string(s);
break;
}
case OBJ_USERDATA: {

View file

@ -206,8 +206,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O obj2 = vm_pop(vm);
O obj1 = vm_pop(vm);
vm_push(vm, obj1);
vm_push(vm, obj1);
vm_push(vm, obj2);
vm_push(vm, obj1);
vm_push(vm, obj2);
break;
}
@ -223,10 +223,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O c = vm_pop(vm);
O b = vm_pop(vm);
O a = vm_pop(vm);
vm_push(vm, d);
vm_push(vm, c);
vm_push(vm, b);
vm_push(vm, d);
vm_push(vm, a);
vm_push(vm, b);
break;
}
case OP_NIP: {
@ -302,21 +302,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm_rpush(vm, vm->chunk, vm->ip);
do_call:
switch (type(quot)) {
case TYPE_QUOT: {
case OBJ_QUOT: {
Bc **ptr = (Bc **)(UNBOX(quot) + 1);
Bc *chunk = *ptr;
vm->chunk = chunk;
vm->ip = chunk->items;
break;
}
case TYPE_COMPOSE: {
case OBJ_COMPOSE: {
Qo *comp = (Qo *)(UNBOX(quot) + 1);
vm_rpush(vm, vm->trampoline, vm->trampoline->items);
vm->rsp[-1].obj = comp->second;
quot = comp->first;
goto do_call;
}
case TYPE_CURRY: {
case OBJ_CURRY: {
Qc *curry = (Qc *)(UNBOX(quot) + 1);
vm_push(vm, curry->value);
quot = curry->callable;
@ -345,21 +345,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O quot = vm_pop(vm);
do_tail_call:
switch (type(quot)) {
case TYPE_QUOT: {
case OBJ_QUOT: {
Bc **ptr = (Bc **)(UNBOX(quot) + 1);
Bc *chunk = *ptr;
vm->chunk = chunk;
vm->ip = chunk->items;
break;
}
case TYPE_COMPOSE: {
case OBJ_COMPOSE: {
Qo *comp = (Qo *)(UNBOX(quot) + 1);
vm_rpush(vm, vm->trampoline, vm->trampoline->items);
vm->rsp[-1].obj = comp->second;
quot = comp->first;
goto do_tail_call;
}
case TYPE_CURRY: {
case OBJ_CURRY: {
Qc *curry = (Qc *)(UNBOX(quot) + 1);
vm_push(vm, curry->value);
quot = curry->callable;
@ -491,10 +491,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
}
case OP_CONCAT: {
O b = vm_pop(vm);
if (type(b) != TYPE_STR)
if (type(b) != OBJ_STR)
vm_error(vm, VM_ERR_TYPE, "expected string");
O a = vm_pop(vm);
if (type(a) != TYPE_STR)
if (type(a) != OBJ_STR)
vm_error(vm, VM_ERR_TYPE, "expected string");
vm_push(vm, string_concat(vm, a, b));
break;

View file

@ -14,6 +14,7 @@ def 3dip { swap [2dip] dip }
def keep { over [call] dip }
def 2keep { [2dup] dip 2dip }
def 3keep { [dup 2over dig] dip 3dip }
def bi { [keep] dip call }
def tri { [[keep] dip keep] dip call }