new compiler/parser

This commit is contained in:
Lobo 2026-01-28 11:57:33 -03:00
parent 35bad08a0d
commit 7efa99d064
16 changed files with 419 additions and 302 deletions

View file

@ -27,7 +27,6 @@ sources = [
'src/userdata.c', 'src/userdata.c',
'src/vm.c', 'src/vm.c',
'src/vendor/linenoise.c', 'src/vendor/linenoise.c',
'src/vendor/mpc.c',
'src/vendor/yar.c', 'src/vendor/yar.c',
] ]

View file

@ -7,12 +7,11 @@
#include "debug.h" #include "debug.h"
#include "gc.h" #include "gc.h"
#include "object.h" #include "object.h"
#include "parser.h"
#include "src/primitive.h" #include "src/primitive.h"
#include "string.h" #include "string.h"
#include "vm.h"
#include "vendor/mpc.h"
#include "vendor/yar.h" #include "vendor/yar.h"
#include "vm.h"
// clang-format off // clang-format off
struct { struct {
@ -26,6 +25,7 @@ struct {
{"2dup", {OP_2DUP, 0}}, {"2dup", {OP_2DUP, 0}},
{"2drop", {OP_2DROP, 0}}, {"2drop", {OP_2DROP, 0}},
{"2swap", {OP_2SWAP, 0}}, {"2swap", {OP_2SWAP, 0}},
{"2over", {OP_2TOR, OP_2DUP, OP_2FROMR, OP_2SWAP, 0}},
{"over", {OP_OVER, 0}}, {"over", {OP_OVER, 0}},
{"nip", {OP_NIP, 0}}, {"nip", {OP_NIP, 0}},
{"bury", {OP_BURY, 0}}, {"bury", {OP_BURY, 0}},
@ -118,8 +118,7 @@ static V optim_tailcall(Bc *chunk) {
} }
} }
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); static I compile_expr(Cm *cm, Ast *node);
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_constant(Cm *cm, O value, I line, I col) { static I compile_constant(Cm *cm, O value, I line, I col) {
I idx = chunk_add_constant(cm->chunk, value); I idx = chunk_add_constant(cm->chunk, value);
@ -169,31 +168,16 @@ static I compile_call(Cm *cm, const char *name, I line, I col) {
return 1; return 1;
} }
static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static I compile_command(Cm *cm, Ast *node) {
curr = mpc_ast_traverse_next(next); for (size_t i = 0; i < node->children.count; i++) {
const char *name = curr->contents; if (!compile_expr(cm, node->children.items[i]))
I name_line = curr->state.row;
I name_col = curr->state.col;
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ";") == 0)
break;
I res = compile_expr(cm, curr, next);
if (!res)
return 0; return 0;
curr = mpc_ast_traverse_next(next);
} }
compile_call(cm, name, name_line, name_col); return compile_call(cm, node->name, node->line, node->col);
return 1;
} }
static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static I compile_definition(Cm *cm, Ast *node) {
(void)mpc_ast_traverse_next(next); // skip 'def' const char *name = arena_strdup(cm->arena, node->name);
curr = mpc_ast_traverse_next(next);
const char *name = arena_strdup(cm->arena, curr->contents);
(void)mpc_ast_traverse_next(next); // skip '{'
Dt *entry = upsert(cm->dictionary, name, cm->arena); Dt *entry = upsert(cm->dictionary, name, cm->arena);
Cm inner = {0}; Cm inner = {0};
@ -202,19 +186,14 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
inner.vm = cm->vm; inner.vm = cm->vm;
inner.dictionary = cm->dictionary; inner.dictionary = cm->dictionary;
curr = mpc_ast_traverse_next(next); for (size_t i = 0; i < node->children.count; i++) {
while (curr != NULL) { if (!compile_expr(&inner, node->children.items[i])) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "}") == 0)
break;
if (!compile_expr(&inner, curr, next)) {
chunk_release(inner.chunk); chunk_release(inner.chunk);
return 0; return 0;
} }
curr = mpc_ast_traverse_next(next);
} }
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col);
curr->state.col);
optim_tailcall(inner.chunk); optim_tailcall(inner.chunk);
entry->chunk = inner.chunk; entry->chunk = inner.chunk;
@ -226,7 +205,7 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return 1; return 1;
} }
static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static O compile_quotation_obj(Cm *cm, Ast *node) {
Cm inner = {0}; Cm inner = {0};
inner.arena = cm->arena; inner.arena = cm->arena;
@ -234,20 +213,13 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
inner.vm = cm->vm; inner.vm = cm->vm;
inner.dictionary = cm->dictionary; inner.dictionary = cm->dictionary;
(void)mpc_ast_traverse_next(next); for (size_t i = 0; i < node->children.count; i++) {
curr = mpc_ast_traverse_next(next); if (!compile_expr(&inner, node->children.items[i])) {
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0)
break;
I res = compile_expr(&inner, curr, next);
if (!res) {
chunk_release(inner.chunk); chunk_release(inner.chunk);
return res; return NIL;
} }
curr = mpc_ast_traverse_next(next);
} }
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col);
curr->state.col);
optim_tailcall(inner.chunk); optim_tailcall(inner.chunk);
Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *)); Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *));
@ -258,158 +230,103 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return BOX(hd); return BOX(hd);
} }
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next, static I compile_quotation(Cm *cm, Ast *node) {
I line, I col) { O obj = compile_quotation_obj(cm, node);
return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col); if (obj == NIL)
return 0;
return compile_constant(cm, obj, node->line, node->col);
} }
static I compile_pragma(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static I compile_pragma(Cm *cm, Ast *node) {
(void)mpc_ast_traverse_next(next); if (strcmp(node->name, "#load") == 0) {
curr = mpc_ast_traverse_next(next); if (node->children.count == 0) {
const char *name = curr->contents; fprintf(stderr, "compiler error: #load requires argument\n");
I line = curr->state.row;
I col = curr->state.col;
curr = mpc_ast_traverse_next(next);
I has_args = 0;
if (curr != NULL && strcmp(curr->tag, "char") == 0 &&
strcmp(curr->contents, "(") == 0) {
has_args = 1;
curr = mpc_ast_traverse_next(next); // Skip '('
}
if (strcmp(name, "load") == 0) {
if (!has_args) {
fprintf(stderr,
"compiler error at %ld:%ld: #load requires a filename argument\n",
line + 1, col + 1);
return 0; return 0;
} }
if (!strstr(curr->tag, "expr|string")) { Ast *arg = node->children.items[0];
fprintf(stderr, if (arg->type != AST_STR) {
"compiler error at %ld:%ld: #load requires a string argument\n", fprintf(stderr, "compiler error: #load requires string\n");
line + 1, col + 1);
return 0; return 0;
} }
char *fname_raw = curr->contents; char *fname = arg->name;
Z len = strlen(fname_raw); FILE *f = fopen(fname, "rb");
char *fname = malloc(len + 1); if (!f) {
memcpy(fname, fname_raw + 1, len - 2); fprintf(stderr, "compiler error: cannot open file '%s'\n", fname);
fname[len - 2] = '\0';
fname = mpcf_unescape(fname);
mpc_result_t res;
extern mpc_parser_t *Program;
if (!mpc_parse_contents(fname, Program, &res)) {
fprintf(stderr, "compiler error at %ld:%ld: failed to parse file '%s':\n",
line + 1, col + 1, fname);
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error);
free(fname);
return 0; return 0;
} }
mpc_ast_trav_t *inner_next = Stream s = {filestream_vtable, f};
mpc_ast_traverse_start(res.output, mpc_ast_trav_order_pre); Lx *lx = lexer_make(&s);
mpc_ast_t *inner_curr = mpc_ast_traverse_next(&inner_next); Ast *root = parser_parse(lx);
I success = compile_ast(cm, inner_curr, &inner_next); I success = 1;
for (size_t i = 0; i < root->children.count; i++) {
mpc_ast_delete(res.output); if (!compile_expr(cm, root->children.items[i])) {
success = 0;
if (!success) {
fprintf(stderr,
"compiler error at %ld:%ld: failed to compile file '%s'\n",
line + 1, col + 1, fname);
free(fname);
return 0;
}
free(fname);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ")") == 0)
break; break;
curr = mpc_ast_traverse_next(next);
} }
} else {
fprintf(stderr, "compiler warning at %ld:%ld: unknown pragma \"%s\"\n",
line + 1, col + 1, name);
} }
if (has_args) { ast_free(root);
if (curr == NULL || strcmp(curr->contents, ")") != 0) { lexer_free(lx);
fprintf(stderr, "error at %ld:%ld: expected ')' after pragma arguments\n", fclose(f);
line + 1, col + 1); return success;
}
fprintf(stderr, "compiler warning: unknown pragma \"%s\"\n", node->name);
return 1;
}
static I compile_expr(Cm *cm, Ast *node) {
if (!node)
return 0;
switch (node->type) {
case AST_INT: {
O num = NUM(node->int_val);
return compile_constant(cm, num, node->line, node->col);
}
case AST_STR: {
O obj = string_make(cm->vm, node->name, -1);
return compile_constant(cm, obj, node->line, node->col);
}
case AST_WORD:
return compile_call(cm, node->name, node->line, node->col);
case AST_QUOTE:
return compile_quotation(cm, node);
case AST_DEF:
return compile_definition(cm, node);
case AST_CMD:
return compile_command(cm, node);
case AST_PRAGMA:
return compile_pragma(cm, node);
case AST_PROGRAM:
for (size_t i = 0; i < node->children.count; i++) {
if (!compile_expr(cm, node->children.items[i]))
return 0;
}
return 1;
default:
fprintf(stderr, "compiler error: nyi ast type %d\n", (int)node->type);
return 0; return 0;
} }
} }
return 1; Bc *compile_program(Cm *cm, Ast *ast) {
} if (ast->type == AST_PROGRAM) {
for (size_t i = 0; i < ast->children.count; i++) {
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { if (!compile_expr(cm, ast->children.items[i])) {
I line = curr->state.row;
I col = curr->state.col;
if (strstr(curr->tag, "expr|number") != NULL) {
I num = strtol(curr->contents, NULL, 0);
return compile_constant(cm, NUM(num), line, col);
} else if (strstr(curr->tag, "expr|string") != NULL) {
curr->contents[strlen(curr->contents) - 1] = '\0';
char *unescaped = malloc(strlen(curr->contents + 1) + 1);
strcpy(unescaped, curr->contents + 1);
unescaped = mpcf_unescape(unescaped);
O obj = string_make(cm->vm, unescaped, -1);
free(unescaped);
return compile_constant(cm, obj, line, col);
} else if (strstr(curr->tag, "expr|word") != NULL) {
return compile_call(cm, curr->contents, line, col);
} else if (strstr(curr->tag, "expr|quotation") != NULL) {
return compile_quotation(cm, curr, next, line, col);
} else if (strstr(curr->tag, "expr|def") != NULL) {
return compile_definition(cm, curr, next);
} else if (strstr(curr->tag, "expr|command") != NULL) {
return compile_command(cm, curr, next);
} else if (strstr(curr->tag, "expr|pragma") != NULL) {
return compile_pragma(cm, curr, next);
} else if (strstr(curr->tag, "expr|comment") != NULL) {
return 1;
} else {
fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1,
col + 1, curr->tag);
return 0;
}
}
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0)
break;
I res = compile_expr(cm, curr, next);
if (!res)
return res;
curr = mpc_ast_traverse_next(next);
}
return 1;
}
Bc *compile_program(Cm *cm, mpc_ast_t *ast) {
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal
if (!compile_ast(cm, curr, &next)) {
chunk_release(cm->chunk); chunk_release(cm->chunk);
return NULL; return NULL;
} }
}
Bc *chunk = cm->chunk; } else {
chunk_emit_byte(chunk, OP_RETURN); if (!compile_expr(cm, ast)) {
optim_tailcall(chunk); chunk_release(cm->chunk);
return chunk; return NULL;
}
}
chunk_emit_byte(cm->chunk, OP_RETURN);
optim_tailcall(cm->chunk);
return cm->chunk;
} }

View file

@ -4,10 +4,9 @@
#include "chunk.h" #include "chunk.h"
#include "gc.h" #include "gc.h"
#include "vm.h" #include "vm.h"
#include "parser.h"
#include "vendor/mpc.h" #define COMPILER_DEBUG 0
#define COMPILER_DEBUG DEBUG
/** Compiler context */ /** Compiler context */
typedef struct Cm { typedef struct Cm {
@ -19,4 +18,4 @@ typedef struct Cm {
V compiler_init(Cm *, Vm *, const char *); V compiler_init(Cm *, Vm *, const char *);
V compiler_deinit(Cm *); V compiler_deinit(Cm *);
Bc *compile_program(Cm *, mpc_ast_t *); Bc *compile_program(Cm *, Ast *);

View file

@ -3,8 +3,8 @@
#include "chunk.h" #include "chunk.h"
#include "debug.h" #include "debug.h"
#include "dictionary.h" #include "dictionary.h"
#include "primitive.h"
#include "print.h" #include "print.h"
#include "src/primitive.h"
#include "vm.h" #include "vm.h"
static I decode_sleb128(U8 *ptr, Z *bytes_read) { static I decode_sleb128(U8 *ptr, Z *bytes_read) {
@ -70,7 +70,7 @@ static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) {
print(obj); print(obj);
printf(")"); printf(")");
if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { if (!IMM(obj) && obj != NIL && type(obj) == OBJ_QUOT) {
putchar('\n'); putchar('\n');
Hd *hdr = UNBOX(obj); Hd *hdr = UNBOX(obj);
Bc **chunk_ptr = (Bc **)(hdr + 1); Bc **chunk_ptr = (Bc **)(hdr + 1);

View file

@ -4,7 +4,7 @@
#include "common.h" #include "common.h"
#include "object.h" #include "object.h"
#define GC_DEBUG 0 #define GC_DEBUG 1
#if GC_DEBUG #if GC_DEBUG
#define HEAP_BYTES (8 * 1024) #define HEAP_BYTES (8 * 1024)
#else #else

View file

@ -1,12 +1,42 @@
#include <ctype.h> #include <ctype.h>
#include <err.h> #include <err.h>
#include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <utf.h> #include <utf.h>
#include "lexer.h" #include "lexer.h"
#include "vendor/yar.h" #include "vendor/yar.h"
Lx *lexer_make(Stream *s) {
Lx *lx = calloc(1, sizeof(Lx));
lx->stream = s;
return lx;
}
V lexer_free(Lx *lx) {
yar_free(lx);
free(lx);
}
static int lx_getc(Lx *lx) {
int c = ST_GETC(lx->stream);
if (c == '\n') {
lx->curr_line++;
lx->curr_col = 0;
} else if (c != -1) {
lx->curr_col++;
}
return c;
}
static void lx_ungetc(Lx *lx, int c) {
ST_UNGETC(c, lx->stream);
if (c == '\n') {
lx->curr_line--;
} else if (c != -1) {
lx->curr_col--;
}
}
static inline int is_delimiter(int i) { static inline int is_delimiter(int i) {
return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' || return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' ||
i == ';' || i == '\\' || i == '"'; i == ';' || i == '\\' || i == '"';
@ -24,7 +54,7 @@ static int getc_ws(Lx *lx) {
if (ST_EOF(lx->stream)) if (ST_EOF(lx->stream))
return -1; return -1;
for (;;) { for (;;) {
int ch = ST_GETC(lx->stream); int ch = lx_getc(lx);
if (isspace(ch)) if (isspace(ch))
continue; continue;
return ch; return ch;
@ -32,21 +62,21 @@ static int getc_ws(Lx *lx) {
} }
static int scanword(Lx *lx) { static int scanword(Lx *lx) {
int next = ST_GETC(lx->stream); int next = lx_getc(lx);
for (;;) { for (;;) {
if (next == -1) { if (next == -1) {
if (lx->cursor == 0) if (lx->count == 0)
lx->kind = TOK_EOF; lx->kind = TOK_EOF;
appendbyte(lx, 0); appendbyte(lx, 0);
return lx->kind; return lx->kind;
} else if (is_delimiter(next) || isspace(next)) { } else if (is_delimiter(next) || isspace(next)) {
ST_UNGETC(next, lx->stream); lx_ungetc(lx, next);
appendbyte(lx, 0); appendbyte(lx, 0);
return lx->kind; return lx->kind;
} else { } else {
appendbyte(lx, next); appendbyte(lx, next);
next = ST_GETC(lx->stream); next = lx_getc(lx);
continue; continue;
} }
} }
@ -58,7 +88,7 @@ static void scanescape(Lx *lx) {
Rune tmp; Rune tmp;
for (;;) { for (;;) {
next = ST_GETC(lx->stream); next = lx_getc(lx);
if (next == -1) { if (next == -1) {
errx(1, "unterminated hex sequence '%s'", escbuf); errx(1, "unterminated hex sequence '%s'", escbuf);
@ -77,22 +107,28 @@ static void scanescape(Lx *lx) {
} }
tmp = strtol(escbuf, &escptr, 16); tmp = strtol(escbuf, &escptr, 16);
if (*escptr == '\0') if (*escptr == '\0') {
if (tmp < 256) {
appendbyte(lx, (U8)(tmp & 255));
} else {
appendrune(lx, tmp); appendrune(lx, tmp);
else }
} else {
errx(1, "invalid hex sequence '%s'", escbuf); errx(1, "invalid hex sequence '%s'", escbuf);
} }
}
static int scanstring(Lx *lx) { static int scanstring(Lx *lx) {
int next; int next;
for (;;) { for (;;) {
next = ST_GETC(lx->stream); next = lx_getc(lx);
switch (next) { switch (next) {
case -1: case -1:
goto eof; goto eof;
case '\\': case '\\':
next = ST_GETC(lx->stream); next = lx_getc(lx);
if (next == -1) if (next == -1)
goto eof; goto eof;
switch (next) { switch (next) {
@ -128,8 +164,7 @@ static int scanstring(Lx *lx) {
scanescape(lx); scanescape(lx);
break; break;
default: default:
fprintf(stderr, "unknown escape sequence '\\%c'\n", next); return (lx->kind = TOK_INVALID);
abort();
} }
break; break;
case '"': case '"':
@ -141,13 +176,13 @@ static int scanstring(Lx *lx) {
} }
eof: eof:
errx(1, "unterminated string literal"); return (lx->kind = TOK_INVALID);
return 0;
} }
I lexer_next(Lx *lx) { I lexer_next(Lx *lx) {
int next; int next;
lx->cursor = 0; lx->cursor = 0;
lx->count = 0;
if (ST_EOF(lx->stream)) { if (ST_EOF(lx->stream)) {
lx->kind = TOK_EOF; lx->kind = TOK_EOF;
@ -156,9 +191,12 @@ I lexer_next(Lx *lx) {
next = getc_ws(lx); next = getc_ws(lx);
lx->start_line = lx->curr_line;
lx->start_col = (lx->curr_col > 0) ? lx->curr_col - 1 : 0;
switch (next) { switch (next) {
case '\\': case '\\':
for (; next != '\n'; next = ST_GETC(lx->stream)) for (; next != '\n'; next = lx_getc(lx))
; ;
return lexer_next(lx); return lexer_next(lx);
case '(': case '(':
@ -172,7 +210,7 @@ I lexer_next(Lx *lx) {
case '"': case '"':
return scanstring(lx); return scanstring(lx);
default: default:
ST_UNGETC(next, lx->stream); lx_ungetc(lx, next);
lx->kind = TOK_WORD; lx->kind = TOK_WORD;
return scanword(lx); return scanword(lx);
}; };

View file

@ -22,12 +22,15 @@ enum {
typedef struct Lx { typedef struct Lx {
I kind; I kind;
I cursor; I cursor;
I curr_line, curr_col;
I start_line, start_col;
Stream *stream; Stream *stream;
char *items; char *items;
Z count, capacity; Z count, capacity;
} Lx; } Lx;
Lx *lexer_make(Stream *); Lx *lexer_make(Stream *);
V lexer_free(Lx *lx);
I lexer_next(Lx *); I lexer_next(Lx *);
#endif #endif

View file

@ -1,5 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include "chunk.h" #include "chunk.h"
#include "compile.h" #include "compile.h"
@ -8,7 +9,6 @@
#include "vm.h" #include "vm.h"
#include "vendor/linenoise.h" #include "vendor/linenoise.h"
#include "vendor/mpc.h"
#define REPL_BUFFER_SIZE 4096 #define REPL_BUFFER_SIZE 4096
@ -18,16 +18,18 @@ I repl(void) {
char *line; char *line;
while ((line = linenoise("growl> ")) != NULL) { while ((line = linenoise("growl> ")) != NULL) {
mpc_result_t res; Buf b = { line, (int)strlen(line), 0, -1 };
if (!mpc_parse("<repl>", line, Program, &res)) { Stream s = { bufstream_vtable, &b };
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error); Lx *lx = lexer_make(&s);
continue; Ast *root = parser_parse(lx);
}
Cm cm = {0}; Cm cm = {0};
compiler_init(&cm, &vm, "<repl>"); compiler_init(&cm, &vm, "<repl>");
Bc *chunk = compile_program(&cm, res.output); Bc *chunk = compile_program(&cm, root);
mpc_ast_delete(res.output); ast_free(root);
lexer_free(lx);
if (chunk != NULL) { if (chunk != NULL) {
vm_run(&vm, chunk, 0); vm_run(&vm, chunk, 0);
chunk_release(chunk); chunk_release(chunk);
@ -44,18 +46,23 @@ I loadfile(const char *fname) {
Vm vm = {0}; Vm vm = {0};
vm_init(&vm); vm_init(&vm);
mpc_result_t res; FILE *f = fopen(fname, "rb");
if (!mpc_parse_contents(fname, Program, &res)) { if (!f) {
mpc_err_print_to(res.error, stderr); fprintf(stderr, "error: cannot open file '%s'\n", fname);
mpc_err_delete(res.error);
return 1; return 1;
} }
Stream s = { filestream_vtable, f };
Lx *lx = lexer_make(&s);
Ast *root = parser_parse(lx);
Cm cm = {0}; Cm cm = {0};
compiler_init(&cm, &vm, fname); compiler_init(&cm, &vm, fname);
Bc *chunk = compile_program(&cm, res.output); Bc *chunk = compile_program(&cm, root);
mpc_ast_delete(res.output); ast_free(root);
lexer_free(lx);
fclose(f);
if (chunk != NULL) { if (chunk != NULL) {
#if COMPILER_DEBUG #if COMPILER_DEBUG
@ -72,9 +79,6 @@ I loadfile(const char *fname) {
} }
int main(int argc, const char *argv[]) { int main(int argc, const char *argv[]) {
parser_init();
atexit(parser_deinit);
switch (argc) { switch (argc) {
case 1: case 1:
return repl(); return repl();

View file

@ -2,9 +2,9 @@
I type(O o) { I type(O o) {
if (o == NIL) if (o == NIL)
return TYPE_NIL; return OBJ_NIL;
if (IMM(o)) if (IMM(o))
return TYPE_NUM; return OBJ_NUM;
Hd *h = UNBOX(o); Hd *h = UNBOX(o);
return h->type; return h->type;
} }

View file

@ -11,25 +11,17 @@
#define ORD(x) ((intptr_t)(x) >> 1) #define ORD(x) ((intptr_t)(x) >> 1)
enum { enum {
OBJ_NIL = 0,
OBJ_NUM = 1,
OBJ_FWD = 2, OBJ_FWD = 2,
OBJ_QUOT, OBJ_QUOT,
OBJ_COMPOSE, OBJ_COMPOSE,
OBJ_CURRY, OBJ_CURRY,
OBJ_STR, OBJ_STR,
OBJ_ARRAY,
OBJ_USERDATA, OBJ_USERDATA,
}; };
enum {
TYPE_NIL = 0,
TYPE_NUM = 1,
TYPE_FWD = OBJ_FWD,
TYPE_QUOT = OBJ_QUOT,
TYPE_COMPOSE = OBJ_COMPOSE,
TYPE_CURRY = OBJ_CURRY,
TYPE_STR = OBJ_STR,
TYPE_USERDATA = OBJ_USERDATA,
};
typedef uintptr_t O; typedef uintptr_t O;
/** Object header */ /** Object header */
@ -50,7 +42,7 @@ typedef struct Qc {
I type(O); I type(O);
static inline I callable(O o) { static inline I callable(O o) {
I t = type(o); I t = type(o);
return t == TYPE_QUOT || t == TYPE_COMPOSE || t == TYPE_CURRY; return t == OBJ_QUOT || t == OBJ_COMPOSE || t == OBJ_CURRY;
} }
#endif #endif

View file

@ -1,51 +1,156 @@
#include "parser.h" #include "parser.h"
#include "vendor/mpc.h" #include <stdio.h>
#include <stdlib.h>
#include <string.h>
mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition, static Ast *ast_new(I type, I line, I col) {
*Command, *List, *Table, *Quotation, *Program; Ast *node = calloc(1, sizeof(Ast));
node->type = type;
node->line = line;
node->col = col;
return node;
}
V parser_init(V) { void ast_free(Ast *ast) {
Pragma = mpc_new("pragma"); if (!ast)
Comment = mpc_new("comment"); return;
Expr = mpc_new("expr"); if (ast->name)
Number = mpc_new("number"); free(ast->name);
String = mpc_new("string"); for (size_t i = 0; i < ast->children.count; i++) {
Word = mpc_new("word"); ast_free(ast->children.items[i]);
Definition = mpc_new("def"); }
Command = mpc_new("command"); yar_free(&ast->children);
List = mpc_new("list"); free(ast);
Table = mpc_new("table"); }
Quotation = mpc_new("quotation");
Program = mpc_new("program");
mpc_err_t *err = mpca_lang( static Ast *parse_expr_at(Lx *lx);
MPCA_LANG_DEFAULT,
" pragma : '#' <word> ('(' <expr>* ')')? ; "
" comment : /\\\\[^\\n]*/ ; "
" expr : ( <pragma> | <def> | <command> | <quotation> "
" | <number> | <list> | <table> | <string> "
" | <word> | <comment> ) ; "
" number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; "
" string : /\"(\\\\.|[^\"])*\"/ ; "
" word : /[a-zA-Z0-9_!?.,@#$%^&*_+\\-=><|\\/]+/ ; "
" def : \"def\" <word> '{' <expr>* '}' ; "
" command : <word> ':' <expr>+ ';' ; "
" list : '(' <expr>* ')' ; "
" table : '{' <expr>* '}' ; "
" quotation : '[' <expr>* ']' ; "
" program : /^/ <expr>* /$/ ; ",
Pragma, Comment, Expr, Number, String, Word, Definition, Command, List,
Table, Quotation, Program, NULL);
// crash if i do a woopsie static void parse_block(Lx *lx, Ast *parent, int close_token) {
if (err != NULL) { while (1) {
mpc_err_print(err); if (lx->kind == TOK_EOF) {
mpc_err_delete(err); if (close_token != TOK_EOF)
abort(); fprintf(stderr, "syntax error: unexpected EOF, expected '%c'\n",
close_token);
break;
}
if (lx->kind == close_token) {
lexer_next(lx);
break;
}
Ast *expr = parse_expr_at(lx);
*yar_append(&parent->children) = expr;
} }
} }
V parser_deinit(V) { static Ast *parse_expr_at(Lx *lx) {
mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition, int kind = lx->kind;
Command, List, Table, Quotation, Program); I line = lx->start_line;
I col = lx->start_col;
if (kind == TOK_WORD) {
char *text = lx->items;
if (strcmp(text, "def") == 0) {
Ast *node = ast_new(AST_DEF, line, col);
lexer_next(lx);
if (lx->kind != TOK_WORD) {
fprintf(stderr, "syntax error: expected word after 'def' at %ld:%ld\n",
(long)line + 1, (long)col + 1);
return node;
}
node->name = strdup(lx->items);
lexer_next(lx);
if (lx->kind != '{') {
fprintf(stderr,
"syntax error: expected '{' after def name at %ld:%ld\n",
(long)lx->start_line + 1, (long)lx->start_col + 1);
return node;
}
lexer_next(lx);
parse_block(lx, node, '}');
return node;
}
size_t len = strlen(text);
if (len > 0 && text[len - 1] == ':') {
Ast *node = ast_new(AST_CMD, line, col);
node->name = strndup(text, len - 1);
lexer_next(lx);
parse_block(lx, node, ';');
return node;
}
if (text[0] == '#') {
Ast *node = ast_new(AST_PRAGMA, line, col);
node->name = strdup(text);
lexer_next(lx);
if (lx->kind == '(') {
lexer_next(lx);
parse_block(lx, node, ')');
}
return node;
}
char *end;
long val = strtol(text, &end, 0);
if (*end == '\0') {
Ast *node = ast_new(AST_INT, line, col);
node->int_val = val;
lexer_next(lx);
return node;
}
Ast *node = ast_new(AST_WORD, line, col);
node->name = strdup(text);
lexer_next(lx);
return node;
}
if (kind == TOK_STRING) {
Ast *node = ast_new(AST_STR, line, col);
node->name = strdup(lx->items);
lexer_next(lx);
return node;
}
if (kind == '[') {
Ast *node = ast_new(AST_QUOTE, line, col);
lexer_next(lx);
parse_block(lx, node, ']');
return node;
}
if (kind == '{') {
Ast *node = ast_new(AST_TABLE, line, col);
lexer_next(lx);
parse_block(lx, node, '}');
return node;
}
if (kind == '(') {
Ast *node = ast_new(AST_LIST, line, col);
lexer_next(lx);
parse_block(lx, node, ')');
return node;
}
if (kind == TOK_INVALID) {
fprintf(stderr, "syntax error: invalid token at %ld:%ld\n", (long)line + 1,
(long)col + 1);
} else {
fprintf(stderr, "syntax error: unexpected token '%c' (%d) at %ld:%ld\n",
kind, kind, (long)line + 1, (long)col + 1);
}
lexer_next(lx);
return NULL;
}
Ast *parser_parse(Lx *lx) {
Ast *root = ast_new(AST_PROGRAM, 0, 0);
lexer_next(lx);
parse_block(lx, root, TOK_EOF);
return root;
} }

View file

@ -2,11 +2,34 @@
#define PARSER_H #define PARSER_H
#include "common.h" #include "common.h"
#include "vendor/mpc.h" #include "lexer.h"
#include "vendor/yar.h"
V parser_init(V); enum {
V parser_deinit(V); AST_PROGRAM,
AST_INT,
AST_STR,
AST_WORD,
AST_LIST,
AST_TABLE,
AST_QUOTE,
AST_DEF,
AST_CMD,
AST_PRAGMA,
};
extern mpc_parser_t *Program; typedef struct Ast {
I type;
char *name;
I int_val;
struct {
struct Ast **items;
Z count, capacity;
} children;
I line, col;
} Ast;
Ast *parser_parse(Lx *lx);
void ast_free(Ast *ast);
#endif #endif

View file

@ -1,7 +1,6 @@
#ifndef PRIMITIVE_H #ifndef PRIMITIVE_H
#define PRIMITIVE_H #define PRIMITIVE_H
#include "common.h"
#include "vm.h" #include "vm.h"
typedef struct Pr { typedef struct Pr {

View file

@ -1,13 +1,56 @@
#include <inttypes.h> #include <inttypes.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "object.h" #include "object.h"
#include "print.h" #include "print.h"
#include "string.h" #include "string.h"
#include "userdata.h" #include "userdata.h"
#include "vendor/mpc.h"
static V print_string(Str *s) {
putchar('"');
for (Z i = 0; i < s->len; i++) {
unsigned char c = s->data[i];
switch (c) {
case '\t':
printf("\\t");
break;
case '\n':
printf("\\n");
break;
case '\r':
printf("\\r");
break;
case '\b':
printf("\\b");
break;
case '\v':
printf("\\v");
break;
case '\f':
printf("\\f");
break;
case '\0':
printf("\\0");
break;
case '\x1b':
printf("\\e");
break;
case '\\':
printf("\\\\");
break;
case '\"':
printf("\\\"");
break;
default:
if (c < 32 || c > 126) {
printf("\\x%02x;", c);
} else {
putchar(c);
}
}
}
putchar('"');
}
V print(O o) { V print(O o) {
if (o == NIL) { if (o == NIL) {
@ -27,14 +70,8 @@ V print(O o) {
printf("<curried>"); printf("<curried>");
break; break;
case OBJ_STR: { case OBJ_STR: {
// TODO: make this binary safe
Str *s = string_unwrap(o); Str *s = string_unwrap(o);
char *escaped = malloc(s->len + 1); print_string(s);
memcpy(escaped, s->data, s->len);
escaped[s->len] = 0;
escaped = mpcf_escape(escaped);
printf("\"%s\"", escaped);
free(escaped);
break; break;
} }
case OBJ_USERDATA: { case OBJ_USERDATA: {

View file

@ -206,8 +206,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O obj2 = vm_pop(vm); O obj2 = vm_pop(vm);
O obj1 = vm_pop(vm); O obj1 = vm_pop(vm);
vm_push(vm, obj1); vm_push(vm, obj1);
vm_push(vm, obj1);
vm_push(vm, obj2); vm_push(vm, obj2);
vm_push(vm, obj1);
vm_push(vm, obj2); vm_push(vm, obj2);
break; break;
} }
@ -223,10 +223,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O c = vm_pop(vm); O c = vm_pop(vm);
O b = vm_pop(vm); O b = vm_pop(vm);
O a = vm_pop(vm); O a = vm_pop(vm);
vm_push(vm, d);
vm_push(vm, c); vm_push(vm, c);
vm_push(vm, b); vm_push(vm, d);
vm_push(vm, a); vm_push(vm, a);
vm_push(vm, b);
break; break;
} }
case OP_NIP: { case OP_NIP: {
@ -302,21 +302,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm_rpush(vm, vm->chunk, vm->ip); vm_rpush(vm, vm->chunk, vm->ip);
do_call: do_call:
switch (type(quot)) { switch (type(quot)) {
case TYPE_QUOT: { case OBJ_QUOT: {
Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc **ptr = (Bc **)(UNBOX(quot) + 1);
Bc *chunk = *ptr; Bc *chunk = *ptr;
vm->chunk = chunk; vm->chunk = chunk;
vm->ip = chunk->items; vm->ip = chunk->items;
break; break;
} }
case TYPE_COMPOSE: { case OBJ_COMPOSE: {
Qo *comp = (Qo *)(UNBOX(quot) + 1); Qo *comp = (Qo *)(UNBOX(quot) + 1);
vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm_rpush(vm, vm->trampoline, vm->trampoline->items);
vm->rsp[-1].obj = comp->second; vm->rsp[-1].obj = comp->second;
quot = comp->first; quot = comp->first;
goto do_call; goto do_call;
} }
case TYPE_CURRY: { case OBJ_CURRY: {
Qc *curry = (Qc *)(UNBOX(quot) + 1); Qc *curry = (Qc *)(UNBOX(quot) + 1);
vm_push(vm, curry->value); vm_push(vm, curry->value);
quot = curry->callable; quot = curry->callable;
@ -345,21 +345,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
O quot = vm_pop(vm); O quot = vm_pop(vm);
do_tail_call: do_tail_call:
switch (type(quot)) { switch (type(quot)) {
case TYPE_QUOT: { case OBJ_QUOT: {
Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc **ptr = (Bc **)(UNBOX(quot) + 1);
Bc *chunk = *ptr; Bc *chunk = *ptr;
vm->chunk = chunk; vm->chunk = chunk;
vm->ip = chunk->items; vm->ip = chunk->items;
break; break;
} }
case TYPE_COMPOSE: { case OBJ_COMPOSE: {
Qo *comp = (Qo *)(UNBOX(quot) + 1); Qo *comp = (Qo *)(UNBOX(quot) + 1);
vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm_rpush(vm, vm->trampoline, vm->trampoline->items);
vm->rsp[-1].obj = comp->second; vm->rsp[-1].obj = comp->second;
quot = comp->first; quot = comp->first;
goto do_tail_call; goto do_tail_call;
} }
case TYPE_CURRY: { case OBJ_CURRY: {
Qc *curry = (Qc *)(UNBOX(quot) + 1); Qc *curry = (Qc *)(UNBOX(quot) + 1);
vm_push(vm, curry->value); vm_push(vm, curry->value);
quot = curry->callable; quot = curry->callable;
@ -491,10 +491,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
} }
case OP_CONCAT: { case OP_CONCAT: {
O b = vm_pop(vm); O b = vm_pop(vm);
if (type(b) != TYPE_STR) if (type(b) != OBJ_STR)
vm_error(vm, VM_ERR_TYPE, "expected string"); vm_error(vm, VM_ERR_TYPE, "expected string");
O a = vm_pop(vm); O a = vm_pop(vm);
if (type(a) != TYPE_STR) if (type(a) != OBJ_STR)
vm_error(vm, VM_ERR_TYPE, "expected string"); vm_error(vm, VM_ERR_TYPE, "expected string");
vm_push(vm, string_concat(vm, a, b)); vm_push(vm, string_concat(vm, a, b));
break; break;

View file

@ -14,6 +14,7 @@ def 3dip { swap [2dip] dip }
def keep { over [call] dip } def keep { over [call] dip }
def 2keep { [2dup] dip 2dip } def 2keep { [2dup] dip 2dip }
def 3keep { [dup 2over dig] dip 3dip }
def bi { [keep] dip call } def bi { [keep] dip call }
def tri { [[keep] dip keep] dip call } def tri { [[keep] dip keep] dip call }