From 7efa99d0648a231766a2000d97d084ad61264d2b Mon Sep 17 00:00:00 2001 From: "Javier B. Torres" Date: Wed, 28 Jan 2026 11:57:33 -0300 Subject: [PATCH] new compiler/parser --- meson.build | 1 - src/compile.c | 267 +++++++++++++++++------------------------------- src/compile.h | 7 +- src/debug.c | 4 +- src/gc.h | 2 +- src/lexer.c | 74 ++++++++++---- src/lexer.h | 3 + src/main.c | 42 ++++---- src/object.c | 4 +- src/object.h | 16 +-- src/parser.c | 189 ++++++++++++++++++++++++++-------- src/parser.h | 31 +++++- src/primitive.h | 1 - src/print.c | 57 +++++++++-- src/vm.c | 22 ++-- std.grr | 1 + 16 files changed, 419 insertions(+), 302 deletions(-) diff --git a/meson.build b/meson.build index 39c778e..d3141f1 100644 --- a/meson.build +++ b/meson.build @@ -27,7 +27,6 @@ sources = [ 'src/userdata.c', 'src/vm.c', 'src/vendor/linenoise.c', - 'src/vendor/mpc.c', 'src/vendor/yar.c', ] diff --git a/src/compile.c b/src/compile.c index 7542684..747f048 100644 --- a/src/compile.c +++ b/src/compile.c @@ -7,12 +7,11 @@ #include "debug.h" #include "gc.h" #include "object.h" +#include "parser.h" #include "src/primitive.h" #include "string.h" -#include "vm.h" - -#include "vendor/mpc.h" #include "vendor/yar.h" +#include "vm.h" // clang-format off struct { @@ -26,6 +25,7 @@ struct { {"2dup", {OP_2DUP, 0}}, {"2drop", {OP_2DROP, 0}}, {"2swap", {OP_2SWAP, 0}}, + {"2over", {OP_2TOR, OP_2DUP, OP_2FROMR, OP_2SWAP, 0}}, {"over", {OP_OVER, 0}}, {"nip", {OP_NIP, 0}}, {"bury", {OP_BURY, 0}}, @@ -118,8 +118,7 @@ static V optim_tailcall(Bc *chunk) { } } -static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); -static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); +static I compile_expr(Cm *cm, Ast *node); static I compile_constant(Cm *cm, O value, I line, I col) { I idx = chunk_add_constant(cm->chunk, value); @@ -169,31 +168,16 @@ static I compile_call(Cm *cm, const char *name, I line, I col) { return 1; } -static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - curr = mpc_ast_traverse_next(next); - const char *name = curr->contents; - I name_line = curr->state.row; - I name_col = curr->state.col; - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ";") == 0) - break; - I res = compile_expr(cm, curr, next); - if (!res) +static I compile_command(Cm *cm, Ast *node) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(cm, node->children.items[i])) return 0; - curr = mpc_ast_traverse_next(next); } - compile_call(cm, name, name_line, name_col); - return 1; + return compile_call(cm, node->name, node->line, node->col); } -static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); // skip 'def' - curr = mpc_ast_traverse_next(next); - const char *name = arena_strdup(cm->arena, curr->contents); - (void)mpc_ast_traverse_next(next); // skip '{' - +static I compile_definition(Cm *cm, Ast *node) { + const char *name = arena_strdup(cm->arena, node->name); Dt *entry = upsert(cm->dictionary, name, cm->arena); Cm inner = {0}; @@ -202,19 +186,14 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { inner.vm = cm->vm; inner.dictionary = cm->dictionary; - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "}") == 0) - break; - if (!compile_expr(&inner, curr, next)) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(&inner, node->children.items[i])) { chunk_release(inner.chunk); return 0; } - curr = mpc_ast_traverse_next(next); } - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, - curr->state.col); + chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); optim_tailcall(inner.chunk); entry->chunk = inner.chunk; @@ -226,7 +205,7 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return 1; } -static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { +static O compile_quotation_obj(Cm *cm, Ast *node) { Cm inner = {0}; inner.arena = cm->arena; @@ -234,20 +213,13 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { inner.vm = cm->vm; inner.dictionary = cm->dictionary; - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0) - break; - I res = compile_expr(&inner, curr, next); - if (!res) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(&inner, node->children.items[i])) { chunk_release(inner.chunk); - return res; + return NIL; } - curr = mpc_ast_traverse_next(next); } - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, - curr->state.col); + chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); optim_tailcall(inner.chunk); Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *)); @@ -258,158 +230,103 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return BOX(hd); } -static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next, - I line, I col) { - return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col); +static I compile_quotation(Cm *cm, Ast *node) { + O obj = compile_quotation_obj(cm, node); + if (obj == NIL) + return 0; + return compile_constant(cm, obj, node->line, node->col); } -static I compile_pragma(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - const char *name = curr->contents; - I line = curr->state.row; - I col = curr->state.col; - curr = mpc_ast_traverse_next(next); - I has_args = 0; - - if (curr != NULL && strcmp(curr->tag, "char") == 0 && - strcmp(curr->contents, "(") == 0) { - has_args = 1; - curr = mpc_ast_traverse_next(next); // Skip '(' - } - - if (strcmp(name, "load") == 0) { - if (!has_args) { - fprintf(stderr, - "compiler error at %ld:%ld: #load requires a filename argument\n", - line + 1, col + 1); +static I compile_pragma(Cm *cm, Ast *node) { + if (strcmp(node->name, "#load") == 0) { + if (node->children.count == 0) { + fprintf(stderr, "compiler error: #load requires argument\n"); return 0; } - if (!strstr(curr->tag, "expr|string")) { - fprintf(stderr, - "compiler error at %ld:%ld: #load requires a string argument\n", - line + 1, col + 1); + Ast *arg = node->children.items[0]; + if (arg->type != AST_STR) { + fprintf(stderr, "compiler error: #load requires string\n"); return 0; } - char *fname_raw = curr->contents; - Z len = strlen(fname_raw); - char *fname = malloc(len + 1); - memcpy(fname, fname_raw + 1, len - 2); - fname[len - 2] = '\0'; - fname = mpcf_unescape(fname); - - mpc_result_t res; - extern mpc_parser_t *Program; - - if (!mpc_parse_contents(fname, Program, &res)) { - fprintf(stderr, "compiler error at %ld:%ld: failed to parse file '%s':\n", - line + 1, col + 1, fname); - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - free(fname); + char *fname = arg->name; + FILE *f = fopen(fname, "rb"); + if (!f) { + fprintf(stderr, "compiler error: cannot open file '%s'\n", fname); return 0; } - mpc_ast_trav_t *inner_next = - mpc_ast_traverse_start(res.output, mpc_ast_trav_order_pre); - mpc_ast_t *inner_curr = mpc_ast_traverse_next(&inner_next); + Stream s = {filestream_vtable, f}; + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); - I success = compile_ast(cm, inner_curr, &inner_next); - - mpc_ast_delete(res.output); - - if (!success) { - fprintf(stderr, - "compiler error at %ld:%ld: failed to compile file '%s'\n", - line + 1, col + 1, fname); - free(fname); - return 0; - } - - free(fname); - - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ")") == 0) + I success = 1; + for (size_t i = 0; i < root->children.count; i++) { + if (!compile_expr(cm, root->children.items[i])) { + success = 0; break; - curr = mpc_ast_traverse_next(next); + } } - } else { - fprintf(stderr, "compiler warning at %ld:%ld: unknown pragma \"%s\"\n", - line + 1, col + 1, name); - } - if (has_args) { - if (curr == NULL || strcmp(curr->contents, ")") != 0) { - fprintf(stderr, "error at %ld:%ld: expected ')' after pragma arguments\n", - line + 1, col + 1); - return 0; - } + ast_free(root); + lexer_free(lx); + fclose(f); + return success; } - + fprintf(stderr, "compiler warning: unknown pragma \"%s\"\n", node->name); return 1; } -static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - I line = curr->state.row; - I col = curr->state.col; - if (strstr(curr->tag, "expr|number") != NULL) { - I num = strtol(curr->contents, NULL, 0); - return compile_constant(cm, NUM(num), line, col); - } else if (strstr(curr->tag, "expr|string") != NULL) { - curr->contents[strlen(curr->contents) - 1] = '\0'; - char *unescaped = malloc(strlen(curr->contents + 1) + 1); - strcpy(unescaped, curr->contents + 1); - unescaped = mpcf_unescape(unescaped); - O obj = string_make(cm->vm, unescaped, -1); - free(unescaped); - return compile_constant(cm, obj, line, col); - } else if (strstr(curr->tag, "expr|word") != NULL) { - return compile_call(cm, curr->contents, line, col); - } else if (strstr(curr->tag, "expr|quotation") != NULL) { - return compile_quotation(cm, curr, next, line, col); - } else if (strstr(curr->tag, "expr|def") != NULL) { - return compile_definition(cm, curr, next); - } else if (strstr(curr->tag, "expr|command") != NULL) { - return compile_command(cm, curr, next); - } else if (strstr(curr->tag, "expr|pragma") != NULL) { - return compile_pragma(cm, curr, next); - } else if (strstr(curr->tag, "expr|comment") != NULL) { +static I compile_expr(Cm *cm, Ast *node) { + if (!node) + return 0; + switch (node->type) { + case AST_INT: { + O num = NUM(node->int_val); + return compile_constant(cm, num, node->line, node->col); + } + case AST_STR: { + O obj = string_make(cm->vm, node->name, -1); + return compile_constant(cm, obj, node->line, node->col); + } + case AST_WORD: + return compile_call(cm, node->name, node->line, node->col); + case AST_QUOTE: + return compile_quotation(cm, node); + case AST_DEF: + return compile_definition(cm, node); + case AST_CMD: + return compile_command(cm, node); + case AST_PRAGMA: + return compile_pragma(cm, node); + case AST_PROGRAM: + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(cm, node->children.items[i])) + return 0; + } return 1; - } else { - fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1, - col + 1, curr->tag); + default: + fprintf(stderr, "compiler error: nyi ast type %d\n", (int)node->type); return 0; } } -static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0) - break; - I res = compile_expr(cm, curr, next); - if (!res) - return res; - curr = mpc_ast_traverse_next(next); +Bc *compile_program(Cm *cm, Ast *ast) { + if (ast->type == AST_PROGRAM) { + for (size_t i = 0; i < ast->children.count; i++) { + if (!compile_expr(cm, ast->children.items[i])) { + chunk_release(cm->chunk); + return NULL; + } + } + } else { + if (!compile_expr(cm, ast)) { + chunk_release(cm->chunk); + return NULL; + } } - return 1; -} - -Bc *compile_program(Cm *cm, mpc_ast_t *ast) { - mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre); - mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal - - if (!compile_ast(cm, curr, &next)) { - chunk_release(cm->chunk); - return NULL; - } - - Bc *chunk = cm->chunk; - chunk_emit_byte(chunk, OP_RETURN); - optim_tailcall(chunk); - return chunk; + chunk_emit_byte(cm->chunk, OP_RETURN); + optim_tailcall(cm->chunk); + return cm->chunk; } diff --git a/src/compile.h b/src/compile.h index 3db8ce2..51cea45 100644 --- a/src/compile.h +++ b/src/compile.h @@ -4,10 +4,9 @@ #include "chunk.h" #include "gc.h" #include "vm.h" +#include "parser.h" -#include "vendor/mpc.h" - -#define COMPILER_DEBUG DEBUG +#define COMPILER_DEBUG 0 /** Compiler context */ typedef struct Cm { @@ -19,4 +18,4 @@ typedef struct Cm { V compiler_init(Cm *, Vm *, const char *); V compiler_deinit(Cm *); -Bc *compile_program(Cm *, mpc_ast_t *); +Bc *compile_program(Cm *, Ast *); diff --git a/src/debug.c b/src/debug.c index eda0ee6..6374c16 100644 --- a/src/debug.c +++ b/src/debug.c @@ -3,8 +3,8 @@ #include "chunk.h" #include "debug.h" #include "dictionary.h" +#include "primitive.h" #include "print.h" -#include "src/primitive.h" #include "vm.h" static I decode_sleb128(U8 *ptr, Z *bytes_read) { @@ -70,7 +70,7 @@ static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) { print(obj); printf(")"); - if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { + if (!IMM(obj) && obj != NIL && type(obj) == OBJ_QUOT) { putchar('\n'); Hd *hdr = UNBOX(obj); Bc **chunk_ptr = (Bc **)(hdr + 1); diff --git a/src/gc.h b/src/gc.h index 2f28ba5..c3bb177 100644 --- a/src/gc.h +++ b/src/gc.h @@ -4,7 +4,7 @@ #include "common.h" #include "object.h" -#define GC_DEBUG 0 +#define GC_DEBUG 1 #if GC_DEBUG #define HEAP_BYTES (8 * 1024) #else diff --git a/src/lexer.c b/src/lexer.c index 5d851d7..9b45afd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,12 +1,42 @@ #include #include -#include #include #include #include "lexer.h" #include "vendor/yar.h" +Lx *lexer_make(Stream *s) { + Lx *lx = calloc(1, sizeof(Lx)); + lx->stream = s; + return lx; +} + +V lexer_free(Lx *lx) { + yar_free(lx); + free(lx); +} + +static int lx_getc(Lx *lx) { + int c = ST_GETC(lx->stream); + if (c == '\n') { + lx->curr_line++; + lx->curr_col = 0; + } else if (c != -1) { + lx->curr_col++; + } + return c; +} + +static void lx_ungetc(Lx *lx, int c) { + ST_UNGETC(c, lx->stream); + if (c == '\n') { + lx->curr_line--; + } else if (c != -1) { + lx->curr_col--; + } +} + static inline int is_delimiter(int i) { return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' || i == ';' || i == '\\' || i == '"'; @@ -24,7 +54,7 @@ static int getc_ws(Lx *lx) { if (ST_EOF(lx->stream)) return -1; for (;;) { - int ch = ST_GETC(lx->stream); + int ch = lx_getc(lx); if (isspace(ch)) continue; return ch; @@ -32,21 +62,21 @@ static int getc_ws(Lx *lx) { } static int scanword(Lx *lx) { - int next = ST_GETC(lx->stream); + int next = lx_getc(lx); for (;;) { if (next == -1) { - if (lx->cursor == 0) + if (lx->count == 0) lx->kind = TOK_EOF; appendbyte(lx, 0); return lx->kind; } else if (is_delimiter(next) || isspace(next)) { - ST_UNGETC(next, lx->stream); + lx_ungetc(lx, next); appendbyte(lx, 0); return lx->kind; } else { appendbyte(lx, next); - next = ST_GETC(lx->stream); + next = lx_getc(lx); continue; } } @@ -58,7 +88,7 @@ static void scanescape(Lx *lx) { Rune tmp; for (;;) { - next = ST_GETC(lx->stream); + next = lx_getc(lx); if (next == -1) { errx(1, "unterminated hex sequence '%s'", escbuf); @@ -77,22 +107,28 @@ static void scanescape(Lx *lx) { } tmp = strtol(escbuf, &escptr, 16); - if (*escptr == '\0') - appendrune(lx, tmp); - else + if (*escptr == '\0') { + if (tmp < 256) { + appendbyte(lx, (U8)(tmp & 255)); + } else { + appendrune(lx, tmp); + } + + } else { errx(1, "invalid hex sequence '%s'", escbuf); + } } static int scanstring(Lx *lx) { int next; for (;;) { - next = ST_GETC(lx->stream); + next = lx_getc(lx); switch (next) { case -1: goto eof; case '\\': - next = ST_GETC(lx->stream); + next = lx_getc(lx); if (next == -1) goto eof; switch (next) { @@ -128,8 +164,7 @@ static int scanstring(Lx *lx) { scanescape(lx); break; default: - fprintf(stderr, "unknown escape sequence '\\%c'\n", next); - abort(); + return (lx->kind = TOK_INVALID); } break; case '"': @@ -141,13 +176,13 @@ static int scanstring(Lx *lx) { } eof: - errx(1, "unterminated string literal"); - return 0; + return (lx->kind = TOK_INVALID); } I lexer_next(Lx *lx) { int next; lx->cursor = 0; + lx->count = 0; if (ST_EOF(lx->stream)) { lx->kind = TOK_EOF; @@ -156,9 +191,12 @@ I lexer_next(Lx *lx) { next = getc_ws(lx); + lx->start_line = lx->curr_line; + lx->start_col = (lx->curr_col > 0) ? lx->curr_col - 1 : 0; + switch (next) { case '\\': - for (; next != '\n'; next = ST_GETC(lx->stream)) + for (; next != '\n'; next = lx_getc(lx)) ; return lexer_next(lx); case '(': @@ -172,7 +210,7 @@ I lexer_next(Lx *lx) { case '"': return scanstring(lx); default: - ST_UNGETC(next, lx->stream); + lx_ungetc(lx, next); lx->kind = TOK_WORD; return scanword(lx); }; diff --git a/src/lexer.h b/src/lexer.h index 217beef..f3fa2de 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -22,12 +22,15 @@ enum { typedef struct Lx { I kind; I cursor; + I curr_line, curr_col; + I start_line, start_col; Stream *stream; char *items; Z count, capacity; } Lx; Lx *lexer_make(Stream *); +V lexer_free(Lx *lx); I lexer_next(Lx *); #endif diff --git a/src/main.c b/src/main.c index 76ab02a..5d292cc 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,6 @@ #include #include +#include #include "chunk.h" #include "compile.h" @@ -8,7 +9,6 @@ #include "vm.h" #include "vendor/linenoise.h" -#include "vendor/mpc.h" #define REPL_BUFFER_SIZE 4096 @@ -18,16 +18,18 @@ I repl(void) { char *line; while ((line = linenoise("growl> ")) != NULL) { - mpc_result_t res; - if (!mpc_parse("", line, Program, &res)) { - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - continue; - } + Buf b = { line, (int)strlen(line), 0, -1 }; + Stream s = { bufstream_vtable, &b }; + + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); + Cm cm = {0}; compiler_init(&cm, &vm, ""); - Bc *chunk = compile_program(&cm, res.output); - mpc_ast_delete(res.output); + Bc *chunk = compile_program(&cm, root); + ast_free(root); + lexer_free(lx); + if (chunk != NULL) { vm_run(&vm, chunk, 0); chunk_release(chunk); @@ -44,18 +46,23 @@ I loadfile(const char *fname) { Vm vm = {0}; vm_init(&vm); - mpc_result_t res; - if (!mpc_parse_contents(fname, Program, &res)) { - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - return 1; + FILE *f = fopen(fname, "rb"); + if (!f) { + fprintf(stderr, "error: cannot open file '%s'\n", fname); + return 1; } + Stream s = { filestream_vtable, f }; + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); + Cm cm = {0}; compiler_init(&cm, &vm, fname); - Bc *chunk = compile_program(&cm, res.output); - mpc_ast_delete(res.output); + Bc *chunk = compile_program(&cm, root); + ast_free(root); + lexer_free(lx); + fclose(f); if (chunk != NULL) { #if COMPILER_DEBUG @@ -72,9 +79,6 @@ I loadfile(const char *fname) { } int main(int argc, const char *argv[]) { - parser_init(); - atexit(parser_deinit); - switch (argc) { case 1: return repl(); diff --git a/src/object.c b/src/object.c index 6a6de05..c947ee0 100644 --- a/src/object.c +++ b/src/object.c @@ -2,9 +2,9 @@ I type(O o) { if (o == NIL) - return TYPE_NIL; + return OBJ_NIL; if (IMM(o)) - return TYPE_NUM; + return OBJ_NUM; Hd *h = UNBOX(o); return h->type; } diff --git a/src/object.h b/src/object.h index f987b5f..3233892 100644 --- a/src/object.h +++ b/src/object.h @@ -11,25 +11,17 @@ #define ORD(x) ((intptr_t)(x) >> 1) enum { + OBJ_NIL = 0, + OBJ_NUM = 1, OBJ_FWD = 2, OBJ_QUOT, OBJ_COMPOSE, OBJ_CURRY, OBJ_STR, + OBJ_ARRAY, OBJ_USERDATA, }; -enum { - TYPE_NIL = 0, - TYPE_NUM = 1, - TYPE_FWD = OBJ_FWD, - TYPE_QUOT = OBJ_QUOT, - TYPE_COMPOSE = OBJ_COMPOSE, - TYPE_CURRY = OBJ_CURRY, - TYPE_STR = OBJ_STR, - TYPE_USERDATA = OBJ_USERDATA, -}; - typedef uintptr_t O; /** Object header */ @@ -50,7 +42,7 @@ typedef struct Qc { I type(O); static inline I callable(O o) { I t = type(o); - return t == TYPE_QUOT || t == TYPE_COMPOSE || t == TYPE_CURRY; + return t == OBJ_QUOT || t == OBJ_COMPOSE || t == OBJ_CURRY; } #endif diff --git a/src/parser.c b/src/parser.c index 0723efc..724bea2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,51 +1,156 @@ #include "parser.h" -#include "vendor/mpc.h" +#include +#include +#include -mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition, - *Command, *List, *Table, *Quotation, *Program; +static Ast *ast_new(I type, I line, I col) { + Ast *node = calloc(1, sizeof(Ast)); + node->type = type; + node->line = line; + node->col = col; + return node; +} -V parser_init(V) { - Pragma = mpc_new("pragma"); - Comment = mpc_new("comment"); - Expr = mpc_new("expr"); - Number = mpc_new("number"); - String = mpc_new("string"); - Word = mpc_new("word"); - Definition = mpc_new("def"); - Command = mpc_new("command"); - List = mpc_new("list"); - Table = mpc_new("table"); - Quotation = mpc_new("quotation"); - Program = mpc_new("program"); +void ast_free(Ast *ast) { + if (!ast) + return; + if (ast->name) + free(ast->name); + for (size_t i = 0; i < ast->children.count; i++) { + ast_free(ast->children.items[i]); + } + yar_free(&ast->children); + free(ast); +} - mpc_err_t *err = mpca_lang( - MPCA_LANG_DEFAULT, - " pragma : '#' ('(' * ')')? ; " - " comment : /\\\\[^\\n]*/ ; " - " expr : ( | | | " - " | | | | " - " | | ) ; " - " number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; " - " string : /\"(\\\\.|[^\"])*\"/ ; " - " word : /[a-zA-Z0-9_!?.,@#$%^&*_+\\-=><|\\/]+/ ; " - " def : \"def\" '{' * '}' ; " - " command : ':' + ';' ; " - " list : '(' * ')' ; " - " table : '{' * '}' ; " - " quotation : '[' * ']' ; " - " program : /^/ * /$/ ; ", - Pragma, Comment, Expr, Number, String, Word, Definition, Command, List, - Table, Quotation, Program, NULL); +static Ast *parse_expr_at(Lx *lx); - // crash if i do a woopsie - if (err != NULL) { - mpc_err_print(err); - mpc_err_delete(err); - abort(); +static void parse_block(Lx *lx, Ast *parent, int close_token) { + while (1) { + if (lx->kind == TOK_EOF) { + if (close_token != TOK_EOF) + fprintf(stderr, "syntax error: unexpected EOF, expected '%c'\n", + close_token); + break; + } + if (lx->kind == close_token) { + lexer_next(lx); + break; + } + Ast *expr = parse_expr_at(lx); + *yar_append(&parent->children) = expr; } } -V parser_deinit(V) { - mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition, - Command, List, Table, Quotation, Program); +static Ast *parse_expr_at(Lx *lx) { + int kind = lx->kind; + I line = lx->start_line; + I col = lx->start_col; + + if (kind == TOK_WORD) { + char *text = lx->items; + + if (strcmp(text, "def") == 0) { + Ast *node = ast_new(AST_DEF, line, col); + lexer_next(lx); + + if (lx->kind != TOK_WORD) { + fprintf(stderr, "syntax error: expected word after 'def' at %ld:%ld\n", + (long)line + 1, (long)col + 1); + return node; + } + node->name = strdup(lx->items); + lexer_next(lx); + + if (lx->kind != '{') { + fprintf(stderr, + "syntax error: expected '{' after def name at %ld:%ld\n", + (long)lx->start_line + 1, (long)lx->start_col + 1); + return node; + } + lexer_next(lx); + parse_block(lx, node, '}'); + return node; + } + + size_t len = strlen(text); + if (len > 0 && text[len - 1] == ':') { + Ast *node = ast_new(AST_CMD, line, col); + node->name = strndup(text, len - 1); + lexer_next(lx); + parse_block(lx, node, ';'); + return node; + } + + if (text[0] == '#') { + Ast *node = ast_new(AST_PRAGMA, line, col); + node->name = strdup(text); + lexer_next(lx); + if (lx->kind == '(') { + lexer_next(lx); + parse_block(lx, node, ')'); + } + return node; + } + + char *end; + long val = strtol(text, &end, 0); + if (*end == '\0') { + Ast *node = ast_new(AST_INT, line, col); + node->int_val = val; + lexer_next(lx); + return node; + } + + Ast *node = ast_new(AST_WORD, line, col); + node->name = strdup(text); + lexer_next(lx); + return node; + } + + if (kind == TOK_STRING) { + Ast *node = ast_new(AST_STR, line, col); + node->name = strdup(lx->items); + lexer_next(lx); + return node; + } + + if (kind == '[') { + Ast *node = ast_new(AST_QUOTE, line, col); + lexer_next(lx); + parse_block(lx, node, ']'); + return node; + } + + if (kind == '{') { + Ast *node = ast_new(AST_TABLE, line, col); + lexer_next(lx); + parse_block(lx, node, '}'); + return node; + } + + if (kind == '(') { + Ast *node = ast_new(AST_LIST, line, col); + lexer_next(lx); + parse_block(lx, node, ')'); + return node; + } + + if (kind == TOK_INVALID) { + fprintf(stderr, "syntax error: invalid token at %ld:%ld\n", (long)line + 1, + (long)col + 1); + } else { + fprintf(stderr, "syntax error: unexpected token '%c' (%d) at %ld:%ld\n", + kind, kind, (long)line + 1, (long)col + 1); + } + lexer_next(lx); + + return NULL; +} + +Ast *parser_parse(Lx *lx) { + Ast *root = ast_new(AST_PROGRAM, 0, 0); + lexer_next(lx); + parse_block(lx, root, TOK_EOF); + return root; } diff --git a/src/parser.h b/src/parser.h index c991dd4..ea8ddda 100644 --- a/src/parser.h +++ b/src/parser.h @@ -2,11 +2,34 @@ #define PARSER_H #include "common.h" -#include "vendor/mpc.h" +#include "lexer.h" +#include "vendor/yar.h" -V parser_init(V); -V parser_deinit(V); +enum { + AST_PROGRAM, + AST_INT, + AST_STR, + AST_WORD, + AST_LIST, + AST_TABLE, + AST_QUOTE, + AST_DEF, + AST_CMD, + AST_PRAGMA, +}; -extern mpc_parser_t *Program; +typedef struct Ast { + I type; + char *name; + I int_val; + struct { + struct Ast **items; + Z count, capacity; + } children; + I line, col; +} Ast; + +Ast *parser_parse(Lx *lx); +void ast_free(Ast *ast); #endif diff --git a/src/primitive.h b/src/primitive.h index ab58696..2e6ca97 100644 --- a/src/primitive.h +++ b/src/primitive.h @@ -1,7 +1,6 @@ #ifndef PRIMITIVE_H #define PRIMITIVE_H -#include "common.h" #include "vm.h" typedef struct Pr { diff --git a/src/print.c b/src/print.c index 8549e5d..ffb6037 100644 --- a/src/print.c +++ b/src/print.c @@ -1,13 +1,56 @@ #include #include -#include -#include #include "object.h" #include "print.h" #include "string.h" #include "userdata.h" -#include "vendor/mpc.h" + +static V print_string(Str *s) { + putchar('"'); + for (Z i = 0; i < s->len; i++) { + unsigned char c = s->data[i]; + switch (c) { + case '\t': + printf("\\t"); + break; + case '\n': + printf("\\n"); + break; + case '\r': + printf("\\r"); + break; + case '\b': + printf("\\b"); + break; + case '\v': + printf("\\v"); + break; + case '\f': + printf("\\f"); + break; + case '\0': + printf("\\0"); + break; + case '\x1b': + printf("\\e"); + break; + case '\\': + printf("\\\\"); + break; + case '\"': + printf("\\\""); + break; + default: + if (c < 32 || c > 126) { + printf("\\x%02x;", c); + } else { + putchar(c); + } + } + } + putchar('"'); +} V print(O o) { if (o == NIL) { @@ -27,14 +70,8 @@ V print(O o) { printf(""); break; case OBJ_STR: { - // TODO: make this binary safe Str *s = string_unwrap(o); - char *escaped = malloc(s->len + 1); - memcpy(escaped, s->data, s->len); - escaped[s->len] = 0; - escaped = mpcf_escape(escaped); - printf("\"%s\"", escaped); - free(escaped); + print_string(s); break; } case OBJ_USERDATA: { diff --git a/src/vm.c b/src/vm.c index 8394e6c..f90c68e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -206,8 +206,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O obj2 = vm_pop(vm); O obj1 = vm_pop(vm); vm_push(vm, obj1); - vm_push(vm, obj1); vm_push(vm, obj2); + vm_push(vm, obj1); vm_push(vm, obj2); break; } @@ -223,10 +223,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O c = vm_pop(vm); O b = vm_pop(vm); O a = vm_pop(vm); - vm_push(vm, d); vm_push(vm, c); - vm_push(vm, b); + vm_push(vm, d); vm_push(vm, a); + vm_push(vm, b); break; } case OP_NIP: { @@ -302,21 +302,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { vm_rpush(vm, vm->chunk, vm->ip); do_call: switch (type(quot)) { - case TYPE_QUOT: { + case OBJ_QUOT: { Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc *chunk = *ptr; vm->chunk = chunk; vm->ip = chunk->items; break; } - case TYPE_COMPOSE: { + case OBJ_COMPOSE: { Qo *comp = (Qo *)(UNBOX(quot) + 1); vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm->rsp[-1].obj = comp->second; quot = comp->first; goto do_call; } - case TYPE_CURRY: { + case OBJ_CURRY: { Qc *curry = (Qc *)(UNBOX(quot) + 1); vm_push(vm, curry->value); quot = curry->callable; @@ -345,21 +345,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O quot = vm_pop(vm); do_tail_call: switch (type(quot)) { - case TYPE_QUOT: { + case OBJ_QUOT: { Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc *chunk = *ptr; vm->chunk = chunk; vm->ip = chunk->items; break; } - case TYPE_COMPOSE: { + case OBJ_COMPOSE: { Qo *comp = (Qo *)(UNBOX(quot) + 1); vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm->rsp[-1].obj = comp->second; quot = comp->first; goto do_tail_call; } - case TYPE_CURRY: { + case OBJ_CURRY: { Qc *curry = (Qc *)(UNBOX(quot) + 1); vm_push(vm, curry->value); quot = curry->callable; @@ -491,10 +491,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { } case OP_CONCAT: { O b = vm_pop(vm); - if (type(b) != TYPE_STR) + if (type(b) != OBJ_STR) vm_error(vm, VM_ERR_TYPE, "expected string"); O a = vm_pop(vm); - if (type(a) != TYPE_STR) + if (type(a) != OBJ_STR) vm_error(vm, VM_ERR_TYPE, "expected string"); vm_push(vm, string_concat(vm, a, b)); break; diff --git a/std.grr b/std.grr index 805ccc7..3bcce87 100644 --- a/std.grr +++ b/std.grr @@ -14,6 +14,7 @@ def 3dip { swap [2dip] dip } def keep { over [call] dip } def 2keep { [2dup] dip 2dip } +def 3keep { [dup 2over dig] dip 3dip } def bi { [keep] dip call } def tri { [[keep] dip keep] dip call }