From 1185690ce6a6a0b13d55d6fbbf84e381b5c388c3 Mon Sep 17 00:00:00 2001 From: "Javier B. Torres" Date: Tue, 20 Jan 2026 11:05:59 -0300 Subject: [PATCH] * --- meson.build | 6 +- shell.nix | 2 +- src/arena.c | 31 ++++++++++ src/arena.h | 18 ++++++ src/chunk.c | 18 ++++-- src/chunk.h | 3 +- src/common.h | 1 + src/compile.c | 146 ++++++++++++++++++++++++++++++++++++++--------- src/compile.h | 27 +++++---- src/debug.c | 97 +++++++++++++++++++++++++++++-- src/debug.h | 5 +- src/dictionary.c | 39 +++++++++++++ src/dictionary.h | 19 ++++++ src/gc.c | 33 +++++++++-- src/gc.h | 9 ++- src/main.c | 59 +++++++++++++------ src/object.h | 2 +- src/parser.c | 4 +- src/print.c | 8 ++- src/table.c | 0 src/table.h | 0 src/vm.c | 113 +++++++++++++++++++++++++++++++++++- src/vm.h | 27 ++++++++- test.grr | 16 +++++- 24 files changed, 597 insertions(+), 86 deletions(-) create mode 100644 src/arena.c create mode 100644 src/arena.h create mode 100644 src/dictionary.c create mode 100644 src/dictionary.h create mode 100644 src/table.c create mode 100644 src/table.h diff --git a/meson.build b/meson.build index 241e96a..3d5bd45 100644 --- a/meson.build +++ b/meson.build @@ -3,15 +3,17 @@ project( 'c', meson_version : '>= 1.3.0', version : '0.1', - default_options : ['buildtype=debugoptimized', 'c_std=c99', 'warning_level=3'], + default_options : ['buildtype=debugoptimized', 'c_std=c11', 'warning_level=3'], ) sources = [ -'src/gc.c', + 'src/arena.c', 'src/chunk.c', 'src/compile.c', 'src/debug.c', + 'src/dictionary.c', 'src/object.c', + 'src/gc.c', 'src/parser.c', 'src/print.c', 'src/vm.c', diff --git a/shell.nix b/shell.nix index 873dc3e..a903be6 100644 --- a/shell.nix +++ b/shell.nix @@ -3,6 +3,6 @@ pkgs.mkShell { buildInputs = with pkgs; [ clang-tools bear gdb tinycc - meson ninja + meson ninja rlwrap hyperfine ]; } diff --git a/src/arena.c b/src/arena.c new file mode 100644 index 0000000..816de2f --- /dev/null +++ b/src/arena.c @@ -0,0 +1,31 @@ +#include "arena.h" + +#include +#include +#include + +V *_arena_alloc(Ar *ar, I count, I size, I align) { + I pad = -(U)ar->start & (align - 1); + assert(count < (ar->end - ar->start - pad) / size); + V *r = ar->start + pad; + ar->start += pad + count * size; + return memset(r, 0, count * size); +} + +V arena_init(Ar *ar, Z size) { + ar->data = malloc(size); + ar->start = ar->data; + ar->end = ar->start + size; +} + +V arena_free(Ar *ar) { + free(ar->data); + ar->data = ar->start = ar->end = NULL; +} + +char *arena_strdup(Ar *ar, const char *str) { + Z len = strlen(str) + 1; + char *copy = arena_alloc(ar, len, char); + memcpy(copy, str, len); + return copy; +} diff --git a/src/arena.h b/src/arena.h new file mode 100644 index 0000000..5ce565d --- /dev/null +++ b/src/arena.h @@ -0,0 +1,18 @@ +#ifndef ARENA_H +#define ARENA_H + +#include "common.h" + +typedef struct Ar { + U8 *data; + U8 *start, *end; +} Ar; + +#define arena_alloc(a, n, t) (t *)_arena_alloc(a, n, sizeof(t), _Alignof(t)) +V *_arena_alloc(Ar *, ptrdiff_t, ptrdiff_t, ptrdiff_t); + +V arena_init(Ar *, Z); +V arena_free(Ar *); +char *arena_strdup(Ar *, const char *); + +#endif diff --git a/src/chunk.c b/src/chunk.c index 42e8358..2d79899 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -1,30 +1,36 @@ -#include #include #include "chunk.h" - #include "vendor/yar.h" -Bc *chunk_new(V) { +#if CHUNK_DEBUG +#include +#endif + +Bc *chunk_new(const char *name) { Bc *chunk = calloc(1, sizeof(Bc)); + chunk->name = name; chunk->ref = 1; +#if CHUNK_DEBUG + fprintf(stderr, "DEBUG: created chunk %s at %p\n", chunk->name, (V *)chunk); +#endif return chunk; } V chunk_acquire(Bc *chunk) { #if CHUNK_DEBUG - fprintf(stderr, "DEBUG: acquiring chunk at %p\n", (V *)chunk); + fprintf(stderr, "DEBUG: acquiring chunk %s at %p\n", chunk->name, (V *)chunk); #endif chunk->ref++; } V chunk_release(Bc *chunk) { #if CHUNK_DEBUG - fprintf(stderr, "DEBUG: releasing chunk at %p\n", (V *)chunk); + fprintf(stderr, "DEBUG: releasing chunk %s at %p\n", chunk->name, (V *)chunk); #endif if (--chunk->ref == 0) { #if CHUNK_DEBUG - fprintf(stderr, "DEBUG: freeing chunk at %p\n", (V *)chunk); + fprintf(stderr, "DEBUG: freeing chunk %s at %p\n", chunk->name, (V *)chunk); #endif yar_free(&chunk->constants); yar_free(chunk); diff --git a/src/chunk.h b/src/chunk.h index 74b2452..e70a8c7 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -9,6 +9,7 @@ /** Bytecode chunk */ typedef struct Bc { I ref; + const char *name; U8 *items; Z count, capacity; struct { @@ -17,7 +18,7 @@ typedef struct Bc { } constants; } Bc; -Bc *chunk_new(V); +Bc *chunk_new(const char *); V chunk_acquire(Bc *); V chunk_release(Bc *); diff --git a/src/common.h b/src/common.h index b71a897..a8c64ba 100644 --- a/src/common.h +++ b/src/common.h @@ -11,5 +11,6 @@ typedef double F; typedef size_t Z; typedef uint8_t U8; typedef uint32_t U32; +typedef uint64_t U64; #endif diff --git a/src/compile.c b/src/compile.c index 1a6041a..a01cba3 100644 --- a/src/compile.c +++ b/src/compile.c @@ -16,13 +16,41 @@ struct { const char *name; U8 opcode; } primitives[] = { - {"+", OP_ADD}, + {"nil", OP_NIL}, + {"dup", OP_DUP}, + {"drop", OP_DROP}, + {"swap", OP_SWAP}, + {">r", OP_TOR}, + {"r>", OP_FROMR}, {"call", OP_APPLY}, + {"?", OP_CHOOSE}, + {"+", OP_ADD}, + {"-", OP_SUB}, + {"*", OP_MUL}, + {"/", OP_DIV}, + {"%", OP_MOD}, + {"=", OP_EQ}, + {"<>", OP_NEQ}, + {"<", OP_LT}, + {">", OP_GT}, + {"<=", OP_LTE}, + {">=", OP_GTE}, {NULL, 0}, }; // clang-format on +V compiler_init(Cm *cm, Vm *vm, const char *name) { + cm->vm = vm; + cm->arena = &vm->arena; + cm->dictionary = &vm->dictionary; + cm->chunk = chunk_new(name); +} + +V compiler_deinit(Cm *cm) { cm->dictionary = NULL; } + static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); +static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); + static I compile_constant(Cm *cm, O value) { I idx = chunk_add_constant(cm->chunk, value); chunk_emit_byte(cm->chunk, OP_CONST); @@ -30,33 +58,104 @@ static I compile_constant(Cm *cm, O value) { return 1; } -static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { +static I compile_call(Cm *cm, const char *name) { + for (Z i = 0; primitives[i].name != NULL; i++) { + if (strcmp(name, primitives[i].name) == 0) { + chunk_emit_byte(cm->chunk, primitives[i].opcode); + return 1; + } + } + Dt *word = upsert(cm->dictionary, name, NULL); + if (!word) { + fprintf(stderr, "compiler: undefined word '%s'\n", name); + return 0; + } + chunk_emit_byte(cm->chunk, OP_DOWORD); + chunk_emit_sleb128(cm->chunk, (I)word->hash); + return 1; +} + +static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + curr = mpc_ast_traverse_next(next); + const char *name = curr->contents; + (void)mpc_ast_traverse_next(next); + curr = mpc_ast_traverse_next(next); + while (curr != NULL) { + if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ";") == 0) + break; + I res = compile_expr(cm, curr, next); + if (!res) + return 0; + curr = mpc_ast_traverse_next(next); + } + compile_call(cm, name); + return 1; +} + +static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + (void)mpc_ast_traverse_next(next); // skip 'def' + curr = mpc_ast_traverse_next(next); + const char *name = arena_strdup(cm->arena, curr->contents); + (void)mpc_ast_traverse_next(next); // skip '{' + + Dt *entry = upsert(cm->dictionary, name, cm->arena); + Cm inner = {0}; - inner.chunk = chunk_new(); - inner.gc = cm->gc; + inner.arena = cm->arena; + inner.chunk = chunk_new(name); + inner.vm = cm->vm; inner.dictionary = cm->dictionary; - (void)mpc_ast_traverse_next(next); // skip opening bracket + curr = mpc_ast_traverse_next(next); + while (curr != NULL) { + if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "}") == 0) + break; + I res = compile_expr(&inner, curr, next); + if (!res) { + chunk_release(inner.chunk); + return 0; + } + curr = mpc_ast_traverse_next(next); + } + + chunk_emit_byte(inner.chunk, OP_RETURN); + entry->chunk = inner.chunk; + // disassemble(inner.chunk, name, cm->dictionary); + + return 1; +} + +static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + Cm inner = {0}; + inner.arena = cm->arena; + inner.chunk = chunk_new(""); + inner.vm = cm->vm; + inner.dictionary = cm->dictionary; + + (void)mpc_ast_traverse_next(next); curr = mpc_ast_traverse_next(next); while (curr != NULL) { if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0) break; I res = compile_expr(&inner, curr, next); - if (!res) + if (!res) { + chunk_release(inner.chunk); return res; + } curr = mpc_ast_traverse_next(next); } chunk_emit_byte(inner.chunk, OP_RETURN); - Hd *hd = gc_alloc(cm->gc, sizeof(Hd) + sizeof(Bc *)); + Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *)); hd->type = OBJ_QUOT; Bc **chunk_ptr = (Bc **)(hd + 1); *chunk_ptr = inner.chunk; - O quot = BOX(hd); - compile_constant(cm, quot); + return BOX(hd); +} - return 1; +static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + return compile_constant(cm, compile_quotation_obj(cm, curr, next)); } static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { @@ -64,16 +163,15 @@ static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { I num = strtol(curr->contents, NULL, 0); return compile_constant(cm, NUM(num)); } else if (strstr(curr->tag, "expr|word") != NULL) { - for (Z i = 0; primitives[i].name != NULL; i++) { - if (strcmp(curr->contents, primitives[i].name) == 0) { - chunk_emit_byte(cm->chunk, primitives[i].opcode); - return 1; - } - } - fprintf(stderr, "compiler: dictionary nyi\n"); - return 0; + return compile_call(cm, curr->contents); } else if (strstr(curr->tag, "expr|quotation") != NULL) { return compile_quotation(cm, curr, next); + } else if (strstr(curr->tag, "expr|def") != NULL) { + return compile_definition(cm, curr, next); + } else if (strstr(curr->tag, "expr|command") != NULL) { + return compile_command(cm, curr, next); + } else if (strstr(curr->tag, "expr|comment") != NULL) { + return 1; } else { fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag); return 0; @@ -97,20 +195,16 @@ static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return 1; } -Bc *compile_program(Gc *gc, mpc_ast_t *ast) { - Cm cm = {0}; - cm.chunk = chunk_new(); - cm.gc = gc; - +Bc *compile_program(Cm *cm, mpc_ast_t *ast) { mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre); mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal - if (!compile_ast(&cm, curr, &next)) { - chunk_release(cm.chunk); + if (!compile_ast(cm, curr, &next)) { + chunk_release(cm->chunk); return NULL; } - Bc *chunk = cm.chunk; + Bc *chunk = cm->chunk; chunk_emit_byte(chunk, OP_RETURN); return chunk; } diff --git a/src/compile.h b/src/compile.h index 8895038..919bf19 100644 --- a/src/compile.h +++ b/src/compile.h @@ -1,24 +1,29 @@ #include "common.h" +#include "arena.h" #include "chunk.h" #include "gc.h" +#include "vm.h" #include "vendor/mpc.h" -/** Compiler dictionary */ -typedef struct Cd Cd; -struct Cd { - Cd *child[4]; - const char *name; - Z offset; -}; - +// Forward declaration /** Compiler context */ typedef struct Cm { - Gc *gc; + Vm *vm; // Parent context + Ar *arena; Bc *chunk; - Cd *dictionary; + Dt **dictionary; } Cm; +V compiler_init(Cm *, Vm *, const char *); +V compiler_deinit(Cm *); + +// Hash function for word names +U64 hash64(const char *); + +// Dictionary lookup +Dt *upsert(Dt **, const char *, Ar *); + // The chunk returned by `compile_program` is owned by the caller. -Bc *compile_program(Gc *, mpc_ast_t *); +Bc *compile_program(Cm *, mpc_ast_t *); diff --git a/src/debug.c b/src/debug.c index 811483f..0f76451 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,6 +1,7 @@ #include #include "debug.h" +#include "dictionary.h" #include "print.h" #include "vm.h" @@ -20,33 +21,70 @@ static I decode_sleb128(U8 *ptr, Z *bytes_read) { return result; } -V disassemble(Bc *chunk, const char *name) { +V disassemble(Bc *chunk, const char *name, Dt **dictionary) { printf("=== %s ===\n", name); Z offset = 0; while (offset < chunk->count) { - offset = disassemble_instruction(chunk, offset); + offset = disassemble_instruction(chunk, offset, dictionary); } } -Z disassemble_instruction(Bc *chunk, Z offset) { +Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) { printf("%04zu ", offset); U8 opcode = chunk->items[offset++]; switch (opcode) { case OP_NOP: printf("NOP\n"); return offset; + case OP_NIL: + printf("NIL\n"); + return offset; case OP_CONST: { Z bytes_read; I idx = decode_sleb128(&chunk->items[offset], &bytes_read); printf("CONST %ld", idx); if (idx >= 0 && idx < (I)chunk->constants.count) { + O obj = chunk->constants.items[idx]; printf(" ("); - print(chunk->constants.items[idx]); + print(obj); printf(")"); + + // If it's a quotation, disassemble it inline + if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { + Hd *hdr = UNBOX(obj); + Bc **chunk_ptr = (Bc **)(hdr + 1); + Bc *quot_chunk = *chunk_ptr; + printf("\n"); + + // Disassemble quotation with indentation + for (Z i = 0; i < quot_chunk->count; ) { + printf(" "); + i = disassemble_instruction(quot_chunk, i, dictionary); + } + return offset + bytes_read; + } } printf("\n"); return offset + bytes_read; } + case OP_DROP: { + printf("DROP\n"); + return offset; + } + case OP_DUP: { + printf("DUP\n"); + return offset; + } + case OP_SWAP: { + printf("SWAP\n"); + return offset; + } + case OP_TOR: + printf("TOR\n"); + return offset; + case OP_FROMR: + printf("FROMR\n"); + return offset; case OP_JUMP: { Z bytes_read; I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); @@ -65,15 +103,66 @@ Z disassemble_instruction(Bc *chunk, Z offset) { printf("CALL %ld\n", ofs); return offset + bytes_read; } + case OP_DOWORD: { + Z bytes_read; + I hash = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("DOWORD"); + + if (dictionary && *dictionary) { + Dt *entry = lookup_hash(dictionary, hash); + if (entry != NULL) { + printf(" %s", entry->name); + } else { + printf(" ???"); + } + } else { + printf(" 0x%lx", hash); + } + printf("\n"); + return offset + bytes_read; + } case OP_APPLY: printf("APPLY\n"); return offset; case OP_RETURN: printf("RETURN\n"); return offset; + case OP_CHOOSE: + printf("CHOOSE\n"); + return offset; case OP_ADD: printf("ADD\n"); return offset; + case OP_SUB: + printf("SUB\n"); + return offset; + case OP_MUL: + printf("MUL\n"); + return offset; + case OP_DIV: + printf("DIV\n"); + return offset; + case OP_MOD: + printf("MOD\n"); + return offset; + case OP_EQ: + printf("EQ\n"); + return offset; + case OP_NEQ: + printf("NEQ\n"); + return offset; + case OP_LT: + printf("LT\n"); + return offset; + case OP_GT: + printf("GT\n"); + return offset; + case OP_LTE: + printf("LTE\n"); + return offset; + case OP_GTE: + printf("GTE\n"); + return offset; default: printf("? (%d)\n", opcode); return offset; diff --git a/src/debug.h b/src/debug.h index 3ab1c05..0643fd8 100644 --- a/src/debug.h +++ b/src/debug.h @@ -1,5 +1,6 @@ #include "chunk.h" #include "common.h" +#include "dictionary.h" -V disassemble(Bc *, const char *); -Z disassemble_instruction(Bc *, Z); +V disassemble(Bc *, const char *, Dt **); +Z disassemble_instruction(Bc *, Z, Dt **); diff --git a/src/dictionary.c b/src/dictionary.c new file mode 100644 index 0000000..7771780 --- /dev/null +++ b/src/dictionary.c @@ -0,0 +1,39 @@ +#include + +#include "arena.h" +#include "common.h" +#include "dictionary.h" + +U64 hash64(const char *str) { + I len = strlen(str); + U64 h = 0x100; + for (I i = 0; i < len; i++) { + h ^= str[i] & 255; + h *= 1111111111111111111; + } + return h; +} + +Dt *upsert(Dt **env, const char *key, Ar *a) { + U64 hash = hash64(key); + for (U64 h = hash; *env; h <<= 2) { + if (hash == (*env)->hash) + return *env; + env = &(*env)->child[h >> 62]; + } + if (!a) + return 0; + *env = arena_alloc(a, 1, Dt); + (*env)->name = key; + (*env)->hash = hash; + return *env; +} + +Dt *lookup_hash(Dt **env, U64 hash) { + for (U64 h = hash; *env; h <<= 2) { + if ((*env)->hash == hash) + return *env; + env = &(*env)->child[h >> 62]; + } + return NULL; +} diff --git a/src/dictionary.h b/src/dictionary.h new file mode 100644 index 0000000..8a6e502 --- /dev/null +++ b/src/dictionary.h @@ -0,0 +1,19 @@ +#ifndef DICTIONARY_H +#define DICTIONARY_H + +#include "arena.h" +#include "chunk.h" + +typedef struct Dt Dt; +struct Dt { + Dt *child[4]; + const char *name; + U64 hash; + Bc *chunk; +}; + +U64 hash64(const char *); +Dt *upsert(Dt **, const char *, Ar *); +Dt *lookup_hash(Dt **, U64); + +#endif diff --git a/src/gc.c b/src/gc.c index 6ca3d3e..d032703 100644 --- a/src/gc.c +++ b/src/gc.c @@ -6,6 +6,8 @@ #include "chunk.h" #include "gc.h" #include "object.h" +#include "src/print.h" +#include "src/vm.h" #include "vendor/yar.h" #define ALIGN(n) (((n) + 7) & ~7) @@ -58,18 +60,41 @@ static V printstats(Gc *gc, const char *label) { } #endif -V gc_collect(Gc *gc) { +V gc_collect(Vm *vm) { + Gc *gc = &vm->gc; uint8_t *scan = gc->to.free; #if GC_DEBUG printstats(gc, "before GC"); #endif + // Forward roots for (Z i = 0; i < gc->roots.count; i++) { O *o = gc->roots.items[i]; *o = forward(gc, *o); } + Dt *dstack[256]; + Dt **dsp = dstack; + *dsp++ = vm->dictionary; + + // Forward constants referenced by dictionary entries + while (dsp > dstack) { + Dt *node = *--dsp; + if (!node) + continue; + if (node->name != NULL) { + for (Z i = 0; i < node->chunk->constants.count; i++) { + node->chunk->constants.items[i] = + forward(gc, node->chunk->constants.items[i]); + } + } + for (I i = 0; i < 4; i++) { + if (node->child[i] != NULL) + *dsp++ = node->child[i]; + } + } + while (scan < gc->to.free) { if (scan >= gc->to.end) { fprintf(stderr, "fatal GC error: out of memory\n"); @@ -121,10 +146,11 @@ V gc_collect(Gc *gc) { #endif } -Hd *gc_alloc(Gc *gc, Z sz) { +Hd *gc_alloc(Vm *vm, Z sz) { + Gc *gc = &vm->gc; sz = ALIGN(sz); if (gc->from.free + sz > gc->from.end) { - gc_collect(gc); + gc_collect(vm); if (gc->from.free + sz > gc->from.end) { fprintf(stderr, "out of memory (requested %" PRIdPTR "bytes\n", sz); abort(); @@ -160,7 +186,6 @@ fatal: } V gc_deinit(Gc *gc) { - gc_collect(gc); free(gc->from.start); free(gc->to.start); yar_free(&gc->roots); diff --git a/src/gc.h b/src/gc.h index 386f739..447f706 100644 --- a/src/gc.h +++ b/src/gc.h @@ -4,7 +4,7 @@ #include "common.h" #include "object.h" -#define GC_DEBUG 1 +#define GC_DEBUG 0 #define HEAP_BYTES (4 * 1024 * 1024) typedef struct Gs { @@ -23,9 +23,12 @@ typedef struct Gc { V gc_addroot(Gc *, O *); I gc_mark(Gc *); V gc_reset(Gc *, I); -V gc_collect(Gc *); -Hd *gc_alloc(Gc *, Z); V gc_init(Gc *); V gc_deinit(Gc *); +typedef struct Vm Vm; + +V gc_collect(Vm *); +Hd *gc_alloc(Vm *, Z); + #endif diff --git a/src/main.c b/src/main.c index 55f4fb2..876206a 100644 --- a/src/main.c +++ b/src/main.c @@ -11,26 +11,50 @@ #include "vendor/mpc.h" +#define REPL_BUFFER_SIZE 4096 + I repl(void) { Vm vm = {0}; vm_init(&vm); - Bc *chunk = chunk_new(); + char input[REPL_BUFFER_SIZE]; - I idx = chunk_add_constant(chunk, NUM(10)); - chunk_emit_byte(chunk, OP_CONST); - chunk_emit_sleb128(chunk, idx); - chunk_emit_byte(chunk, OP_CONST); - chunk_emit_sleb128(chunk, idx); - chunk_emit_byte(chunk, OP_ADD); - chunk_emit_byte(chunk, OP_RETURN); - - disassemble(chunk, "test chunk"); - I res = vm_run(&vm, chunk, 0); - - chunk_release(chunk); + for (;;) { + printf("> "); + fflush(stdout); + if (fgets(input, REPL_BUFFER_SIZE, stdin) == NULL) { + printf("\n"); + break; + } + I is_empty = 1; + for (char *p = input; *p; p++) { + if (*p != ' ' && *p != '\t' && *p != '\n' && *p != '\r') { + is_empty = 0; + break; + } + } + if (is_empty) + continue; + if (strncmp(input, "bye", 3) == 0 || strncmp(input, "quit", 4) == 0) + break; + mpc_result_t res; + if (!mpc_parse("", input, Program, &res)) { + mpc_err_print(res.error); + mpc_err_delete(res.error); + continue; + } + Cm cm = {0}; + compiler_init(&cm, &vm, ""); + Bc *chunk = compile_program(&cm, res.output); + mpc_ast_delete(res.output); + if (chunk != NULL) { + vm_run(&vm, chunk, 0); + chunk_release(chunk); + } + compiler_deinit(&cm); + } vm_deinit(&vm); - return !res; + return 0; } I loadfile(const char *fname) { @@ -44,11 +68,14 @@ I loadfile(const char *fname) { return 1; } - Bc *chunk = compile_program(&vm.gc, res.output); + Cm cm = {0}; + compiler_init(&cm, &vm, fname); + + Bc *chunk = compile_program(&cm, res.output); mpc_ast_delete(res.output); if (chunk != NULL) { - disassemble(chunk, fname); + // disassemble(chunk, fname, &vm.dictionary); I res = vm_run(&vm, chunk, 0); chunk_release(chunk); vm_deinit(&vm); diff --git a/src/object.h b/src/object.h index 171bff3..4b231a7 100644 --- a/src/object.h +++ b/src/object.h @@ -8,7 +8,7 @@ #define UNBOX(x) ((Hd *)(x)) #define IMM(x) ((O)(x) & (O)1) #define NUM(x) (((O)((intptr_t)(x) << 1)) | (O)1) -#define ORD(x) ((O)(x) >> 1) +#define ORD(x) ((intptr_t)(x) >> 1) enum { OBJ_FWD = 2, diff --git a/src/parser.c b/src/parser.c index f53b8ba..0723efc 100644 --- a/src/parser.c +++ b/src/parser.c @@ -27,8 +27,8 @@ V parser_init(V) { " | | ) ; " " number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; " " string : /\"(\\\\.|[^\"])*\"/ ; " - " word : /[a-zA-Z0-9_!.,@#$%^&*_+\\-=><|\\/]+/ ; " - " def : ':' * ';' ; " + " word : /[a-zA-Z0-9_!?.,@#$%^&*_+\\-=><|\\/]+/ ; " + " def : \"def\" '{' * '}' ; " " command : ':' + ';' ; " " list : '(' * ')' ; " " table : '{' * '}' ; " diff --git a/src/print.c b/src/print.c index c55cafe..ccdaf1f 100644 --- a/src/print.c +++ b/src/print.c @@ -10,7 +10,13 @@ V print(O o) { } else if (IMM(o)) { printf("%" PRIdPTR, ORD(o)); } else { - printf("", type(o), (void *)o); + switch (type(o)) { + case TYPE_QUOT: + printf(""); + break; + default: + printf("", type(o), (void *)o); + } } } diff --git a/src/table.c b/src/table.c new file mode 100644 index 0000000..e69de29 diff --git a/src/table.h b/src/table.h new file mode 100644 index 0000000..e69de29 diff --git a/src/vm.c b/src/vm.c index 7239f97..fcf64dc 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,5 +1,9 @@ #include +#include "arena.h" +#include "chunk.h" +#include "compile.h" +#include "dictionary.h" #include "gc.h" #include "object.h" #include "print.h" @@ -27,21 +31,43 @@ static I decode_sleb128(U8 **ptr) { V vm_init(Vm *vm) { vm->sp = vm->stack; vm->rsp = vm->rstack; + vm->rtsp = vm->rtstack; vm->chunk = NULL; + vm->dictionary = NULL; + gc_init(&vm->gc); + arena_init(&vm->arena, 1024 * 1024); for (Z i = 0; i < STACK_SIZE; i++) { vm->stack[i] = NIL; + vm->rtstack[i] = NIL; gc_addroot(&vm->gc, &vm->stack[i]); + gc_addroot(&vm->gc, &vm->rtstack[i]); } } -V vm_deinit(Vm *vm) { gc_deinit(&vm->gc); } +V vm_deinit(Vm *vm) { + gc_collect(vm); + gc_deinit(&vm->gc); + arena_free(&vm->arena); + vm->dictionary = NULL; +} V vm_push(Vm *vm, O o) { *vm->sp++ = o; } -O vm_pop(Vm *vm) { return *--vm->sp; } +O vm_pop(Vm *vm) { + O o = *--vm->sp; + *vm->sp = NIL; + return o; +} O vm_peek(Vm *vm) { return *(vm->sp - 1); } +V vm_rtpush(Vm *vm, O o) { *vm->rtsp++ = o; } +O vm_rtpop(Vm *vm) { + O o = *--vm->rtsp; + *vm->rtsp = NIL; + return o; +} + V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) { vm->rsp->chunk = chunk; vm->rsp->ip = ip; @@ -66,6 +92,18 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { break; \ } +#define CMPOP(op) \ + { \ + O b = vm_pop(vm); \ + O a = vm_pop(vm); \ + if (!IMM(a) || !IMM(b)) { \ + fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ + return 0; \ + } \ + vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \ + break; \ + } + vm->ip = chunk->items + offset; vm->chunk = chunk; @@ -74,11 +112,39 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { switch (opcode = *vm->ip++) { case OP_NOP: continue; + case OP_NIL: + vm_push(vm, NIL); + break; case OP_CONST: { I idx = decode_sleb128(&vm->ip); vm_push(vm, vm->chunk->constants.items[idx]); break; } + case OP_DROP: { + (void)vm_pop(vm); + break; + } + case OP_DUP: { + O obj = vm_pop(vm); + vm_push(vm, obj); + vm_push(vm, obj); + break; + } + case OP_SWAP: { + O b = vm_pop(vm); + O a = vm_pop(vm); + vm_push(vm, b); + vm_push(vm, a); + break; + } + case OP_TOR: { + vm_rtpush(vm, vm_pop(vm)); + break; + } + case OP_FROMR: { + vm_push(vm, vm_rtpop(vm)); + break; + } case OP_JUMP: { I ofs = decode_sleb128(&vm->ip); vm->ip += ofs; @@ -96,6 +162,18 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { vm->ip = chunk->items + ofs; break; } + case OP_DOWORD: { + I hash = decode_sleb128(&vm->ip); + Dt *word = lookup_hash(&vm->dictionary, hash); + if (!word) { + fprintf(stderr, "vm: word not found (hash = %lx)\n", hash); + return 0; + } + vm_rpush(vm, vm->chunk, vm->ip); + vm->chunk = word->chunk; + vm->ip = word->chunk->items; + break; + } case OP_APPLY: { O quot = vm_pop(vm); if (type(quot) == TYPE_QUOT) { @@ -119,8 +197,39 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { goto done; } break; + case OP_CHOOSE: { + O fals = vm_pop(vm); + O tru = vm_pop(vm); + O cond = vm_pop(vm); + if (cond == NIL) { + vm_push(vm, fals); + } else { + vm_push(vm, tru); + } + break; + } case OP_ADD: BINOP(+); + case OP_SUB: + BINOP(-); + case OP_MUL: + BINOP(*); + case OP_DIV: + BINOP(/); + case OP_MOD: + BINOP(%); + case OP_EQ: + CMPOP(==); + case OP_NEQ: + CMPOP(!=); + case OP_LT: + CMPOP(<); + case OP_GT: + CMPOP(>); + case OP_LTE: + CMPOP(<=); + case OP_GTE: + CMPOP(>=); default: fprintf(stderr, "unknown opcode %d\n", opcode); return 0; diff --git a/src/vm.h b/src/vm.h index b4f0916..c745538 100644 --- a/src/vm.h +++ b/src/vm.h @@ -3,22 +3,40 @@ #include "common.h" +#include "arena.h" #include "chunk.h" +#include "dictionary.h" #include "gc.h" #include "object.h" enum { OP_NOP = 0, - OP_CONST, // Push constant to stack + OP_CONST, // Push constant to stack + OP_NIL, // Push constant to stack OP_DROP, OP_DUP, OP_SWAP, + OP_TOR, // Push from stack to retain stack + OP_FROMR, // Push from retain stack to stack OP_JUMP, // Relative jump OP_JUMP_IF_NIL, // Relative jump if top-of-stack is nil OP_CALL, + OP_DOWORD, // Call word from dictionary by name hash OP_APPLY, OP_RETURN, + OP_CHOOSE, OP_ADD, + OP_SUB, + OP_MUL, + OP_DIV, + OP_MOD, + OP_EQ, + OP_NEQ, + OP_LT, + OP_GT, + OP_LTE, + OP_GTE, + OP_PPRINT, }; #define STACK_SIZE 256 @@ -30,10 +48,13 @@ typedef struct Fr { typedef struct Vm { Gc gc; - O stack[256], *sp; - Fr rstack[256], *rsp; + O stack[STACK_SIZE], *sp; + O rtstack[STACK_SIZE], *rtsp; + Fr rstack[STACK_SIZE], *rsp; // Return stack U8 *ip; Bc *chunk; + Dt *dictionary; + Ar arena; } Vm; V vm_init(Vm *); diff --git a/test.grr b/test.grr index d176e24..2ce175f 100644 --- a/test.grr +++ b/test.grr @@ -1 +1,15 @@ -[ 1 2 + ] call 3 + +def over { swap dup >r swap r> } +def dip { swap >r call r> } +def keep { over >r call r> } +def if { ? call } + +def fac { + dup if: 1 <= [drop 1] [dup 1 - fac *]; +} + +def fib { + dup if: 1 <= [] [dup 1 - fib swap 2 - fib +]; +} + +[ 20 fib ] call \=> 6765 +[ 10 fac ] call \=> 3628800