diff --git a/meson.build b/meson.build index ebe2b98..241e96a 100644 --- a/meson.build +++ b/meson.build @@ -7,9 +7,10 @@ project( ) sources = [ +'src/gc.c', 'src/chunk.c', 'src/compile.c', - 'src/gc.c', + 'src/debug.c', 'src/object.c', 'src/parser.c', 'src/print.c', diff --git a/src/chunk.c b/src/chunk.c index d16c2db..42e8358 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -1,6 +1,37 @@ +#include +#include + #include "chunk.h" + #include "vendor/yar.h" +Bc *chunk_new(V) { + Bc *chunk = calloc(1, sizeof(Bc)); + chunk->ref = 1; + return chunk; +} + +V chunk_acquire(Bc *chunk) { +#if CHUNK_DEBUG + fprintf(stderr, "DEBUG: acquiring chunk at %p\n", (V *)chunk); +#endif + chunk->ref++; +} +V chunk_release(Bc *chunk) { +#if CHUNK_DEBUG + fprintf(stderr, "DEBUG: releasing chunk at %p\n", (V *)chunk); +#endif + + if (--chunk->ref == 0) { +#if CHUNK_DEBUG + fprintf(stderr, "DEBUG: freeing chunk at %p\n", (V *)chunk); +#endif + yar_free(&chunk->constants); + yar_free(chunk); + free(chunk); + } +} + V chunk_emit_byte(Bc *chunk, U8 byte) { *yar_append(chunk) = byte; } V chunk_emit_sleb128(Bc *chunk, I num) { @@ -22,8 +53,3 @@ I chunk_add_constant(Bc *chunk, O value) { *yar_append(&chunk->constants) = value; return mark; } - -V chunk_free(Bc *chunk) { - yar_free(&chunk->constants); - yar_free(chunk); -} diff --git a/src/chunk.h b/src/chunk.h index e30bce1..74b2452 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -1,11 +1,14 @@ #ifndef CHUNK_H #define CHUNK_H +#define CHUNK_DEBUG 0 + #include "common.h" #include "object.h" /** Bytecode chunk */ typedef struct Bc { + I ref; U8 *items; Z count, capacity; struct { @@ -14,9 +17,12 @@ typedef struct Bc { } constants; } Bc; +Bc *chunk_new(V); +V chunk_acquire(Bc *); +V chunk_release(Bc *); + V chunk_emit_byte(Bc *, U8); V chunk_emit_sleb128(Bc *, I); I chunk_add_constant(Bc *, O); -V chunk_free(Bc *); #endif diff --git a/src/compile.c b/src/compile.c index a510f7d..1a6041a 100644 --- a/src/compile.c +++ b/src/compile.c @@ -1,6 +1,116 @@ +#include +#include +#include + +#include "chunk.h" #include "compile.h" +#include "debug.h" +#include "gc.h" +#include "object.h" +#include "vm.h" +#include "vendor/mpc.h" -I compile_program(mpc_ast_t *ast) { +// clang-format off +struct { + const char *name; + U8 opcode; +} primitives[] = { + {"+", OP_ADD}, + {"call", OP_APPLY}, + {NULL, 0}, +}; +// clang-format on +static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); +static I compile_constant(Cm *cm, O value) { + I idx = chunk_add_constant(cm->chunk, value); + chunk_emit_byte(cm->chunk, OP_CONST); + chunk_emit_sleb128(cm->chunk, idx); + return 1; +} + +static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + Cm inner = {0}; + inner.chunk = chunk_new(); + inner.gc = cm->gc; + inner.dictionary = cm->dictionary; + + (void)mpc_ast_traverse_next(next); // skip opening bracket + curr = mpc_ast_traverse_next(next); + while (curr != NULL) { + if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0) + break; + I res = compile_expr(&inner, curr, next); + if (!res) + return res; + curr = mpc_ast_traverse_next(next); + } + chunk_emit_byte(inner.chunk, OP_RETURN); + + Hd *hd = gc_alloc(cm->gc, sizeof(Hd) + sizeof(Bc *)); + hd->type = OBJ_QUOT; + Bc **chunk_ptr = (Bc **)(hd + 1); + *chunk_ptr = inner.chunk; + + O quot = BOX(hd); + compile_constant(cm, quot); + + return 1; +} + +static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + if (strstr(curr->tag, "expr|number") != NULL) { + I num = strtol(curr->contents, NULL, 0); + return compile_constant(cm, NUM(num)); + } else if (strstr(curr->tag, "expr|word") != NULL) { + for (Z i = 0; primitives[i].name != NULL; i++) { + if (strcmp(curr->contents, primitives[i].name) == 0) { + chunk_emit_byte(cm->chunk, primitives[i].opcode); + return 1; + } + } + fprintf(stderr, "compiler: dictionary nyi\n"); + return 0; + } else if (strstr(curr->tag, "expr|quotation") != NULL) { + return compile_quotation(cm, curr, next); + } else { + fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag); + return 0; + } + + return 1; +} + +static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + (void)mpc_ast_traverse_next(next); + curr = mpc_ast_traverse_next(next); + while (curr != NULL) { + if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0) + break; + I res = compile_expr(cm, curr, next); + if (!res) + return res; + curr = mpc_ast_traverse_next(next); + } + + return 1; +} + +Bc *compile_program(Gc *gc, mpc_ast_t *ast) { + Cm cm = {0}; + cm.chunk = chunk_new(); + cm.gc = gc; + + mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre); + mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal + + if (!compile_ast(&cm, curr, &next)) { + chunk_release(cm.chunk); + return NULL; + } + + Bc *chunk = cm.chunk; + chunk_emit_byte(chunk, OP_RETURN); + return chunk; } diff --git a/src/compile.h b/src/compile.h index 2aa3751..8895038 100644 --- a/src/compile.h +++ b/src/compile.h @@ -1,4 +1,24 @@ #include "common.h" + +#include "chunk.h" +#include "gc.h" + #include "vendor/mpc.h" -I compile_program(mpc_ast_t *); +/** Compiler dictionary */ +typedef struct Cd Cd; +struct Cd { + Cd *child[4]; + const char *name; + Z offset; +}; + +/** Compiler context */ +typedef struct Cm { + Gc *gc; + Bc *chunk; + Cd *dictionary; +} Cm; + +// The chunk returned by `compile_program` is owned by the caller. +Bc *compile_program(Gc *, mpc_ast_t *); diff --git a/src/debug.c b/src/debug.c new file mode 100644 index 0000000..811483f --- /dev/null +++ b/src/debug.c @@ -0,0 +1,81 @@ +#include + +#include "debug.h" +#include "print.h" +#include "vm.h" + +static I decode_sleb128(U8 *ptr, Z *bytes_read) { + I result = 0; + I shift = 0; + U8 byte; + Z count = 0; + do { + byte = ptr[count++]; + result |= (I)(byte & 0x7F) << shift; + shift += 7; + } while (byte & 0x80); + if ((shift < 64) && (byte & 0x40)) + result |= -(1LL << shift); + *bytes_read = count; + return result; +} + +V disassemble(Bc *chunk, const char *name) { + printf("=== %s ===\n", name); + Z offset = 0; + while (offset < chunk->count) { + offset = disassemble_instruction(chunk, offset); + } +} + +Z disassemble_instruction(Bc *chunk, Z offset) { + printf("%04zu ", offset); + U8 opcode = chunk->items[offset++]; + switch (opcode) { + case OP_NOP: + printf("NOP\n"); + return offset; + case OP_CONST: { + Z bytes_read; + I idx = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("CONST %ld", idx); + if (idx >= 0 && idx < (I)chunk->constants.count) { + printf(" ("); + print(chunk->constants.items[idx]); + printf(")"); + } + printf("\n"); + return offset + bytes_read; + } + case OP_JUMP: { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs); + return offset + bytes_read; + } + case OP_JUMP_IF_NIL: { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs); + return offset + bytes_read; + } + case OP_CALL: { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("CALL %ld\n", ofs); + return offset + bytes_read; + } + case OP_APPLY: + printf("APPLY\n"); + return offset; + case OP_RETURN: + printf("RETURN\n"); + return offset; + case OP_ADD: + printf("ADD\n"); + return offset; + default: + printf("? (%d)\n", opcode); + return offset; + } +} diff --git a/src/debug.h b/src/debug.h new file mode 100644 index 0000000..3ab1c05 --- /dev/null +++ b/src/debug.h @@ -0,0 +1,5 @@ +#include "chunk.h" +#include "common.h" + +V disassemble(Bc *, const char *); +Z disassemble_instruction(Bc *, Z); diff --git a/src/gc.c b/src/gc.c index 05043ce..6ca3d3e 100644 --- a/src/gc.c +++ b/src/gc.c @@ -79,7 +79,8 @@ V gc_collect(Gc *gc) { switch (hdr->type) { // TODO: the rest of the owl case OBJ_QUOT: { - Bc *chunk = (Bc *)(hdr + 1); + Bc **chunk_ptr = (Bc **)(hdr + 1); + Bc *chunk = *chunk_ptr; for (Z i = 0; i < chunk->constants.count; i++) chunk->constants.items[i] = forward(gc, chunk->constants.items[i]); break; @@ -93,6 +94,23 @@ V gc_collect(Gc *gc) { scan += ALIGN(hdr->size); } + scan = gc->from.start; + while (scan < gc->from.free) { + Hd *hdr = (Hd *)scan; + if (hdr->type != OBJ_FWD) { + switch (hdr->type) { + case OBJ_QUOT: { + Bc **chunk_ptr = (Bc **)(hdr + 1); + chunk_release(*chunk_ptr); + break; + } + default: + break; + } + } + scan += ALIGN(hdr->size); + } + Gs tmp = gc->from; gc->from = gc->to; gc->to = tmp; @@ -103,7 +121,22 @@ V gc_collect(Gc *gc) { #endif } -void gc_init(Gc *gc) { +Hd *gc_alloc(Gc *gc, Z sz) { + sz = ALIGN(sz); + if (gc->from.free + sz > gc->from.end) { + gc_collect(gc); + if (gc->from.free + sz > gc->from.end) { + fprintf(stderr, "out of memory (requested %" PRIdPTR "bytes\n", sz); + abort(); + } + } + Hd *hdr = (Hd *)gc->from.free; + gc->from.free += sz; + hdr->size = sz; + return hdr; +} + +V gc_init(Gc *gc) { gc->from.start = malloc(HEAP_BYTES); if (!gc->from.start) goto fatal; @@ -126,7 +159,7 @@ fatal: abort(); } -void gc_deinit(Gc *gc) { +V gc_deinit(Gc *gc) { gc_collect(gc); free(gc->from.start); free(gc->to.start); diff --git a/src/main.c b/src/main.c index dce95f9..55f4fb2 100644 --- a/src/main.c +++ b/src/main.c @@ -4,73 +4,59 @@ #include "common.h" #include "chunk.h" -#include "gc.h" +#include "compile.h" +#include "debug.h" #include "parser.h" -#include "vendor/mpc.h" #include "vm.h" -void dump(const V *data, Z size) { - char ascii[17]; - Z i, j; - ascii[16] = '\0'; - for (i = 0; i < size; ++i) { - printf("%02X ", ((unsigned char *)data)[i]); - if (((unsigned char *)data)[i] >= ' ' && - ((unsigned char *)data)[i] <= '~') { - ascii[i % 16] = ((unsigned char *)data)[i]; - } else { - ascii[i % 16] = '.'; - } - if ((i + 1) % 8 == 0 || i + 1 == size) { - printf(" "); - if ((i + 1) % 16 == 0) { - printf("| %s \n", ascii); - } else if (i + 1 == size) { - ascii[(i + 1) % 16] = '\0'; - if ((i + 1) % 16 <= 8) { - printf(" "); - } - for (j = (i + 1) % 16; j < 16; ++j) { - printf(" "); - } - printf("| %s \n", ascii); - } - } - } -} +#include "vendor/mpc.h" I repl(void) { - Bc chunk = {0}; Vm vm = {0}; - vm_init(&vm); - I idx = chunk_add_constant(&chunk, NUM(10)); - chunk_emit_byte(&chunk, OP_CONST); - chunk_emit_sleb128(&chunk, idx); - chunk_emit_byte(&chunk, OP_RETURN); + Bc *chunk = chunk_new(); - vm_run(&vm, &chunk, 0); + I idx = chunk_add_constant(chunk, NUM(10)); + chunk_emit_byte(chunk, OP_CONST); + chunk_emit_sleb128(chunk, idx); + chunk_emit_byte(chunk, OP_CONST); + chunk_emit_sleb128(chunk, idx); + chunk_emit_byte(chunk, OP_ADD); + chunk_emit_byte(chunk, OP_RETURN); - return 0; + disassemble(chunk, "test chunk"); + I res = vm_run(&vm, chunk, 0); + + chunk_release(chunk); + vm_deinit(&vm); + return !res; } I loadfile(const char *fname) { - Gc gc = {0}; - gc_init(&gc); + Vm vm = {0}; + vm_init(&vm); mpc_result_t res; if (!mpc_parse_contents(fname, Program, &res)) { mpc_err_print_to(res.error, stderr); mpc_err_delete(res.error); - gc_deinit(&gc); return 1; } - mpc_ast_print(res.output); + Bc *chunk = compile_program(&vm.gc, res.output); mpc_ast_delete(res.output); - gc_deinit(&gc); - return 0; + + if (chunk != NULL) { + disassemble(chunk, fname); + I res = vm_run(&vm, chunk, 0); + chunk_release(chunk); + vm_deinit(&vm); + return !res; + } else { + vm_deinit(&vm); + return 1; + } } int main(int argc, const char *argv[]) { diff --git a/src/object.h b/src/object.h index 45d2239..171bff3 100644 --- a/src/object.h +++ b/src/object.h @@ -19,6 +19,7 @@ enum { TYPE_NIL = 0, TYPE_NUM = 1, TYPE_FWD = OBJ_FWD, + TYPE_QUOT = OBJ_QUOT, }; typedef uintptr_t O; diff --git a/src/vm.c b/src/vm.c index accbe82..7239f97 100644 --- a/src/vm.c +++ b/src/vm.c @@ -1,8 +1,10 @@ -#include "vm.h" -#include "gc.h" -#include "print.h" #include +#include "gc.h" +#include "object.h" +#include "print.h" +#include "vm.h" + static I decode_sleb128(U8 **ptr) { I result = 0; I shift = 0; @@ -25,6 +27,7 @@ static I decode_sleb128(U8 **ptr) { V vm_init(Vm *vm) { vm->sp = vm->stack; vm->rsp = vm->rstack; + vm->chunk = NULL; gc_init(&vm->gc); for (Z i = 0; i < STACK_SIZE; i++) { @@ -33,16 +36,39 @@ V vm_init(Vm *vm) { } } +V vm_deinit(Vm *vm) { gc_deinit(&vm->gc); } + V vm_push(Vm *vm, O o) { *vm->sp++ = o; } O vm_pop(Vm *vm) { return *--vm->sp; } O vm_peek(Vm *vm) { return *(vm->sp - 1); } -V vm_run(Vm *vm, Bc *chunk, I offset) { +V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) { + vm->rsp->chunk = chunk; + vm->rsp->ip = ip; + vm->rsp++; +} +Fr vm_rpop(Vm *vm) { return *--vm->rsp; } + +I vm_run(Vm *vm, Bc *chunk, I offset) { I mark = gc_mark(&vm->gc); for (Z i = 0; i < chunk->constants.count; i++) gc_addroot(&vm->gc, &chunk->constants.items[i]); +#define BINOP(op) \ + { \ + O b = vm_pop(vm); \ + O a = vm_pop(vm); \ + if (!IMM(a) || !IMM(b)) { \ + fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ + return 0; \ + } \ + vm_push(vm, NUM(ORD(a) op ORD(b))); \ + break; \ + } + vm->ip = chunk->items + offset; + vm->chunk = chunk; + for (;;) { U8 opcode; switch (opcode = *vm->ip++) { @@ -50,20 +76,65 @@ V vm_run(Vm *vm, Bc *chunk, I offset) { continue; case OP_CONST: { I idx = decode_sleb128(&vm->ip); - vm_push(vm, chunk->constants.items[idx]); + vm_push(vm, vm->chunk->constants.items[idx]); + break; + } + case OP_JUMP: { + I ofs = decode_sleb128(&vm->ip); + vm->ip += ofs; + break; + } + case OP_JUMP_IF_NIL: { + I ofs = decode_sleb128(&vm->ip); + if (vm_pop(vm) == NIL) + vm->ip += ofs; + break; + } + case OP_CALL: { + I ofs = decode_sleb128(&vm->ip); + vm_rpush(vm, vm->chunk, vm->ip); + vm->ip = chunk->items + ofs; + break; + } + case OP_APPLY: { + O quot = vm_pop(vm); + if (type(quot) == TYPE_QUOT) { + Bc **ptr = (Bc **)(UNBOX(quot) + 1); + Bc *chunk = *ptr; + vm_rpush(vm, vm->chunk, vm->ip); + vm->chunk = chunk; + vm->ip = chunk->items; + } else { + fprintf(stderr, "vm: attempt to apply non-quotation object\n"); + return 0; + } break; } case OP_RETURN: - goto done; + if (vm->rsp != vm->rstack) { + Fr frame = vm_rpop(vm); + vm->chunk = frame.chunk; + vm->ip = frame.ip; + } else { + goto done; + } + break; + case OP_ADD: + BINOP(+); + default: + fprintf(stderr, "unknown opcode %d\n", opcode); + return 0; } } done: gc_reset(&vm->gc, mark); - // print stack :3 - for (O *i = vm->stack; i < vm->sp; i++) { - print(*i); - putchar(' '); + if (vm->sp != vm->stack) { + for (O *i = vm->stack; i < vm->sp; i++) { + print(*i); + putchar(' '); + } + putchar('\n'); } - putchar('\n'); + return 1; } diff --git a/src/vm.h b/src/vm.h index a122267..b4f0916 100644 --- a/src/vm.h +++ b/src/vm.h @@ -10,25 +10,38 @@ enum { OP_NOP = 0, OP_CONST, // Push constant to stack + OP_DROP, + OP_DUP, + OP_SWAP, OP_JUMP, // Relative jump OP_JUMP_IF_NIL, // Relative jump if top-of-stack is nil - OP_DOWORD, OP_CALL, + OP_APPLY, OP_RETURN, + OP_ADD, }; #define STACK_SIZE 256 +typedef struct Fr { + Bc *chunk; + U8 *ip; +} Fr; + typedef struct Vm { Gc gc; O stack[256], *sp; - U rstack[256], *rsp; + Fr rstack[256], *rsp; U8 *ip; + Bc *chunk; } Vm; V vm_init(Vm *); +V vm_deinit(Vm *); + V vm_push(Vm *, O); O vm_pop(Vm *); O vm_peek(Vm *); -V vm_run(Vm *, Bc *, I); + +I vm_run(Vm *, Bc *, I); #endif diff --git a/test.grr b/test.grr index ff43c22..d176e24 100644 --- a/test.grr +++ b/test.grr @@ -1,3 +1 @@ -\ This is a comment. - -: when [] if ; +[ 1 2 + ] call 3 +