From aebe586a05f5e02cf48b241a6e951d1f16d1226d Mon Sep 17 00:00:00 2001 From: "Javier B. Torres" Date: Wed, 21 Jan 2026 10:48:06 -0300 Subject: [PATCH] source tracking and composite primitives --- src/chunk.c | 28 ++++++ src/chunk.h | 14 ++- src/compile.c | 89 +++++++++-------- src/compile.h | 2 +- src/debug.c | 272 ++++++++++++++++++++++---------------------------- src/debug.h | 3 +- src/main.c | 3 + src/vm.c | 40 ++++---- test.grr | 6 +- 9 files changed, 237 insertions(+), 220 deletions(-) diff --git a/src/chunk.c b/src/chunk.c index 2d79899..02847cb 100644 --- a/src/chunk.c +++ b/src/chunk.c @@ -59,3 +59,31 @@ I chunk_add_constant(Bc *chunk, O value) { *yar_append(&chunk->constants) = value; return mark; } + +V chunk_emit_byte_with_line(Bc *chunk, U8 byte, I line, I col) { + *yar_append(chunk) = byte; + if (chunk->lines.count == 0 || + chunk->lines.items[chunk->lines.count - 1].row != line || + chunk->lines.items[chunk->lines.count - 1].col != col) { + Bl *entry = yar_append(&chunk->lines); + entry->offset = chunk->count - 1; + entry->row = line; + entry->col = col; + } +} + +I chunk_get_line(Bc *chunk, Z offset, I *out_col) { + if (chunk->lines.count == 0) + return -1; + Z left = 0, right = chunk->lines.count - 1; + while (left < right) { + Z mid = left + (right - left + 1) / 2; + if (chunk->lines.items[mid].offset <= offset) + left = mid; + else + right = mid - 1; + } + if (out_col) + *out_col = chunk->lines.items[left].col; + return chunk->lines.items[left].row; +} diff --git a/src/chunk.h b/src/chunk.h index e70a8c7..a93ab50 100644 --- a/src/chunk.h +++ b/src/chunk.h @@ -6,7 +6,12 @@ #include "common.h" #include "object.h" -/** Bytecode chunk */ +typedef struct Bl { + Z offset; + I row; + I col; +} Bl; + typedef struct Bc { I ref; const char *name; @@ -16,6 +21,10 @@ typedef struct Bc { O *items; Z count, capacity; } constants; + struct { + Bl *items; + Z count, capacity; + } lines; } Bc; Bc *chunk_new(const char *); @@ -26,4 +35,7 @@ V chunk_emit_byte(Bc *, U8); V chunk_emit_sleb128(Bc *, I); I chunk_add_constant(Bc *, O); +V chunk_emit_byte_with_line(Bc *, U8, I, I); +I chunk_get_line(Bc *, Z, I*); + #endif diff --git a/src/compile.c b/src/compile.c index 2995d45..7f2f03f 100644 --- a/src/compile.c +++ b/src/compile.c @@ -14,32 +14,35 @@ // clang-format off struct { const char *name; - U8 opcode; + U8 opcode[8]; } primitives[] = { - {"nil", OP_NIL}, - {"dup", OP_DUP}, - {"drop", OP_DROP}, - {"swap", OP_SWAP}, - {"over", OP_OVER}, - {"nip", OP_NIP}, - {"bury", OP_BURY}, - {"dig", OP_DIG}, - {">r", OP_TOR}, - {"r>", OP_FROMR}, - {"call", OP_APPLY}, - {"?", OP_CHOOSE}, - {"+", OP_ADD}, - {"-", OP_SUB}, - {"*", OP_MUL}, - {"/", OP_DIV}, - {"%", OP_MOD}, - {"=", OP_EQ}, - {"<>", OP_NEQ}, - {"<", OP_LT}, - {">", OP_GT}, - {"<=", OP_LTE}, - {">=", OP_GTE}, - {NULL, 0}, + {"nil", {OP_NIL, 0}}, + {"dup", {OP_DUP, 0}}, + {"drop", {OP_DROP, 0}}, + {"swap", {OP_SWAP, 0}}, + {"over", {OP_OVER, 0}}, + {"nip", {OP_NIP, 0}}, + {"bury", {OP_BURY, 0}}, + {"dig", {OP_DIG, 0}}, + {">r", {OP_TOR, 0}}, + {"r>", {OP_FROMR, 0}}, + {"dip", {OP_SWAP, OP_TOR, OP_APPLY, OP_FROMR, 0}}, + {"keep", {OP_OVER, OP_TOR, OP_APPLY, OP_FROMR, 0}}, + {"if", {OP_CHOOSE, OP_APPLY, 0}}, + {"call", {OP_APPLY, 0}}, + {"?", {OP_CHOOSE, 0}}, + {"+", {OP_ADD, 0}}, + {"-", {OP_SUB, 0}}, + {"*", {OP_MUL, 0}}, + {"/", {OP_DIV, 0}}, + {"%", {OP_MOD, 0}}, + {"=", {OP_EQ, 0}}, + {"<>", {OP_NEQ, 0}}, + {"<", {OP_LT, 0}}, + {">", {OP_GT, 0}}, + {"<=", {OP_LTE, 0}}, + {">=", {OP_GTE, 0}}, + {NULL, {0}}, }; // clang-format on @@ -111,26 +114,29 @@ static V optim_tailcall(Bc *chunk) { static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); -static I compile_constant(Cm *cm, O value) { +static I compile_constant(Cm *cm, O value, I line, I col) { I idx = chunk_add_constant(cm->chunk, value); - chunk_emit_byte(cm->chunk, OP_CONST); + chunk_emit_byte_with_line(cm->chunk, OP_CONST, line, col); chunk_emit_sleb128(cm->chunk, idx); return 1; } -static I compile_call(Cm *cm, const char *name) { +static I compile_call(Cm *cm, const char *name, I line, I col) { for (Z i = 0; primitives[i].name != NULL; i++) { if (strcmp(name, primitives[i].name) == 0) { - chunk_emit_byte(cm->chunk, primitives[i].opcode); + for (Z j = 0; primitives[i].opcode[j] != 0; j++) + chunk_emit_byte_with_line(cm->chunk, primitives[i].opcode[j], line, + col); return 1; } } Dt *word = upsert(cm->dictionary, name, NULL); if (!word) { - fprintf(stderr, "compiler: undefined word '%s'\n", name); + fprintf(stderr, "compiler error at %ld:%ld: undefined word '%s'\n", + line + 1, col + 1, name); return 0; } - chunk_emit_byte(cm->chunk, OP_DOWORD); + chunk_emit_byte_with_line(cm->chunk, OP_DOWORD, line, col); chunk_emit_sleb128(cm->chunk, (I)word->hash); return 1; } @@ -148,7 +154,7 @@ static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return 0; curr = mpc_ast_traverse_next(next); } - compile_call(cm, name); + compile_call(cm, name, curr->state.row, curr->state.col); return 1; } @@ -177,7 +183,8 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { curr = mpc_ast_traverse_next(next); } - chunk_emit_byte(inner.chunk, OP_RETURN); + chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, + curr->state.col); optim_tailcall(inner.chunk); entry->chunk = inner.chunk; @@ -219,18 +226,21 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return BOX(hd); } -static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - return compile_constant(cm, compile_quotation_obj(cm, curr, next)); +static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next, + I line, I col) { + return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col); } static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { + I line = curr->state.row; + I col = curr->state.col; if (strstr(curr->tag, "expr|number") != NULL) { I num = strtol(curr->contents, NULL, 0); - return compile_constant(cm, NUM(num)); + return compile_constant(cm, NUM(num), line, col); } else if (strstr(curr->tag, "expr|word") != NULL) { - return compile_call(cm, curr->contents); + return compile_call(cm, curr->contents, line, col); } else if (strstr(curr->tag, "expr|quotation") != NULL) { - return compile_quotation(cm, curr, next); + return compile_quotation(cm, curr, next, line, col); } else if (strstr(curr->tag, "expr|def") != NULL) { return compile_definition(cm, curr, next); } else if (strstr(curr->tag, "expr|command") != NULL) { @@ -238,7 +248,8 @@ static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { } else if (strstr(curr->tag, "expr|comment") != NULL) { return 1; } else { - fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag); + fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1, + col + 1, curr->tag); return 0; } diff --git a/src/compile.h b/src/compile.h index 6dd6bb1..b851552 100644 --- a/src/compile.h +++ b/src/compile.h @@ -7,7 +7,7 @@ #include "vendor/mpc.h" -#define COMPILER_DEBUG 1 +#define COMPILER_DEBUG 0 /** Compiler context */ typedef struct Cm { diff --git a/src/debug.c b/src/debug.c index 0d25c78..d936684 100644 --- a/src/debug.c +++ b/src/debug.c @@ -1,5 +1,6 @@ #include +#include "chunk.h" #include "debug.h" #include "dictionary.h" #include "print.h" @@ -21,177 +22,146 @@ static I decode_sleb128(U8 *ptr, Z *bytes_read) { return result; } +static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent); + +static V dis(Bc *chunk, Dt **dictionary, I indent) { + Z offset = 0; + while (offset < chunk->count) + offset = dis_instr(chunk, offset, dictionary, indent); +} + V disassemble(Bc *chunk, const char *name, Dt **dictionary) { printf("=== %s ===\n", name); - Z offset = 0; - while (offset < chunk->count) { - offset = disassemble_instruction(chunk, offset, dictionary); - } + dis(chunk, dictionary, 0); } -Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) { +static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) { + for (I i = 0; i < indent * 2; i++) + putchar(' '); + fflush(stdout); printf("%04zu ", offset); U8 opcode = chunk->items[offset++]; + +#define CASE(name) case OP_##name: +#define SIMPLE(name) \ + case OP_##name: \ + printf(#name "\n"); \ + return offset; + switch (opcode) { - case OP_NOP: - printf("NOP\n"); - return offset; - case OP_NIL: - printf("NIL\n"); - return offset; - case OP_CONST: { - Z bytes_read; - I idx = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("CONST %ld", idx); - if (idx >= 0 && idx < (I)chunk->constants.count) { - O obj = chunk->constants.items[idx]; - printf(" ("); - print(obj); - printf(")"); + SIMPLE(NOP); + SIMPLE(NIL); + CASE(CONST) { + Z bytes_read; + I idx = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("CONST %ld", idx); + if (idx >= 0 && idx < (I)chunk->constants.count) { + O obj = chunk->constants.items[idx]; + printf(" ("); + print(obj); + printf(")"); - // If it's a quotation, disassemble it inline - if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { - Hd *hdr = UNBOX(obj); - Bc **chunk_ptr = (Bc **)(hdr + 1); - Bc *quot_chunk = *chunk_ptr; - printf("\n"); - - // Disassemble quotation with indentation - for (Z i = 0; i < quot_chunk->count; ) { - printf(" "); - i = disassemble_instruction(quot_chunk, i, dictionary); + if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { + putchar('\n'); + Hd *hdr = UNBOX(obj); + Bc **chunk_ptr = (Bc **)(hdr + 1); + Bc *quot_chunk = *chunk_ptr; + dis(quot_chunk, dictionary, indent + 1); + return offset + bytes_read; } - return offset + bytes_read; } + printf("\n"); + return offset + bytes_read; } - printf("\n"); - return offset + bytes_read; - } - case OP_DROP: { - printf("DROP\n"); - return offset; - } - case OP_DUP: { - printf("DUP\n"); - return offset; - } - case OP_SWAP: { - printf("SWAP\n"); - return offset; - } - case OP_TOR: - printf("TOR\n"); - return offset; - case OP_FROMR: - printf("FROMR\n"); - return offset; - case OP_JUMP: { - Z bytes_read; - I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs); - return offset + bytes_read; - } - case OP_JUMP_IF_NIL: { - Z bytes_read; - I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs); - return offset + bytes_read; - } - case OP_CALL: { - Z bytes_read; - I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("CALL %ld\n", ofs); - return offset + bytes_read; - } - case OP_DOWORD: { - Z bytes_read; - I hash = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("DOWORD"); + SIMPLE(DROP); + SIMPLE(DUP); + SIMPLE(SWAP); + SIMPLE(NIP); + SIMPLE(OVER); + SIMPLE(BURY); + SIMPLE(DIG); + SIMPLE(TOR); + SIMPLE(FROMR); + CASE(JUMP) { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs); + return offset + bytes_read; + } + CASE(JUMP_IF_NIL) { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs); + return offset + bytes_read; + } + CASE(CALL) { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("CALL %ld\n", ofs); + return offset + bytes_read; + } + CASE(DOWORD) { + Z bytes_read; + I hash = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("DOWORD"); - if (dictionary && *dictionary) { - Dt *entry = lookup_hash(dictionary, hash); - if (entry != NULL) { - printf(" %s", entry->name); + if (dictionary && *dictionary) { + Dt *entry = lookup_hash(dictionary, hash); + if (entry != NULL) { + printf(" %s", entry->name); + } else { + printf(" ???"); + } } else { - printf(" ???"); + printf(" 0x%lx", hash); } - } else { - printf(" 0x%lx", hash); + printf("\n"); + return offset + bytes_read; } - printf("\n"); - return offset + bytes_read; - } - case OP_APPLY: - printf("APPLY\n"); - return offset; - case OP_TAIL_CALL: { - Z bytes_read; - I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("TAIL_CALL %ld\n", ofs); - return offset + bytes_read; - } - case OP_TAIL_DOWORD: { - Z bytes_read; - I hash = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("TAIL_DOWORD"); + SIMPLE(APPLY); + CASE(TAIL_CALL) { + Z bytes_read; + I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("TAIL_CALL %ld\n", ofs); + return offset + bytes_read; + } + CASE(TAIL_DOWORD) { + Z bytes_read; + I hash = decode_sleb128(&chunk->items[offset], &bytes_read); + printf("TAIL_DOWORD"); - if (dictionary && *dictionary) { - Dt *entry = lookup_hash(dictionary, hash); - if (entry != NULL) { - printf(" %s", entry->name); + if (dictionary && *dictionary) { + Dt *entry = lookup_hash(dictionary, hash); + if (entry != NULL) { + printf(" %s", entry->name); + } else { + printf(" ???"); + } } else { - printf(" ???"); + printf(" 0x%lx", hash); } - } else { - printf(" 0x%lx", hash); + printf("\n"); + return offset + bytes_read; } - printf("\n"); - return offset + bytes_read; - } - case OP_TAIL_APPLY: - printf("TAIL_APPLY\n"); - return offset; - case OP_RETURN: - printf("RETURN\n"); - return offset; - case OP_CHOOSE: - printf("CHOOSE\n"); - return offset; - case OP_ADD: - printf("ADD\n"); - return offset; - case OP_SUB: - printf("SUB\n"); - return offset; - case OP_MUL: - printf("MUL\n"); - return offset; - case OP_DIV: - printf("DIV\n"); - return offset; - case OP_MOD: - printf("MOD\n"); - return offset; - case OP_EQ: - printf("EQ\n"); - return offset; - case OP_NEQ: - printf("NEQ\n"); - return offset; - case OP_LT: - printf("LT\n"); - return offset; - case OP_GT: - printf("GT\n"); - return offset; - case OP_LTE: - printf("LTE\n"); - return offset; - case OP_GTE: - printf("GTE\n"); - return offset; + SIMPLE(TAIL_APPLY); + SIMPLE(RETURN); + SIMPLE(CHOOSE); + SIMPLE(ADD); + SIMPLE(SUB); + SIMPLE(MUL); + SIMPLE(DIV); + SIMPLE(MOD); + SIMPLE(EQ); + SIMPLE(NEQ); + SIMPLE(LT); + SIMPLE(GT); + SIMPLE(LTE); + SIMPLE(GTE); default: - printf("? (%d)\n", opcode); + printf("??? (%d)\n", opcode); return offset; } + +#undef SIMPLE +#undef CASE } diff --git a/src/debug.h b/src/debug.h index 0643fd8..2a1251c 100644 --- a/src/debug.h +++ b/src/debug.h @@ -2,5 +2,4 @@ #include "common.h" #include "dictionary.h" -V disassemble(Bc *, const char *, Dt **); -Z disassemble_instruction(Bc *, Z, Dt **); +V disassemble(Bc *, const char*, Dt **); diff --git a/src/main.c b/src/main.c index 05d5b18..395784d 100644 --- a/src/main.c +++ b/src/main.c @@ -73,6 +73,9 @@ I loadfile(const char *fname) { mpc_ast_delete(res.output); if (chunk != NULL) { +#if COMPILER_DEBUG + disassemble(chunk, fname, &vm.dictionary); +#endif I res = vm_run(&vm, chunk, 0); chunk_release(chunk); vm_deinit(&vm); diff --git a/src/vm.c b/src/vm.c index f33703c..9fef97e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -75,6 +75,14 @@ V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) { } Fr vm_rpop(Vm *vm) { return *--vm->rsp; } +static I vm_error(Vm *vm, const char *message) { + I col = -1; + I line = chunk_get_line(vm->chunk, vm->ip - vm->chunk->items, &col); + + fprintf(stderr, "error at %ld:%ld: %s\n", line + 1, col + 1, message); + return 0; +} + I vm_run(Vm *vm, Bc *chunk, I offset) { I mark = gc_mark(&vm->gc); for (Z i = 0; i < chunk->constants.count; i++) @@ -84,10 +92,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { { \ O b = vm_pop(vm); \ O a = vm_pop(vm); \ - if (!IMM(a) || !IMM(b)) { \ - fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ - return 0; \ - } \ + if (!IMM(a) || !IMM(b)) \ + return vm_error(vm, "arithmetic on non-numeric objects"); \ vm_push(vm, NUM(ORD(a) op ORD(b))); \ break; \ } @@ -96,10 +102,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { { \ O b = vm_pop(vm); \ O a = vm_pop(vm); \ - if (!IMM(a) || !IMM(b)) { \ - fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ - return 0; \ - } \ + if (!IMM(a) || !IMM(b)) \ + return vm_error(vm, "comparison on non-numeric objects"); \ vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \ break; \ } @@ -201,10 +205,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { case OP_DOWORD: { I hash = decode_sleb128(&vm->ip); Dt *word = lookup_hash(&vm->dictionary, hash); - if (!word) { - fprintf(stderr, "vm: word not found (hash = %lx)\n", hash); - return 0; - } + if (!word) + return vm_error(vm, "word not found"); vm_rpush(vm, vm->chunk, vm->ip); vm->chunk = word->chunk; vm->ip = word->chunk->items; @@ -219,8 +221,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { vm->chunk = chunk; vm->ip = chunk->items; } else { - fprintf(stderr, "vm: attempt to apply non-quotation object\n"); - return 0; + return vm_error(vm, "attempt to apply non-quotation object"); } break; } @@ -233,10 +234,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { case OP_TAIL_DOWORD: { I hash = decode_sleb128(&vm->ip); Dt *word = lookup_hash(&vm->dictionary, hash); - if (!word) { - fprintf(stderr, "vm: word not found (hash = %lx)\n", hash); - return 0; - } + if (!word) + return vm_error(vm, "word not found"); // Tail call: reuse current frame vm->chunk = word->chunk; vm->ip = word->chunk->items; @@ -251,8 +250,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { vm->chunk = chunk; vm->ip = chunk->items; } else { - fprintf(stderr, "vm: attempt to apply non-quotation object\n"); - return 0; + return vm_error(vm, "attempt to apply non-quotation object\n"); } break; } @@ -299,7 +297,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { case OP_GTE: CMPOP(>=); default: - fprintf(stderr, "unknown opcode %d\n", opcode); + vm_error(vm, "unknown opcode"); return 0; } } diff --git a/test.grr b/test.grr index 9220147..9c53cf2 100644 --- a/test.grr +++ b/test.grr @@ -1,7 +1,3 @@ -def dip { swap >r call r> } -def keep { over >r call r> } -def if { ? call } - def fib/aux { if: dig dup 0 = [drop drop] @@ -10,4 +6,4 @@ def fib/aux { } def fib { 0 1 fib/aux } -[ 50 fib ] call \=> 12586269025 +[ 50 fib ] call