source tracking and composite primitives

This commit is contained in:
Lobo 2026-01-21 10:48:06 -03:00
parent b9a5bc5e63
commit aebe586a05
9 changed files with 237 additions and 220 deletions

View file

@ -59,3 +59,31 @@ I chunk_add_constant(Bc *chunk, O value) {
*yar_append(&chunk->constants) = value; *yar_append(&chunk->constants) = value;
return mark; return mark;
} }
V chunk_emit_byte_with_line(Bc *chunk, U8 byte, I line, I col) {
*yar_append(chunk) = byte;
if (chunk->lines.count == 0 ||
chunk->lines.items[chunk->lines.count - 1].row != line ||
chunk->lines.items[chunk->lines.count - 1].col != col) {
Bl *entry = yar_append(&chunk->lines);
entry->offset = chunk->count - 1;
entry->row = line;
entry->col = col;
}
}
I chunk_get_line(Bc *chunk, Z offset, I *out_col) {
if (chunk->lines.count == 0)
return -1;
Z left = 0, right = chunk->lines.count - 1;
while (left < right) {
Z mid = left + (right - left + 1) / 2;
if (chunk->lines.items[mid].offset <= offset)
left = mid;
else
right = mid - 1;
}
if (out_col)
*out_col = chunk->lines.items[left].col;
return chunk->lines.items[left].row;
}

View file

@ -6,7 +6,12 @@
#include "common.h" #include "common.h"
#include "object.h" #include "object.h"
/** Bytecode chunk */ typedef struct Bl {
Z offset;
I row;
I col;
} Bl;
typedef struct Bc { typedef struct Bc {
I ref; I ref;
const char *name; const char *name;
@ -16,6 +21,10 @@ typedef struct Bc {
O *items; O *items;
Z count, capacity; Z count, capacity;
} constants; } constants;
struct {
Bl *items;
Z count, capacity;
} lines;
} Bc; } Bc;
Bc *chunk_new(const char *); Bc *chunk_new(const char *);
@ -26,4 +35,7 @@ V chunk_emit_byte(Bc *, U8);
V chunk_emit_sleb128(Bc *, I); V chunk_emit_sleb128(Bc *, I);
I chunk_add_constant(Bc *, O); I chunk_add_constant(Bc *, O);
V chunk_emit_byte_with_line(Bc *, U8, I, I);
I chunk_get_line(Bc *, Z, I*);
#endif #endif

View file

@ -14,32 +14,35 @@
// clang-format off // clang-format off
struct { struct {
const char *name; const char *name;
U8 opcode; U8 opcode[8];
} primitives[] = { } primitives[] = {
{"nil", OP_NIL}, {"nil", {OP_NIL, 0}},
{"dup", OP_DUP}, {"dup", {OP_DUP, 0}},
{"drop", OP_DROP}, {"drop", {OP_DROP, 0}},
{"swap", OP_SWAP}, {"swap", {OP_SWAP, 0}},
{"over", OP_OVER}, {"over", {OP_OVER, 0}},
{"nip", OP_NIP}, {"nip", {OP_NIP, 0}},
{"bury", OP_BURY}, {"bury", {OP_BURY, 0}},
{"dig", OP_DIG}, {"dig", {OP_DIG, 0}},
{">r", OP_TOR}, {">r", {OP_TOR, 0}},
{"r>", OP_FROMR}, {"r>", {OP_FROMR, 0}},
{"call", OP_APPLY}, {"dip", {OP_SWAP, OP_TOR, OP_APPLY, OP_FROMR, 0}},
{"?", OP_CHOOSE}, {"keep", {OP_OVER, OP_TOR, OP_APPLY, OP_FROMR, 0}},
{"+", OP_ADD}, {"if", {OP_CHOOSE, OP_APPLY, 0}},
{"-", OP_SUB}, {"call", {OP_APPLY, 0}},
{"*", OP_MUL}, {"?", {OP_CHOOSE, 0}},
{"/", OP_DIV}, {"+", {OP_ADD, 0}},
{"%", OP_MOD}, {"-", {OP_SUB, 0}},
{"=", OP_EQ}, {"*", {OP_MUL, 0}},
{"<>", OP_NEQ}, {"/", {OP_DIV, 0}},
{"<", OP_LT}, {"%", {OP_MOD, 0}},
{">", OP_GT}, {"=", {OP_EQ, 0}},
{"<=", OP_LTE}, {"<>", {OP_NEQ, 0}},
{">=", OP_GTE}, {"<", {OP_LT, 0}},
{NULL, 0}, {">", {OP_GT, 0}},
{"<=", {OP_LTE, 0}},
{">=", {OP_GTE, 0}},
{NULL, {0}},
}; };
// clang-format on // clang-format on
@ -111,26 +114,29 @@ static V optim_tailcall(Bc *chunk) {
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_constant(Cm *cm, O value) { static I compile_constant(Cm *cm, O value, I line, I col) {
I idx = chunk_add_constant(cm->chunk, value); I idx = chunk_add_constant(cm->chunk, value);
chunk_emit_byte(cm->chunk, OP_CONST); chunk_emit_byte_with_line(cm->chunk, OP_CONST, line, col);
chunk_emit_sleb128(cm->chunk, idx); chunk_emit_sleb128(cm->chunk, idx);
return 1; return 1;
} }
static I compile_call(Cm *cm, const char *name) { static I compile_call(Cm *cm, const char *name, I line, I col) {
for (Z i = 0; primitives[i].name != NULL; i++) { for (Z i = 0; primitives[i].name != NULL; i++) {
if (strcmp(name, primitives[i].name) == 0) { if (strcmp(name, primitives[i].name) == 0) {
chunk_emit_byte(cm->chunk, primitives[i].opcode); for (Z j = 0; primitives[i].opcode[j] != 0; j++)
chunk_emit_byte_with_line(cm->chunk, primitives[i].opcode[j], line,
col);
return 1; return 1;
} }
} }
Dt *word = upsert(cm->dictionary, name, NULL); Dt *word = upsert(cm->dictionary, name, NULL);
if (!word) { if (!word) {
fprintf(stderr, "compiler: undefined word '%s'\n", name); fprintf(stderr, "compiler error at %ld:%ld: undefined word '%s'\n",
line + 1, col + 1, name);
return 0; return 0;
} }
chunk_emit_byte(cm->chunk, OP_DOWORD); chunk_emit_byte_with_line(cm->chunk, OP_DOWORD, line, col);
chunk_emit_sleb128(cm->chunk, (I)word->hash); chunk_emit_sleb128(cm->chunk, (I)word->hash);
return 1; return 1;
} }
@ -148,7 +154,7 @@ static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return 0; return 0;
curr = mpc_ast_traverse_next(next); curr = mpc_ast_traverse_next(next);
} }
compile_call(cm, name); compile_call(cm, name, curr->state.row, curr->state.col);
return 1; return 1;
} }
@ -177,7 +183,8 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
curr = mpc_ast_traverse_next(next); curr = mpc_ast_traverse_next(next);
} }
chunk_emit_byte(inner.chunk, OP_RETURN); chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row,
curr->state.col);
optim_tailcall(inner.chunk); optim_tailcall(inner.chunk);
entry->chunk = inner.chunk; entry->chunk = inner.chunk;
@ -219,18 +226,21 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return BOX(hd); return BOX(hd);
} }
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next,
return compile_constant(cm, compile_quotation_obj(cm, curr, next)); I line, I col) {
return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col);
} }
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
I line = curr->state.row;
I col = curr->state.col;
if (strstr(curr->tag, "expr|number") != NULL) { if (strstr(curr->tag, "expr|number") != NULL) {
I num = strtol(curr->contents, NULL, 0); I num = strtol(curr->contents, NULL, 0);
return compile_constant(cm, NUM(num)); return compile_constant(cm, NUM(num), line, col);
} else if (strstr(curr->tag, "expr|word") != NULL) { } else if (strstr(curr->tag, "expr|word") != NULL) {
return compile_call(cm, curr->contents); return compile_call(cm, curr->contents, line, col);
} else if (strstr(curr->tag, "expr|quotation") != NULL) { } else if (strstr(curr->tag, "expr|quotation") != NULL) {
return compile_quotation(cm, curr, next); return compile_quotation(cm, curr, next, line, col);
} else if (strstr(curr->tag, "expr|def") != NULL) { } else if (strstr(curr->tag, "expr|def") != NULL) {
return compile_definition(cm, curr, next); return compile_definition(cm, curr, next);
} else if (strstr(curr->tag, "expr|command") != NULL) { } else if (strstr(curr->tag, "expr|command") != NULL) {
@ -238,7 +248,8 @@ static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
} else if (strstr(curr->tag, "expr|comment") != NULL) { } else if (strstr(curr->tag, "expr|comment") != NULL) {
return 1; return 1;
} else { } else {
fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag); fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1,
col + 1, curr->tag);
return 0; return 0;
} }

View file

@ -7,7 +7,7 @@
#include "vendor/mpc.h" #include "vendor/mpc.h"
#define COMPILER_DEBUG 1 #define COMPILER_DEBUG 0
/** Compiler context */ /** Compiler context */
typedef struct Cm { typedef struct Cm {

View file

@ -1,5 +1,6 @@
#include <stdio.h> #include <stdio.h>
#include "chunk.h"
#include "debug.h" #include "debug.h"
#include "dictionary.h" #include "dictionary.h"
#include "print.h" #include "print.h"
@ -21,25 +22,36 @@ static I decode_sleb128(U8 *ptr, Z *bytes_read) {
return result; return result;
} }
V disassemble(Bc *chunk, const char *name, Dt **dictionary) { static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent);
printf("=== %s ===\n", name);
static V dis(Bc *chunk, Dt **dictionary, I indent) {
Z offset = 0; Z offset = 0;
while (offset < chunk->count) { while (offset < chunk->count)
offset = disassemble_instruction(chunk, offset, dictionary); offset = dis_instr(chunk, offset, dictionary, indent);
}
} }
Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) { V disassemble(Bc *chunk, const char *name, Dt **dictionary) {
printf("=== %s ===\n", name);
dis(chunk, dictionary, 0);
}
static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) {
for (I i = 0; i < indent * 2; i++)
putchar(' ');
fflush(stdout);
printf("%04zu ", offset); printf("%04zu ", offset);
U8 opcode = chunk->items[offset++]; U8 opcode = chunk->items[offset++];
#define CASE(name) case OP_##name:
#define SIMPLE(name) \
case OP_##name: \
printf(#name "\n"); \
return offset;
switch (opcode) { switch (opcode) {
case OP_NOP: SIMPLE(NOP);
printf("NOP\n"); SIMPLE(NIL);
return offset; CASE(CONST) {
case OP_NIL:
printf("NIL\n");
return offset;
case OP_CONST: {
Z bytes_read; Z bytes_read;
I idx = decode_sleb128(&chunk->items[offset], &bytes_read); I idx = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CONST %ld", idx); printf("CONST %ld", idx);
@ -49,61 +61,46 @@ Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) {
print(obj); print(obj);
printf(")"); printf(")");
// If it's a quotation, disassemble it inline
if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) {
putchar('\n');
Hd *hdr = UNBOX(obj); Hd *hdr = UNBOX(obj);
Bc **chunk_ptr = (Bc **)(hdr + 1); Bc **chunk_ptr = (Bc **)(hdr + 1);
Bc *quot_chunk = *chunk_ptr; Bc *quot_chunk = *chunk_ptr;
printf("\n"); dis(quot_chunk, dictionary, indent + 1);
// Disassemble quotation with indentation
for (Z i = 0; i < quot_chunk->count; ) {
printf(" ");
i = disassemble_instruction(quot_chunk, i, dictionary);
}
return offset + bytes_read; return offset + bytes_read;
} }
} }
printf("\n"); printf("\n");
return offset + bytes_read; return offset + bytes_read;
} }
case OP_DROP: { SIMPLE(DROP);
printf("DROP\n"); SIMPLE(DUP);
return offset; SIMPLE(SWAP);
} SIMPLE(NIP);
case OP_DUP: { SIMPLE(OVER);
printf("DUP\n"); SIMPLE(BURY);
return offset; SIMPLE(DIG);
} SIMPLE(TOR);
case OP_SWAP: { SIMPLE(FROMR);
printf("SWAP\n"); CASE(JUMP) {
return offset;
}
case OP_TOR:
printf("TOR\n");
return offset;
case OP_FROMR:
printf("FROMR\n");
return offset;
case OP_JUMP: {
Z bytes_read; Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs); printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read; return offset + bytes_read;
} }
case OP_JUMP_IF_NIL: { CASE(JUMP_IF_NIL) {
Z bytes_read; Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs); printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read; return offset + bytes_read;
} }
case OP_CALL: { CASE(CALL) {
Z bytes_read; Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CALL %ld\n", ofs); printf("CALL %ld\n", ofs);
return offset + bytes_read; return offset + bytes_read;
} }
case OP_DOWORD: { CASE(DOWORD) {
Z bytes_read; Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read); I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("DOWORD"); printf("DOWORD");
@ -121,16 +118,14 @@ Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) {
printf("\n"); printf("\n");
return offset + bytes_read; return offset + bytes_read;
} }
case OP_APPLY: SIMPLE(APPLY);
printf("APPLY\n"); CASE(TAIL_CALL) {
return offset;
case OP_TAIL_CALL: {
Z bytes_read; Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read); I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_CALL %ld\n", ofs); printf("TAIL_CALL %ld\n", ofs);
return offset + bytes_read; return offset + bytes_read;
} }
case OP_TAIL_DOWORD: { CASE(TAIL_DOWORD) {
Z bytes_read; Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read); I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_DOWORD"); printf("TAIL_DOWORD");
@ -148,50 +143,25 @@ Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) {
printf("\n"); printf("\n");
return offset + bytes_read; return offset + bytes_read;
} }
case OP_TAIL_APPLY: SIMPLE(TAIL_APPLY);
printf("TAIL_APPLY\n"); SIMPLE(RETURN);
return offset; SIMPLE(CHOOSE);
case OP_RETURN: SIMPLE(ADD);
printf("RETURN\n"); SIMPLE(SUB);
return offset; SIMPLE(MUL);
case OP_CHOOSE: SIMPLE(DIV);
printf("CHOOSE\n"); SIMPLE(MOD);
return offset; SIMPLE(EQ);
case OP_ADD: SIMPLE(NEQ);
printf("ADD\n"); SIMPLE(LT);
return offset; SIMPLE(GT);
case OP_SUB: SIMPLE(LTE);
printf("SUB\n"); SIMPLE(GTE);
return offset;
case OP_MUL:
printf("MUL\n");
return offset;
case OP_DIV:
printf("DIV\n");
return offset;
case OP_MOD:
printf("MOD\n");
return offset;
case OP_EQ:
printf("EQ\n");
return offset;
case OP_NEQ:
printf("NEQ\n");
return offset;
case OP_LT:
printf("LT\n");
return offset;
case OP_GT:
printf("GT\n");
return offset;
case OP_LTE:
printf("LTE\n");
return offset;
case OP_GTE:
printf("GTE\n");
return offset;
default: default:
printf("? (%d)\n", opcode); printf("??? (%d)\n", opcode);
return offset; return offset;
} }
#undef SIMPLE
#undef CASE
} }

View file

@ -3,4 +3,3 @@
#include "dictionary.h" #include "dictionary.h"
V disassemble(Bc *, const char*, Dt **); V disassemble(Bc *, const char*, Dt **);
Z disassemble_instruction(Bc *, Z, Dt **);

View file

@ -73,6 +73,9 @@ I loadfile(const char *fname) {
mpc_ast_delete(res.output); mpc_ast_delete(res.output);
if (chunk != NULL) { if (chunk != NULL) {
#if COMPILER_DEBUG
disassemble(chunk, fname, &vm.dictionary);
#endif
I res = vm_run(&vm, chunk, 0); I res = vm_run(&vm, chunk, 0);
chunk_release(chunk); chunk_release(chunk);
vm_deinit(&vm); vm_deinit(&vm);

View file

@ -75,6 +75,14 @@ V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) {
} }
Fr vm_rpop(Vm *vm) { return *--vm->rsp; } Fr vm_rpop(Vm *vm) { return *--vm->rsp; }
static I vm_error(Vm *vm, const char *message) {
I col = -1;
I line = chunk_get_line(vm->chunk, vm->ip - vm->chunk->items, &col);
fprintf(stderr, "error at %ld:%ld: %s\n", line + 1, col + 1, message);
return 0;
}
I vm_run(Vm *vm, Bc *chunk, I offset) { I vm_run(Vm *vm, Bc *chunk, I offset) {
I mark = gc_mark(&vm->gc); I mark = gc_mark(&vm->gc);
for (Z i = 0; i < chunk->constants.count; i++) for (Z i = 0; i < chunk->constants.count; i++)
@ -84,10 +92,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
{ \ { \
O b = vm_pop(vm); \ O b = vm_pop(vm); \
O a = vm_pop(vm); \ O a = vm_pop(vm); \
if (!IMM(a) || !IMM(b)) { \ if (!IMM(a) || !IMM(b)) \
fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ return vm_error(vm, "arithmetic on non-numeric objects"); \
return 0; \
} \
vm_push(vm, NUM(ORD(a) op ORD(b))); \ vm_push(vm, NUM(ORD(a) op ORD(b))); \
break; \ break; \
} }
@ -96,10 +102,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
{ \ { \
O b = vm_pop(vm); \ O b = vm_pop(vm); \
O a = vm_pop(vm); \ O a = vm_pop(vm); \
if (!IMM(a) || !IMM(b)) { \ if (!IMM(a) || !IMM(b)) \
fprintf(stderr, "vm: arithmetic on non-number objects\n"); \ return vm_error(vm, "comparison on non-numeric objects"); \
return 0; \
} \
vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \ vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \
break; \ break; \
} }
@ -201,10 +205,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_DOWORD: { case OP_DOWORD: {
I hash = decode_sleb128(&vm->ip); I hash = decode_sleb128(&vm->ip);
Dt *word = lookup_hash(&vm->dictionary, hash); Dt *word = lookup_hash(&vm->dictionary, hash);
if (!word) { if (!word)
fprintf(stderr, "vm: word not found (hash = %lx)\n", hash); return vm_error(vm, "word not found");
return 0;
}
vm_rpush(vm, vm->chunk, vm->ip); vm_rpush(vm, vm->chunk, vm->ip);
vm->chunk = word->chunk; vm->chunk = word->chunk;
vm->ip = word->chunk->items; vm->ip = word->chunk->items;
@ -219,8 +221,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm->chunk = chunk; vm->chunk = chunk;
vm->ip = chunk->items; vm->ip = chunk->items;
} else { } else {
fprintf(stderr, "vm: attempt to apply non-quotation object\n"); return vm_error(vm, "attempt to apply non-quotation object");
return 0;
} }
break; break;
} }
@ -233,10 +234,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_TAIL_DOWORD: { case OP_TAIL_DOWORD: {
I hash = decode_sleb128(&vm->ip); I hash = decode_sleb128(&vm->ip);
Dt *word = lookup_hash(&vm->dictionary, hash); Dt *word = lookup_hash(&vm->dictionary, hash);
if (!word) { if (!word)
fprintf(stderr, "vm: word not found (hash = %lx)\n", hash); return vm_error(vm, "word not found");
return 0;
}
// Tail call: reuse current frame // Tail call: reuse current frame
vm->chunk = word->chunk; vm->chunk = word->chunk;
vm->ip = word->chunk->items; vm->ip = word->chunk->items;
@ -251,8 +250,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm->chunk = chunk; vm->chunk = chunk;
vm->ip = chunk->items; vm->ip = chunk->items;
} else { } else {
fprintf(stderr, "vm: attempt to apply non-quotation object\n"); return vm_error(vm, "attempt to apply non-quotation object\n");
return 0;
} }
break; break;
} }
@ -299,7 +297,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_GTE: case OP_GTE:
CMPOP(>=); CMPOP(>=);
default: default:
fprintf(stderr, "unknown opcode %d\n", opcode); vm_error(vm, "unknown opcode");
return 0; return 0;
} }
} }

View file

@ -1,7 +1,3 @@
def dip { swap >r call r> }
def keep { over >r call r> }
def if { ? call }
def fib/aux { def fib/aux {
if: dig dup 0 = if: dig dup 0 =
[drop drop] [drop drop]
@ -10,4 +6,4 @@ def fib/aux {
} }
def fib { 0 1 fib/aux } def fib { 0 1 fib/aux }
[ 50 fib ] call \=> 12586269025 [ 50 fib ] call