source tracking and composite primitives

This commit is contained in:
Lobo 2026-01-21 10:48:06 -03:00
parent b9a5bc5e63
commit aebe586a05
9 changed files with 237 additions and 220 deletions

View file

@ -59,3 +59,31 @@ I chunk_add_constant(Bc *chunk, O value) {
*yar_append(&chunk->constants) = value;
return mark;
}
V chunk_emit_byte_with_line(Bc *chunk, U8 byte, I line, I col) {
*yar_append(chunk) = byte;
if (chunk->lines.count == 0 ||
chunk->lines.items[chunk->lines.count - 1].row != line ||
chunk->lines.items[chunk->lines.count - 1].col != col) {
Bl *entry = yar_append(&chunk->lines);
entry->offset = chunk->count - 1;
entry->row = line;
entry->col = col;
}
}
I chunk_get_line(Bc *chunk, Z offset, I *out_col) {
if (chunk->lines.count == 0)
return -1;
Z left = 0, right = chunk->lines.count - 1;
while (left < right) {
Z mid = left + (right - left + 1) / 2;
if (chunk->lines.items[mid].offset <= offset)
left = mid;
else
right = mid - 1;
}
if (out_col)
*out_col = chunk->lines.items[left].col;
return chunk->lines.items[left].row;
}

View file

@ -6,7 +6,12 @@
#include "common.h"
#include "object.h"
/** Bytecode chunk */
typedef struct Bl {
Z offset;
I row;
I col;
} Bl;
typedef struct Bc {
I ref;
const char *name;
@ -16,6 +21,10 @@ typedef struct Bc {
O *items;
Z count, capacity;
} constants;
struct {
Bl *items;
Z count, capacity;
} lines;
} Bc;
Bc *chunk_new(const char *);
@ -26,4 +35,7 @@ V chunk_emit_byte(Bc *, U8);
V chunk_emit_sleb128(Bc *, I);
I chunk_add_constant(Bc *, O);
V chunk_emit_byte_with_line(Bc *, U8, I, I);
I chunk_get_line(Bc *, Z, I*);
#endif

View file

@ -14,32 +14,35 @@
// clang-format off
struct {
const char *name;
U8 opcode;
U8 opcode[8];
} primitives[] = {
{"nil", OP_NIL},
{"dup", OP_DUP},
{"drop", OP_DROP},
{"swap", OP_SWAP},
{"over", OP_OVER},
{"nip", OP_NIP},
{"bury", OP_BURY},
{"dig", OP_DIG},
{">r", OP_TOR},
{"r>", OP_FROMR},
{"call", OP_APPLY},
{"?", OP_CHOOSE},
{"+", OP_ADD},
{"-", OP_SUB},
{"*", OP_MUL},
{"/", OP_DIV},
{"%", OP_MOD},
{"=", OP_EQ},
{"<>", OP_NEQ},
{"<", OP_LT},
{">", OP_GT},
{"<=", OP_LTE},
{">=", OP_GTE},
{NULL, 0},
{"nil", {OP_NIL, 0}},
{"dup", {OP_DUP, 0}},
{"drop", {OP_DROP, 0}},
{"swap", {OP_SWAP, 0}},
{"over", {OP_OVER, 0}},
{"nip", {OP_NIP, 0}},
{"bury", {OP_BURY, 0}},
{"dig", {OP_DIG, 0}},
{">r", {OP_TOR, 0}},
{"r>", {OP_FROMR, 0}},
{"dip", {OP_SWAP, OP_TOR, OP_APPLY, OP_FROMR, 0}},
{"keep", {OP_OVER, OP_TOR, OP_APPLY, OP_FROMR, 0}},
{"if", {OP_CHOOSE, OP_APPLY, 0}},
{"call", {OP_APPLY, 0}},
{"?", {OP_CHOOSE, 0}},
{"+", {OP_ADD, 0}},
{"-", {OP_SUB, 0}},
{"*", {OP_MUL, 0}},
{"/", {OP_DIV, 0}},
{"%", {OP_MOD, 0}},
{"=", {OP_EQ, 0}},
{"<>", {OP_NEQ, 0}},
{"<", {OP_LT, 0}},
{">", {OP_GT, 0}},
{"<=", {OP_LTE, 0}},
{">=", {OP_GTE, 0}},
{NULL, {0}},
};
// clang-format on
@ -111,26 +114,29 @@ static V optim_tailcall(Bc *chunk) {
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_constant(Cm *cm, O value) {
static I compile_constant(Cm *cm, O value, I line, I col) {
I idx = chunk_add_constant(cm->chunk, value);
chunk_emit_byte(cm->chunk, OP_CONST);
chunk_emit_byte_with_line(cm->chunk, OP_CONST, line, col);
chunk_emit_sleb128(cm->chunk, idx);
return 1;
}
static I compile_call(Cm *cm, const char *name) {
static I compile_call(Cm *cm, const char *name, I line, I col) {
for (Z i = 0; primitives[i].name != NULL; i++) {
if (strcmp(name, primitives[i].name) == 0) {
chunk_emit_byte(cm->chunk, primitives[i].opcode);
for (Z j = 0; primitives[i].opcode[j] != 0; j++)
chunk_emit_byte_with_line(cm->chunk, primitives[i].opcode[j], line,
col);
return 1;
}
}
Dt *word = upsert(cm->dictionary, name, NULL);
if (!word) {
fprintf(stderr, "compiler: undefined word '%s'\n", name);
fprintf(stderr, "compiler error at %ld:%ld: undefined word '%s'\n",
line + 1, col + 1, name);
return 0;
}
chunk_emit_byte(cm->chunk, OP_DOWORD);
chunk_emit_byte_with_line(cm->chunk, OP_DOWORD, line, col);
chunk_emit_sleb128(cm->chunk, (I)word->hash);
return 1;
}
@ -148,7 +154,7 @@ static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return 0;
curr = mpc_ast_traverse_next(next);
}
compile_call(cm, name);
compile_call(cm, name, curr->state.row, curr->state.col);
return 1;
}
@ -177,7 +183,8 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
curr = mpc_ast_traverse_next(next);
}
chunk_emit_byte(inner.chunk, OP_RETURN);
chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row,
curr->state.col);
optim_tailcall(inner.chunk);
entry->chunk = inner.chunk;
@ -219,18 +226,21 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return BOX(hd);
}
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
return compile_constant(cm, compile_quotation_obj(cm, curr, next));
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next,
I line, I col) {
return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col);
}
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
I line = curr->state.row;
I col = curr->state.col;
if (strstr(curr->tag, "expr|number") != NULL) {
I num = strtol(curr->contents, NULL, 0);
return compile_constant(cm, NUM(num));
return compile_constant(cm, NUM(num), line, col);
} else if (strstr(curr->tag, "expr|word") != NULL) {
return compile_call(cm, curr->contents);
return compile_call(cm, curr->contents, line, col);
} else if (strstr(curr->tag, "expr|quotation") != NULL) {
return compile_quotation(cm, curr, next);
return compile_quotation(cm, curr, next, line, col);
} else if (strstr(curr->tag, "expr|def") != NULL) {
return compile_definition(cm, curr, next);
} else if (strstr(curr->tag, "expr|command") != NULL) {
@ -238,7 +248,8 @@ static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
} else if (strstr(curr->tag, "expr|comment") != NULL) {
return 1;
} else {
fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag);
fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1,
col + 1, curr->tag);
return 0;
}

View file

@ -7,7 +7,7 @@
#include "vendor/mpc.h"
#define COMPILER_DEBUG 1
#define COMPILER_DEBUG 0
/** Compiler context */
typedef struct Cm {

View file

@ -1,5 +1,6 @@
#include <stdio.h>
#include "chunk.h"
#include "debug.h"
#include "dictionary.h"
#include "print.h"
@ -21,177 +22,146 @@ static I decode_sleb128(U8 *ptr, Z *bytes_read) {
return result;
}
static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent);
static V dis(Bc *chunk, Dt **dictionary, I indent) {
Z offset = 0;
while (offset < chunk->count)
offset = dis_instr(chunk, offset, dictionary, indent);
}
V disassemble(Bc *chunk, const char *name, Dt **dictionary) {
printf("=== %s ===\n", name);
Z offset = 0;
while (offset < chunk->count) {
offset = disassemble_instruction(chunk, offset, dictionary);
}
dis(chunk, dictionary, 0);
}
Z disassemble_instruction(Bc *chunk, Z offset, Dt **dictionary) {
static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) {
for (I i = 0; i < indent * 2; i++)
putchar(' ');
fflush(stdout);
printf("%04zu ", offset);
U8 opcode = chunk->items[offset++];
#define CASE(name) case OP_##name:
#define SIMPLE(name) \
case OP_##name: \
printf(#name "\n"); \
return offset;
switch (opcode) {
case OP_NOP:
printf("NOP\n");
return offset;
case OP_NIL:
printf("NIL\n");
return offset;
case OP_CONST: {
Z bytes_read;
I idx = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CONST %ld", idx);
if (idx >= 0 && idx < (I)chunk->constants.count) {
O obj = chunk->constants.items[idx];
printf(" (");
print(obj);
printf(")");
SIMPLE(NOP);
SIMPLE(NIL);
CASE(CONST) {
Z bytes_read;
I idx = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CONST %ld", idx);
if (idx >= 0 && idx < (I)chunk->constants.count) {
O obj = chunk->constants.items[idx];
printf(" (");
print(obj);
printf(")");
// If it's a quotation, disassemble it inline
if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) {
Hd *hdr = UNBOX(obj);
Bc **chunk_ptr = (Bc **)(hdr + 1);
Bc *quot_chunk = *chunk_ptr;
printf("\n");
// Disassemble quotation with indentation
for (Z i = 0; i < quot_chunk->count; ) {
printf(" ");
i = disassemble_instruction(quot_chunk, i, dictionary);
if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) {
putchar('\n');
Hd *hdr = UNBOX(obj);
Bc **chunk_ptr = (Bc **)(hdr + 1);
Bc *quot_chunk = *chunk_ptr;
dis(quot_chunk, dictionary, indent + 1);
return offset + bytes_read;
}
return offset + bytes_read;
}
printf("\n");
return offset + bytes_read;
}
printf("\n");
return offset + bytes_read;
}
case OP_DROP: {
printf("DROP\n");
return offset;
}
case OP_DUP: {
printf("DUP\n");
return offset;
}
case OP_SWAP: {
printf("SWAP\n");
return offset;
}
case OP_TOR:
printf("TOR\n");
return offset;
case OP_FROMR:
printf("FROMR\n");
return offset;
case OP_JUMP: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
case OP_JUMP_IF_NIL: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
case OP_CALL: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CALL %ld\n", ofs);
return offset + bytes_read;
}
case OP_DOWORD: {
Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("DOWORD");
SIMPLE(DROP);
SIMPLE(DUP);
SIMPLE(SWAP);
SIMPLE(NIP);
SIMPLE(OVER);
SIMPLE(BURY);
SIMPLE(DIG);
SIMPLE(TOR);
SIMPLE(FROMR);
CASE(JUMP) {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
CASE(JUMP_IF_NIL) {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
CASE(CALL) {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CALL %ld\n", ofs);
return offset + bytes_read;
}
CASE(DOWORD) {
Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("DOWORD");
if (dictionary && *dictionary) {
Dt *entry = lookup_hash(dictionary, hash);
if (entry != NULL) {
printf(" %s", entry->name);
if (dictionary && *dictionary) {
Dt *entry = lookup_hash(dictionary, hash);
if (entry != NULL) {
printf(" %s", entry->name);
} else {
printf(" ???");
}
} else {
printf(" ???");
printf(" 0x%lx", hash);
}
} else {
printf(" 0x%lx", hash);
printf("\n");
return offset + bytes_read;
}
printf("\n");
return offset + bytes_read;
}
case OP_APPLY:
printf("APPLY\n");
return offset;
case OP_TAIL_CALL: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_CALL %ld\n", ofs);
return offset + bytes_read;
}
case OP_TAIL_DOWORD: {
Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_DOWORD");
SIMPLE(APPLY);
CASE(TAIL_CALL) {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_CALL %ld\n", ofs);
return offset + bytes_read;
}
CASE(TAIL_DOWORD) {
Z bytes_read;
I hash = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("TAIL_DOWORD");
if (dictionary && *dictionary) {
Dt *entry = lookup_hash(dictionary, hash);
if (entry != NULL) {
printf(" %s", entry->name);
if (dictionary && *dictionary) {
Dt *entry = lookup_hash(dictionary, hash);
if (entry != NULL) {
printf(" %s", entry->name);
} else {
printf(" ???");
}
} else {
printf(" ???");
printf(" 0x%lx", hash);
}
} else {
printf(" 0x%lx", hash);
printf("\n");
return offset + bytes_read;
}
printf("\n");
return offset + bytes_read;
}
case OP_TAIL_APPLY:
printf("TAIL_APPLY\n");
return offset;
case OP_RETURN:
printf("RETURN\n");
return offset;
case OP_CHOOSE:
printf("CHOOSE\n");
return offset;
case OP_ADD:
printf("ADD\n");
return offset;
case OP_SUB:
printf("SUB\n");
return offset;
case OP_MUL:
printf("MUL\n");
return offset;
case OP_DIV:
printf("DIV\n");
return offset;
case OP_MOD:
printf("MOD\n");
return offset;
case OP_EQ:
printf("EQ\n");
return offset;
case OP_NEQ:
printf("NEQ\n");
return offset;
case OP_LT:
printf("LT\n");
return offset;
case OP_GT:
printf("GT\n");
return offset;
case OP_LTE:
printf("LTE\n");
return offset;
case OP_GTE:
printf("GTE\n");
return offset;
SIMPLE(TAIL_APPLY);
SIMPLE(RETURN);
SIMPLE(CHOOSE);
SIMPLE(ADD);
SIMPLE(SUB);
SIMPLE(MUL);
SIMPLE(DIV);
SIMPLE(MOD);
SIMPLE(EQ);
SIMPLE(NEQ);
SIMPLE(LT);
SIMPLE(GT);
SIMPLE(LTE);
SIMPLE(GTE);
default:
printf("? (%d)\n", opcode);
printf("??? (%d)\n", opcode);
return offset;
}
#undef SIMPLE
#undef CASE
}

View file

@ -2,5 +2,4 @@
#include "common.h"
#include "dictionary.h"
V disassemble(Bc *, const char *, Dt **);
Z disassemble_instruction(Bc *, Z, Dt **);
V disassemble(Bc *, const char*, Dt **);

View file

@ -73,6 +73,9 @@ I loadfile(const char *fname) {
mpc_ast_delete(res.output);
if (chunk != NULL) {
#if COMPILER_DEBUG
disassemble(chunk, fname, &vm.dictionary);
#endif
I res = vm_run(&vm, chunk, 0);
chunk_release(chunk);
vm_deinit(&vm);

View file

@ -75,6 +75,14 @@ V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) {
}
Fr vm_rpop(Vm *vm) { return *--vm->rsp; }
static I vm_error(Vm *vm, const char *message) {
I col = -1;
I line = chunk_get_line(vm->chunk, vm->ip - vm->chunk->items, &col);
fprintf(stderr, "error at %ld:%ld: %s\n", line + 1, col + 1, message);
return 0;
}
I vm_run(Vm *vm, Bc *chunk, I offset) {
I mark = gc_mark(&vm->gc);
for (Z i = 0; i < chunk->constants.count; i++)
@ -84,10 +92,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
{ \
O b = vm_pop(vm); \
O a = vm_pop(vm); \
if (!IMM(a) || !IMM(b)) { \
fprintf(stderr, "vm: arithmetic on non-number objects\n"); \
return 0; \
} \
if (!IMM(a) || !IMM(b)) \
return vm_error(vm, "arithmetic on non-numeric objects"); \
vm_push(vm, NUM(ORD(a) op ORD(b))); \
break; \
}
@ -96,10 +102,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
{ \
O b = vm_pop(vm); \
O a = vm_pop(vm); \
if (!IMM(a) || !IMM(b)) { \
fprintf(stderr, "vm: arithmetic on non-number objects\n"); \
return 0; \
} \
if (!IMM(a) || !IMM(b)) \
return vm_error(vm, "comparison on non-numeric objects"); \
vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \
break; \
}
@ -201,10 +205,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_DOWORD: {
I hash = decode_sleb128(&vm->ip);
Dt *word = lookup_hash(&vm->dictionary, hash);
if (!word) {
fprintf(stderr, "vm: word not found (hash = %lx)\n", hash);
return 0;
}
if (!word)
return vm_error(vm, "word not found");
vm_rpush(vm, vm->chunk, vm->ip);
vm->chunk = word->chunk;
vm->ip = word->chunk->items;
@ -219,8 +221,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm->chunk = chunk;
vm->ip = chunk->items;
} else {
fprintf(stderr, "vm: attempt to apply non-quotation object\n");
return 0;
return vm_error(vm, "attempt to apply non-quotation object");
}
break;
}
@ -233,10 +234,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_TAIL_DOWORD: {
I hash = decode_sleb128(&vm->ip);
Dt *word = lookup_hash(&vm->dictionary, hash);
if (!word) {
fprintf(stderr, "vm: word not found (hash = %lx)\n", hash);
return 0;
}
if (!word)
return vm_error(vm, "word not found");
// Tail call: reuse current frame
vm->chunk = word->chunk;
vm->ip = word->chunk->items;
@ -251,8 +250,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
vm->chunk = chunk;
vm->ip = chunk->items;
} else {
fprintf(stderr, "vm: attempt to apply non-quotation object\n");
return 0;
return vm_error(vm, "attempt to apply non-quotation object\n");
}
break;
}
@ -299,7 +297,7 @@ I vm_run(Vm *vm, Bc *chunk, I offset) {
case OP_GTE:
CMPOP(>=);
default:
fprintf(stderr, "unknown opcode %d\n", opcode);
vm_error(vm, "unknown opcode");
return 0;
}
}

View file

@ -1,7 +1,3 @@
def dip { swap >r call r> }
def keep { over >r call r> }
def if { ? call }
def fib/aux {
if: dig dup 0 =
[drop drop]
@ -10,4 +6,4 @@ def fib/aux {
}
def fib { 0 1 fib/aux }
[ 50 fib ] call \=> 12586269025
[ 50 fib ] call