compiler now compiles some things

This commit is contained in:
Lobo 2026-01-19 12:50:54 -03:00
parent 9616fb616e
commit ce345f2440
13 changed files with 425 additions and 74 deletions

View file

@ -1,6 +1,37 @@
#include <stdio.h>
#include <stdlib.h>
#include "chunk.h"
#include "vendor/yar.h"
Bc *chunk_new(V) {
Bc *chunk = calloc(1, sizeof(Bc));
chunk->ref = 1;
return chunk;
}
V chunk_acquire(Bc *chunk) {
#if CHUNK_DEBUG
fprintf(stderr, "DEBUG: acquiring chunk at %p\n", (V *)chunk);
#endif
chunk->ref++;
}
V chunk_release(Bc *chunk) {
#if CHUNK_DEBUG
fprintf(stderr, "DEBUG: releasing chunk at %p\n", (V *)chunk);
#endif
if (--chunk->ref == 0) {
#if CHUNK_DEBUG
fprintf(stderr, "DEBUG: freeing chunk at %p\n", (V *)chunk);
#endif
yar_free(&chunk->constants);
yar_free(chunk);
free(chunk);
}
}
V chunk_emit_byte(Bc *chunk, U8 byte) { *yar_append(chunk) = byte; }
V chunk_emit_sleb128(Bc *chunk, I num) {
@ -22,8 +53,3 @@ I chunk_add_constant(Bc *chunk, O value) {
*yar_append(&chunk->constants) = value;
return mark;
}
V chunk_free(Bc *chunk) {
yar_free(&chunk->constants);
yar_free(chunk);
}

View file

@ -1,11 +1,14 @@
#ifndef CHUNK_H
#define CHUNK_H
#define CHUNK_DEBUG 0
#include "common.h"
#include "object.h"
/** Bytecode chunk */
typedef struct Bc {
I ref;
U8 *items;
Z count, capacity;
struct {
@ -14,9 +17,12 @@ typedef struct Bc {
} constants;
} Bc;
Bc *chunk_new(V);
V chunk_acquire(Bc *);
V chunk_release(Bc *);
V chunk_emit_byte(Bc *, U8);
V chunk_emit_sleb128(Bc *, I);
I chunk_add_constant(Bc *, O);
V chunk_free(Bc *);
#endif

View file

@ -1,6 +1,116 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "chunk.h"
#include "compile.h"
#include "debug.h"
#include "gc.h"
#include "object.h"
#include "vm.h"
#include "vendor/mpc.h"
I compile_program(mpc_ast_t *ast) {
// clang-format off
struct {
const char *name;
U8 opcode;
} primitives[] = {
{"+", OP_ADD},
{"call", OP_APPLY},
{NULL, 0},
};
// clang-format on
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next);
static I compile_constant(Cm *cm, O value) {
I idx = chunk_add_constant(cm->chunk, value);
chunk_emit_byte(cm->chunk, OP_CONST);
chunk_emit_sleb128(cm->chunk, idx);
return 1;
}
static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
Cm inner = {0};
inner.chunk = chunk_new();
inner.gc = cm->gc;
inner.dictionary = cm->dictionary;
(void)mpc_ast_traverse_next(next); // skip opening bracket
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0)
break;
I res = compile_expr(&inner, curr, next);
if (!res)
return res;
curr = mpc_ast_traverse_next(next);
}
chunk_emit_byte(inner.chunk, OP_RETURN);
Hd *hd = gc_alloc(cm->gc, sizeof(Hd) + sizeof(Bc *));
hd->type = OBJ_QUOT;
Bc **chunk_ptr = (Bc **)(hd + 1);
*chunk_ptr = inner.chunk;
O quot = BOX(hd);
compile_constant(cm, quot);
return 1;
}
static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
if (strstr(curr->tag, "expr|number") != NULL) {
I num = strtol(curr->contents, NULL, 0);
return compile_constant(cm, NUM(num));
} else if (strstr(curr->tag, "expr|word") != NULL) {
for (Z i = 0; primitives[i].name != NULL; i++) {
if (strcmp(curr->contents, primitives[i].name) == 0) {
chunk_emit_byte(cm->chunk, primitives[i].opcode);
return 1;
}
}
fprintf(stderr, "compiler: dictionary nyi\n");
return 0;
} else if (strstr(curr->tag, "expr|quotation") != NULL) {
return compile_quotation(cm, curr, next);
} else {
fprintf(stderr, "compiler: \"%s\" nyi\n", curr->tag);
return 0;
}
return 1;
}
static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
(void)mpc_ast_traverse_next(next);
curr = mpc_ast_traverse_next(next);
while (curr != NULL) {
if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0)
break;
I res = compile_expr(cm, curr, next);
if (!res)
return res;
curr = mpc_ast_traverse_next(next);
}
return 1;
}
Bc *compile_program(Gc *gc, mpc_ast_t *ast) {
Cm cm = {0};
cm.chunk = chunk_new();
cm.gc = gc;
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal
if (!compile_ast(&cm, curr, &next)) {
chunk_release(cm.chunk);
return NULL;
}
Bc *chunk = cm.chunk;
chunk_emit_byte(chunk, OP_RETURN);
return chunk;
}

View file

@ -1,4 +1,24 @@
#include "common.h"
#include "chunk.h"
#include "gc.h"
#include "vendor/mpc.h"
I compile_program(mpc_ast_t *);
/** Compiler dictionary */
typedef struct Cd Cd;
struct Cd {
Cd *child[4];
const char *name;
Z offset;
};
/** Compiler context */
typedef struct Cm {
Gc *gc;
Bc *chunk;
Cd *dictionary;
} Cm;
// The chunk returned by `compile_program` is owned by the caller.
Bc *compile_program(Gc *, mpc_ast_t *);

81
src/debug.c Normal file
View file

@ -0,0 +1,81 @@
#include <stdio.h>
#include "debug.h"
#include "print.h"
#include "vm.h"
static I decode_sleb128(U8 *ptr, Z *bytes_read) {
I result = 0;
I shift = 0;
U8 byte;
Z count = 0;
do {
byte = ptr[count++];
result |= (I)(byte & 0x7F) << shift;
shift += 7;
} while (byte & 0x80);
if ((shift < 64) && (byte & 0x40))
result |= -(1LL << shift);
*bytes_read = count;
return result;
}
V disassemble(Bc *chunk, const char *name) {
printf("=== %s ===\n", name);
Z offset = 0;
while (offset < chunk->count) {
offset = disassemble_instruction(chunk, offset);
}
}
Z disassemble_instruction(Bc *chunk, Z offset) {
printf("%04zu ", offset);
U8 opcode = chunk->items[offset++];
switch (opcode) {
case OP_NOP:
printf("NOP\n");
return offset;
case OP_CONST: {
Z bytes_read;
I idx = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CONST %ld", idx);
if (idx >= 0 && idx < (I)chunk->constants.count) {
printf(" (");
print(chunk->constants.items[idx]);
printf(")");
}
printf("\n");
return offset + bytes_read;
}
case OP_JUMP: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
case OP_JUMP_IF_NIL: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("JUMP_IF_NIL %ld -> %zu\n", ofs, offset + bytes_read + ofs);
return offset + bytes_read;
}
case OP_CALL: {
Z bytes_read;
I ofs = decode_sleb128(&chunk->items[offset], &bytes_read);
printf("CALL %ld\n", ofs);
return offset + bytes_read;
}
case OP_APPLY:
printf("APPLY\n");
return offset;
case OP_RETURN:
printf("RETURN\n");
return offset;
case OP_ADD:
printf("ADD\n");
return offset;
default:
printf("? (%d)\n", opcode);
return offset;
}
}

5
src/debug.h Normal file
View file

@ -0,0 +1,5 @@
#include "chunk.h"
#include "common.h"
V disassemble(Bc *, const char *);
Z disassemble_instruction(Bc *, Z);

View file

@ -79,7 +79,8 @@ V gc_collect(Gc *gc) {
switch (hdr->type) {
// TODO: the rest of the owl
case OBJ_QUOT: {
Bc *chunk = (Bc *)(hdr + 1);
Bc **chunk_ptr = (Bc **)(hdr + 1);
Bc *chunk = *chunk_ptr;
for (Z i = 0; i < chunk->constants.count; i++)
chunk->constants.items[i] = forward(gc, chunk->constants.items[i]);
break;
@ -93,6 +94,23 @@ V gc_collect(Gc *gc) {
scan += ALIGN(hdr->size);
}
scan = gc->from.start;
while (scan < gc->from.free) {
Hd *hdr = (Hd *)scan;
if (hdr->type != OBJ_FWD) {
switch (hdr->type) {
case OBJ_QUOT: {
Bc **chunk_ptr = (Bc **)(hdr + 1);
chunk_release(*chunk_ptr);
break;
}
default:
break;
}
}
scan += ALIGN(hdr->size);
}
Gs tmp = gc->from;
gc->from = gc->to;
gc->to = tmp;
@ -103,7 +121,22 @@ V gc_collect(Gc *gc) {
#endif
}
void gc_init(Gc *gc) {
Hd *gc_alloc(Gc *gc, Z sz) {
sz = ALIGN(sz);
if (gc->from.free + sz > gc->from.end) {
gc_collect(gc);
if (gc->from.free + sz > gc->from.end) {
fprintf(stderr, "out of memory (requested %" PRIdPTR "bytes\n", sz);
abort();
}
}
Hd *hdr = (Hd *)gc->from.free;
gc->from.free += sz;
hdr->size = sz;
return hdr;
}
V gc_init(Gc *gc) {
gc->from.start = malloc(HEAP_BYTES);
if (!gc->from.start)
goto fatal;
@ -126,7 +159,7 @@ fatal:
abort();
}
void gc_deinit(Gc *gc) {
V gc_deinit(Gc *gc) {
gc_collect(gc);
free(gc->from.start);
free(gc->to.start);

View file

@ -4,73 +4,59 @@
#include "common.h"
#include "chunk.h"
#include "gc.h"
#include "compile.h"
#include "debug.h"
#include "parser.h"
#include "vendor/mpc.h"
#include "vm.h"
void dump(const V *data, Z size) {
char ascii[17];
Z i, j;
ascii[16] = '\0';
for (i = 0; i < size; ++i) {
printf("%02X ", ((unsigned char *)data)[i]);
if (((unsigned char *)data)[i] >= ' ' &&
((unsigned char *)data)[i] <= '~') {
ascii[i % 16] = ((unsigned char *)data)[i];
} else {
ascii[i % 16] = '.';
}
if ((i + 1) % 8 == 0 || i + 1 == size) {
printf(" ");
if ((i + 1) % 16 == 0) {
printf("| %s \n", ascii);
} else if (i + 1 == size) {
ascii[(i + 1) % 16] = '\0';
if ((i + 1) % 16 <= 8) {
printf(" ");
}
for (j = (i + 1) % 16; j < 16; ++j) {
printf(" ");
}
printf("| %s \n", ascii);
}
}
}
}
#include "vendor/mpc.h"
I repl(void) {
Bc chunk = {0};
Vm vm = {0};
vm_init(&vm);
I idx = chunk_add_constant(&chunk, NUM(10));
chunk_emit_byte(&chunk, OP_CONST);
chunk_emit_sleb128(&chunk, idx);
chunk_emit_byte(&chunk, OP_RETURN);
Bc *chunk = chunk_new();
vm_run(&vm, &chunk, 0);
I idx = chunk_add_constant(chunk, NUM(10));
chunk_emit_byte(chunk, OP_CONST);
chunk_emit_sleb128(chunk, idx);
chunk_emit_byte(chunk, OP_CONST);
chunk_emit_sleb128(chunk, idx);
chunk_emit_byte(chunk, OP_ADD);
chunk_emit_byte(chunk, OP_RETURN);
return 0;
disassemble(chunk, "test chunk");
I res = vm_run(&vm, chunk, 0);
chunk_release(chunk);
vm_deinit(&vm);
return !res;
}
I loadfile(const char *fname) {
Gc gc = {0};
gc_init(&gc);
Vm vm = {0};
vm_init(&vm);
mpc_result_t res;
if (!mpc_parse_contents(fname, Program, &res)) {
mpc_err_print_to(res.error, stderr);
mpc_err_delete(res.error);
gc_deinit(&gc);
return 1;
}
mpc_ast_print(res.output);
Bc *chunk = compile_program(&vm.gc, res.output);
mpc_ast_delete(res.output);
gc_deinit(&gc);
return 0;
if (chunk != NULL) {
disassemble(chunk, fname);
I res = vm_run(&vm, chunk, 0);
chunk_release(chunk);
vm_deinit(&vm);
return !res;
} else {
vm_deinit(&vm);
return 1;
}
}
int main(int argc, const char *argv[]) {

View file

@ -19,6 +19,7 @@ enum {
TYPE_NIL = 0,
TYPE_NUM = 1,
TYPE_FWD = OBJ_FWD,
TYPE_QUOT = OBJ_QUOT,
};
typedef uintptr_t O;

View file

@ -1,8 +1,10 @@
#include "vm.h"
#include "gc.h"
#include "print.h"
#include <stdio.h>
#include "gc.h"
#include "object.h"
#include "print.h"
#include "vm.h"
static I decode_sleb128(U8 **ptr) {
I result = 0;
I shift = 0;
@ -25,6 +27,7 @@ static I decode_sleb128(U8 **ptr) {
V vm_init(Vm *vm) {
vm->sp = vm->stack;
vm->rsp = vm->rstack;
vm->chunk = NULL;
gc_init(&vm->gc);
for (Z i = 0; i < STACK_SIZE; i++) {
@ -33,16 +36,39 @@ V vm_init(Vm *vm) {
}
}
V vm_deinit(Vm *vm) { gc_deinit(&vm->gc); }
V vm_push(Vm *vm, O o) { *vm->sp++ = o; }
O vm_pop(Vm *vm) { return *--vm->sp; }
O vm_peek(Vm *vm) { return *(vm->sp - 1); }
V vm_run(Vm *vm, Bc *chunk, I offset) {
V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) {
vm->rsp->chunk = chunk;
vm->rsp->ip = ip;
vm->rsp++;
}
Fr vm_rpop(Vm *vm) { return *--vm->rsp; }
I vm_run(Vm *vm, Bc *chunk, I offset) {
I mark = gc_mark(&vm->gc);
for (Z i = 0; i < chunk->constants.count; i++)
gc_addroot(&vm->gc, &chunk->constants.items[i]);
#define BINOP(op) \
{ \
O b = vm_pop(vm); \
O a = vm_pop(vm); \
if (!IMM(a) || !IMM(b)) { \
fprintf(stderr, "vm: arithmetic on non-number objects\n"); \
return 0; \
} \
vm_push(vm, NUM(ORD(a) op ORD(b))); \
break; \
}
vm->ip = chunk->items + offset;
vm->chunk = chunk;
for (;;) {
U8 opcode;
switch (opcode = *vm->ip++) {
@ -50,20 +76,65 @@ V vm_run(Vm *vm, Bc *chunk, I offset) {
continue;
case OP_CONST: {
I idx = decode_sleb128(&vm->ip);
vm_push(vm, chunk->constants.items[idx]);
vm_push(vm, vm->chunk->constants.items[idx]);
break;
}
case OP_JUMP: {
I ofs = decode_sleb128(&vm->ip);
vm->ip += ofs;
break;
}
case OP_JUMP_IF_NIL: {
I ofs = decode_sleb128(&vm->ip);
if (vm_pop(vm) == NIL)
vm->ip += ofs;
break;
}
case OP_CALL: {
I ofs = decode_sleb128(&vm->ip);
vm_rpush(vm, vm->chunk, vm->ip);
vm->ip = chunk->items + ofs;
break;
}
case OP_APPLY: {
O quot = vm_pop(vm);
if (type(quot) == TYPE_QUOT) {
Bc **ptr = (Bc **)(UNBOX(quot) + 1);
Bc *chunk = *ptr;
vm_rpush(vm, vm->chunk, vm->ip);
vm->chunk = chunk;
vm->ip = chunk->items;
} else {
fprintf(stderr, "vm: attempt to apply non-quotation object\n");
return 0;
}
break;
}
case OP_RETURN:
goto done;
if (vm->rsp != vm->rstack) {
Fr frame = vm_rpop(vm);
vm->chunk = frame.chunk;
vm->ip = frame.ip;
} else {
goto done;
}
break;
case OP_ADD:
BINOP(+);
default:
fprintf(stderr, "unknown opcode %d\n", opcode);
return 0;
}
}
done:
gc_reset(&vm->gc, mark);
// print stack :3
for (O *i = vm->stack; i < vm->sp; i++) {
print(*i);
putchar(' ');
if (vm->sp != vm->stack) {
for (O *i = vm->stack; i < vm->sp; i++) {
print(*i);
putchar(' ');
}
putchar('\n');
}
putchar('\n');
return 1;
}

View file

@ -10,25 +10,38 @@
enum {
OP_NOP = 0,
OP_CONST, // Push constant to stack
OP_DROP,
OP_DUP,
OP_SWAP,
OP_JUMP, // Relative jump
OP_JUMP_IF_NIL, // Relative jump if top-of-stack is nil
OP_DOWORD,
OP_CALL,
OP_APPLY,
OP_RETURN,
OP_ADD,
};
#define STACK_SIZE 256
typedef struct Fr {
Bc *chunk;
U8 *ip;
} Fr;
typedef struct Vm {
Gc gc;
O stack[256], *sp;
U rstack[256], *rsp;
Fr rstack[256], *rsp;
U8 *ip;
Bc *chunk;
} Vm;
V vm_init(Vm *);
V vm_deinit(Vm *);
V vm_push(Vm *, O);
O vm_pop(Vm *);
O vm_peek(Vm *);
V vm_run(Vm *, Bc *, I);
I vm_run(Vm *, Bc *, I);
#endif