diff --git a/.editorconfig b/.editorconfig index 7ecbc3d..7c14f37 100644 --- a/.editorconfig +++ b/.editorconfig @@ -10,4 +10,4 @@ indent_size = 2 [meson.build] indent_style = space -indent_size = 2 +indent_size = 4 diff --git a/meson.build b/meson.build index 39c778e..90cb202 100644 --- a/meson.build +++ b/meson.build @@ -1,39 +1,65 @@ project( - 'growl', - 'c', - meson_version : '>= 1.3.0', - version : '0.1', - default_options : ['buildtype=debugoptimized', 'c_std=gnu11', 'warning_level=3'], + 'growl', + 'c', + 'cpp', + meson_version: '>= 1.3.0', + version: '0.1', + default_options: [ + 'buildtype=debugoptimized', + 'c_std=gnu11', + 'cpp_std=c++20', + 'warning_level=3', + ], ) libutf = subproject('libutf') libutf_dep = libutf.get_variable('libutf_dep') -sources = [ - 'src/arena.c', - 'src/chunk.c', - 'src/compile.c', - 'src/debug.c', - 'src/dictionary.c', - 'src/file.c', - 'src/lexer.c', - 'src/object.c', - 'src/gc.c', - 'src/parser.c', - 'src/primitive.c', - 'src/print.c', - 'src/stream.c', - 'src/string.c', - 'src/userdata.c', - 'src/vm.c', - 'src/vendor/linenoise.c', - 'src/vendor/mpc.c', - 'src/vendor/yar.c', +growl_sources = [ + 'src/arena.c', + 'src/chunk.c', + 'src/compile.c', + 'src/debug.c', + 'src/dictionary.c', + 'src/file.c', + 'src/lexer.c', + 'src/object.c', + 'src/gc.c', + 'src/parser.c', + 'src/primitive.c', + 'src/print.c', + 'src/stream.c', + 'src/string.c', + 'src/userdata.c', + 'src/vm.c', + 'src/vendor/linenoise.c', + 'src/vendor/yar.c', ] -exe = executable( - 'growl', - 'src/main.c', sources, - dependencies : [libutf_dep], - install : true, +growl = executable( + 'growl', + 'src/main.c', + growl_sources, + dependencies: [libutf_dep], + install: true, +) + +growlnext_sources = [ + 'next/core/arena.c', + 'next/core/callable.c', + 'next/core/compiler.c', + 'next/core/gc.c', + 'next/core/list.c', + 'next/core/sleb128.c', + 'next/core/string.c', + 'next/core/tuple.c', + 'next/core/vm.c', + 'next/main.c', +] + +growlnext = executable( + 'growlnext', + growlnext_sources, + include_directories: ['next/include'], + install: true, ) diff --git a/next/core/arena.c b/next/core/arena.c new file mode 100644 index 0000000..c7b6247 --- /dev/null +++ b/next/core/arena.c @@ -0,0 +1,26 @@ +#include +#include +#include + +void growl_arena_init(GrowlGCArena *arena, size_t size) { + arena->start = arena->free = malloc(size); + if (arena->start == NULL) + abort(); + arena->end = arena->start + size; +} + +void growl_arena_free(GrowlGCArena *arena) { + free(arena->start); + arena->start = arena->end = arena->free = NULL; +} + +void *growl_arena_alloc(GrowlGCArena *arena, size_t size, size_t align, + size_t count) { + ptrdiff_t padding = -(uintptr_t)arena->start & (align - 1); + ptrdiff_t available = arena->end - arena->start - padding; + if (available < 0 || count > available / size) + abort(); + void *p = arena->start + padding; + arena->start += padding + count * size; + return memset(p, 0, count * size); +} diff --git a/next/core/callable.c b/next/core/callable.c new file mode 100644 index 0000000..71052ba --- /dev/null +++ b/next/core/callable.c @@ -0,0 +1,97 @@ +#include +#include + +int growl_callable(Growl obj) { + if (obj == GROWL_NIL || GROWL_IMM(obj)) + return 0; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + switch (hdr->type) { + case GROWL_QUOTATION: + case GROWL_COMPOSE: + case GROWL_CURRY: + return 1; + default: + return 0; + } +} + +Growl growl_make_quotation(GrowlVM *vm, const uint8_t *code, size_t code_size, + const Growl *constants, size_t constants_size) { + size_t constants_obj_size = sizeof(GrowlObjectHeader) + sizeof(GrowlTuple) + + constants_size * sizeof(Growl); + GrowlObjectHeader *constants_hdr = + growl_gc_alloc_tenured(vm, constants_obj_size); + constants_hdr->type = GROWL_TUPLE; + GrowlTuple *constants_tuple = (GrowlTuple *)(constants_hdr + 1); + + constants_tuple->count = constants_size; + for (size_t i = 0; i < constants_size; ++i) { + constants_tuple->data[i] = constants[i]; + } + + size_t quotation_obj_size = + sizeof(GrowlObjectHeader) + sizeof(GrowlQuotation) + code_size; + GrowlObjectHeader *quotation_hdr = + growl_gc_alloc_tenured(vm, quotation_obj_size); + quotation_hdr->type = GROWL_QUOTATION; + GrowlQuotation *quotation = (GrowlQuotation *)(quotation_hdr + 1); + + quotation->constants = GROWL_BOX(constants_hdr); + quotation->count = code_size; + memcpy(quotation->data, code, code_size); + + return GROWL_BOX(quotation_hdr); +} + +GrowlQuotation *growl_unwrap_quotation(Growl obj) { + if (obj == GROWL_NIL || GROWL_IMM(obj)) + return NULL; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + if (hdr->type != GROWL_QUOTATION) + return NULL; + return (GrowlQuotation *)(hdr + 1); +} + +Growl growl_compose(GrowlVM *vm, Growl first, Growl second) { + if (!growl_callable(first)) + return GROWL_NIL; + if (!growl_callable(second)) + return GROWL_NIL; + size_t size = sizeof(GrowlObjectHeader) + sizeof(GrowlCompose); + GrowlObjectHeader *hdr = growl_gc_alloc(vm, size); + hdr->type = GROWL_COMPOSE; + GrowlCompose *comp = (GrowlCompose *)(hdr + 1); + comp->first = first; + comp->second = second; + return GROWL_BOX(hdr); +} + +GrowlCompose *growl_unwrap_compose(Growl obj) { + if (obj == GROWL_NIL || GROWL_IMM(obj)) + return NULL; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + if (hdr->type != GROWL_COMPOSE) + return NULL; + return (GrowlCompose *)(hdr + 1); +} + +Growl growl_curry(GrowlVM *vm, Growl value, Growl callable) { + if (!growl_callable(callable)) + return GROWL_NIL; + size_t size = sizeof(GrowlObjectHeader) + sizeof(GrowlCurry); + GrowlObjectHeader *hdr = growl_gc_alloc(vm, size); + hdr->type = GROWL_CURRY; + GrowlCurry *comp = (GrowlCurry *)(hdr + 1); + comp->value = value; + comp->callable = callable; + return GROWL_BOX(hdr); +} + +GrowlCurry *growl_unwrap_curry(Growl obj) { + if (obj == GROWL_NIL || GROWL_IMM(obj)) + return NULL; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + if (hdr->type != GROWL_CURRY) + return NULL; + return (GrowlCurry *)(hdr + 1); +} diff --git a/next/core/compiler.c b/next/core/compiler.c new file mode 100644 index 0000000..db7f5ca --- /dev/null +++ b/next/core/compiler.c @@ -0,0 +1,2 @@ +#include + diff --git a/next/core/gc.c b/next/core/gc.c new file mode 100644 index 0000000..9e17c1f --- /dev/null +++ b/next/core/gc.c @@ -0,0 +1,169 @@ +// +// Created by lobo on 2/5/26. +// + +#include +#include +#include +#include +#include +#include + +#define ALIGN(n) (((n) + 7) & ~7) + +static int in_from(GrowlVM *vm, void *ptr) { + const uint8_t *x = ptr; + return (x >= vm->from.start && x < vm->from.end); +} + +static Growl copy(GrowlVM *vm, GrowlObjectHeader *hdr) { + assert(in_from(vm, hdr)); + assert(hdr->type != UINT32_MAX); + size_t size = ALIGN(hdr->size); + GrowlObjectHeader *new = (GrowlObjectHeader *)vm->to.free; + vm->to.free += size; + memcpy(new, hdr, size); + hdr->type = UINT32_MAX; + Growl *obj = (Growl *)(hdr + 1); + *obj = (Growl)(new); + return *obj; +} + +static Growl forward(GrowlVM *vm, Growl obj) { + if (obj == 0) + return 0; + if (!in_from(vm, (void *)obj)) + return obj; + + GrowlObjectHeader *hdr = (GrowlObjectHeader *)obj; + if (hdr->type == UINT32_MAX) { + Growl *fwd = (Growl *)(hdr + 1); + return *fwd; + } + return copy(vm, hdr); +} + +GrowlObjectHeader *growl_gc_alloc(GrowlVM *vm, size_t size) { + size = ALIGN(size); + if (vm->from.free + size > vm->from.end) { + growl_gc_collect(vm); + if (vm->from.free + size > vm->from.end) { + fprintf(stderr, "gc: oom (requested %" PRIdPTR " bytes)\n", size); + abort(); + } + } + GrowlObjectHeader *hdr = (GrowlObjectHeader *)vm->from.free; + vm->from.free += size; + hdr->size = size; + return hdr; +} + +GrowlObjectHeader *growl_gc_alloc_tenured(GrowlVM *vm, size_t size) { + size = ALIGN(size); + GrowlObjectHeader *hdr = growl_arena_alloc(&vm->arena, size, 8, 1); + hdr->size = size; + return hdr; +} + +static void scan(GrowlVM *vm, GrowlObjectHeader *hdr) { + switch (hdr->type) { + case GROWL_STRING: + break; + case GROWL_LIST: { + GrowlList *list = (GrowlList *)(hdr + 1); + list->head = forward(vm, list->head); + list->tail = forward(vm, list->tail); + break; + } + case GROWL_TUPLE: { + GrowlTuple *tuple = (GrowlTuple *)(hdr + 1); + for (size_t i = 0; i < tuple->count; ++i) { + tuple->data[i] = forward(vm, tuple->data[i]); + } + break; + } + case GROWL_QUOTATION: { + GrowlQuotation *quot = (GrowlQuotation *)(hdr + 1); + quot->constants = forward(vm, quot->constants); + break; + } + case GROWL_COMPOSE: { + GrowlCompose *comp = (GrowlCompose *)(hdr + 1); + comp->first = forward(vm, comp->first); + comp->second = forward(vm, comp->second); + break; + } + case GROWL_CURRY: { + GrowlCurry *comp = (GrowlCurry *)(hdr + 1); + comp->value = forward(vm, comp->value); + comp->callable = forward(vm, comp->callable); + break; + } + case UINT32_MAX: + fprintf(stderr, "gc: fwd pointer during scan\n"); + abort(); + default: + fprintf(stderr, "gc: junk object type %" PRIu32 "\n", hdr->type); + abort(); + } +} + +static void gc_print_stats(GrowlVM *vm, const char *label) { + size_t used = vm->from.free - vm->from.start; + size_t total = vm->from.end - vm->from.start; + fprintf(stderr, "[%s] used=%zu/%zu bytes (%.1f%%)\n", label, used, total, + (double)used / (double)total * 100.0); +} + +void growl_gc_collect(GrowlVM *vm) { + uint8_t *gc_scan = vm->to.free; + + gc_print_stats(vm, "before GC"); + + for (size_t i = 0; i < GROWL_STACK_SIZE; ++i) { + vm->wst[i] = forward(vm, vm->wst[i]); + } + + for (size_t i = 0; i < vm->root_count; ++i) { + *vm->roots[i] = forward(vm, *vm->roots[i]); + } + + uint8_t *arena_scan = vm->arena.start; + while (arena_scan < vm->arena.free) { + GrowlObjectHeader *hdr = (GrowlObjectHeader *)arena_scan; + scan(vm, hdr); + arena_scan += ALIGN(hdr->size); + } + + while (gc_scan < vm->to.free) { + GrowlObjectHeader *hdr = (GrowlObjectHeader *)gc_scan; + scan(vm, hdr); + gc_scan += ALIGN(hdr->size); + } + + GrowlGCArena tmp = vm->from; + vm->from = vm->to; + vm->to = tmp; + vm->to.free = vm->to.start; + vm->scratch.free = vm->scratch.start; + + gc_print_stats(vm, "after GC"); +} + +void growl_gc_root(GrowlVM *vm, Growl *ptr) { + if (vm->root_count >= vm->root_capacity) { + size_t cap = vm->root_capacity == 0 ? 16 : vm->root_capacity * 2; + Growl **data = realloc(vm->roots, cap * sizeof(Growl *)); + if (!data) { + fprintf(stderr, "expanding roots array: oom\n"); + abort(); + } + vm->root_capacity = cap; + vm->roots = data; + } + vm->roots[vm->root_count++] = ptr; +} + +size_t growl_gc_mark(GrowlVM *vm) { return vm->root_count; } + +void growl_gc_reset(GrowlVM *vm, size_t mark) { vm->root_count = mark; } diff --git a/next/core/list.c b/next/core/list.c new file mode 100644 index 0000000..db7f5ca --- /dev/null +++ b/next/core/list.c @@ -0,0 +1,2 @@ +#include + diff --git a/next/core/opcodes.h b/next/core/opcodes.h new file mode 100644 index 0000000..0d3eca1 --- /dev/null +++ b/next/core/opcodes.h @@ -0,0 +1,12 @@ +#ifndef GROWL_OPCODES_H +#define GROWL_OPCODES_H + +enum { + GOP_NOP = 0, + GOP_PUSH_NIL, + GOP_PUSH_CONSTANT, + GOP_CALL, + GOP_RETURN, +}; + +#endif // GROWL_OPCODES_H diff --git a/next/core/sleb128.c b/next/core/sleb128.c new file mode 100644 index 0000000..0f41aad --- /dev/null +++ b/next/core/sleb128.c @@ -0,0 +1,45 @@ +// +// Created by lobo on 2/5/26. +// + +#include "sleb128.h" + +intptr_t growl_sleb128_decode(uint8_t **ptr) { + intptr_t result = 0; + intptr_t shift = 0; + uint8_t byte; + + do { + byte = **ptr; + (*ptr)++; + result |= (intptr_t)(byte & 0x7F) << shift; + shift += 7; + } while (byte & 0x80); + + if ((shift < 64) && (byte & 0x40)) { + result |= -(1LL << shift); + } + + return result; +} + +size_t growl_sleb128_peek(const uint8_t *ptr, intptr_t *out) { + intptr_t result = 0, shift = 0; + size_t bytes = 0; + uint8_t byte; + + do { + byte = ptr[bytes]; + bytes++; + result |= (intptr_t)(byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + + if (shift < 64 && byte & 0x40) { + result |= -(1LL << shift); + } + + if (out) + *out = result; + return bytes; +} diff --git a/next/core/sleb128.h b/next/core/sleb128.h new file mode 100644 index 0000000..b866f6d --- /dev/null +++ b/next/core/sleb128.h @@ -0,0 +1,10 @@ +#ifndef GROWL_SLEB128_H +#define GROWL_SLEB128_H + +#include +#include + +intptr_t growl_sleb128_decode(uint8_t **ptr); +size_t growl_sleb128_peek(const uint8_t *ptr, intptr_t *out); + +#endif // GROWL_SLEB128_H diff --git a/next/core/string.c b/next/core/string.c new file mode 100644 index 0000000..41bf42a --- /dev/null +++ b/next/core/string.c @@ -0,0 +1,33 @@ +#include +#include + +Growl growl_make_string(GrowlVM *vm, size_t len) { + size_t size = sizeof(GrowlObjectHeader) + sizeof(GrowlString) + len; + GrowlObjectHeader *hdr = growl_gc_alloc(vm, size); + hdr->type = GROWL_STRING; + GrowlString *str = (GrowlString *)(hdr + 1); + str->len = len; + memset(str->data, 0, len); + return GROWL_BOX(hdr); +} + +Growl growl_wrap_string(GrowlVM *vm, const char *cstr) { + size_t len = strlen(cstr); + size_t size = sizeof(GrowlObjectHeader) + sizeof(GrowlString) + len + 1; + GrowlObjectHeader *hdr = growl_gc_alloc(vm, size); + hdr->type = GROWL_STRING; + GrowlString *str = (GrowlString *)(hdr + 1); + str->len = len; + memcpy(str->data, cstr, len); + str->data[len] = 0; + return GROWL_BOX(hdr); +} + +GrowlString *growl_unwrap_string(Growl obj) { + if (obj == 0 || GROWL_IMM(obj)) + return NULL; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + if (hdr->type != GROWL_STRING) + return NULL; + return (GrowlString *)(hdr + 1); +} diff --git a/next/core/tuple.c b/next/core/tuple.c new file mode 100644 index 0000000..f38e0b5 --- /dev/null +++ b/next/core/tuple.c @@ -0,0 +1,10 @@ +#include + +GrowlTuple *growl_unwrap_tuple(Growl obj) { + if (obj == 0 || GROWL_IMM(obj)) + return NULL; + GrowlObjectHeader *hdr = GROWL_UNBOX(obj); + if (hdr->type != GROWL_TUPLE) + return NULL; + return (GrowlTuple *)(hdr + 1); +} diff --git a/next/core/vm.c b/next/core/vm.c new file mode 100644 index 0000000..49d94d7 --- /dev/null +++ b/next/core/vm.c @@ -0,0 +1,164 @@ +#include +#include +#include +#include + +#include "opcodes.h" +#include "sleb128.h" + +#include +#include + +GrowlVM *growl_vm_init(void) { + GrowlVM *mem = malloc(sizeof(GrowlVM)); + if (mem == NULL) { + abort(); + } + + growl_arena_init(&mem->from, GROWL_HEAP_SIZE); + growl_arena_init(&mem->to, GROWL_HEAP_SIZE); + growl_arena_init(&mem->arena, GROWL_ARENA_SIZE); + growl_arena_init(&mem->scratch, GROWL_SCRATCH_SIZE); + + mem->sp = mem->wst; + mem->rsp = mem->rst; + mem->csp = mem->cst; + + for (size_t i = 0; i < GROWL_STACK_SIZE; ++i) { + mem->wst[i] = 0; + mem->rst[i] = 0; + } + + mem->roots = NULL; + mem->root_count = 0; + mem->root_capacity = 0; + + return mem; +} + +void growl_vm_free(GrowlVM *vm) { + growl_arena_free(&vm->from); + growl_arena_free(&vm->to); + growl_arena_free(&vm->arena); + growl_arena_free(&vm->scratch); + if (vm->roots != NULL) + free(vm->roots); + free(vm); +} + +__attribute__((format(printf, 2, 3))) static noreturn void +vm_error(GrowlVM *vm, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + fprintf(stderr, "vm: "); + vfprintf(stderr, fmt, args); + fprintf(stderr, "\n"); + va_end(args); + longjmp(vm->error, -1); +} + +void growl_push(GrowlVM *vm, Growl obj) { + if (vm->sp >= vm->wst + GROWL_STACK_SIZE) + vm_error(vm, "work stack overflow"); + *vm->sp++ = obj; +} + +Growl growl_pop(GrowlVM *vm) { + if (vm->sp <= vm->wst) + vm_error(vm, "work stack underflow"); + Growl obj = *--vm->sp; + *vm->sp = GROWL_NIL; + return obj; +} + +void growl_rpush(GrowlVM *vm, Growl obj) { + if (vm->rsp >= vm->rst + GROWL_STACK_SIZE) + vm_error(vm, "work stack overflow"); + *vm->rsp++ = obj; +} + +Growl growl_rpop(GrowlVM *vm) { + if (vm->rsp <= vm->rst) + vm_error(vm, "work stack underflow"); + Growl obj = *--vm->rsp; + *vm->rsp = GROWL_NIL; + return obj; +} + +static void push_call(GrowlVM *vm, GrowlQuotation *q, uint8_t *ip) { + if (vm->csp >= vm->cst + GROWL_CALL_STACK_SIZE) + vm_error(vm, "call stack overflow"); + vm->csp->quot = q; + vm->csp->ip = ip; + vm->csp++; +} +static GrowlFrame pop_call(GrowlVM *vm) { + if (vm->csp <= vm->cst) + vm_error(vm, "call stack underflow"); + return *--vm->csp; +} + +int vm_doquot(GrowlVM *vm, GrowlQuotation *quot) { + size_t gc_mark = growl_gc_mark(vm); + int result = setjmp(vm->error); + + if (result != 0) { + growl_gc_reset(vm, gc_mark); + return result; + } + + GrowlTuple *constants = growl_unwrap_tuple(quot->constants); + if (constants != NULL) { + for (size_t i = 0; i < constants->count; ++i) { + growl_gc_root(vm, &constants->data[i]); + } + } + + vm->ip = quot->data; + vm->quotation = quot; + + for (;;) { + uint8_t opcode; + switch (opcode = *vm->ip++) { + case GOP_NOP: + break; + case GOP_PUSH_NIL: + growl_push(vm, GROWL_NIL); + break; + case GOP_PUSH_CONSTANT: { + intptr_t idx = growl_sleb128_decode(&vm->ip); + if (constants != NULL) { + growl_push(vm, constants->data[idx]); + } else { + vm_error(vm, "constant index %" PRIdPTR " out of bounds", idx); + } + break; + case GOP_CALL: { // TODO: compose and curry + Growl obj = growl_pop(vm); + push_call(vm, vm->quotation, vm->ip); + GrowlQuotation *obj_quot = growl_unwrap_quotation(obj); + if (obj_quot == NULL) + vm_error(vm, "attempt to call non-callable"); + vm->quotation = obj_quot; + vm->ip = obj_quot->data; + break; + } + case GOP_RETURN: + if (vm->csp != vm->cst) { + GrowlFrame frame = pop_call(vm); + vm->quotation = frame.quot; + vm->ip = frame.ip; + } else { + goto done; + } + break; + } + default: + vm_error(vm, "unknown opcode %d", opcode); + } + } + +done: + growl_gc_reset(vm, gc_mark); + return 0; +} diff --git a/next/include/growl.h b/next/include/growl.h new file mode 100644 index 0000000..0bda72a --- /dev/null +++ b/next/include/growl.h @@ -0,0 +1,135 @@ +#ifndef GROWL_H +#define GROWL_H + +#include +#include +#include + +typedef uintptr_t Growl; + +#define GROWL_NIL ((Growl)(0)) +#define GROWL_BOX(x) ((Growl)(x)) +#define GROWL_UNBOX(x) ((GrowlObjectHeader *)(x)) +#define GROWL_IMM(x) ((Growl)(x) & (Growl)1) +#define GROWL_NUM(x) (((Growl)((intptr_t)(x) << 1)) | (Growl)1) +#define GROWL_ORD(x) ((intptr_t)(x) >> 1) + +typedef struct GrowlObjectHeader GrowlObjectHeader; +typedef struct GrowlString GrowlString; +typedef struct GrowlList GrowlList; +typedef struct GrowlTuple GrowlTuple; +typedef struct GrowlQuotation GrowlQuotation; +typedef struct GrowlCompose GrowlCompose; +typedef struct GrowlCurry GrowlCurry; +typedef struct GrowlGCArena GrowlGCArena; +typedef struct GrowlFrame GrowlFrame; +typedef struct GrowlVM GrowlVM; + +enum { + GROWL_STRING, + GROWL_LIST, + GROWL_TUPLE, + GROWL_QUOTATION, + GROWL_COMPOSE, + GROWL_CURRY, +}; + +struct GrowlObjectHeader { + size_t size; + uint32_t type; +}; + +struct GrowlString { + size_t len; + char data[]; +}; + +Growl growl_make_string(GrowlVM *vm, size_t len); +Growl growl_wrap_string(GrowlVM *vm, const char *cstr); +GrowlString *growl_unwrap_string(Growl obj); + +struct GrowlList { + Growl head, tail; +}; + +struct GrowlTuple { + size_t count; + Growl data[]; +}; + +GrowlTuple *growl_unwrap_tuple(Growl obj); + +struct GrowlQuotation { + size_t count; + Growl constants; + uint8_t data[]; +}; + +struct GrowlCompose { + Growl first, second; +}; + +struct GrowlCurry { + Growl value, callable; +}; + +int growl_callable(Growl obj); +Growl growl_make_quotation(GrowlVM *vm, const uint8_t *code, size_t code_size, + const Growl *constants, size_t constants_size); +GrowlQuotation *growl_unwrap_quotation(Growl obj); +Growl growl_compose(GrowlVM *vm, Growl first, Growl second); +GrowlCompose *growl_unwrap_compose(Growl obj); +Growl growl_curry(GrowlVM *vm, Growl value, Growl callable); +GrowlCurry *growl_unwrap_curry(Growl obj); + +struct GrowlGCArena { + uint8_t *start, *end; + uint8_t *free; +}; + +void growl_arena_init(GrowlGCArena *arena, size_t size); +void growl_arena_free(GrowlGCArena *arena); +void *growl_arena_alloc(GrowlGCArena *arena, size_t size, size_t align, + size_t count); +#define growl_arena_new(a, t, n) \ + (t *)growl_arena_alloc(a, sizeof(t), _Alignof(t), n) + +#define GROWL_STACK_SIZE 128 +#define GROWL_CALL_STACK_SIZE 64 +#define GROWL_HEAP_SIZE (4 * 1024 * 1024) +#define GROWL_ARENA_SIZE (2 * 1024 * 1024) +#define GROWL_SCRATCH_SIZE (1024 * 1024) + +struct GrowlFrame { + GrowlQuotation *quot; + uint8_t *ip; +}; + +struct GrowlVM { + GrowlGCArena from, to; + GrowlGCArena arena; + GrowlGCArena scratch; + + GrowlQuotation *quotation; + uint8_t *ip; + Growl wst[GROWL_STACK_SIZE], *sp; + Growl rst[GROWL_STACK_SIZE], *rsp; + GrowlFrame cst[GROWL_CALL_STACK_SIZE], *csp; + + Growl **roots; + size_t root_count, root_capacity; + + jmp_buf error; +}; + +GrowlVM *growl_vm_init(void); +void growl_vm_free(GrowlVM *vm); +GrowlObjectHeader *growl_gc_alloc(GrowlVM *vm, size_t size); +GrowlObjectHeader *growl_gc_alloc_tenured(GrowlVM *vm, size_t size); +void growl_gc_collect(GrowlVM *vm); +void growl_gc_root(GrowlVM *vm, Growl *ptr); +size_t growl_gc_mark(GrowlVM *vm); +void growl_gc_reset(GrowlVM *vm, size_t mark); +int vm_doquot(GrowlVM *vm, GrowlQuotation *quot); + +#endif // GROWL_H diff --git a/next/main.c b/next/main.c new file mode 100644 index 0000000..195abf3 --- /dev/null +++ b/next/main.c @@ -0,0 +1,18 @@ +#include "core/opcodes.h" +#include + +static uint8_t code[] = { + GOP_PUSH_NIL, + GOP_RETURN, +}; + +int main(void) { + GrowlVM *vm = growl_vm_init(); + + Growl quot_obj = growl_make_quotation(vm, code, sizeof(code), NULL, 0); + GrowlQuotation *quot = (GrowlQuotation *)(GROWL_UNBOX(quot_obj) + 1); + vm_doquot(vm, quot); + + growl_gc_collect(vm); + growl_vm_free(vm); +} diff --git a/src/compile.c b/src/compile.c index 7542684..747f048 100644 --- a/src/compile.c +++ b/src/compile.c @@ -7,12 +7,11 @@ #include "debug.h" #include "gc.h" #include "object.h" +#include "parser.h" #include "src/primitive.h" #include "string.h" -#include "vm.h" - -#include "vendor/mpc.h" #include "vendor/yar.h" +#include "vm.h" // clang-format off struct { @@ -26,6 +25,7 @@ struct { {"2dup", {OP_2DUP, 0}}, {"2drop", {OP_2DROP, 0}}, {"2swap", {OP_2SWAP, 0}}, + {"2over", {OP_2TOR, OP_2DUP, OP_2FROMR, OP_2SWAP, 0}}, {"over", {OP_OVER, 0}}, {"nip", {OP_NIP, 0}}, {"bury", {OP_BURY, 0}}, @@ -118,8 +118,7 @@ static V optim_tailcall(Bc *chunk) { } } -static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); -static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next); +static I compile_expr(Cm *cm, Ast *node); static I compile_constant(Cm *cm, O value, I line, I col) { I idx = chunk_add_constant(cm->chunk, value); @@ -169,31 +168,16 @@ static I compile_call(Cm *cm, const char *name, I line, I col) { return 1; } -static I compile_command(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - curr = mpc_ast_traverse_next(next); - const char *name = curr->contents; - I name_line = curr->state.row; - I name_col = curr->state.col; - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ";") == 0) - break; - I res = compile_expr(cm, curr, next); - if (!res) +static I compile_command(Cm *cm, Ast *node) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(cm, node->children.items[i])) return 0; - curr = mpc_ast_traverse_next(next); } - compile_call(cm, name, name_line, name_col); - return 1; + return compile_call(cm, node->name, node->line, node->col); } -static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); // skip 'def' - curr = mpc_ast_traverse_next(next); - const char *name = arena_strdup(cm->arena, curr->contents); - (void)mpc_ast_traverse_next(next); // skip '{' - +static I compile_definition(Cm *cm, Ast *node) { + const char *name = arena_strdup(cm->arena, node->name); Dt *entry = upsert(cm->dictionary, name, cm->arena); Cm inner = {0}; @@ -202,19 +186,14 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { inner.vm = cm->vm; inner.dictionary = cm->dictionary; - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "}") == 0) - break; - if (!compile_expr(&inner, curr, next)) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(&inner, node->children.items[i])) { chunk_release(inner.chunk); return 0; } - curr = mpc_ast_traverse_next(next); } - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, - curr->state.col); + chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); optim_tailcall(inner.chunk); entry->chunk = inner.chunk; @@ -226,7 +205,7 @@ static I compile_definition(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return 1; } -static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { +static O compile_quotation_obj(Cm *cm, Ast *node) { Cm inner = {0}; inner.arena = cm->arena; @@ -234,20 +213,13 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { inner.vm = cm->vm; inner.dictionary = cm->dictionary; - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, "]") == 0) - break; - I res = compile_expr(&inner, curr, next); - if (!res) { + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(&inner, node->children.items[i])) { chunk_release(inner.chunk); - return res; + return NIL; } - curr = mpc_ast_traverse_next(next); } - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, curr->state.row, - curr->state.col); + chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); optim_tailcall(inner.chunk); Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *)); @@ -258,158 +230,103 @@ static O compile_quotation_obj(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { return BOX(hd); } -static I compile_quotation(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next, - I line, I col) { - return compile_constant(cm, compile_quotation_obj(cm, curr, next), line, col); +static I compile_quotation(Cm *cm, Ast *node) { + O obj = compile_quotation_obj(cm, node); + if (obj == NIL) + return 0; + return compile_constant(cm, obj, node->line, node->col); } -static I compile_pragma(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - const char *name = curr->contents; - I line = curr->state.row; - I col = curr->state.col; - curr = mpc_ast_traverse_next(next); - I has_args = 0; - - if (curr != NULL && strcmp(curr->tag, "char") == 0 && - strcmp(curr->contents, "(") == 0) { - has_args = 1; - curr = mpc_ast_traverse_next(next); // Skip '(' - } - - if (strcmp(name, "load") == 0) { - if (!has_args) { - fprintf(stderr, - "compiler error at %ld:%ld: #load requires a filename argument\n", - line + 1, col + 1); +static I compile_pragma(Cm *cm, Ast *node) { + if (strcmp(node->name, "#load") == 0) { + if (node->children.count == 0) { + fprintf(stderr, "compiler error: #load requires argument\n"); return 0; } - if (!strstr(curr->tag, "expr|string")) { - fprintf(stderr, - "compiler error at %ld:%ld: #load requires a string argument\n", - line + 1, col + 1); + Ast *arg = node->children.items[0]; + if (arg->type != AST_STR) { + fprintf(stderr, "compiler error: #load requires string\n"); return 0; } - char *fname_raw = curr->contents; - Z len = strlen(fname_raw); - char *fname = malloc(len + 1); - memcpy(fname, fname_raw + 1, len - 2); - fname[len - 2] = '\0'; - fname = mpcf_unescape(fname); - - mpc_result_t res; - extern mpc_parser_t *Program; - - if (!mpc_parse_contents(fname, Program, &res)) { - fprintf(stderr, "compiler error at %ld:%ld: failed to parse file '%s':\n", - line + 1, col + 1, fname); - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - free(fname); + char *fname = arg->name; + FILE *f = fopen(fname, "rb"); + if (!f) { + fprintf(stderr, "compiler error: cannot open file '%s'\n", fname); return 0; } - mpc_ast_trav_t *inner_next = - mpc_ast_traverse_start(res.output, mpc_ast_trav_order_pre); - mpc_ast_t *inner_curr = mpc_ast_traverse_next(&inner_next); + Stream s = {filestream_vtable, f}; + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); - I success = compile_ast(cm, inner_curr, &inner_next); - - mpc_ast_delete(res.output); - - if (!success) { - fprintf(stderr, - "compiler error at %ld:%ld: failed to compile file '%s'\n", - line + 1, col + 1, fname); - free(fname); - return 0; - } - - free(fname); - - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "char") == 0 && strcmp(curr->contents, ")") == 0) + I success = 1; + for (size_t i = 0; i < root->children.count; i++) { + if (!compile_expr(cm, root->children.items[i])) { + success = 0; break; - curr = mpc_ast_traverse_next(next); + } } - } else { - fprintf(stderr, "compiler warning at %ld:%ld: unknown pragma \"%s\"\n", - line + 1, col + 1, name); - } - if (has_args) { - if (curr == NULL || strcmp(curr->contents, ")") != 0) { - fprintf(stderr, "error at %ld:%ld: expected ')' after pragma arguments\n", - line + 1, col + 1); - return 0; - } + ast_free(root); + lexer_free(lx); + fclose(f); + return success; } - + fprintf(stderr, "compiler warning: unknown pragma \"%s\"\n", node->name); return 1; } -static I compile_expr(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - I line = curr->state.row; - I col = curr->state.col; - if (strstr(curr->tag, "expr|number") != NULL) { - I num = strtol(curr->contents, NULL, 0); - return compile_constant(cm, NUM(num), line, col); - } else if (strstr(curr->tag, "expr|string") != NULL) { - curr->contents[strlen(curr->contents) - 1] = '\0'; - char *unescaped = malloc(strlen(curr->contents + 1) + 1); - strcpy(unescaped, curr->contents + 1); - unescaped = mpcf_unescape(unescaped); - O obj = string_make(cm->vm, unescaped, -1); - free(unescaped); - return compile_constant(cm, obj, line, col); - } else if (strstr(curr->tag, "expr|word") != NULL) { - return compile_call(cm, curr->contents, line, col); - } else if (strstr(curr->tag, "expr|quotation") != NULL) { - return compile_quotation(cm, curr, next, line, col); - } else if (strstr(curr->tag, "expr|def") != NULL) { - return compile_definition(cm, curr, next); - } else if (strstr(curr->tag, "expr|command") != NULL) { - return compile_command(cm, curr, next); - } else if (strstr(curr->tag, "expr|pragma") != NULL) { - return compile_pragma(cm, curr, next); - } else if (strstr(curr->tag, "expr|comment") != NULL) { +static I compile_expr(Cm *cm, Ast *node) { + if (!node) + return 0; + switch (node->type) { + case AST_INT: { + O num = NUM(node->int_val); + return compile_constant(cm, num, node->line, node->col); + } + case AST_STR: { + O obj = string_make(cm->vm, node->name, -1); + return compile_constant(cm, obj, node->line, node->col); + } + case AST_WORD: + return compile_call(cm, node->name, node->line, node->col); + case AST_QUOTE: + return compile_quotation(cm, node); + case AST_DEF: + return compile_definition(cm, node); + case AST_CMD: + return compile_command(cm, node); + case AST_PRAGMA: + return compile_pragma(cm, node); + case AST_PROGRAM: + for (size_t i = 0; i < node->children.count; i++) { + if (!compile_expr(cm, node->children.items[i])) + return 0; + } return 1; - } else { - fprintf(stderr, "compiler error at %ld:%ld: \"%s\" nyi\n", line + 1, - col + 1, curr->tag); + default: + fprintf(stderr, "compiler error: nyi ast type %d\n", (int)node->type); return 0; } } -static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) { - (void)mpc_ast_traverse_next(next); - curr = mpc_ast_traverse_next(next); - while (curr != NULL) { - if (strcmp(curr->tag, "regex") == 0 && strcmp(curr->contents, "") == 0) - break; - I res = compile_expr(cm, curr, next); - if (!res) - return res; - curr = mpc_ast_traverse_next(next); +Bc *compile_program(Cm *cm, Ast *ast) { + if (ast->type == AST_PROGRAM) { + for (size_t i = 0; i < ast->children.count; i++) { + if (!compile_expr(cm, ast->children.items[i])) { + chunk_release(cm->chunk); + return NULL; + } + } + } else { + if (!compile_expr(cm, ast)) { + chunk_release(cm->chunk); + return NULL; + } } - return 1; -} - -Bc *compile_program(Cm *cm, mpc_ast_t *ast) { - mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre); - mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal - - if (!compile_ast(cm, curr, &next)) { - chunk_release(cm->chunk); - return NULL; - } - - Bc *chunk = cm->chunk; - chunk_emit_byte(chunk, OP_RETURN); - optim_tailcall(chunk); - return chunk; + chunk_emit_byte(cm->chunk, OP_RETURN); + optim_tailcall(cm->chunk); + return cm->chunk; } diff --git a/src/compile.h b/src/compile.h index 3db8ce2..51cea45 100644 --- a/src/compile.h +++ b/src/compile.h @@ -4,10 +4,9 @@ #include "chunk.h" #include "gc.h" #include "vm.h" +#include "parser.h" -#include "vendor/mpc.h" - -#define COMPILER_DEBUG DEBUG +#define COMPILER_DEBUG 0 /** Compiler context */ typedef struct Cm { @@ -19,4 +18,4 @@ typedef struct Cm { V compiler_init(Cm *, Vm *, const char *); V compiler_deinit(Cm *); -Bc *compile_program(Cm *, mpc_ast_t *); +Bc *compile_program(Cm *, Ast *); diff --git a/src/debug.c b/src/debug.c index eda0ee6..6374c16 100644 --- a/src/debug.c +++ b/src/debug.c @@ -3,8 +3,8 @@ #include "chunk.h" #include "debug.h" #include "dictionary.h" +#include "primitive.h" #include "print.h" -#include "src/primitive.h" #include "vm.h" static I decode_sleb128(U8 *ptr, Z *bytes_read) { @@ -70,7 +70,7 @@ static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) { print(obj); printf(")"); - if (!IMM(obj) && obj != NIL && type(obj) == TYPE_QUOT) { + if (!IMM(obj) && obj != NIL && type(obj) == OBJ_QUOT) { putchar('\n'); Hd *hdr = UNBOX(obj); Bc **chunk_ptr = (Bc **)(hdr + 1); diff --git a/src/gc.h b/src/gc.h index 2f28ba5..c3bb177 100644 --- a/src/gc.h +++ b/src/gc.h @@ -4,7 +4,7 @@ #include "common.h" #include "object.h" -#define GC_DEBUG 0 +#define GC_DEBUG 1 #if GC_DEBUG #define HEAP_BYTES (8 * 1024) #else diff --git a/src/lexer.c b/src/lexer.c index 5d851d7..9b45afd 100644 --- a/src/lexer.c +++ b/src/lexer.c @@ -1,12 +1,42 @@ #include #include -#include #include #include #include "lexer.h" #include "vendor/yar.h" +Lx *lexer_make(Stream *s) { + Lx *lx = calloc(1, sizeof(Lx)); + lx->stream = s; + return lx; +} + +V lexer_free(Lx *lx) { + yar_free(lx); + free(lx); +} + +static int lx_getc(Lx *lx) { + int c = ST_GETC(lx->stream); + if (c == '\n') { + lx->curr_line++; + lx->curr_col = 0; + } else if (c != -1) { + lx->curr_col++; + } + return c; +} + +static void lx_ungetc(Lx *lx, int c) { + ST_UNGETC(c, lx->stream); + if (c == '\n') { + lx->curr_line--; + } else if (c != -1) { + lx->curr_col--; + } +} + static inline int is_delimiter(int i) { return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' || i == ';' || i == '\\' || i == '"'; @@ -24,7 +54,7 @@ static int getc_ws(Lx *lx) { if (ST_EOF(lx->stream)) return -1; for (;;) { - int ch = ST_GETC(lx->stream); + int ch = lx_getc(lx); if (isspace(ch)) continue; return ch; @@ -32,21 +62,21 @@ static int getc_ws(Lx *lx) { } static int scanword(Lx *lx) { - int next = ST_GETC(lx->stream); + int next = lx_getc(lx); for (;;) { if (next == -1) { - if (lx->cursor == 0) + if (lx->count == 0) lx->kind = TOK_EOF; appendbyte(lx, 0); return lx->kind; } else if (is_delimiter(next) || isspace(next)) { - ST_UNGETC(next, lx->stream); + lx_ungetc(lx, next); appendbyte(lx, 0); return lx->kind; } else { appendbyte(lx, next); - next = ST_GETC(lx->stream); + next = lx_getc(lx); continue; } } @@ -58,7 +88,7 @@ static void scanescape(Lx *lx) { Rune tmp; for (;;) { - next = ST_GETC(lx->stream); + next = lx_getc(lx); if (next == -1) { errx(1, "unterminated hex sequence '%s'", escbuf); @@ -77,22 +107,28 @@ static void scanescape(Lx *lx) { } tmp = strtol(escbuf, &escptr, 16); - if (*escptr == '\0') - appendrune(lx, tmp); - else + if (*escptr == '\0') { + if (tmp < 256) { + appendbyte(lx, (U8)(tmp & 255)); + } else { + appendrune(lx, tmp); + } + + } else { errx(1, "invalid hex sequence '%s'", escbuf); + } } static int scanstring(Lx *lx) { int next; for (;;) { - next = ST_GETC(lx->stream); + next = lx_getc(lx); switch (next) { case -1: goto eof; case '\\': - next = ST_GETC(lx->stream); + next = lx_getc(lx); if (next == -1) goto eof; switch (next) { @@ -128,8 +164,7 @@ static int scanstring(Lx *lx) { scanescape(lx); break; default: - fprintf(stderr, "unknown escape sequence '\\%c'\n", next); - abort(); + return (lx->kind = TOK_INVALID); } break; case '"': @@ -141,13 +176,13 @@ static int scanstring(Lx *lx) { } eof: - errx(1, "unterminated string literal"); - return 0; + return (lx->kind = TOK_INVALID); } I lexer_next(Lx *lx) { int next; lx->cursor = 0; + lx->count = 0; if (ST_EOF(lx->stream)) { lx->kind = TOK_EOF; @@ -156,9 +191,12 @@ I lexer_next(Lx *lx) { next = getc_ws(lx); + lx->start_line = lx->curr_line; + lx->start_col = (lx->curr_col > 0) ? lx->curr_col - 1 : 0; + switch (next) { case '\\': - for (; next != '\n'; next = ST_GETC(lx->stream)) + for (; next != '\n'; next = lx_getc(lx)) ; return lexer_next(lx); case '(': @@ -172,7 +210,7 @@ I lexer_next(Lx *lx) { case '"': return scanstring(lx); default: - ST_UNGETC(next, lx->stream); + lx_ungetc(lx, next); lx->kind = TOK_WORD; return scanword(lx); }; diff --git a/src/lexer.h b/src/lexer.h index 217beef..f3fa2de 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -22,12 +22,15 @@ enum { typedef struct Lx { I kind; I cursor; + I curr_line, curr_col; + I start_line, start_col; Stream *stream; char *items; Z count, capacity; } Lx; Lx *lexer_make(Stream *); +V lexer_free(Lx *lx); I lexer_next(Lx *); #endif diff --git a/src/main.c b/src/main.c index 76ab02a..5d292cc 100644 --- a/src/main.c +++ b/src/main.c @@ -1,5 +1,6 @@ #include #include +#include #include "chunk.h" #include "compile.h" @@ -8,7 +9,6 @@ #include "vm.h" #include "vendor/linenoise.h" -#include "vendor/mpc.h" #define REPL_BUFFER_SIZE 4096 @@ -18,16 +18,18 @@ I repl(void) { char *line; while ((line = linenoise("growl> ")) != NULL) { - mpc_result_t res; - if (!mpc_parse("", line, Program, &res)) { - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - continue; - } + Buf b = { line, (int)strlen(line), 0, -1 }; + Stream s = { bufstream_vtable, &b }; + + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); + Cm cm = {0}; compiler_init(&cm, &vm, ""); - Bc *chunk = compile_program(&cm, res.output); - mpc_ast_delete(res.output); + Bc *chunk = compile_program(&cm, root); + ast_free(root); + lexer_free(lx); + if (chunk != NULL) { vm_run(&vm, chunk, 0); chunk_release(chunk); @@ -44,18 +46,23 @@ I loadfile(const char *fname) { Vm vm = {0}; vm_init(&vm); - mpc_result_t res; - if (!mpc_parse_contents(fname, Program, &res)) { - mpc_err_print_to(res.error, stderr); - mpc_err_delete(res.error); - return 1; + FILE *f = fopen(fname, "rb"); + if (!f) { + fprintf(stderr, "error: cannot open file '%s'\n", fname); + return 1; } + Stream s = { filestream_vtable, f }; + Lx *lx = lexer_make(&s); + Ast *root = parser_parse(lx); + Cm cm = {0}; compiler_init(&cm, &vm, fname); - Bc *chunk = compile_program(&cm, res.output); - mpc_ast_delete(res.output); + Bc *chunk = compile_program(&cm, root); + ast_free(root); + lexer_free(lx); + fclose(f); if (chunk != NULL) { #if COMPILER_DEBUG @@ -72,9 +79,6 @@ I loadfile(const char *fname) { } int main(int argc, const char *argv[]) { - parser_init(); - atexit(parser_deinit); - switch (argc) { case 1: return repl(); diff --git a/src/object.c b/src/object.c index 6a6de05..c947ee0 100644 --- a/src/object.c +++ b/src/object.c @@ -2,9 +2,9 @@ I type(O o) { if (o == NIL) - return TYPE_NIL; + return OBJ_NIL; if (IMM(o)) - return TYPE_NUM; + return OBJ_NUM; Hd *h = UNBOX(o); return h->type; } diff --git a/src/object.h b/src/object.h index f987b5f..3233892 100644 --- a/src/object.h +++ b/src/object.h @@ -11,25 +11,17 @@ #define ORD(x) ((intptr_t)(x) >> 1) enum { + OBJ_NIL = 0, + OBJ_NUM = 1, OBJ_FWD = 2, OBJ_QUOT, OBJ_COMPOSE, OBJ_CURRY, OBJ_STR, + OBJ_ARRAY, OBJ_USERDATA, }; -enum { - TYPE_NIL = 0, - TYPE_NUM = 1, - TYPE_FWD = OBJ_FWD, - TYPE_QUOT = OBJ_QUOT, - TYPE_COMPOSE = OBJ_COMPOSE, - TYPE_CURRY = OBJ_CURRY, - TYPE_STR = OBJ_STR, - TYPE_USERDATA = OBJ_USERDATA, -}; - typedef uintptr_t O; /** Object header */ @@ -50,7 +42,7 @@ typedef struct Qc { I type(O); static inline I callable(O o) { I t = type(o); - return t == TYPE_QUOT || t == TYPE_COMPOSE || t == TYPE_CURRY; + return t == OBJ_QUOT || t == OBJ_COMPOSE || t == OBJ_CURRY; } #endif diff --git a/src/parser.c b/src/parser.c index 0723efc..724bea2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1,51 +1,156 @@ #include "parser.h" -#include "vendor/mpc.h" +#include +#include +#include -mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition, - *Command, *List, *Table, *Quotation, *Program; +static Ast *ast_new(I type, I line, I col) { + Ast *node = calloc(1, sizeof(Ast)); + node->type = type; + node->line = line; + node->col = col; + return node; +} -V parser_init(V) { - Pragma = mpc_new("pragma"); - Comment = mpc_new("comment"); - Expr = mpc_new("expr"); - Number = mpc_new("number"); - String = mpc_new("string"); - Word = mpc_new("word"); - Definition = mpc_new("def"); - Command = mpc_new("command"); - List = mpc_new("list"); - Table = mpc_new("table"); - Quotation = mpc_new("quotation"); - Program = mpc_new("program"); +void ast_free(Ast *ast) { + if (!ast) + return; + if (ast->name) + free(ast->name); + for (size_t i = 0; i < ast->children.count; i++) { + ast_free(ast->children.items[i]); + } + yar_free(&ast->children); + free(ast); +} - mpc_err_t *err = mpca_lang( - MPCA_LANG_DEFAULT, - " pragma : '#' ('(' * ')')? ; " - " comment : /\\\\[^\\n]*/ ; " - " expr : ( | | | " - " | | | | " - " | | ) ; " - " number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; " - " string : /\"(\\\\.|[^\"])*\"/ ; " - " word : /[a-zA-Z0-9_!?.,@#$%^&*_+\\-=><|\\/]+/ ; " - " def : \"def\" '{' * '}' ; " - " command : ':' + ';' ; " - " list : '(' * ')' ; " - " table : '{' * '}' ; " - " quotation : '[' * ']' ; " - " program : /^/ * /$/ ; ", - Pragma, Comment, Expr, Number, String, Word, Definition, Command, List, - Table, Quotation, Program, NULL); +static Ast *parse_expr_at(Lx *lx); - // crash if i do a woopsie - if (err != NULL) { - mpc_err_print(err); - mpc_err_delete(err); - abort(); +static void parse_block(Lx *lx, Ast *parent, int close_token) { + while (1) { + if (lx->kind == TOK_EOF) { + if (close_token != TOK_EOF) + fprintf(stderr, "syntax error: unexpected EOF, expected '%c'\n", + close_token); + break; + } + if (lx->kind == close_token) { + lexer_next(lx); + break; + } + Ast *expr = parse_expr_at(lx); + *yar_append(&parent->children) = expr; } } -V parser_deinit(V) { - mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition, - Command, List, Table, Quotation, Program); +static Ast *parse_expr_at(Lx *lx) { + int kind = lx->kind; + I line = lx->start_line; + I col = lx->start_col; + + if (kind == TOK_WORD) { + char *text = lx->items; + + if (strcmp(text, "def") == 0) { + Ast *node = ast_new(AST_DEF, line, col); + lexer_next(lx); + + if (lx->kind != TOK_WORD) { + fprintf(stderr, "syntax error: expected word after 'def' at %ld:%ld\n", + (long)line + 1, (long)col + 1); + return node; + } + node->name = strdup(lx->items); + lexer_next(lx); + + if (lx->kind != '{') { + fprintf(stderr, + "syntax error: expected '{' after def name at %ld:%ld\n", + (long)lx->start_line + 1, (long)lx->start_col + 1); + return node; + } + lexer_next(lx); + parse_block(lx, node, '}'); + return node; + } + + size_t len = strlen(text); + if (len > 0 && text[len - 1] == ':') { + Ast *node = ast_new(AST_CMD, line, col); + node->name = strndup(text, len - 1); + lexer_next(lx); + parse_block(lx, node, ';'); + return node; + } + + if (text[0] == '#') { + Ast *node = ast_new(AST_PRAGMA, line, col); + node->name = strdup(text); + lexer_next(lx); + if (lx->kind == '(') { + lexer_next(lx); + parse_block(lx, node, ')'); + } + return node; + } + + char *end; + long val = strtol(text, &end, 0); + if (*end == '\0') { + Ast *node = ast_new(AST_INT, line, col); + node->int_val = val; + lexer_next(lx); + return node; + } + + Ast *node = ast_new(AST_WORD, line, col); + node->name = strdup(text); + lexer_next(lx); + return node; + } + + if (kind == TOK_STRING) { + Ast *node = ast_new(AST_STR, line, col); + node->name = strdup(lx->items); + lexer_next(lx); + return node; + } + + if (kind == '[') { + Ast *node = ast_new(AST_QUOTE, line, col); + lexer_next(lx); + parse_block(lx, node, ']'); + return node; + } + + if (kind == '{') { + Ast *node = ast_new(AST_TABLE, line, col); + lexer_next(lx); + parse_block(lx, node, '}'); + return node; + } + + if (kind == '(') { + Ast *node = ast_new(AST_LIST, line, col); + lexer_next(lx); + parse_block(lx, node, ')'); + return node; + } + + if (kind == TOK_INVALID) { + fprintf(stderr, "syntax error: invalid token at %ld:%ld\n", (long)line + 1, + (long)col + 1); + } else { + fprintf(stderr, "syntax error: unexpected token '%c' (%d) at %ld:%ld\n", + kind, kind, (long)line + 1, (long)col + 1); + } + lexer_next(lx); + + return NULL; +} + +Ast *parser_parse(Lx *lx) { + Ast *root = ast_new(AST_PROGRAM, 0, 0); + lexer_next(lx); + parse_block(lx, root, TOK_EOF); + return root; } diff --git a/src/parser.h b/src/parser.h index c991dd4..ea8ddda 100644 --- a/src/parser.h +++ b/src/parser.h @@ -2,11 +2,34 @@ #define PARSER_H #include "common.h" -#include "vendor/mpc.h" +#include "lexer.h" +#include "vendor/yar.h" -V parser_init(V); -V parser_deinit(V); +enum { + AST_PROGRAM, + AST_INT, + AST_STR, + AST_WORD, + AST_LIST, + AST_TABLE, + AST_QUOTE, + AST_DEF, + AST_CMD, + AST_PRAGMA, +}; -extern mpc_parser_t *Program; +typedef struct Ast { + I type; + char *name; + I int_val; + struct { + struct Ast **items; + Z count, capacity; + } children; + I line, col; +} Ast; + +Ast *parser_parse(Lx *lx); +void ast_free(Ast *ast); #endif diff --git a/src/primitive.h b/src/primitive.h index ab58696..2e6ca97 100644 --- a/src/primitive.h +++ b/src/primitive.h @@ -1,7 +1,6 @@ #ifndef PRIMITIVE_H #define PRIMITIVE_H -#include "common.h" #include "vm.h" typedef struct Pr { diff --git a/src/print.c b/src/print.c index 8549e5d..ffb6037 100644 --- a/src/print.c +++ b/src/print.c @@ -1,13 +1,56 @@ #include #include -#include -#include #include "object.h" #include "print.h" #include "string.h" #include "userdata.h" -#include "vendor/mpc.h" + +static V print_string(Str *s) { + putchar('"'); + for (Z i = 0; i < s->len; i++) { + unsigned char c = s->data[i]; + switch (c) { + case '\t': + printf("\\t"); + break; + case '\n': + printf("\\n"); + break; + case '\r': + printf("\\r"); + break; + case '\b': + printf("\\b"); + break; + case '\v': + printf("\\v"); + break; + case '\f': + printf("\\f"); + break; + case '\0': + printf("\\0"); + break; + case '\x1b': + printf("\\e"); + break; + case '\\': + printf("\\\\"); + break; + case '\"': + printf("\\\""); + break; + default: + if (c < 32 || c > 126) { + printf("\\x%02x;", c); + } else { + putchar(c); + } + } + } + putchar('"'); +} V print(O o) { if (o == NIL) { @@ -27,14 +70,8 @@ V print(O o) { printf(""); break; case OBJ_STR: { - // TODO: make this binary safe Str *s = string_unwrap(o); - char *escaped = malloc(s->len + 1); - memcpy(escaped, s->data, s->len); - escaped[s->len] = 0; - escaped = mpcf_escape(escaped); - printf("\"%s\"", escaped); - free(escaped); + print_string(s); break; } case OBJ_USERDATA: { diff --git a/src/vm.c b/src/vm.c index 8394e6c..f90c68e 100644 --- a/src/vm.c +++ b/src/vm.c @@ -206,8 +206,8 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O obj2 = vm_pop(vm); O obj1 = vm_pop(vm); vm_push(vm, obj1); - vm_push(vm, obj1); vm_push(vm, obj2); + vm_push(vm, obj1); vm_push(vm, obj2); break; } @@ -223,10 +223,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O c = vm_pop(vm); O b = vm_pop(vm); O a = vm_pop(vm); - vm_push(vm, d); vm_push(vm, c); - vm_push(vm, b); + vm_push(vm, d); vm_push(vm, a); + vm_push(vm, b); break; } case OP_NIP: { @@ -302,21 +302,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { vm_rpush(vm, vm->chunk, vm->ip); do_call: switch (type(quot)) { - case TYPE_QUOT: { + case OBJ_QUOT: { Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc *chunk = *ptr; vm->chunk = chunk; vm->ip = chunk->items; break; } - case TYPE_COMPOSE: { + case OBJ_COMPOSE: { Qo *comp = (Qo *)(UNBOX(quot) + 1); vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm->rsp[-1].obj = comp->second; quot = comp->first; goto do_call; } - case TYPE_CURRY: { + case OBJ_CURRY: { Qc *curry = (Qc *)(UNBOX(quot) + 1); vm_push(vm, curry->value); quot = curry->callable; @@ -345,21 +345,21 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { O quot = vm_pop(vm); do_tail_call: switch (type(quot)) { - case TYPE_QUOT: { + case OBJ_QUOT: { Bc **ptr = (Bc **)(UNBOX(quot) + 1); Bc *chunk = *ptr; vm->chunk = chunk; vm->ip = chunk->items; break; } - case TYPE_COMPOSE: { + case OBJ_COMPOSE: { Qo *comp = (Qo *)(UNBOX(quot) + 1); vm_rpush(vm, vm->trampoline, vm->trampoline->items); vm->rsp[-1].obj = comp->second; quot = comp->first; goto do_tail_call; } - case TYPE_CURRY: { + case OBJ_CURRY: { Qc *curry = (Qc *)(UNBOX(quot) + 1); vm_push(vm, curry->value); quot = curry->callable; @@ -491,10 +491,10 @@ I vm_run(Vm *vm, Bc *chunk, I offset) { } case OP_CONCAT: { O b = vm_pop(vm); - if (type(b) != TYPE_STR) + if (type(b) != OBJ_STR) vm_error(vm, VM_ERR_TYPE, "expected string"); O a = vm_pop(vm); - if (type(a) != TYPE_STR) + if (type(a) != OBJ_STR) vm_error(vm, VM_ERR_TYPE, "expected string"); vm_push(vm, string_concat(vm, a, b)); break; diff --git a/std.grr b/std.grr index 805ccc7..3bcce87 100644 --- a/std.grr +++ b/std.grr @@ -14,6 +14,7 @@ def 3dip { swap [2dip] dip } def keep { over [call] dip } def 2keep { [2dup] dip 2dip } +def 3keep { [dup 2over dig] dip 3dip } def bi { [keep] dip call } def tri { [[keep] dip keep] dip call }