diff --git a/.editorconfig b/.editorconfig index 0a31b58..1c51663 100644 --- a/.editorconfig +++ b/.editorconfig @@ -8,6 +8,6 @@ insert_final_newline = true indent_style = space indent_size = 2 -[Makefile] -indent_style = tab -indent_size = 4 +[meson.build] +indent_style = space +indent_size = 2 diff --git a/README.md b/README.md index b282a8e..15b4d47 100644 --- a/README.md +++ b/README.md @@ -1,14 +1,3 @@ -# wolfscheme +# wolflisp -An experiment in how quick I can get from zero to a decent Lisp. -Don't know why I called it `wolfscheme`. - -## Day 1: - -- Symbol interning -- Garbage collector and cons pairs -- Object printing - -## Day 2: - -- Tree-walking evaluator with a few primitives +a lisp but wolfy diff --git a/include/wolflisp.h b/include/wolflisp.h new file mode 100644 index 0000000..05dff12 --- /dev/null +++ b/include/wolflisp.h @@ -0,0 +1,167 @@ +#ifndef WOLFLISP_H +#define WOLFLISP_H + +#include +#include + +/// * Behavioral macros +#define GC_DEBUG 1 + +/// * Type declarations +typedef void V; +typedef intptr_t I; +typedef uintptr_t U; +typedef char C; +typedef uint8_t U8; +typedef uint32_t U32; +typedef int32_t I32; +typedef size_t Z; + +// Object +typedef U O; + +#define NIL ((O)0) +#define IMM(x) ((O)(x) & (O)1) +#define NUM(x) (((O)((I)(x) << 1)) | (O)1) +#define ORD(x) ((I)(x) >> 1) + +// Pair +typedef struct Pa { + O head, tail; +} Pa; + +// Symbol +typedef struct Sy { + U32 hash; + Z len; + U8 *data; +} Sy; + +// Closure +typedef struct Cl { + O args, body, env; +} Cl; + +// Primitive +typedef struct In In; +typedef struct Pr { + const char *name; + O (*fn)(In *, O, O); +} Pr; + +// Symbol table +typedef struct St { + Z count; + Z capacity; + Sy **data; +} St; + +#define HEAP_BYTES (1024 * 1024) +#define TYPE_MASK 7 + +enum { + TAG_MAN = 0, // GC-managed object + TAG_IMM = 1, // Immediate number + TAG_SYM = 2, // Pointer to symbol + TAG_PRIM = 4, // Pointer to primitive +}; + +enum { + TYPE_NIL = 0, + TYPE_NUM = 1, + TYPE_SYM = 2, + TYPE_PRIM = 4, + TYPE_PAIR, // = 5, + TYPE_CLOS, // = 6, + TYPE_CODE, // = 7, + TYPE_FWD, // = 8, + TYPE__MAX, +}; + +#define TAG_OF(x) (((U)(x)) & TYPE_MASK) +#define UNTAG(x) (((U)(x)) & ~TYPE_MASK) +#define TAG(x, t) (V *)(((U)(x)) | t) + +// GC-managed header +typedef struct Gh { + U32 type; + U32 size; +} Gh; + +#define BOX(x) ((O)(x)) +#define UNBOX(x) ((Gh *)(x)) + +// GC space +typedef struct Gs { + U8 *start, *end; + U8 *free; +} Gs; + +// GC context +typedef struct Gc { + Gs from, to; + struct { + Z count; + Z capacity; + O **data; + } roots; +} Gc; + +// Interpreter context +typedef struct In { + Gc gc; + St symtab; + O env; +} In; + +/// * Function declarations + +// Get the type of an object +I type(O obj); +// Get the name of a type +const char *typename(I t); + +// Add a root to a GC context. +V gc_addroot(Gc *gc, O *root); +// Mark the current root state in a GC context. +I gc_rootmark(Gc *gc); +// Reset the root state in a GC context to a previously marked state. +V gc_rootreset(Gc *gc, I mark); +// Perform a garbage collection in a GC context. +V gc_collect(Gc *gc); +// Allocate memory in a GC context. +Gh *gc_alloc(Gc *gc, Z sz); +// Initialize a GC context. +V gc_init(Gc *gc); +// Finalize a GC context. +V gc_finalize(Gc *gc); + +// Initialize an interpreter context. +V interp_init(In *in); +// Finalize an interpreter context. +V interp_finalize(In *in); + +// Evaluate a list of values. +O interp_eval_list(In *in, O list, O env); + +// Evaluate an expression. +O interp_eval(In *in, O obj, O env); + +// Intern a string +Sy *intern(St *tab, const char *str, Z len); + +// Create a pair +O pair_make(Gc *gc, O head, O tail); +// Unwrap a pair +Pa *pair_unwrap(O obj); + +V print(O obj); +V println(O obj); + +O symbol_make(In *in, const char *str); +O prim_make(In *in, const char *name, O (*fn)(In *, O, O)); + +O list_assoc(O key, O alist); +O list_reverse(O list); + +#endif diff --git a/meson.build b/meson.build new file mode 100644 index 0000000..7d5f4e7 --- /dev/null +++ b/meson.build @@ -0,0 +1,27 @@ +project( + 'wolflisp', + 'c', + meson_version : '>= 1.3.0', + version : '0.1', + default_options : ['c_std=gnu11', 'buildtype=debugoptimized', 'warning_level=3'], +) + +inc = include_directories('include', 'src') +src = [ + 'src/gc.c', + 'src/interp.c', + 'src/list.c', + 'src/main.c', + 'src/pair.c', + 'src/prim.c', + 'src/print.c', + 'src/symbol.c', + 'src/type.c', +] + +exe = executable( + 'wl', + src, + include_directories : inc, + install : true, +) diff --git a/Makefile b/old/Makefile similarity index 63% rename from Makefile rename to old/Makefile index 026da86..f2dfeb2 100644 --- a/Makefile +++ b/old/Makefile @@ -1,5 +1,5 @@ CFLAGS := -std=gnu99 -Og -g -Wpedantic -Wall -OBJS := symbol.o object.o gc.o print.o eval.o main.o +OBJS := util.o symbol.o object.o gc.o print.o lex.o read.o eval.o main.o wscm: $(OBJS) $(CC) $(OBJS) -o wscm diff --git a/old/README.md b/old/README.md new file mode 100644 index 0000000..b282a8e --- /dev/null +++ b/old/README.md @@ -0,0 +1,14 @@ +# wolfscheme + +An experiment in how quick I can get from zero to a decent Lisp. +Don't know why I called it `wolfscheme`. + +## Day 1: + +- Symbol interning +- Garbage collector and cons pairs +- Object printing + +## Day 2: + +- Tree-walking evaluator with a few primitives diff --git a/eval.c b/old/eval.c similarity index 94% rename from eval.c rename to old/eval.c index c199cf7..58fa416 100644 --- a/eval.c +++ b/old/eval.c @@ -18,7 +18,7 @@ static O listrev(O list) { return prev; } -O assoc(O v, O env) { +O pair_assoc(O v, O env) { while (kind(env) == KIND_CONS) { C *c = uncons(env); O pair = c->car; @@ -164,6 +164,12 @@ O mkprim(const char *name, O (*fn)(O, O)) { } V setupenv(O *env) { + O t = BOX(TAG(intern("t", 1), TAG_SYM)); + *env = cons(cons(t, t), *env); + + O nil = BOX(TAG(intern("nil", 3), TAG_SYM)); + *env = cons(cons(nil, NIL), *env); + *env = cons(mkprim("cons", prim_cons), *env); *env = cons(mkprim("car", prim_car), *env); *env = cons(mkprim("cdr", prim_cdr), *env); @@ -195,7 +201,7 @@ O eval(O obj, O env) { I k = kind(obj); if (k == KIND_SYM) { - O pair = assoc(obj, env); + O pair = pair_assoc(obj, env); if (pair == NIL) { S *s = (S *)UNTAG(obj); fprintf(stderr, "error: undefined symbol '%.*s'\n", (int)s->len, s->data); diff --git a/gc.c b/old/gc.c similarity index 100% rename from gc.c rename to old/gc.c diff --git a/old/lex.c b/old/lex.c new file mode 100644 index 0000000..9e50cd9 --- /dev/null +++ b/old/lex.c @@ -0,0 +1,119 @@ + +#include +#include +#include + +#include "wscm.h" + +// helpers +static int iswhite(int ch) { return ch == ' ' || ch == '\t' || ch == '\n'; } + +static int isdelim(int ch) { + return ch == '(' || ch == ')' || ch == '\'' || ch == ';'; +} + +static inline void appendchar(Lx *lex, char ch) { + if (lex->cursor > LEXER_CAP) { + fprintf(stderr, "lexer buffer overflow"); + abort(); + } + lex->buffer[lex->cursor++] = ch; +} + +static int getcws(Lx *lex) { + if (feof(lex->input)) + return EOF; + for (;;) { + int ch = getc(lex->input); + if (iswhite(ch)) + continue; + return ch; + } +} + +static int scanword(Lx *lex) { + int ch = getc(lex->input); + for (;;) { + if (ch == EOF) { + if (lex->cursor == 0) + lex->kind = TOK_EOF; + appendchar(lex, 0); + return lex->kind; + } else if (iswhite(ch) || isdelim(ch)) { + ungetc(ch, lex->input); + appendchar(lex, 0); + return lex->kind; + } else { + appendchar(lex, ch); + ch = getc(lex->input); + } + } +} + +static int scanstring(Lx *lex) { + int ch; + for (;;) { + ch = getc(lex->input); + switch (ch) { + case EOF: + goto eof; + case '\\': + ch = getc(lex->input); + if (ch == EOF) + goto eof; + switch (ch) { + case 'n': + appendchar(lex, '\n'); + break; + case 't': + appendchar(lex, '\t'); + break; + case '"': + appendchar(lex, '"'); + break; + } + break; + case '"': + appendchar(lex, 0); + return (lex->kind = TOK_STRING); + default: + appendchar(lex, ch); + } + } + +eof: + errx(1, "unterminated string literal"); +} + +int nexttoken(Lx *lex) { + int ch; + lex->cursor = 0; + + if (feof(lex->input)) { + lex->kind = TOK_EOF; + *lex->buffer = 0; + return 0; + } + + ch = getcws(lex); + switch (ch) { + case ';': + for (; ch != '\n'; ch = getc(lex->input)) + appendchar(lex, ch); + appendchar(lex, 0); + return (lex->kind = TOK_COMMENT); + case '(': + case ')': + case '.': + case '\'': + return (lex->kind = ch); + case '"': + return scanstring(lex); + default: + ungetc(ch, lex->input); + lex->kind = TOK_WORD; + return scanword(lex); + } + + return 0; +} diff --git a/main.c b/old/main.c similarity index 77% rename from main.c rename to old/main.c index e4b1b7c..3f0e751 100644 --- a/main.c +++ b/old/main.c @@ -10,7 +10,7 @@ int main(void) { setupenv(&env); // IF - O code = cons(SYM("if"), cons(NIL, cons(NUM(100), cons(NUM(200), NIL)))); + O code = readfile(stdin); addroot(&code); // collect(); diff --git a/object.c b/old/object.c similarity index 100% rename from object.c rename to old/object.c diff --git a/print.c b/old/print.c similarity index 97% rename from print.c rename to old/print.c index 906825e..a22ae71 100644 --- a/print.c +++ b/old/print.c @@ -5,7 +5,7 @@ void print(O obj); -void printcons(O obj) { +static void printcons(O obj) { O c = obj; I f = 1; diff --git a/old/read.c b/old/read.c new file mode 100644 index 0000000..235cdde --- /dev/null +++ b/old/read.c @@ -0,0 +1,156 @@ +#include +#include +#include + +#include "wscm.h" + +static O read(Lx *lex); + +static void skipcomments(Lx *lex) { + while (lex->kind == TOK_COMMENT) { + if (!nexttoken(lex)) + break; + } +} + +static O makeobject(Lx *lex) { + if (lex->kind == TOK_WORD) { + char *tok = lex->buffer; + char *end; + long v = strtol(tok, &end, 10); + if (end != tok && *end == '\0') { + nexttoken(lex); + return NUM((I)v); + } else { + S *s = intern(tok, (I)strlen(tok)); + O sym = BOX(TAG(s, TAG_SYM)); + nexttoken(lex); + return sym; + } + } else if (lex->kind == TOK_STRING) { + // TODO: string type + char *tok = lex->buffer; + S *s = intern(tok, (I)strlen(tok)); + O sym = BOX(TAG(s, TAG_SYM)); + nexttoken(lex); + return sym; + } + return NIL; +} + +static void lastcdr(O list, O cdr_val) { + O curr = list; + while (1) { + C *c = uncons(curr); + if (c->cdr == NIL) { + c->cdr = cdr_val; + return; + } + curr = c->cdr; + } +} + +static O readlist(Lx *lex) { + nexttoken(lex); + skipcomments(lex); + + if (lex->kind == TOK_RPAREN) { + nexttoken(lex); + return NIL; + } + + O head = NIL; + while (lex->kind != TOK_EOF) { + skipcomments(lex); + if (lex->kind == TOK_RPAREN) { + nexttoken(lex); + break; + } + + if (lex->kind == TOK_DOT) { + nexttoken(lex); + skipcomments(lex); + if (lex->kind == TOK_EOF) { + fprintf(stderr, "reader error: unexpected EOF after '.'\n"); + abort(); + } + O cdr_val = read(lex); + skipcomments(lex); + if (lex->kind != TOK_RPAREN) { + fprintf(stderr, "reader error: expected ')' after dotted pair cdr\n"); + abort(); + } + nexttoken(lex); + O normal = listreverse(head); + if (normal == NIL) { + fprintf(stderr, "reader error: '.' with no preceding elements\n"); + abort(); + } + lastcdr(normal, cdr_val); + return normal; + } + O elem = read(lex); + head = cons(elem, head); + skipcomments(lex); + } + + return listreverse(head); +} + +static O readquote(Lx *lex) { + nexttoken(lex); + skipcomments(lex); + O e = read(lex); + O qsym = BOX(TAG(intern("quote", -1), TAG_SYM)); + return cons(qsym, cons(e, NIL)); +} + +static O read(Lx *lex) { + skipcomments(lex); + + switch (lex->kind) { + case TOK_EOF: + return NIL; + case TOK_LPAREN: + return readlist(lex); + case TOK_QUOTE: + return readquote(lex); + case TOK_WORD: + case TOK_STRING: + return makeobject(lex); + case TOK_COMMENT: + nexttoken(lex); + return read(lex); + default: + nexttoken(lex); + return NIL; + } +} + +O readfile(FILE *f) { + if (!f) + return NIL; + + Lx lex; + lex.kind = TOK_EOF; + lex.cursor = 0; + lex.input = f; + lex.buffer[0] = '\0'; + + nexttoken(&lex); + skipcomments(&lex); + + if (lex.kind == TOK_EOF) + return NIL; + return read(&lex); +} + +O readstring(const char *s) { + if (!s) + return NIL; + size_t len = strlen(s); + FILE *f = fmemopen((void *)s, len, "r"); + O res = readfile(f); + fclose(f); + return res; +} diff --git a/symbol.c b/old/symbol.c similarity index 100% rename from symbol.c rename to old/symbol.c diff --git a/old/util.c b/old/util.c new file mode 100644 index 0000000..55fd0ff --- /dev/null +++ b/old/util.c @@ -0,0 +1,16 @@ +#include "wscm.h" + +O listreverse(O list) { + O prev = NIL; + O curr = list; + O next; + + while (curr != NIL) { + C *c = uncons(curr); + next = c->cdr; + c->cdr = prev; + prev = curr; + curr = next; + } + return prev; +} diff --git a/wscm.h b/old/wscm.h similarity index 85% rename from wscm.h rename to old/wscm.h index 351eea7..22b447a 100644 --- a/wscm.h +++ b/old/wscm.h @@ -131,8 +131,36 @@ O mksym(const char *str); O cons(O head, O tail); C *uncons(O obj); +O listreverse(O list); + V setupenv(O *env); O eval(O obj, O env); void print(O obj); void println(O obj); + +enum { + TOK_EOF = 0, + TOK_COMMENT = ';', + TOK_WORD = 'a', + TOK_LPAREN = '(', + TOK_RPAREN = ')', + TOK_STRING = '"', + TOK_QUOTE = '\'', + TOK_DOT = '.', +}; + +#include + +#define LEXER_CAP 1024 + +typedef struct Lx { + int kind; + int cursor; + FILE *input; + char buffer[1024]; +} Lx; + +int nexttoken(Lx *lex); +O readfile(FILE *f); +O readstring(const char *s); diff --git a/shell.nix b/shell.nix index 4870fe9..fcf4e51 100644 --- a/shell.nix +++ b/shell.nix @@ -1,7 +1,10 @@ { pkgs ? import {} }: pkgs.mkShell { + name = "rufus"; packages = with pkgs; [ clang-tools + meson + ninja bear gdb ]; diff --git a/src/gc.c b/src/gc.c new file mode 100644 index 0000000..b6c1342 --- /dev/null +++ b/src/gc.c @@ -0,0 +1,171 @@ +#include +#include +#include +#include +#include + +#include + +#define ALIGN(n) (((n) + 7) & ~7) +static inline I infrom(Gc *gc, V *ptr) { + const U8 *x = (const U8 *)ptr; + return (x >= gc->from.start && x < gc->from.end); +} + +V gc_addroot(Gc *gc, O *root) { + if (gc->roots.count >= gc->roots.capacity) { + Z newcap = gc->roots.capacity == 0 ? 16 : gc->roots.capacity * 2; + O **newdata = realloc(gc->roots.data, newcap * sizeof(O *)); + if (!newdata) { + fprintf(stderr, "fatal error: failed to expand roots array\n"); + abort(); + } + gc->roots.capacity = newcap; + gc->roots.data = newdata; + } + gc->roots.data[gc->roots.count++] = root; +} + +I gc_rootmark(Gc *gc) { return gc->roots.count; } +V gc_rootreset(Gc *gc, I mark) { gc->roots.count = mark; } + +static O copy(Gc *gc, Gh *hdr) { + assert(infrom(gc, hdr)); + assert(hdr->type != TYPE_FWD); + + // Copy the object to the to-space and leave a forwarding pointer behind + Z sz = ALIGN(hdr->size); + Gh *new = (Gh *)gc->to.free; + gc->to.free += sz; + memcpy(new, hdr, sz); + hdr->type = TYPE_FWD; + O *obj = (O *)(hdr + 1); + *obj = BOX(new); + + // Return the new object + return *obj; +} + +static O forward(Gc *gc, O obj) { + if (obj == NIL) + return NIL; + if (IMM(obj)) + return obj; + if (TAG_OF(obj) != TAG_MAN) + return obj; + if (!infrom(gc, (V *)obj)) + return obj; + + Gh *hdr = UNBOX(obj); + + // If the object to copy is already a forwarding pointer, return the object + // pointed to. + if (hdr->type == TYPE_FWD) { + O *o = (O *)(hdr + 1); + return *o; + } else { + return copy(gc, hdr); + } +} + +#if GC_DEBUG +static V printstats(Gc *gc, const char *label) { + size_t used = (Z)(gc->from.free - gc->from.start); + fprintf(stderr, "[%s] used=%zu/%zu bytes (%.1f%%)\n", label, used, + (Z)HEAP_BYTES, (double)used / (double)HEAP_BYTES * 100.0); +} +#endif + +V gc_collect(Gc *gc) { + U8 *scan = gc->to.free; + +#if GC_DEBUG + printstats(gc, "before GC"); +#endif + + // Forward roots to the to-space. + for (Z i = 0; i < gc->roots.count; i++) { + O *o = gc->roots.data[i]; + *o = forward(gc, *o); + } + + // Scan to-space for objects to forward (breadth-first iteration) + while (scan < gc->to.free) { + Gh *hdr = (Gh *)scan; + switch (hdr->type) { + case TYPE_PAIR: { + Pa *obj = (Pa *)(hdr + 1); + obj->head = forward(gc, obj->head); + obj->tail = forward(gc, obj->tail); + break; + } + case TYPE_CLOS: { + Cl *obj = (Cl *)(hdr + 1); + obj->args = forward(gc, obj->args); + obj->body = forward(gc, obj->body); + obj->env = forward(gc, obj->env); + break; + } + case TYPE_FWD: + fprintf(stderr, "fatal GC error: forwarding pointer in to-space\n"); + abort(); + default: + fprintf(stderr, "GC warning: junk object type %" PRId32 "\n", hdr->type); + } + scan += ALIGN(hdr->size); + } + + // Swap to- and from-spaces + Gs tmp = gc->from; + gc->from = gc->to; + gc->to = tmp; + gc->to.free = gc->to.start; + +#if GC_DEBUG + printstats(gc, "after GC"); +#endif +} + +Gh *gc_alloc(Gc *gc, Z sz) { + sz = ALIGN(sz); + if (gc->from.free + sz > gc->from.end) { + gc_collect(gc); + if (gc->from.free + sz > gc->from.end) { + fprintf(stderr, "out of memory (requested %" PRIdPTR "bytes\n", sz); + abort(); + } + } + Gh *hdr = (Gh *)gc->from.free; + gc->from.free += sz; + hdr->size = sz; + return hdr; +} + +V gc_init(Gc *gc) { + gc->from.start = malloc(HEAP_BYTES); + if (!gc->from.start) + goto fatal; + gc->from.end = gc->from.start + HEAP_BYTES; + gc->from.free = gc->from.start; + + gc->to.start = malloc(HEAP_BYTES); + if (!gc->to.start) + goto fatal; + gc->to.end = gc->to.start + HEAP_BYTES; + gc->to.free = gc->to.start; + + gc->roots.capacity = 0; + gc->roots.count = 0; + gc->roots.data = NULL; + return; + +fatal: + fprintf(stderr, "failed to allocate heap space\n"); + abort(); +} + +V gc_finalize(Gc *gc) { + free(gc->from.start); + free(gc->to.start); + free(gc->roots.data); +} diff --git a/src/interp.c b/src/interp.c new file mode 100644 index 0000000..5056d16 --- /dev/null +++ b/src/interp.c @@ -0,0 +1,156 @@ +#include +#include +#include + +#include +#include + +V interp_init(In *in) { + memset(&in->symtab, 0, sizeof(St)); + gc_init(&in->gc); + + in->env = NIL; + in->env = pair_make(&in->gc, prim_make(in, "cons", prim_cons), in->env); + in->env = pair_make(&in->gc, prim_make(in, "head", prim_head), in->env); + in->env = pair_make(&in->gc, prim_make(in, "tail", prim_tail), in->env); + in->env = pair_make(&in->gc, prim_make(in, "quote", prim_quote), in->env); + in->env = pair_make(&in->gc, prim_make(in, "print", prim_print), in->env); + in->env = pair_make(&in->gc, prim_make(in, "println", prim_println), in->env); +} + +V interp_finalize(In *in) { + free(in->symtab.data); + gc_finalize(&in->gc); +} + +static O bind(Gc *gc, O params, O args, O env) { + I mark = gc_rootmark(gc); + gc_addroot(gc, ¶ms); + gc_addroot(gc, &args); + gc_addroot(gc, &env); + + O res = env; + gc_addroot(gc, &res); + + while (params != NIL) { + if (type(params) == TYPE_SYM) { + O pair = pair_make(gc, params, args); + gc_addroot(gc, &pair); + res = pair_make(gc, pair, res); + break; + } + + if (type(params) != TYPE_PAIR) { + fprintf(stderr, "error: expected proper list or symbol for parameters\n"); + abort(); + } + + Pa *p = pair_unwrap(params); + O sym = p->head; + + O val = NIL; + if (args != NIL) { + if (type(args) != TYPE_PAIR) { + fprintf(stderr, "error: too many parameters for arguments\n"); + abort(); + } + Pa *a = pair_unwrap(args); + val = a->head; + args = a->tail; + } + + O pair = pair_make(gc, sym, val); + gc_addroot(gc, &pair); + res = pair_make(gc, pair, res); + + params = p->tail; + } + + gc_rootreset(gc, mark); + return res; +} + +static O eval(In *in, O obj, O env); + +static O eval_list(In *in, O list, O env) { + I mark = gc_rootmark(&in->gc); + gc_addroot(&in->gc, &list); + gc_addroot(&in->gc, &env); + + O head = NIL; + O curr = list; + + gc_addroot(&in->gc, &head); + gc_addroot(&in->gc, &curr); + + while (curr != NIL) { + Pa *p = pair_unwrap(curr); + O obj = eval(in, p->head, env); + head = pair_make(&in->gc, obj, head); + curr = p->tail; + } + + O result = list_reverse(head); + gc_rootreset(&in->gc, mark); + return result; +} + +static O apply(In *in, O fn, O args, O env) { + I ty = type(fn); + switch (ty) { + case TYPE_PRIM: { + Pr *pr = (Pr *)UNTAG(fn); + return pr->fn(in, args, env); + } + case TYPE_CLOS: { + Gh *hdr = UNBOX(fn); + Cl *cl = (Cl *)(hdr + 1); + args = eval_list(in, args, env); + O nenv = bind(&in->gc, cl->args, args, cl->env); + return eval(in, cl->body, nenv); + } + default: + fprintf(stderr, "tried to call non-function value\n"); + abort(); + } +} + +static O eval(In *in, O obj, O env) { + I ty = type(obj); + + if (ty == TYPE_SYM) { + O pair = list_assoc(obj, env); + if (pair == NIL) { + Sy *s = (Sy *)UNTAG(obj); + fprintf(stderr, "error: undefined symbol '%.*s'\n", (int)s->len, s->data); + abort(); + } + return pair_unwrap(pair)->tail; + } else if (ty != TYPE_PAIR) { + return obj; + } + + I mark = gc_rootmark(&in->gc); + gc_addroot(&in->gc, &obj); + gc_addroot(&in->gc, &env); + + Pa *c = pair_unwrap(obj); + O fn = eval(in, c->head, env); + gc_addroot(&in->gc, &fn); + + O res = apply(in, fn, c->tail, env); + gc_rootreset(&in->gc, mark); + return res; +} + +O interp_eval_list(In *in, O list, O env) { + if (env == NIL) + env = in->env; + return eval_list(in, list, env); +} + +O interp_eval(In *in, O obj, O env) { + if (env == NIL) + env = in->env; + return eval(in, obj, in->env); +} diff --git a/src/list.c b/src/list.c new file mode 100644 index 0000000..8870337 --- /dev/null +++ b/src/list.c @@ -0,0 +1,35 @@ +#include + +O list_assoc(O key, O alist) { + while (type(alist) == TYPE_PAIR) { + Pa *c = pair_unwrap(alist); + O pair = c->head; + if (pair == NIL) { + alist = c->tail; + continue; + } + if (type(pair) == TYPE_PAIR) { + Pa *kv = pair_unwrap(pair); + if (kv->head == key) + return pair; + } + alist = c->tail; + } + return NIL; +} + +O list_reverse(O list) { + O prev = NIL; + O curr = list; + O next; + + while (curr != NIL) { + Pa *c = pair_unwrap(curr); + next = c->tail; + c->tail = prev; + prev = curr; + curr = next; + } + + return prev; +} diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..d024013 --- /dev/null +++ b/src/main.c @@ -0,0 +1,19 @@ +#include + +int main(void) { + In interp; + interp_init(&interp); + + // Build code + O s_println = symbol_make(&interp, "println"); + O s_quote = symbol_make(&interp, "quote"); + + O data = pair_make( + &interp.gc, s_quote, + pair_make(&interp.gc, pair_make(&interp.gc, NUM(1), NUM(2)), NIL)); + + O code = pair_make(&interp.gc, s_println, pair_make(&interp.gc, data, NIL)); + (void)interp_eval(&interp, code, NIL); + + interp_finalize(&interp); +} diff --git a/src/pair.c b/src/pair.c new file mode 100644 index 0000000..b860b0d --- /dev/null +++ b/src/pair.c @@ -0,0 +1,28 @@ +#include "stdlib.h" +#include +#include + +O pair_make(Gc *gc, O head, O tail) { + I mark = gc_rootmark(gc); + gc_addroot(gc, &head); + gc_addroot(gc, &tail); + + Z size = sizeof(Gh) + sizeof(Pa); + Gh *hdr = gc_alloc(gc, size); + hdr->type = TYPE_PAIR; + + Pa *pair = (Pa *)(hdr + 1); + pair->head = head; + pair->tail = tail; + + gc_rootreset(gc, mark); + return BOX(hdr); +} + +Pa *pair_unwrap(O obj) { + if (type(obj) != TYPE_PAIR) { + fprintf(stderr, "expected pair, got %s\n", typename(type(obj))); + abort(); + } + return (Pa *)(UNBOX(obj) + 1); +} diff --git a/src/prim.c b/src/prim.c new file mode 100644 index 0000000..d51f38f --- /dev/null +++ b/src/prim.c @@ -0,0 +1,53 @@ +#include +#include +#include + +O prim_make(In *in, const char *name, O (*fn)(In *, O, O)) { + Pr *pr = malloc(sizeof(Pr)); + pr->name = name; + pr->fn = fn; + O sym = BOX(TAG(intern(&in->symtab, name, 0), TAG_SYM)); + O prim = BOX(TAG(pr, TAG_PRIM)); + return pair_make(&in->gc, sym, prim); +} + +O prim_cons(In *in, O args, O env) { + args = interp_eval_list(in, args, env); + O head = pair_unwrap(args)->head; + args = pair_unwrap(args)->tail; + O tail = pair_unwrap(args)->head; + return pair_make(&in->gc, head, tail); +} + +O prim_head(In *in, O args, O env) { + args = interp_eval_list(in, args, env); + return pair_unwrap(pair_unwrap(args)->head)->head; +} + +O prim_tail(In *in, O args, O env) { + args = interp_eval_list(in, args, env); + return pair_unwrap(pair_unwrap(args)->head)->tail; +} + +O prim_print(In *in, O args, O env) { + args = interp_eval_list(in, args, env); + O arg = pair_unwrap(args)->head; + print(arg); + return NIL; +} + +O prim_println(In *in, O args, O env) { + args = interp_eval_list(in, args, env); + O arg = pair_unwrap(args)->head; + println(arg); + return NIL; +} + +O prim_quote(In *in, O args, O env) { + (void)in; + (void)env; + + if (args == NIL) + return NIL; + return pair_unwrap(args)->head; +} diff --git a/src/prim.h b/src/prim.h new file mode 100644 index 0000000..093e6e0 --- /dev/null +++ b/src/prim.h @@ -0,0 +1,8 @@ +#include + +O prim_cons(In *in, O args, O env); +O prim_head(In *in, O args, O env); +O prim_tail(In *in, O args, O env); +O prim_quote(In *in, O args, O env); +O prim_print(In *in, O args, O env); +O prim_println(In *in, O args, O env); diff --git a/src/print.c b/src/print.c new file mode 100644 index 0000000..418b653 --- /dev/null +++ b/src/print.c @@ -0,0 +1,70 @@ +#include +#include + +#include + +void print(O obj); + +void print_pair(O obj) { + O c = obj; + I f = 1; + + printf("("); + while (c != NIL && !IMM(c)) { + Gh *h = UNBOX(c); + if (h->type != TYPE_PAIR) { + printf(" . "); + print(c); + printf(")"); + return; + } + Pa *p = (Pa *)(h + 1); + if (!f) + printf(" "); + f = 0; + print(p->head); + c = p->tail; + } + if (c != NIL) { + printf(" . "); + print(c); + } + printf(")"); +} + +void print(O obj) { + if (obj == NIL) { + printf("()"); + } else if (IMM(obj)) { + printf("%" PRIdPTR, ORD(obj)); + } else { + switch (TAG_OF(obj)) { + case TAG_SYM: { + Sy *s = (Sy *)UNTAG(obj); + printf("%.*s", (int)s->len, s->data); + break; + } + case TAG_PRIM: { + Pr *p = (Pr *)UNTAG(obj); + printf("<#primitive %s>", p->name); + break; + } + default: { + Gh *h = UNBOX(obj); + switch (h->type) { + case TYPE_PAIR: + print_pair(obj); + break; + default: + printf("<#obj type=%" PRId32 " @ %p>", h->type, (void *)h); + break; + } + } + } + } +} + +void println(O obj) { + print(obj); + putchar('\n'); +} diff --git a/src/symbol.c b/src/symbol.c new file mode 100644 index 0000000..67d049c --- /dev/null +++ b/src/symbol.c @@ -0,0 +1,84 @@ +#include +#include +#include + +#include + +#define ALIGN(n) (((n) + 7) & ~7) + +static Sy *find(St *tab, U32 hash, Z len) { + if (tab->capacity == 0) + return NULL; + + Z ix = hash % tab->capacity; + for (Z i = 0; i < tab->capacity; i++) { + Sy *s = tab->data[ix]; + if (!s) + return NULL; + if (s->hash == hash && s->len == len) + return s; + ix = (ix + 1) % tab->capacity; + } + return NULL; +} + +static V resize(St *tab) { + Z cap = tab->capacity == 0 ? 16 : tab->capacity * 2; + Sy **nb = calloc(cap, sizeof(Sy *)); + for (Z i = 0; i < tab->capacity; i++) { + if (tab->data[i]) { + Sy *s = tab->data[i]; + Z ix = s->hash % cap; + while (nb[ix]) + ix = (ix + 1) % cap; + nb[ix] = s; + } + } + + if (tab->data != NULL) + free(tab->data); + + tab->capacity = cap; + tab->data = nb; +} + +static U32 hashstr(const char *data, Z len) { + U32 hash = 2166136261u; + for (Z i = 0; i < len; i++) { + hash ^= (uint8_t)data[i]; + hash *= 16777619u; + } + return hash; +} + +Sy *intern(St *tab, const char *str, Z len) { + if (len == 0) + len = strlen(str); + U32 hash = hashstr(str, len); + Sy *sym = find(tab, hash, len); + if (sym) + return sym; + + sym = aligned_alloc(8, ALIGN(sizeof(Sy))); + if (!sym) { + fprintf(stderr, "failed to allocate memory for symbol\n"); + abort(); + } + + sym->data = malloc(len); + memcpy(sym->data, str, len); + sym->len = len; + sym->hash = hash; + if (tab->count + 1 > tab->capacity) + resize(tab); + Z idx = hash % tab->capacity; + while (tab->data[idx] != NULL) + idx = (idx + 1) % tab->capacity; + tab->data[idx] = sym; + tab->count++; + return sym; +} + +O symbol_make(In *in, const char *str) { + return BOX(TAG(intern(&in->symtab, str, 0), TAG_SYM)); +} diff --git a/src/type.c b/src/type.c new file mode 100644 index 0000000..49ab75c --- /dev/null +++ b/src/type.c @@ -0,0 +1,43 @@ +#include +#include +#include +#include + +// clang-format off +static const char *typenames[] = { + [TYPE_NIL] = "nil", + [TYPE_NUM] = "number", + [TYPE_SYM] = "symbol", + [TYPE_PRIM] = "primitive", + [TYPE_PAIR] = "pair", + [TYPE_CLOS] = "closure", +}; +// clang-format on + +I type(O obj) { + if (obj == NIL) { + return TYPE_NIL; + } else if (IMM(obj)) { + return TYPE_NUM; + } else { + switch (TAG_OF(obj)) { + case TAG_SYM: + return TYPE_SYM; + case TAG_PRIM: + return TYPE_PRIM; + case TAG_MAN: { + Gh *hdr = UNBOX(obj); + return hdr->type; + } + default: + fprintf(stderr, "unknown pointer tag %" PRIdPTR "\n", TAG_OF(obj)); + abort(); + } + } +} + +const char *typename(I t) { + if (t >= TYPE__MAX) + return "??"; + return typenames[t]; +}