From 2532dd9f4a7111338332e6444b0419ed57b0071a Mon Sep 17 00:00:00 2001 From: "Javier B. Torres" Date: Wed, 7 Jan 2026 09:40:21 -0300 Subject: [PATCH] first evaluator draft --- Makefile | 2 +- README.md | 12 ++- eval.c | 216 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ gc.c | 20 ++--- main.c | 19 +++-- object.c | 86 +++++++++++++++++++--- print.c | 29 +++++--- wscm.h | 58 ++++++++++++++- 8 files changed, 399 insertions(+), 43 deletions(-) create mode 100644 eval.c diff --git a/Makefile b/Makefile index 5c7a703..fd0ed4e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CFLAGS := -std=c99 -Og -g -Wpedantic -Wall -OBJS := symbol.o object.o gc.o print.o main.o +OBJS := symbol.o object.o gc.o print.o eval.o main.o wscm: $(OBJS) $(CC) $(OBJS) -o wscm diff --git a/README.md b/README.md index 1c61799..b282a8e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,14 @@ # wolfscheme -An experiment in how quick I can get from zero to a decent Scheme. +An experiment in how quick I can get from zero to a decent Lisp. +Don't know why I called it `wolfscheme`. +## Day 1: + +- Symbol interning +- Garbage collector and cons pairs +- Object printing + +## Day 2: + +- Tree-walking evaluator with a few primitives diff --git a/eval.c b/eval.c new file mode 100644 index 0000000..11a5483 --- /dev/null +++ b/eval.c @@ -0,0 +1,216 @@ +#include +#include + +#include "wscm.h" + +static O listrev(O list) { + O prev = NIL; + O curr = list; + O next; + + while (curr != NIL) { + C *c = uncons(curr); + next = c->cdr; + c->cdr = prev; + prev = curr; + curr = next; + } + return prev; +} + +O assoc(O v, O env) { + while (kind(env) == KIND_CONS) { + C *c = uncons(env); + O pair = c->car; + if (pair == NIL) { + env = c->cdr; + continue; + } + if (kind(pair) == KIND_CONS) { + C *kv = uncons(pair); + if (kv->car == v) + return pair; + } + env = c->cdr; + } + return NIL; +} + +O bind(O params, O args, O env) { + I mark = rootmark(); + addroot(¶ms); + addroot(&args); + addroot(&env); + + O res = env; + addroot(&res); + + while (params != NIL) { + if (kind(params) == KIND_SYM) { + O pair = cons(params, args); + addroot(&pair); + res = cons(pair, res); + break; + } + + if (kind(params) != KIND_CONS) { + fprintf(stderr, "error: expected proper list or symbol for parameters\n"); + abort(); + } + + C *p = uncons(params); + O sym = p->car; + + O val = NIL; + if (args != NIL) { + if (kind(args) != KIND_CONS) { + fprintf(stderr, "error: too many parameters for arguments\n"); + abort(); + } + C *a = uncons(args); + val = a->car; + args = a->cdr; + } + + O pair = cons(sym, val); + addroot(&pair); + res = cons(pair, res); + + params = p->cdr; + } + + rootreset(mark); + return res; +} + +O eval(O obj, O env); + +static O evallist(O list, O env) { + I mark = rootmark(); + addroot(&list); + addroot(&env); + + O head = NIL; + addroot(&head); + + O curr = list; + addroot(&curr); + + while (curr != NIL) { + C *c = uncons(curr); + O val = eval(c->car, env); + head = cons(val, head); + curr = c->cdr; + } + + O res = listrev(head); + + rootreset(mark); + return res; +} + +/* primitives */ + +O prim_cons(O args, O env) { + args = evallist(args, env); + + O x = uncons(args)->car; + args = uncons(args)->cdr; + O y = uncons(args)->car; + return cons(x, y); +} + +O prim_car(O args, O env) { + args = evallist(args, env); + O list = uncons(args)->car; + return uncons(list)->car; +} + +O prim_cdr(O args, O env) { + args = evallist(args, env); + O list = uncons(args)->car; + return uncons(list)->cdr; +} + +O prim_quote(O args, O env) { + if (args == NIL) + return NIL; + return uncons(args)->car; +} + +O prim_if(O args, O env) { + C *ac = uncons(args); + O cond = eval(ac->car, env); + args = ac->cdr; + if (args == NIL) + return NIL; + ac = uncons(args); + if (cond != NIL) { + return eval(ac->car, env); + } else { + if (ac->cdr == NIL) + return NIL; + return eval(uncons(ac->cdr)->car, env); + } +} + +O mkprim(const char *name, O (*fn)(O, O)) { + P *p = malloc(sizeof(P)); + p->name = name; + p->fn = fn; + O sym = BOX(TAG(intern(name, -1), TAG_SYM)); + O prim = BOX(TAG(p, TAG_PRIM)); + return cons(sym, prim); +} + +V setupenv(O *env) { + *env = cons(mkprim("cons", prim_cons), *env); + *env = cons(mkprim("car", prim_car), *env); + *env = cons(mkprim("cdr", prim_cdr), *env); + *env = cons(mkprim("quote", prim_quote), *env); + *env = cons(mkprim("if", prim_if), *env); +} + +O eval(O obj, O env) { + I k = kind(obj); + + if (k == KIND_SYM) { + O pair = assoc(obj, env); + if (pair == NIL) { + S *s = (S *)UNTAG(obj); + fprintf(stderr, "error: undefined symbol '%.*s'\n", (int)s->len, s->data); + abort(); + } + return uncons(pair)->cdr; + } else if (k != KIND_CONS) { + return obj; + } + + I mark = rootmark(); + addroot(&obj); + addroot(&env); + + C *c = uncons(obj); + O fn = eval(c->car, env); + addroot(&fn); + + I fk = kind(fn); + O res = NIL; + + if (fk == KIND_PRIM) { + P *p = (P *)UNTAG(fn); + res = p->fn(c->cdr, env); + } else if (fk == KIND_CLOS) { + H *h = UNBOX(fn); + L *l = (L *)(h + 1); + O args = evallist(c->cdr, env); + O nenv = bind(l->args, args, l->env); + res = eval(l->body, nenv); + } else { + fprintf(stderr, "tried to call non-function value\n"); + abort(); + } + + rootreset(mark); + return res; +} diff --git a/gc.c b/gc.c index c8ae713..2fbaa35 100644 --- a/gc.c +++ b/gc.c @@ -8,10 +8,6 @@ E heap; -#define ALIGN(n) (((n) + 7) & ~7) -#define INFROM(x) \ - (((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end) - // roots management void addroot(O *ptr) { if (heap.root_count >= heap.root_capacity) { @@ -48,6 +44,8 @@ static O forward(O obj, U8 **freep) { return NIL; if (IMM(obj)) return obj; + if (TYPE(obj) != TAG_GC) + return obj; H *h = UNBOX(obj); if (!INFROM(h)) @@ -62,6 +60,8 @@ static O forward(O obj, U8 **freep) { } void collect(void) { + return; // DEBUG + U8 *freep = heap.to.start; U8 *scan = freep; @@ -75,12 +75,10 @@ void collect(void) { switch (h->type) { case OBJ_CONS: { C *c = (C *)(h + 1); - c->head = forward(c->head, &freep); - c->tail = forward(c->tail, &freep); + c->car = forward(c->car, &freep); + c->cdr = forward(c->cdr, &freep); break; } - case OBJ_SYM: - break; case OBJ_FWD: fprintf(stderr, "gc internal error: forwarding pointer in to-space\n"); abort(); @@ -122,12 +120,14 @@ H *alloc(Z sz) { void gcinit(void) { heap.from.start = malloc(GC_HEAP_BYTES); - if (!heap.from.start) abort(); + if (!heap.from.start) + abort(); heap.from.free = heap.from.start; heap.from.end = heap.from.start + GC_HEAP_BYTES; heap.to.start = malloc(GC_HEAP_BYTES); - if (!heap.to.start) abort(); + if (!heap.to.start) + abort(); heap.to.free = heap.to.start; heap.to.end = heap.to.start + GC_HEAP_BYTES; } diff --git a/main.c b/main.c index 2db482c..e4b1b7c 100644 --- a/main.c +++ b/main.c @@ -1,15 +1,22 @@ +#include + #include "wscm.h" int main(void) { gcinit(); - const S *hello = intern("hello", -1); - const S *goodbye = intern("goodbye", -1); - O p = cons(BOX(hello), BOX(goodbye)); - addroot(&p); + O env = NIL; + addroot(&env); + setupenv(&env); - collect(); - println(p); + // IF + O code = cons(SYM("if"), cons(NIL, cons(NUM(100), cons(NUM(200), NIL)))); + addroot(&code); + + // collect(); + println(code); + printf("=> "); + println(eval(code, env)); gcfinalize(); return 0; diff --git a/object.c b/object.c index 1ec97c5..b031c17 100644 --- a/object.c +++ b/object.c @@ -1,9 +1,47 @@ +#include #include #include -#include "inttypes.h" #include "wscm.h" +// clang-format off +static const char *kindnames[] = { + [KIND_NIL] = "nil", + [KIND_NUM] = "number", + [KIND_SYM] = "symbol", + [KIND_PRIM] = "primitive", + [KIND_CONS] = "cons", + [KIND_CLOS] = "closure", +}; +// clang-format on + +I kind(O obj) { + if (obj == NIL) + return KIND_NIL; + if (IMM(obj)) + return KIND_NUM; + + switch (TYPE(obj)) { + case TAG_SYM: + return KIND_SYM; + case TAG_PRIM: + return KIND_PRIM; + case TAG_GC: { + H *h = UNBOX(obj); + return h->type; + } + default: + fprintf(stderr, "unknown pointer tag %" PRIdPTR "\n", TYPE(obj)); + abort(); + } +} + +const char *kindname(I k) { + if (k >= KIND__MAX) + return "??"; + return kindnames[k]; +} + // cons lists O cons(O head, O tail) { I mark = rootmark(); @@ -16,24 +54,50 @@ O cons(O head, O tail) { h->type = OBJ_CONS; C *c = (C *)(h + 1); - c->head = head; - c->tail = tail; + c->car = head; + c->cdr = tail; rootreset(mark); return BOX(h); } C *uncons(O obj) { - if (obj == NIL) - return NULL; - if (IMM(obj)) { - fprintf(stderr, "unpair: expected pair, got integer\n"); + I k = kind(obj); + if (k != KIND_CONS) { + fprintf(stderr, "expected cons, got %s\n", kindnames[k]); abort(); } H *h = UNBOX(obj); - if (h->type != OBJ_CONS) { - fprintf(stderr, "unpair: expected pair, got type %" PRIdPTR "\n", h->type); - abort(); - } return (C *)(h + 1); } + +// closures +O mkclos(O args, O body, O env) { + I mark = rootmark(); + addroot(&args); + addroot(&body); + addroot(&env); + + const Z sz = sizeof(H) + sizeof(L); + H *h = alloc(sz); + h->size = sz; + h->type = OBJ_CLOS; + + L *l = (L *)(h + 1); + l->args = args; + l->body = body; + l->env = env; + + rootreset(mark); + return BOX(h); +} + +L *unclos(O obj) { + I k = kind(obj); + if (k != KIND_CONS) { + fprintf(stderr, "expected closure, got %s\n", kindnames[k]); + abort(); + } + H *h = UNBOX(obj); + return (L *)(h + 1); +} diff --git a/print.c b/print.c index fd2bb78..906825e 100644 --- a/print.c +++ b/print.c @@ -1,6 +1,7 @@ -#include "inttypes.h" #include "wscm.h" + #include +#include void print(O obj); @@ -21,10 +22,10 @@ void printcons(O obj) { if (!f) printf(" "); f = 0; - print(p->head); - c = p->tail; + print(p->car); + c = p->cdr; } - if (c != NIL && !IMM(c)) { + if (c != NIL) { printf(" . "); print(c); } @@ -38,20 +39,28 @@ void print(O obj) { printf("%" PRIdPTR, ORD(obj)); } else { void *x = (void *)UNBOX(obj); - if (((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end) { + switch (TYPE(obj)) { + case TAG_SYM: { + S *s = (S *)((U)x & ~7); + printf("%.*s", (int)s->len, s->data); + break; + } + case TAG_PRIM: { + P *p = (P *)((U)x & ~7); + printf("<#primitive %s>", p->name); + break; + } + default: { H *h = (H *)x; switch (h->type) { case OBJ_CONS: printcons(obj); break; default: - printf("", h->type, (void *)h); + printf("<#obj type=%" PRIdPTR " @ %p>", h->type, (void *)h); break; } - } else { - // If pointer is outside the heap, it's a symbol. - S *s = (S *)x; - printf("%.*s", (int)s->len, s->data); + } } } } diff --git a/wscm.h b/wscm.h index f2dc1c2..351eea7 100644 --- a/wscm.h +++ b/wscm.h @@ -2,6 +2,8 @@ #include // common types +typedef void V; + typedef uintptr_t U; typedef intptr_t I; @@ -16,7 +18,13 @@ typedef uintptr_t O; // cons pair typedef struct C C; struct C { - O head, tail; + O car, cdr; +}; + +// lambda +typedef struct L L; +struct L { + O args, body, env; }; // symbol @@ -35,9 +43,32 @@ struct St { S **data; }; -// gc header -enum { OBJ_CONS, OBJ_SYM, OBJ_FWD }; +#define TYPE_MASK 7 +enum { + TAG_GC = 0, // GC-managed object + TAG_NUM = 1, // Immediate number + TAG_SYM = 2, // Pointer to symbol + TAG_PRIM = 4, // Pointer to primitive +}; + +enum { + KIND_NIL = 0, + KIND_NUM = 1, + KIND_SYM = 2, + KIND_PRIM = 4, + KIND_CONS = 5, + KIND_CLOS = 6, + KIND__MAX, +}; + +#define TYPE(x) (((U)(x)) & TYPE_MASK) +#define UNTAG(x) (((U)(x)) & ~TYPE_MASK) +#define TAG(x, t) (void *)(((U)(x)) | t) + +enum { OBJ_CONS = 5, OBJ_CLOS = 6, OBJ_FWD = 7 }; + +// gc header typedef struct H H; struct H { I type; @@ -57,15 +88,28 @@ struct E { O **roots; }; +// primitive +typedef struct P P; +struct P { + const char *name; + O (*fn)(O, O); +}; + extern E heap; extern St syms; +#define ALIGN(n) (((n) + 7) & ~7) +#define INFROM(x) \ + (((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end) + #define IMM(x) ((x) & 1) -#define NUM(x) (((O)((I)(x) << 1)) | (V)1) +#define NUM(x) (((O)((I)(x) << 1)) | (O)1) #define ORD(x) ((I)(x) >> 1) #define BOX(x) ((O)(x)) #define UNBOX(x) ((H *)(x)) +#define SYM(s) BOX(TAG(intern(s, -1), TAG_SYM)) + #define NIL ((O)0) #define GC_HEAP_BYTES (1024 * 1024) @@ -78,11 +122,17 @@ H *alloc(Z sz); void gcinit(void); void gcfinalize(void); +I kind(O obj); +const char *kindname(I k); + S *intern(const char *str, I len); O mksym(const char *str); O cons(O head, O tail); C *uncons(O obj); +V setupenv(O *env); +O eval(O obj, O env); + void print(O obj); void println(O obj);