first evaluator draft

This commit is contained in:
Lobo 2026-01-07 09:40:21 -03:00
parent 9fc5f10fc6
commit 2532dd9f4a
8 changed files with 399 additions and 43 deletions

View file

@ -1,5 +1,5 @@
CFLAGS := -std=c99 -Og -g -Wpedantic -Wall
OBJS := symbol.o object.o gc.o print.o main.o
OBJS := symbol.o object.o gc.o print.o eval.o main.o
wscm: $(OBJS)
$(CC) $(OBJS) -o wscm

View file

@ -1,4 +1,14 @@
# wolfscheme
An experiment in how quick I can get from zero to a decent Scheme.
An experiment in how quick I can get from zero to a decent Lisp.
Don't know why I called it `wolfscheme`.
## Day 1:
- Symbol interning
- Garbage collector and cons pairs
- Object printing
## Day 2:
- Tree-walking evaluator with a few primitives

216
eval.c Normal file
View file

@ -0,0 +1,216 @@
#include <stdio.h>
#include <stdlib.h>
#include "wscm.h"
static O listrev(O list) {
O prev = NIL;
O curr = list;
O next;
while (curr != NIL) {
C *c = uncons(curr);
next = c->cdr;
c->cdr = prev;
prev = curr;
curr = next;
}
return prev;
}
O assoc(O v, O env) {
while (kind(env) == KIND_CONS) {
C *c = uncons(env);
O pair = c->car;
if (pair == NIL) {
env = c->cdr;
continue;
}
if (kind(pair) == KIND_CONS) {
C *kv = uncons(pair);
if (kv->car == v)
return pair;
}
env = c->cdr;
}
return NIL;
}
O bind(O params, O args, O env) {
I mark = rootmark();
addroot(&params);
addroot(&args);
addroot(&env);
O res = env;
addroot(&res);
while (params != NIL) {
if (kind(params) == KIND_SYM) {
O pair = cons(params, args);
addroot(&pair);
res = cons(pair, res);
break;
}
if (kind(params) != KIND_CONS) {
fprintf(stderr, "error: expected proper list or symbol for parameters\n");
abort();
}
C *p = uncons(params);
O sym = p->car;
O val = NIL;
if (args != NIL) {
if (kind(args) != KIND_CONS) {
fprintf(stderr, "error: too many parameters for arguments\n");
abort();
}
C *a = uncons(args);
val = a->car;
args = a->cdr;
}
O pair = cons(sym, val);
addroot(&pair);
res = cons(pair, res);
params = p->cdr;
}
rootreset(mark);
return res;
}
O eval(O obj, O env);
static O evallist(O list, O env) {
I mark = rootmark();
addroot(&list);
addroot(&env);
O head = NIL;
addroot(&head);
O curr = list;
addroot(&curr);
while (curr != NIL) {
C *c = uncons(curr);
O val = eval(c->car, env);
head = cons(val, head);
curr = c->cdr;
}
O res = listrev(head);
rootreset(mark);
return res;
}
/* primitives */
O prim_cons(O args, O env) {
args = evallist(args, env);
O x = uncons(args)->car;
args = uncons(args)->cdr;
O y = uncons(args)->car;
return cons(x, y);
}
O prim_car(O args, O env) {
args = evallist(args, env);
O list = uncons(args)->car;
return uncons(list)->car;
}
O prim_cdr(O args, O env) {
args = evallist(args, env);
O list = uncons(args)->car;
return uncons(list)->cdr;
}
O prim_quote(O args, O env) {
if (args == NIL)
return NIL;
return uncons(args)->car;
}
O prim_if(O args, O env) {
C *ac = uncons(args);
O cond = eval(ac->car, env);
args = ac->cdr;
if (args == NIL)
return NIL;
ac = uncons(args);
if (cond != NIL) {
return eval(ac->car, env);
} else {
if (ac->cdr == NIL)
return NIL;
return eval(uncons(ac->cdr)->car, env);
}
}
O mkprim(const char *name, O (*fn)(O, O)) {
P *p = malloc(sizeof(P));
p->name = name;
p->fn = fn;
O sym = BOX(TAG(intern(name, -1), TAG_SYM));
O prim = BOX(TAG(p, TAG_PRIM));
return cons(sym, prim);
}
V setupenv(O *env) {
*env = cons(mkprim("cons", prim_cons), *env);
*env = cons(mkprim("car", prim_car), *env);
*env = cons(mkprim("cdr", prim_cdr), *env);
*env = cons(mkprim("quote", prim_quote), *env);
*env = cons(mkprim("if", prim_if), *env);
}
O eval(O obj, O env) {
I k = kind(obj);
if (k == KIND_SYM) {
O pair = assoc(obj, env);
if (pair == NIL) {
S *s = (S *)UNTAG(obj);
fprintf(stderr, "error: undefined symbol '%.*s'\n", (int)s->len, s->data);
abort();
}
return uncons(pair)->cdr;
} else if (k != KIND_CONS) {
return obj;
}
I mark = rootmark();
addroot(&obj);
addroot(&env);
C *c = uncons(obj);
O fn = eval(c->car, env);
addroot(&fn);
I fk = kind(fn);
O res = NIL;
if (fk == KIND_PRIM) {
P *p = (P *)UNTAG(fn);
res = p->fn(c->cdr, env);
} else if (fk == KIND_CLOS) {
H *h = UNBOX(fn);
L *l = (L *)(h + 1);
O args = evallist(c->cdr, env);
O nenv = bind(l->args, args, l->env);
res = eval(l->body, nenv);
} else {
fprintf(stderr, "tried to call non-function value\n");
abort();
}
rootreset(mark);
return res;
}

20
gc.c
View file

@ -8,10 +8,6 @@
E heap;
#define ALIGN(n) (((n) + 7) & ~7)
#define INFROM(x) \
(((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end)
// roots management
void addroot(O *ptr) {
if (heap.root_count >= heap.root_capacity) {
@ -48,6 +44,8 @@ static O forward(O obj, U8 **freep) {
return NIL;
if (IMM(obj))
return obj;
if (TYPE(obj) != TAG_GC)
return obj;
H *h = UNBOX(obj);
if (!INFROM(h))
@ -62,6 +60,8 @@ static O forward(O obj, U8 **freep) {
}
void collect(void) {
return; // DEBUG
U8 *freep = heap.to.start;
U8 *scan = freep;
@ -75,12 +75,10 @@ void collect(void) {
switch (h->type) {
case OBJ_CONS: {
C *c = (C *)(h + 1);
c->head = forward(c->head, &freep);
c->tail = forward(c->tail, &freep);
c->car = forward(c->car, &freep);
c->cdr = forward(c->cdr, &freep);
break;
}
case OBJ_SYM:
break;
case OBJ_FWD:
fprintf(stderr, "gc internal error: forwarding pointer in to-space\n");
abort();
@ -122,12 +120,14 @@ H *alloc(Z sz) {
void gcinit(void) {
heap.from.start = malloc(GC_HEAP_BYTES);
if (!heap.from.start) abort();
if (!heap.from.start)
abort();
heap.from.free = heap.from.start;
heap.from.end = heap.from.start + GC_HEAP_BYTES;
heap.to.start = malloc(GC_HEAP_BYTES);
if (!heap.to.start) abort();
if (!heap.to.start)
abort();
heap.to.free = heap.to.start;
heap.to.end = heap.to.start + GC_HEAP_BYTES;
}

19
main.c
View file

@ -1,15 +1,22 @@
#include <stdio.h>
#include "wscm.h"
int main(void) {
gcinit();
const S *hello = intern("hello", -1);
const S *goodbye = intern("goodbye", -1);
O p = cons(BOX(hello), BOX(goodbye));
addroot(&p);
O env = NIL;
addroot(&env);
setupenv(&env);
collect();
println(p);
// IF
O code = cons(SYM("if"), cons(NIL, cons(NUM(100), cons(NUM(200), NIL))));
addroot(&code);
// collect();
println(code);
printf("=> ");
println(eval(code, env));
gcfinalize();
return 0;

View file

@ -1,9 +1,47 @@
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include "inttypes.h"
#include "wscm.h"
// clang-format off
static const char *kindnames[] = {
[KIND_NIL] = "nil",
[KIND_NUM] = "number",
[KIND_SYM] = "symbol",
[KIND_PRIM] = "primitive",
[KIND_CONS] = "cons",
[KIND_CLOS] = "closure",
};
// clang-format on
I kind(O obj) {
if (obj == NIL)
return KIND_NIL;
if (IMM(obj))
return KIND_NUM;
switch (TYPE(obj)) {
case TAG_SYM:
return KIND_SYM;
case TAG_PRIM:
return KIND_PRIM;
case TAG_GC: {
H *h = UNBOX(obj);
return h->type;
}
default:
fprintf(stderr, "unknown pointer tag %" PRIdPTR "\n", TYPE(obj));
abort();
}
}
const char *kindname(I k) {
if (k >= KIND__MAX)
return "??";
return kindnames[k];
}
// cons lists
O cons(O head, O tail) {
I mark = rootmark();
@ -16,24 +54,50 @@ O cons(O head, O tail) {
h->type = OBJ_CONS;
C *c = (C *)(h + 1);
c->head = head;
c->tail = tail;
c->car = head;
c->cdr = tail;
rootreset(mark);
return BOX(h);
}
C *uncons(O obj) {
if (obj == NIL)
return NULL;
if (IMM(obj)) {
fprintf(stderr, "unpair: expected pair, got integer\n");
I k = kind(obj);
if (k != KIND_CONS) {
fprintf(stderr, "expected cons, got %s\n", kindnames[k]);
abort();
}
H *h = UNBOX(obj);
if (h->type != OBJ_CONS) {
fprintf(stderr, "unpair: expected pair, got type %" PRIdPTR "\n", h->type);
abort();
}
return (C *)(h + 1);
}
// closures
O mkclos(O args, O body, O env) {
I mark = rootmark();
addroot(&args);
addroot(&body);
addroot(&env);
const Z sz = sizeof(H) + sizeof(L);
H *h = alloc(sz);
h->size = sz;
h->type = OBJ_CLOS;
L *l = (L *)(h + 1);
l->args = args;
l->body = body;
l->env = env;
rootreset(mark);
return BOX(h);
}
L *unclos(O obj) {
I k = kind(obj);
if (k != KIND_CONS) {
fprintf(stderr, "expected closure, got %s\n", kindnames[k]);
abort();
}
H *h = UNBOX(obj);
return (L *)(h + 1);
}

29
print.c
View file

@ -1,6 +1,7 @@
#include "inttypes.h"
#include "wscm.h"
#include <stdio.h>
#include <inttypes.h>
void print(O obj);
@ -21,10 +22,10 @@ void printcons(O obj) {
if (!f)
printf(" ");
f = 0;
print(p->head);
c = p->tail;
print(p->car);
c = p->cdr;
}
if (c != NIL && !IMM(c)) {
if (c != NIL) {
printf(" . ");
print(c);
}
@ -38,20 +39,28 @@ void print(O obj) {
printf("%" PRIdPTR, ORD(obj));
} else {
void *x = (void *)UNBOX(obj);
if (((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end) {
switch (TYPE(obj)) {
case TAG_SYM: {
S *s = (S *)((U)x & ~7);
printf("%.*s", (int)s->len, s->data);
break;
}
case TAG_PRIM: {
P *p = (P *)((U)x & ~7);
printf("<#primitive %s>", p->name);
break;
}
default: {
H *h = (H *)x;
switch (h->type) {
case OBJ_CONS:
printcons(obj);
break;
default:
printf("<obj type=%" PRIdPTR " @ %p>", h->type, (void *)h);
printf("<#obj type=%" PRIdPTR " @ %p>", h->type, (void *)h);
break;
}
} else {
// If pointer is outside the heap, it's a symbol.
S *s = (S *)x;
printf("%.*s", (int)s->len, s->data);
}
}
}
}

58
wscm.h
View file

@ -2,6 +2,8 @@
#include <stdint.h>
// common types
typedef void V;
typedef uintptr_t U;
typedef intptr_t I;
@ -16,7 +18,13 @@ typedef uintptr_t O;
// cons pair
typedef struct C C;
struct C {
O head, tail;
O car, cdr;
};
// lambda
typedef struct L L;
struct L {
O args, body, env;
};
// symbol
@ -35,9 +43,32 @@ struct St {
S **data;
};
// gc header
enum { OBJ_CONS, OBJ_SYM, OBJ_FWD };
#define TYPE_MASK 7
enum {
TAG_GC = 0, // GC-managed object
TAG_NUM = 1, // Immediate number
TAG_SYM = 2, // Pointer to symbol
TAG_PRIM = 4, // Pointer to primitive
};
enum {
KIND_NIL = 0,
KIND_NUM = 1,
KIND_SYM = 2,
KIND_PRIM = 4,
KIND_CONS = 5,
KIND_CLOS = 6,
KIND__MAX,
};
#define TYPE(x) (((U)(x)) & TYPE_MASK)
#define UNTAG(x) (((U)(x)) & ~TYPE_MASK)
#define TAG(x, t) (void *)(((U)(x)) | t)
enum { OBJ_CONS = 5, OBJ_CLOS = 6, OBJ_FWD = 7 };
// gc header
typedef struct H H;
struct H {
I type;
@ -57,15 +88,28 @@ struct E {
O **roots;
};
// primitive
typedef struct P P;
struct P {
const char *name;
O (*fn)(O, O);
};
extern E heap;
extern St syms;
#define ALIGN(n) (((n) + 7) & ~7)
#define INFROM(x) \
(((const U8 *)x) >= heap.from.start && ((const U8 *)x) < heap.from.end)
#define IMM(x) ((x) & 1)
#define NUM(x) (((O)((I)(x) << 1)) | (V)1)
#define NUM(x) (((O)((I)(x) << 1)) | (O)1)
#define ORD(x) ((I)(x) >> 1)
#define BOX(x) ((O)(x))
#define UNBOX(x) ((H *)(x))
#define SYM(s) BOX(TAG(intern(s, -1), TAG_SYM))
#define NIL ((O)0)
#define GC_HEAP_BYTES (1024 * 1024)
@ -78,11 +122,17 @@ H *alloc(Z sz);
void gcinit(void);
void gcfinalize(void);
I kind(O obj);
const char *kindname(I k);
S *intern(const char *str, I len);
O mksym(const char *str);
O cons(O head, O tail);
C *uncons(O obj);
V setupenv(O *env);
O eval(O obj, O env);
void print(O obj);
void println(O obj);