diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..0a31b58 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,13 @@ +root = true + +[*] +end_of_line = lf +insert_final_newline = true + +[*.{c,h}] +indent_style = space +indent_size = 2 + +[Makefile] +indent_style = tab +indent_size = 4 diff --git a/Makefile b/Makefile index 5088671..d5665d6 100644 --- a/Makefile +++ b/Makefile @@ -1,6 +1,9 @@ -CFLAGS := -std=c99 -Og -g -Wpedantic -Wall -.PHONY: clean +CFLAGS := -std=c99 -Og -g -Wpedantic -Wall +OBJS := main.o sym.o -wscm: wscm.o +wscm: $(OBJS) + $(CC) $(OBJS) -o wscm + +.PHONY: clean clean: - rm -f wscm wscm.o + rm -f wscm main.o sym.o diff --git a/main.c b/main.c new file mode 100644 index 0000000..d9c7b67 --- /dev/null +++ b/main.c @@ -0,0 +1,12 @@ +#include +#include "wscm.h" + +int main(void) { + const S *hello = intern("hello", -1); + printf("hello = %p\n", (void *)hello); + + const S *hello2 = intern("hello", -1); + printf("hello2 = %p (should be equal to hello)\n", (void *)hello2); + + return 0; +} diff --git a/sym.c b/sym.c new file mode 100644 index 0000000..d6f1c93 --- /dev/null +++ b/sym.c @@ -0,0 +1,81 @@ +#include +#include + +#include "wscm.h" + +St syms = {0, 0, NULL}; + +static S *findsym(const char *str, Z len, U32 hash) { + if (syms.capacity == 0) + return NULL; + + Z ix = hash % syms.capacity; + for (Z i = 0; i < syms.capacity; i++) { + S *s = syms.data[ix]; + if (!s) + return NULL; + if (s->hash == hash && s->len == len) + return s; + ix = (ix + 1) % syms.capacity; + } + return NULL; +} + +static void symtabresize(void) { + Z cap = syms.capacity; + if (cap == 0) { + syms.capacity = 16; + } else { + syms.capacity *= 2; + } + S **nb = calloc(syms.capacity, sizeof(S *)); + for (Z i = 0; i < cap; i++) { + if (syms.data[i]) { + S *s = syms.data[i]; + Z ix = s->hash % syms.capacity; + while (nb[ix]) + ix = (ix + 1) % syms.capacity; + nb[ix] = s; + } + } + if (syms.data != NULL) + free(syms.data); + syms.data = nb; +} + +U32 hashstring(const char *data, Z len) { + U32 hash = 2166136261u; + for (Z i = 0; i < len; i++) { + hash ^= (uint8_t)data[i]; + hash *= 16777619u; + } + return hash; +} + +S *intern(const char *str, I len) { + if (len < 0) + len = strlen(str); + + U32 hash = hashstring(str, len); + S *s = findsym(str, len, hash); + if (s) + return s; + + s = malloc(sizeof(S)); + s->data = malloc(len); + memcpy(s->data, str, len); + s->len = len; + s->hash = hash; + + if (syms.count + 1 > syms.capacity) + symtabresize(); + + Z ix = hash % syms.capacity; + while (syms.data[ix]) + ix = (ix + 1) % syms.capacity; + + syms.data[ix] = s; + syms.count++; + + return s; +} diff --git a/wscm.c b/wscm.c deleted file mode 100644 index f65fc66..0000000 --- a/wscm.c +++ /dev/null @@ -1,6 +0,0 @@ -#include - -int main(void) { - printf("Hello, world!\n"); - return 0; -} diff --git a/wscm.h b/wscm.h new file mode 100644 index 0000000..3328734 --- /dev/null +++ b/wscm.h @@ -0,0 +1,61 @@ +#include +#include + +// common types +typedef uintptr_t U; +typedef intptr_t I; + +typedef uint8_t U8; +typedef uint32_t U32; +typedef int32_t I32; +typedef size_t Z; + +// objects +typedef uintptr_t O; + +// cons pair +typedef struct C C; +struct C { + O head, tail; +}; + +// symbol +typedef struct S S; +struct S { + U8 *data; + U32 hash; + Z len; +}; + +// symbol table +typedef struct St St; +struct St { + I count; + Z capacity; + S **data; +}; + +// gc header +typedef struct H H; +struct H { + I type; + Z len; +}; + +// heap +typedef struct E E; +struct E { + struct { + U8 *start, *end; + U8 *free; + } from, to; + + I root_count; + Z root_capacity; + O **roots; +}; + +extern E heap; +extern St syms; + +S *intern(const char *str, I len);