symbol interning

This commit is contained in:
Lobo 2026-01-06 12:27:53 -03:00
parent 5e6bc2679d
commit d64b0f0a6f
6 changed files with 174 additions and 10 deletions

13
.editorconfig Normal file
View file

@ -0,0 +1,13 @@
root = true
[*]
end_of_line = lf
insert_final_newline = true
[*.{c,h}]
indent_style = space
indent_size = 2
[Makefile]
indent_style = tab
indent_size = 4

View file

@ -1,6 +1,9 @@
CFLAGS := -std=c99 -Og -g -Wpedantic -Wall
.PHONY: clean
OBJS := main.o sym.o
wscm: wscm.o
wscm: $(OBJS)
$(CC) $(OBJS) -o wscm
.PHONY: clean
clean:
rm -f wscm wscm.o
rm -f wscm main.o sym.o

12
main.c Normal file
View file

@ -0,0 +1,12 @@
#include <stdio.h>
#include "wscm.h"
int main(void) {
const S *hello = intern("hello", -1);
printf("hello = %p\n", (void *)hello);
const S *hello2 = intern("hello", -1);
printf("hello2 = %p (should be equal to hello)\n", (void *)hello2);
return 0;
}

81
sym.c Normal file
View file

@ -0,0 +1,81 @@
#include <stdlib.h>
#include <string.h>
#include "wscm.h"
St syms = {0, 0, NULL};
static S *findsym(const char *str, Z len, U32 hash) {
if (syms.capacity == 0)
return NULL;
Z ix = hash % syms.capacity;
for (Z i = 0; i < syms.capacity; i++) {
S *s = syms.data[ix];
if (!s)
return NULL;
if (s->hash == hash && s->len == len)
return s;
ix = (ix + 1) % syms.capacity;
}
return NULL;
}
static void symtabresize(void) {
Z cap = syms.capacity;
if (cap == 0) {
syms.capacity = 16;
} else {
syms.capacity *= 2;
}
S **nb = calloc(syms.capacity, sizeof(S *));
for (Z i = 0; i < cap; i++) {
if (syms.data[i]) {
S *s = syms.data[i];
Z ix = s->hash % syms.capacity;
while (nb[ix])
ix = (ix + 1) % syms.capacity;
nb[ix] = s;
}
}
if (syms.data != NULL)
free(syms.data);
syms.data = nb;
}
U32 hashstring(const char *data, Z len) {
U32 hash = 2166136261u;
for (Z i = 0; i < len; i++) {
hash ^= (uint8_t)data[i];
hash *= 16777619u;
}
return hash;
}
S *intern(const char *str, I len) {
if (len < 0)
len = strlen(str);
U32 hash = hashstring(str, len);
S *s = findsym(str, len, hash);
if (s)
return s;
s = malloc(sizeof(S));
s->data = malloc(len);
memcpy(s->data, str, len);
s->len = len;
s->hash = hash;
if (syms.count + 1 > syms.capacity)
symtabresize();
Z ix = hash % syms.capacity;
while (syms.data[ix])
ix = (ix + 1) % syms.capacity;
syms.data[ix] = s;
syms.count++;
return s;
}

6
wscm.c
View file

@ -1,6 +0,0 @@
#include <stdio.h>
int main(void) {
printf("Hello, world!\n");
return 0;
}

61
wscm.h Normal file
View file

@ -0,0 +1,61 @@
#include <stddef.h>
#include <stdint.h>
// common types
typedef uintptr_t U;
typedef intptr_t I;
typedef uint8_t U8;
typedef uint32_t U32;
typedef int32_t I32;
typedef size_t Z;
// objects
typedef uintptr_t O;
// cons pair
typedef struct C C;
struct C {
O head, tail;
};
// symbol
typedef struct S S;
struct S {
U8 *data;
U32 hash;
Z len;
};
// symbol table
typedef struct St St;
struct St {
I count;
Z capacity;
S **data;
};
// gc header
typedef struct H H;
struct H {
I type;
Z len;
};
// heap
typedef struct E E;
struct E {
struct {
U8 *start, *end;
U8 *free;
} from, to;
I root_count;
Z root_capacity;
O **roots;
};
extern E heap;
extern St syms;
S *intern(const char *str, I len);