#include #include #include #include #include #include "opcodes.h" #include "sleb128.h" typedef struct { Growl *data; size_t count; size_t capacity; } ConstantTable; typedef struct { uint8_t *data; size_t count; size_t capacity; ConstantTable constants; } Chunk; typedef struct { const char *name; uint8_t opcodes[8]; } Primitive; // clang-format off Primitive primitives[] = { {"nil", {GOP_PUSH_NIL, 0}}, {"drop", {GOP_DROP, 0}}, {"dup", {GOP_DUP, 0}}, {"swap", {GOP_SWAP, 0}}, {"2drop", {GOP_2DROP, 0}}, {"2dup", {GOP_2DUP, 0}}, {"2swap", {GOP_2SWAP, 0}}, {"nip", {GOP_NIP, 0}}, {"over", {GOP_OVER, 0}}, {"bury", {GOP_BURY, 0}}, {"dig", {GOP_DIG, 0}}, {">r", {GOP_TO_RETAIN, 0}}, {"r>", {GOP_FROM_RETAIN, 0}}, {"?", {GOP_CHOOSE, 0}}, {"if", {GOP_CHOOSE, GOP_CALL, 0}}, {"call", {GOP_CALL, 0}}, {"compose", {GOP_COMPOSE, 0}}, {"curry", {GOP_CURRY, 0}}, {".", {GOP_PPRINT, 0}}, {"+", {GOP_ADD, 0}}, {"*", {GOP_MUL, 0}}, {"-", {GOP_SUB, 0}}, {"/", {GOP_DIV, 0}}, {"%", {GOP_MOD, 0}}, {"=", {GOP_EQ, 0}}, {"!=", {GOP_NEQ, 0}}, {"<", {GOP_LT, 0}}, {"<=", {GOP_LTE, 0}}, {">", {GOP_GT, 0}}, {">=", {GOP_GTE, 0}}, {"&", {GOP_BAND, 0}}, {"|", {GOP_BOR, 0}}, {"^", {GOP_BXOR, 0}}, {"~", {GOP_BNOT, 0}}, {NULL, {0}} }; // clang-format on // See https://nullprogram.com/blog/2023/10/05/ #define push(s, a) \ ({ \ typeof(s) s_ = (s); \ typeof(a) a_ = (a); \ if (s_->count >= s_->capacity) { \ grow(s_, sizeof(*s_->data), _Alignof(*s_->data), a_); \ } \ s_->data + s_->count++; \ }) static void grow(void *slice, ptrdiff_t size, ptrdiff_t align, GrowlArena *a) { struct { uint8_t *data; ptrdiff_t len; ptrdiff_t cap; } replica; memcpy(&replica, slice, sizeof(replica)); if (!replica.data) { replica.cap = 1; replica.data = growl_arena_alloc(a, 2 * size, align, replica.cap); } else if (a->free == replica.data + size * replica.cap) { growl_arena_alloc(a, size, 1, replica.cap); } else { void *data = growl_arena_alloc(a, 2 * size, align, replica.cap); memcpy(data, replica.data, size * replica.len); replica.data = data; } replica.cap *= 2; memcpy(slice, &replica, sizeof(replica)); } static void emit_byte(GrowlVM *vm, Chunk *chunk, uint8_t byte) { *push(chunk, &vm->scratch) = byte; } static void emit_sleb128(GrowlVM *vm, Chunk *chunk, intptr_t num) { int more = 1; while (more) { uint8_t byte = num & 0x7f; num >>= 7; if ((num == 0 && !(byte & 0x40)) || (num == -1 && (byte & 0x40))) { more = 0; } else { byte |= 0x80; } emit_byte(vm, chunk, byte); } } static size_t add_constant(GrowlVM *vm, Chunk *chunk, Growl value) { for (size_t i = 0; i < chunk->constants.count; ++i) { if (chunk->constants.data[i] == value) return i; } *push(&chunk->constants, &vm->scratch) = value; return chunk->constants.count - 1; } static int is_integer(const char *str, long *out) { char *end; long val = strtol(str, &end, 0); if (*end == '\0' && end != str) { *out = val; return 1; } return 0; } __attribute__((format(printf, 2, 3))) static void compile_error(GrowlLexer *lexer, const char *fmt, ...) { fprintf(stderr, "%d:%d: compile error: ", lexer->start_row + 1, lexer->start_col + 1); va_list args; va_start(args, fmt); vfprintf(stderr, fmt, args); va_end(args); fprintf(stderr, "\n"); } static void optimize_tail_calls(Chunk *chunk) { size_t i = 0; while (i < chunk->count) { uint8_t opcode = chunk->data[i]; size_t start = i++; if (opcode == GOP_PUSH_CONSTANT || opcode == GOP_WORD || opcode == GOP_TAIL_WORD) { if (i < chunk->count) i += growl_sleb128_peek(&chunk->data[i], NULL); } if (i < chunk->count && chunk->data[i] == GOP_RETURN) { if (opcode == GOP_CALL) { chunk->data[i] = GOP_NOP; chunk->data[start] = GOP_TAIL_CALL; } else if (opcode == GOP_WORD) { chunk->data[i] = GOP_NOP; chunk->data[start] = GOP_TAIL_WORD; } } } } static int compile_token(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk); static int compile_quotation(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk) { growl_lexer_next(lexer); // skip '[' Chunk quot_chunk = {0}; while (lexer->kind != ']' && lexer->kind != GTOK_EOF && lexer->kind != GTOK_INVALID) { if (compile_token(vm, lexer, "_chunk)) { return 1; } } if (lexer->kind != ']') { compile_error(lexer, "expected ']' to close quotation"); return 1; } emit_byte(vm, "_chunk, GOP_RETURN); optimize_tail_calls("_chunk); Growl quot = growl_make_quotation(vm, quot_chunk.data, quot_chunk.count, quot_chunk.constants.data, quot_chunk.constants.count); size_t idx = add_constant(vm, chunk, quot); emit_byte(vm, chunk, GOP_PUSH_CONSTANT); emit_sleb128(vm, chunk, (intptr_t)idx); growl_lexer_next(lexer); return 0; } static int compile_string(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk) { Growl str = growl_wrap_string(vm, lexer->buffer); size_t const_idx = add_constant(vm, chunk, str); emit_byte(vm, chunk, GOP_PUSH_CONSTANT); emit_sleb128(vm, chunk, (intptr_t)const_idx); growl_lexer_next(lexer); return 0; } static int compile_def(GrowlVM *vm, GrowlLexer *lexer) { growl_lexer_next(lexer); if (lexer->kind != GTOK_WORD) { compile_error(lexer, "expected name after 'def'"); return 1; } char *name = growl_arena_strdup(&vm->scratch, lexer->buffer); growl_lexer_next(lexer); if (lexer->kind != GTOK_LBRACE) { compile_error(lexer, "expected '{' after def name '%s'", name); return 1; } growl_lexer_next(lexer); Chunk fn_chunk = {0}; while (lexer->kind != GTOK_RBRACE && lexer->kind != GTOK_EOF && lexer->kind != GTOK_INVALID) { if (compile_token(vm, lexer, &fn_chunk)) { return 1; } } if (lexer->kind != GTOK_RBRACE) { compile_error(lexer, "expected '}' to close def '%s'", name); return 1; } emit_byte(vm, &fn_chunk, GOP_RETURN); optimize_tail_calls(&fn_chunk); Growl fn = growl_make_quotation(vm, fn_chunk.data, fn_chunk.count, fn_chunk.constants.data, fn_chunk.constants.count); GrowlQuotation *quot = (GrowlQuotation *)(GROWL_UNBOX(fn) + 1); GrowlDictionary *entry = growl_dictionary_upsert(&vm->dictionary, name, &vm->arena); GrowlDefinition *def = push(&vm->defs, &vm->arena); def->name = growl_arena_strdup(&vm->arena, name); def->quotation = quot; entry->quotation = quot; entry->index = vm->defs.count - 1; growl_lexer_next(lexer); return 0; } static int compile_call(GrowlVM *vm, GrowlLexer *lexer, const char *name, Chunk *chunk) { for (size_t i = 0; primitives[i].name != NULL; i++) { if (strcmp(name, primitives[i].name) == 0) { for (size_t j = 0; primitives[i].opcodes[j] != 0; j++) emit_byte(vm, chunk, primitives[i].opcodes[j]); growl_lexer_next(lexer); return 0; } } GrowlDictionary *entry = growl_dictionary_upsert(&vm->dictionary, name, NULL); if (entry == NULL) { compile_error(lexer, "undefined word '%s'", name); return 1; } emit_byte(vm, chunk, GOP_WORD); emit_sleb128(vm, chunk, entry->index); growl_lexer_next(lexer); return 0; } static int compile_command(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk) { char *name = growl_arena_strdup(&vm->scratch, lexer->buffer); name[strlen(name) - 1] = '\0'; growl_lexer_next(lexer); while (lexer->kind != GTOK_SEMICOLON && lexer->kind != GTOK_EOF && lexer->kind != GTOK_INVALID) { if (compile_token(vm, lexer, chunk)) { return 1; } } if (lexer->kind != GTOK_SEMICOLON) { compile_error(lexer, "expected ';' to close command '%s:'", name); return 1; } return compile_call(vm, lexer, name, chunk); } static int compile_word(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk) { char *text = lexer->buffer; size_t len = strlen(text); if (strcmp(text, "load") == 0) { // TODO: loading source files compile_error(lexer, "'load' nyi"); return 1; } // Compile a definition if (strcmp(text, "def") == 0) { return compile_def(vm, lexer); } // Compile a command: word: args... ; if (len > 1 && text[len - 1] == ':') { return compile_command(vm, lexer, chunk); } // Compile an integer value long value; if (is_integer(text, &value)) { size_t idx = add_constant(vm, chunk, GROWL_NUM(value)); emit_byte(vm, chunk, GOP_PUSH_CONSTANT); emit_sleb128(vm, chunk, (intptr_t)idx); growl_lexer_next(lexer); return 0; } return compile_call(vm, lexer, text, chunk); } static int compile_token(GrowlVM *vm, GrowlLexer *lexer, Chunk *chunk) { switch (lexer->kind) { case GTOK_WORD: return compile_word(vm, lexer, chunk); case GTOK_STRING: return compile_string(vm, lexer, chunk); case GTOK_LBRACKET: return compile_quotation(vm, lexer, chunk); case GTOK_SEMICOLON: case GTOK_RPAREN: case GTOK_RBRACKET: case GTOK_RBRACE: compile_error(lexer, "unexpected token '%c'", lexer->kind); return 1; case GTOK_INVALID: compile_error(lexer, "invalid token"); return 1; default: compile_error(lexer, "unhandled token type '%c'", lexer->kind); return 1; } } Growl growl_compile(GrowlVM *vm, GrowlLexer *lexer) { Chunk chunk = {0}; growl_lexer_next(lexer); while (lexer->kind != GTOK_EOF) { if (compile_token(vm, lexer, &chunk)) return GROWL_NIL; } emit_byte(vm, &chunk, GOP_RETURN); optimize_tail_calls(&chunk); return growl_make_quotation(vm, chunk.data, chunk.count, chunk.constants.data, chunk.constants.count); }