diff --git a/meson.build b/meson.build index 0f3bcc3..8f9a9fd 100644 --- a/meson.build +++ b/meson.build @@ -1,79 +1,45 @@ project( 'growl', 'c', - 'cpp', meson_version: '>= 1.3.0', version: '0.1', default_options: [ 'buildtype=debugoptimized', 'c_std=gnu11', - 'cpp_std=c++20', 'warning_level=2', ], ) -libutf = subproject('libutf') -libutf_dep = libutf.get_variable('libutf_dep') - growl_sources = [ - 'src/arena.c', - 'src/chunk.c', - 'src/compile.c', - 'src/debug.c', - 'src/dictionary.c', - 'src/file.c', - 'src/lexer.c', - 'src/object.c', - 'src/gc.c', - 'src/parser.c', - 'src/primitive.c', - 'src/print.c', - 'src/stream.c', - 'src/string.c', - 'src/userdata.c', - 'src/vm.c', - 'src/vendor/linenoise.c', - 'src/vendor/yar.c', -] - -growl = executable( - 'growl', + 'src/core/alien.c', + 'src/core/arena.c', + 'src/core/callable.c', + 'src/core/compiler.c', + 'src/core/dictionary.c', + 'src/core/disasm.c', + 'src/core/file.c', + 'src/core/gc.c', + 'src/core/hash.c', + 'src/core/lexer.c', + 'src/core/list.c', + 'src/core/native.c', + 'src/core/print.c', + 'src/core/sleb128.c', + 'src/core/string.c', + 'src/core/table.c', + 'src/core/tuple.c', + 'src/core/value.c', + 'src/core/vm.c', 'src/main.c', - growl_sources, - dependencies: [libutf_dep], - install: true, -) - -growlnext_sources = [ - 'next/core/alien.c', - 'next/core/arena.c', - 'next/core/callable.c', - 'next/core/compiler.c', - 'next/core/dictionary.c', - 'next/core/disasm.c', - 'next/core/file.c', - 'next/core/gc.c', - 'next/core/hash.c', - 'next/core/lexer.c', - 'next/core/list.c', - 'next/core/native.c', - 'next/core/print.c', - 'next/core/sleb128.c', - 'next/core/string.c', - 'next/core/table.c', - 'next/core/tuple.c', - 'next/core/value.c', - 'next/core/vm.c', - 'next/main.c', ] cc = meson.get_compiler('c') m_dep = cc.find_library('m', required: false) -growlnext = executable( - 'growlnext', - growlnext_sources, +growl = executable( + 'growl', + growl_sources, dependencies: [m_dep], - include_directories: ['next/include'], + include_directories: ['src/include'], install: true, ) diff --git a/next/main.c b/next/main.c deleted file mode 100644 index 4976e06..0000000 --- a/next/main.c +++ /dev/null @@ -1,28 +0,0 @@ -#include -#include - -int main(void) { - GrowlVM *vm = growl_vm_init(); - growl_register_file_library(vm); - GrowlLexer lexer = {0}; - lexer.file = stdin; - - Growl obj = growl_compile(vm, &lexer); - if (obj != GROWL_NIL) { - GrowlQuotation *quot = growl_unwrap_quotation(obj); - if (!growl_vm_execute(vm, quot)) { - if (vm->sp != vm->wst) { - fprintf(stderr, "Stack:"); - for (Growl *p = vm->wst; p < vm->sp; p++) { - putc(' ', stderr); - growl_print_to(stderr, *p); - } - putchar('\n'); - } - } - } - - growl_gc_collect(vm); - growl_vm_free(vm); - return 0; -} diff --git a/src/arena.c b/src/arena.c deleted file mode 100644 index 816de2f..0000000 --- a/src/arena.c +++ /dev/null @@ -1,31 +0,0 @@ -#include "arena.h" - -#include -#include -#include - -V *_arena_alloc(Ar *ar, I count, I size, I align) { - I pad = -(U)ar->start & (align - 1); - assert(count < (ar->end - ar->start - pad) / size); - V *r = ar->start + pad; - ar->start += pad + count * size; - return memset(r, 0, count * size); -} - -V arena_init(Ar *ar, Z size) { - ar->data = malloc(size); - ar->start = ar->data; - ar->end = ar->start + size; -} - -V arena_free(Ar *ar) { - free(ar->data); - ar->data = ar->start = ar->end = NULL; -} - -char *arena_strdup(Ar *ar, const char *str) { - Z len = strlen(str) + 1; - char *copy = arena_alloc(ar, len, char); - memcpy(copy, str, len); - return copy; -} diff --git a/src/arena.h b/src/arena.h deleted file mode 100644 index 5ce565d..0000000 --- a/src/arena.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef ARENA_H -#define ARENA_H - -#include "common.h" - -typedef struct Ar { - U8 *data; - U8 *start, *end; -} Ar; - -#define arena_alloc(a, n, t) (t *)_arena_alloc(a, n, sizeof(t), _Alignof(t)) -V *_arena_alloc(Ar *, ptrdiff_t, ptrdiff_t, ptrdiff_t); - -V arena_init(Ar *, Z); -V arena_free(Ar *); -char *arena_strdup(Ar *, const char *); - -#endif diff --git a/src/chunk.c b/src/chunk.c deleted file mode 100644 index 2e5d892..0000000 --- a/src/chunk.c +++ /dev/null @@ -1,91 +0,0 @@ -#include - -#include "chunk.h" -#include "vendor/yar.h" - -#if CHUNK_DEBUG -#include -#endif - -Bc *chunk_new(const char *name) { - Bc *chunk = calloc(1, sizeof(Bc)); - chunk->name = name; - chunk->ref = 1; -#if CHUNK_DEBUG - fprintf(stderr, "DEBUG: created chunk %s at %p\n", chunk->name, (V *)chunk); -#endif - return chunk; -} - -V chunk_acquire(Bc *chunk) { -#if CHUNK_DEBUG - fprintf(stderr, "DEBUG: acquiring chunk %s at %p\n", chunk->name, (V *)chunk); -#endif - chunk->ref++; -} -V chunk_release(Bc *chunk) { -#if CHUNK_DEBUG - fprintf(stderr, "DEBUG: releasing chunk %s at %p\n", chunk->name, (V *)chunk); -#endif - - if (--chunk->ref == 0) { -#if CHUNK_DEBUG - fprintf(stderr, "DEBUG: freeing chunk %s at %p\n", chunk->name, (V *)chunk); -#endif - yar_free(&chunk->constants); - yar_free(&chunk->lines); - yar_free(&chunk->symbols); - yar_free(chunk); - free(chunk); - } -} - -V chunk_emit_byte(Bc *chunk, U8 byte) { *yar_append(chunk) = byte; } - -V chunk_emit_sleb128(Bc *chunk, I num) { - I more = 1; - while (more) { - U8 byte = num & 0x7f; - num >>= 7; - if ((num == 0 && !(byte & 0x40)) || (num == -1 && (byte & 0x40))) { - more = 0; - } else { - byte |= 0x80; - } - chunk_emit_byte(chunk, byte); - } -} - -I chunk_add_constant(Bc *chunk, O value) { - I mark = chunk->constants.count; - *yar_append(&chunk->constants) = value; - return mark; -} - -V chunk_emit_byte_with_line(Bc *chunk, U8 byte, I line, I col) { - *yar_append(chunk) = byte; - if (chunk->lines.count == 0 || - chunk->lines.items[chunk->lines.count - 1].row != line || - chunk->lines.items[chunk->lines.count - 1].col != col) { - Bl *entry = yar_append(&chunk->lines); - entry->offset = chunk->count - 1; - entry->row = line; - entry->col = col; - } -} - -I chunk_get_line(Bc *chunk, Z offset, I *out_col) { - if (chunk->lines.count == 0) - return -1; - Z left = 0, right = chunk->lines.count - 1; - while (left < right) { - Z mid = left + (right - left + 1) / 2; - if (chunk->lines.items[mid].offset <= offset) - left = mid; - else - right = mid - 1; - } - if (out_col) - *out_col = chunk->lines.items[left].col; - return chunk->lines.items[left].row; -} diff --git a/src/chunk.h b/src/chunk.h deleted file mode 100644 index 4946394..0000000 --- a/src/chunk.h +++ /dev/null @@ -1,50 +0,0 @@ -#ifndef CHUNK_H -#define CHUNK_H - -#define CHUNK_DEBUG 0 - -#include "common.h" -#include "object.h" - -typedef struct Bl { - Z offset; - I row; - I col; -} Bl; - -typedef struct Bs { - const char *name; - struct Dt *resolved; -} Bs; - -typedef struct Bc { - I ref; - const char *name; - U8 *items; - Z count, capacity; - struct { - O *items; - Z count, capacity; - } constants; - struct { - Bl *items; - Z count, capacity; - } lines; - struct { - Bs *items; - Z count, capacity; - } symbols; -} Bc; - -Bc *chunk_new(const char *); -V chunk_acquire(Bc *); -V chunk_release(Bc *); - -V chunk_emit_byte(Bc *, U8); -V chunk_emit_sleb128(Bc *, I); -I chunk_add_constant(Bc *, O); - -V chunk_emit_byte_with_line(Bc *, U8, I, I); -I chunk_get_line(Bc *, Z, I*); - -#endif diff --git a/src/common.h b/src/common.h deleted file mode 100644 index a8c64ba..0000000 --- a/src/common.h +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef COMMON_H -#define COMMON_H - -#include -#include - -typedef void V; -typedef intptr_t I; -typedef uintptr_t U; -typedef double F; -typedef size_t Z; -typedef uint8_t U8; -typedef uint32_t U32; -typedef uint64_t U64; - -#endif diff --git a/src/compile.c b/src/compile.c deleted file mode 100644 index 747f048..0000000 --- a/src/compile.c +++ /dev/null @@ -1,332 +0,0 @@ -#include -#include -#include - -#include "chunk.h" -#include "compile.h" -#include "debug.h" -#include "gc.h" -#include "object.h" -#include "parser.h" -#include "src/primitive.h" -#include "string.h" -#include "vendor/yar.h" -#include "vm.h" - -// clang-format off -struct { - const char *name; - U8 opcode[8]; -} primitives[] = { - {"nil", {OP_NIL, 0}}, - {"dup", {OP_DUP, 0}}, - {"drop", {OP_DROP, 0}}, - {"swap", {OP_SWAP, 0}}, - {"2dup", {OP_2DUP, 0}}, - {"2drop", {OP_2DROP, 0}}, - {"2swap", {OP_2SWAP, 0}}, - {"2over", {OP_2TOR, OP_2DUP, OP_2FROMR, OP_2SWAP, 0}}, - {"over", {OP_OVER, 0}}, - {"nip", {OP_NIP, 0}}, - {"bury", {OP_BURY, 0}}, - {"dig", {OP_DIG, 0}}, - {">r", {OP_TOR, 0}}, - {"r>", {OP_FROMR, 0}}, - {"2>r", {OP_2TOR, 0}}, - {"2r>", {OP_2FROMR, 0}}, - {"if", {OP_CHOOSE, OP_CALL, 0}}, - {"call", {OP_CALL, 0}}, - {"compose", {OP_COMPOSE, 0}}, - {"curry", {OP_CURRY, 0}}, - {"?", {OP_CHOOSE, 0}}, - {"+", {OP_ADD, 0}}, - {"-", {OP_SUB, 0}}, - {"*", {OP_MUL, 0}}, - {"/", {OP_DIV, 0}}, - {"%", {OP_MOD, 0}}, - {"logand", {OP_LOGAND, 0}}, - {"logor", {OP_LOGOR, 0}}, - {"logxor", {OP_LOGXOR, 0}}, - {"lognot", {OP_LOGNOT, 0}}, - {"=", {OP_EQ, 0}}, - {"<>", {OP_NEQ, 0}}, - {"<", {OP_LT, 0}}, - {">", {OP_GT, 0}}, - {"<=", {OP_LTE, 0}}, - {">=", {OP_GTE, 0}}, - {"and", {OP_AND, 0}}, - {"or", {OP_OR, 0}}, - {"^", {OP_CONCAT, 0}}, - {NULL, {0}}, -}; -// clang-format on - -V compiler_init(Cm *cm, Vm *vm, const char *name) { - cm->vm = vm; - cm->arena = &vm->arena; - cm->dictionary = &vm->dictionary; - cm->chunk = chunk_new(name); -} - -V compiler_deinit(Cm *cm) { cm->dictionary = NULL; } - -static I peek_sleb128(U8 *ptr, I *out_value) { - I result = 0; - I shift = 0; - U8 byte; - I bytes = 0; - - do { - byte = ptr[bytes]; - bytes++; - result |= (I)(byte & 0x7F) << shift; - shift += 7; - } while (byte & 0x80); - - if ((shift < 64) && (byte & 0x40)) { - result |= -(1LL << shift); - } - - if (out_value) - *out_value = result; - return bytes; -} - -static V optim_tailcall(Bc *chunk) { - Z i = 0; - while (i < chunk->count) { - U8 opcode = chunk->items[i]; - if (opcode == OP_DOWORD) { - I ofs = peek_sleb128(&chunk->items[i + 1], NULL); - Z next = i + 1 + ofs; - if (next < chunk->count && chunk->items[next] == OP_RETURN) { - chunk->items[i] = OP_TAIL_DOWORD; - } - i++; - } else if (opcode == OP_CALL) { - Z ofs = i + 1; - if (ofs < chunk->count && chunk->items[ofs] == OP_RETURN) { - chunk->items[i] = OP_TAIL_CALL; - } - i++; - } else if (opcode == OP_CONST) { - I ofs = peek_sleb128(&chunk->items[i + 1], NULL); - i += 1 + ofs; - } else { - i++; - } - } -} - -static I compile_expr(Cm *cm, Ast *node); - -static I compile_constant(Cm *cm, O value, I line, I col) { - I idx = chunk_add_constant(cm->chunk, value); - chunk_emit_byte_with_line(cm->chunk, OP_CONST, line, col); - chunk_emit_sleb128(cm->chunk, idx); - return 1; -} - -static I add_sym(Bc *chunk, const char *name, Dt *word) { - for (Z i = 0; i < chunk->symbols.count; i++) { - if (strcmp(chunk->symbols.items[i].name, name) == 0) - return i; - } - Z idx = chunk->symbols.count; - Bs *sym = yar_append(&chunk->symbols); - sym->name = name; - sym->resolved = word; - return idx; -} - -static I compile_call(Cm *cm, const char *name, I line, I col) { - for (Z i = 0; primitives[i].name != NULL; i++) { - if (strcmp(name, primitives[i].name) == 0) { - for (Z j = 0; primitives[i].opcode[j] != 0; j++) - chunk_emit_byte_with_line(cm->chunk, primitives[i].opcode[j], line, - col); - return 1; - } - } - - I prim_idx = prim_find(name); - if (prim_idx != -1) { - chunk_emit_byte_with_line(cm->chunk, OP_PRIM, line, col); - chunk_emit_sleb128(cm->chunk, prim_idx); - return 1; - } - - Dt *word = upsert(cm->dictionary, name, NULL); - if (!word) { - fprintf(stderr, "compiler error at %ld:%ld: undefined word '%s'\n", - line + 1, col + 1, name); - return 0; - } - I idx = add_sym(cm->chunk, name, word); - chunk_emit_byte_with_line(cm->chunk, OP_DOWORD, line, col); - chunk_emit_sleb128(cm->chunk, idx); - return 1; -} - -static I compile_command(Cm *cm, Ast *node) { - for (size_t i = 0; i < node->children.count; i++) { - if (!compile_expr(cm, node->children.items[i])) - return 0; - } - return compile_call(cm, node->name, node->line, node->col); -} - -static I compile_definition(Cm *cm, Ast *node) { - const char *name = arena_strdup(cm->arena, node->name); - Dt *entry = upsert(cm->dictionary, name, cm->arena); - - Cm inner = {0}; - inner.arena = cm->arena; - inner.chunk = chunk_new(name); - inner.vm = cm->vm; - inner.dictionary = cm->dictionary; - - for (size_t i = 0; i < node->children.count; i++) { - if (!compile_expr(&inner, node->children.items[i])) { - chunk_release(inner.chunk); - return 0; - } - } - - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); - optim_tailcall(inner.chunk); - - entry->chunk = inner.chunk; - -#if COMPILER_DEBUG - disassemble(inner.chunk, name, cm->dictionary); -#endif - - return 1; -} - -static O compile_quotation_obj(Cm *cm, Ast *node) { - Cm inner = {0}; - inner.arena = cm->arena; - - inner.chunk = chunk_new(""); - inner.vm = cm->vm; - inner.dictionary = cm->dictionary; - - for (size_t i = 0; i < node->children.count; i++) { - if (!compile_expr(&inner, node->children.items[i])) { - chunk_release(inner.chunk); - return NIL; - } - } - chunk_emit_byte_with_line(inner.chunk, OP_RETURN, node->line, node->col); - optim_tailcall(inner.chunk); - - Hd *hd = gc_alloc(cm->vm, sizeof(Hd) + sizeof(Bc *)); - hd->type = OBJ_QUOT; - Bc **chunk_ptr = (Bc **)(hd + 1); - *chunk_ptr = inner.chunk; - - return BOX(hd); -} - -static I compile_quotation(Cm *cm, Ast *node) { - O obj = compile_quotation_obj(cm, node); - if (obj == NIL) - return 0; - return compile_constant(cm, obj, node->line, node->col); -} - -static I compile_pragma(Cm *cm, Ast *node) { - if (strcmp(node->name, "#load") == 0) { - if (node->children.count == 0) { - fprintf(stderr, "compiler error: #load requires argument\n"); - return 0; - } - Ast *arg = node->children.items[0]; - if (arg->type != AST_STR) { - fprintf(stderr, "compiler error: #load requires string\n"); - return 0; - } - - char *fname = arg->name; - FILE *f = fopen(fname, "rb"); - if (!f) { - fprintf(stderr, "compiler error: cannot open file '%s'\n", fname); - return 0; - } - - Stream s = {filestream_vtable, f}; - Lx *lx = lexer_make(&s); - Ast *root = parser_parse(lx); - - I success = 1; - for (size_t i = 0; i < root->children.count; i++) { - if (!compile_expr(cm, root->children.items[i])) { - success = 0; - break; - } - } - - ast_free(root); - lexer_free(lx); - fclose(f); - return success; - } - fprintf(stderr, "compiler warning: unknown pragma \"%s\"\n", node->name); - return 1; -} - -static I compile_expr(Cm *cm, Ast *node) { - if (!node) - return 0; - switch (node->type) { - case AST_INT: { - O num = NUM(node->int_val); - return compile_constant(cm, num, node->line, node->col); - } - case AST_STR: { - O obj = string_make(cm->vm, node->name, -1); - return compile_constant(cm, obj, node->line, node->col); - } - case AST_WORD: - return compile_call(cm, node->name, node->line, node->col); - case AST_QUOTE: - return compile_quotation(cm, node); - case AST_DEF: - return compile_definition(cm, node); - case AST_CMD: - return compile_command(cm, node); - case AST_PRAGMA: - return compile_pragma(cm, node); - case AST_PROGRAM: - for (size_t i = 0; i < node->children.count; i++) { - if (!compile_expr(cm, node->children.items[i])) - return 0; - } - return 1; - default: - fprintf(stderr, "compiler error: nyi ast type %d\n", (int)node->type); - return 0; - } -} - -Bc *compile_program(Cm *cm, Ast *ast) { - if (ast->type == AST_PROGRAM) { - for (size_t i = 0; i < ast->children.count; i++) { - if (!compile_expr(cm, ast->children.items[i])) { - chunk_release(cm->chunk); - return NULL; - } - } - } else { - if (!compile_expr(cm, ast)) { - chunk_release(cm->chunk); - return NULL; - } - } - - chunk_emit_byte(cm->chunk, OP_RETURN); - optim_tailcall(cm->chunk); - return cm->chunk; -} diff --git a/src/compile.h b/src/compile.h deleted file mode 100644 index 51cea45..0000000 --- a/src/compile.h +++ /dev/null @@ -1,21 +0,0 @@ -#include "common.h" - -#include "arena.h" -#include "chunk.h" -#include "gc.h" -#include "vm.h" -#include "parser.h" - -#define COMPILER_DEBUG 0 - -/** Compiler context */ -typedef struct Cm { - Vm *vm; // Parent context - Ar *arena; - Bc *chunk; - Dt **dictionary; -} Cm; - -V compiler_init(Cm *, Vm *, const char *); -V compiler_deinit(Cm *); -Bc *compile_program(Cm *, Ast *); diff --git a/next/core/alien.c b/src/core/alien.c similarity index 100% rename from next/core/alien.c rename to src/core/alien.c diff --git a/next/core/arena.c b/src/core/arena.c similarity index 100% rename from next/core/arena.c rename to src/core/arena.c diff --git a/next/core/callable.c b/src/core/callable.c similarity index 100% rename from next/core/callable.c rename to src/core/callable.c diff --git a/next/core/compiler.c b/src/core/compiler.c similarity index 100% rename from next/core/compiler.c rename to src/core/compiler.c diff --git a/next/core/dictionary.c b/src/core/dictionary.c similarity index 100% rename from next/core/dictionary.c rename to src/core/dictionary.c diff --git a/next/core/disasm.c b/src/core/disasm.c similarity index 100% rename from next/core/disasm.c rename to src/core/disasm.c diff --git a/next/core/dynarray.h b/src/core/dynarray.h similarity index 100% rename from next/core/dynarray.h rename to src/core/dynarray.h diff --git a/next/core/file.c b/src/core/file.c similarity index 100% rename from next/core/file.c rename to src/core/file.c diff --git a/next/core/gc.c b/src/core/gc.c similarity index 100% rename from next/core/gc.c rename to src/core/gc.c diff --git a/next/core/hash.c b/src/core/hash.c similarity index 100% rename from next/core/hash.c rename to src/core/hash.c diff --git a/next/core/lexer.c b/src/core/lexer.c similarity index 100% rename from next/core/lexer.c rename to src/core/lexer.c diff --git a/next/core/list.c b/src/core/list.c similarity index 100% rename from next/core/list.c rename to src/core/list.c diff --git a/next/core/native.c b/src/core/native.c similarity index 100% rename from next/core/native.c rename to src/core/native.c diff --git a/next/core/opcodes.h b/src/core/opcodes.h similarity index 100% rename from next/core/opcodes.h rename to src/core/opcodes.h diff --git a/next/core/print.c b/src/core/print.c similarity index 100% rename from next/core/print.c rename to src/core/print.c diff --git a/next/core/sleb128.c b/src/core/sleb128.c similarity index 100% rename from next/core/sleb128.c rename to src/core/sleb128.c diff --git a/next/core/sleb128.h b/src/core/sleb128.h similarity index 100% rename from next/core/sleb128.h rename to src/core/sleb128.h diff --git a/next/core/string.c b/src/core/string.c similarity index 100% rename from next/core/string.c rename to src/core/string.c diff --git a/next/core/table.c b/src/core/table.c similarity index 100% rename from next/core/table.c rename to src/core/table.c diff --git a/next/core/tuple.c b/src/core/tuple.c similarity index 100% rename from next/core/tuple.c rename to src/core/tuple.c diff --git a/next/core/value.c b/src/core/value.c similarity index 100% rename from next/core/value.c rename to src/core/value.c diff --git a/next/core/vm.c b/src/core/vm.c similarity index 100% rename from next/core/vm.c rename to src/core/vm.c diff --git a/src/debug.c b/src/debug.c deleted file mode 100644 index 6374c16..0000000 --- a/src/debug.c +++ /dev/null @@ -1,146 +0,0 @@ -#include - -#include "chunk.h" -#include "debug.h" -#include "dictionary.h" -#include "primitive.h" -#include "print.h" -#include "vm.h" - -static I decode_sleb128(U8 *ptr, Z *bytes_read) { - I result = 0; - I shift = 0; - U8 byte; - Z count = 0; - do { - byte = ptr[count++]; - result |= (I)(byte & 0x7F) << shift; - shift += 7; - } while (byte & 0x80); - if ((shift < 64) && (byte & 0x40)) - result |= -(1LL << shift); - *bytes_read = count; - return result; -} - -static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent); - -static V dis(Bc *chunk, Dt **dictionary, I indent) { - Z offset = 0; - while (offset < chunk->count) - offset = dis_instr(chunk, offset, dictionary, indent); -} - -V disassemble(Bc *chunk, const char *name, Dt **dictionary) { - printf("=== %s ===\n", name); - dis(chunk, dictionary, 0); -} - -static Z dis_instr(Bc *chunk, Z offset, Dt **dictionary, I indent) { - for (I i = 0; i < indent; i++) - printf(" "); - printf("%04zu ", offset); - - I col = -1; - I line = chunk_get_line(chunk, offset, &col); - if (line >= 0) { - printf("%4ld:%-3ld ", line + 1, col + 1); - } else { - printf(" "); - } - - U8 opcode = chunk->items[offset++]; - -#define CASE(name) case OP_##name: -#define SIMPLE(name) \ - case OP_##name: \ - printf(#name "\n"); \ - return offset; - - switch (opcode) { - SIMPLE(NOP); - SIMPLE(NIL); - CASE(CONST) { - Z bytes_read; - I idx = decode_sleb128(&chunk->items[offset], &bytes_read); - printf("CONST %ld", idx); - if (idx >= 0 && idx < (I)chunk->constants.count) { - O obj = chunk->constants.items[idx]; - printf(" ("); - print(obj); - printf(")"); - - if (!IMM(obj) && obj != NIL && type(obj) == OBJ_QUOT) { - putchar('\n'); - Hd *hdr = UNBOX(obj); - Bc **chunk_ptr = (Bc **)(hdr + 1); - Bc *quot_chunk = *chunk_ptr; - dis(quot_chunk, dictionary, indent + 1); - return offset + bytes_read; - } - } - printf("\n"); - return offset + bytes_read; - } - SIMPLE(DROP); - SIMPLE(DUP); - SIMPLE(SWAP); - SIMPLE(NIP); - SIMPLE(OVER); - SIMPLE(BURY); - SIMPLE(DIG); - SIMPLE(TOR); - SIMPLE(FROMR); - CASE(DOWORD) { - Z bytes_read; - I idx = decode_sleb128(&chunk->items[offset], &bytes_read); - Dt *word = chunk->symbols.items[idx].resolved; - printf("DOWORD \"%s\"\n", word->name); - return offset + bytes_read; - } - SIMPLE(CALL); - CASE(TAIL_DOWORD) { - Z bytes_read; - I idx = decode_sleb128(&chunk->items[offset], &bytes_read); - Dt *word = chunk->symbols.items[idx].resolved; - printf("TAIL_DOWORD \"%s\"\n", word->name); - return offset + bytes_read; - } - SIMPLE(TAIL_CALL); - CASE(PRIM) { - Z bytes_read; - I idx = decode_sleb128(&chunk->items[offset], &bytes_read); - Pr prim = primitives_table[idx]; - printf("PRIM \"%s\"\n", prim.name); - return offset + bytes_read; - } - SIMPLE(COMPOSE); - SIMPLE(CURRY); - SIMPLE(RETURN); - SIMPLE(CHOOSE); - SIMPLE(ADD); - SIMPLE(SUB); - SIMPLE(MUL); - SIMPLE(DIV); - SIMPLE(MOD); - SIMPLE(LOGAND); - SIMPLE(LOGOR); - SIMPLE(LOGXOR); - SIMPLE(LOGNOT); - SIMPLE(EQ); - SIMPLE(NEQ); - SIMPLE(LT); - SIMPLE(GT); - SIMPLE(LTE); - SIMPLE(GTE); - SIMPLE(AND); - SIMPLE(OR); - SIMPLE(CONCAT); - default: - printf("??? (%d)\n", opcode); - return offset; - } - -#undef SIMPLE -#undef CASE -} diff --git a/src/debug.h b/src/debug.h deleted file mode 100644 index 2a1251c..0000000 --- a/src/debug.h +++ /dev/null @@ -1,5 +0,0 @@ -#include "chunk.h" -#include "common.h" -#include "dictionary.h" - -V disassemble(Bc *, const char*, Dt **); diff --git a/src/dictionary.c b/src/dictionary.c deleted file mode 100644 index 7771780..0000000 --- a/src/dictionary.c +++ /dev/null @@ -1,39 +0,0 @@ -#include - -#include "arena.h" -#include "common.h" -#include "dictionary.h" - -U64 hash64(const char *str) { - I len = strlen(str); - U64 h = 0x100; - for (I i = 0; i < len; i++) { - h ^= str[i] & 255; - h *= 1111111111111111111; - } - return h; -} - -Dt *upsert(Dt **env, const char *key, Ar *a) { - U64 hash = hash64(key); - for (U64 h = hash; *env; h <<= 2) { - if (hash == (*env)->hash) - return *env; - env = &(*env)->child[h >> 62]; - } - if (!a) - return 0; - *env = arena_alloc(a, 1, Dt); - (*env)->name = key; - (*env)->hash = hash; - return *env; -} - -Dt *lookup_hash(Dt **env, U64 hash) { - for (U64 h = hash; *env; h <<= 2) { - if ((*env)->hash == hash) - return *env; - env = &(*env)->child[h >> 62]; - } - return NULL; -} diff --git a/src/dictionary.h b/src/dictionary.h deleted file mode 100644 index 8a6e502..0000000 --- a/src/dictionary.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef DICTIONARY_H -#define DICTIONARY_H - -#include "arena.h" -#include "chunk.h" - -typedef struct Dt Dt; -struct Dt { - Dt *child[4]; - const char *name; - U64 hash; - Bc *chunk; -}; - -U64 hash64(const char *); -Dt *upsert(Dt **, const char *, Ar *); -Dt *lookup_hash(Dt **, U64); - -#endif diff --git a/src/file.c b/src/file.c deleted file mode 100644 index 1aa010c..0000000 --- a/src/file.c +++ /dev/null @@ -1,83 +0,0 @@ -#include -#include - -#include "src/gc.h" -#include "src/object.h" -#include "string.h" -#include "userdata.h" -#include "vm.h" - -static V finalizer(V *data); - -// clang-format off -Ut userdata_file = { - .name = "file", - .finalizer = finalizer -}; -// clang-format on - -I prim_file_stdin(Vm *vm) { - vm_push(vm, vm->stdin); - return 0; -} - -I prim_file_stdout(Vm *vm) { - vm_push(vm, vm->stdout); - return 0; -} - -I prim_file_stderr(Vm *vm) { - vm_push(vm, vm->stderr); - return 0; -} - -I prim_file_fprint(Vm *vm) { - O file_obj = vm_pop(vm); - O string_obj = vm_pop(vm); - - Ud *file_ud = userdata_unwrap(file_obj, &userdata_file); - if (file_ud == NULL) { - fprintf(stderr, "expected file object\n"); - return VM_ERR_TYPE; - }; - - Str *str = string_unwrap(string_obj); - if (str == NULL) { - fprintf(stderr, "expected string\n"); - return VM_ERR_TYPE; - } - - fwrite(str->data, sizeof(char), str->len, (FILE *)file_ud->data); - return 0; -} - -I prim_file_fgetline(Vm *vm) { - O file_obj = vm_pop(vm); - I mark = gc_mark(&vm->gc); - gc_addroot(&vm->gc, &file_obj); - - Ud *file_ud = userdata_unwrap(file_obj, &userdata_file); - if (file_ud == NULL) { - fprintf(stderr, "expected file object\n"); - return VM_ERR_TYPE; - } - - char *lineptr = NULL; - size_t size; - I len = getline(&lineptr, &size, (FILE *)file_ud->data); - if (len == -1) { - vm_push(vm, NIL); - } else { - vm_push(vm, string_make(vm, lineptr, len)); - } - free(lineptr); - - gc_reset(&vm->gc, mark); - return 0; -} - -static V finalizer(V *data) { - FILE *f = (FILE *)data; - if (f && f != stdin && f != stdout && f != stderr) - fclose(f); -} diff --git a/src/file.h b/src/file.h deleted file mode 100644 index cb10b11..0000000 --- a/src/file.h +++ /dev/null @@ -1,9 +0,0 @@ -#include "userdata.h" - -extern Ut userdata_file; - -I prim_file_stdin(Vm *); -I prim_file_stdout(Vm *); -I prim_file_stderr(Vm *); -I prim_file_fprint(Vm *); -I prim_file_fgetline(Vm *vm); diff --git a/src/gc.c b/src/gc.c deleted file mode 100644 index 7802a62..0000000 --- a/src/gc.c +++ /dev/null @@ -1,215 +0,0 @@ -#include -#include -#include -#include - -#include "chunk.h" -#include "gc.h" -#include "object.h" -#include "userdata.h" -#include "vendor/yar.h" -#include "vm.h" - -#define ALIGN(n) (((n) + 7) & ~7) -static inline int infrom(Gc *gc, V *ptr) { - const U8 *x = (const U8 *)ptr; - return (x >= gc->from.start && x < gc->from.end); -} - -V gc_addroot(Gc *gc, O *ptr) { *yar_append(&gc->roots) = ptr; } -I gc_mark(Gc *gc) { return gc->roots.count; } -V gc_reset(Gc *gc, I mark) { gc->roots.count = mark; } - -static O copy(Gc *gc, Hd *hdr) { - assert(infrom(gc, hdr)); - assert(hdr->type != OBJ_FWD); - - Z sz = ALIGN(hdr->size); - Hd *new = (Hd *)gc->to.free; - gc->to.free += sz; - memcpy(new, hdr, sz); - - hdr->type = OBJ_FWD; - O *obj = (O *)(hdr + 1); - *obj = BOX(new); - return *obj; -} - -static O forward(Gc *gc, O obj) { - if (obj == 0) - return 0; - if (IMM(obj)) - return obj; - if (!infrom(gc, (V *)obj)) - return obj; - - Hd *hdr = UNBOX(obj); - if (hdr->type == OBJ_FWD) { - O *o = (O *)(hdr + 1); - return *o; - } else { - return copy(gc, hdr); - } -} - -#if GC_DEBUG -static V printstats(Gc *gc, const char *label) { - Z used = (Z)(gc->from.free - gc->from.start); - fprintf(stderr, "[%s] used=%zu/%zu bytes (%.1f%%)\n", label, used, - (Z)HEAP_BYTES, (F)used / (F)HEAP_BYTES * 100.0); -} -#endif - -V gc_collect(Vm *vm, I final) { - Gc *gc = &vm->gc; - uint8_t *scan = gc->to.free; - -#if GC_DEBUG - printstats(gc, "before GC"); -#endif - - if (!final) { - // Final GC ignores roots. - for (Z i = 0; i < gc->roots.count; i++) { - O *o = gc->roots.items[i]; - *o = forward(gc, *o); - } - - Dt *dstack[256]; - Dt **dsp = dstack; - *dsp++ = vm->dictionary; - - // Forward constants referenced by dictionary entries - while (dsp > dstack) { - Dt *node = *--dsp; - if (!node) - continue; - if (node->chunk != NULL) { - for (Z i = 0; i < node->chunk->constants.count; i++) { - node->chunk->constants.items[i] = - forward(gc, node->chunk->constants.items[i]); - } - } - for (I i = 0; i < 4; i++) { - if (node->child[i] != NULL) - *dsp++ = node->child[i]; - } - } - } - - while (scan < gc->to.free) { - if (scan >= gc->to.end) { - fprintf(stderr, "fatal GC error: out of memory\n"); - abort(); - } - Hd *hdr = (Hd *)scan; - switch (hdr->type) { - case OBJ_STR: - break; - case OBJ_QUOT: { - Bc **chunk_ptr = (Bc **)(hdr + 1); - Bc *chunk = *chunk_ptr; - for (Z i = 0; i < chunk->constants.count; i++) - chunk->constants.items[i] = forward(gc, chunk->constants.items[i]); - break; - } - case OBJ_COMPOSE: { - Qo *comp = (Qo *)(hdr + 1); - comp->first = forward(gc, comp->first); - comp->second = forward(gc, comp->second); - break; - }; - case OBJ_CURRY: { - Qc *curry = (Qc *)(hdr + 1); - curry->value = forward(gc, curry->value); - curry->callable = forward(gc, curry->callable); - break; - }; - case OBJ_USERDATA: - break; - case OBJ_FWD: - fprintf(stderr, "fatal GC error: forwarding pointer in to-space\n"); - abort(); - default: - fprintf(stderr, "GC warning: junk object type %" PRId32 "\n", hdr->type); - } - scan += ALIGN(hdr->size); - } - - scan = gc->from.start; - while (scan < gc->from.free) { - Hd *hdr = (Hd *)scan; - if (hdr->type != OBJ_FWD) { - switch (hdr->type) { - case OBJ_QUOT: { - Bc **chunk_ptr = (Bc **)(hdr + 1); - chunk_release(*chunk_ptr); - break; - } - case OBJ_USERDATA: { - Ud *ud = (Ud *)(hdr + 1); - if (ud->kind->finalizer != NULL) - ud->kind->finalizer(ud->data); - break; - } - default: - break; - } - } - scan += ALIGN(hdr->size); - } - - Gs tmp = gc->from; - gc->from = gc->to; - gc->to = tmp; - gc->to.free = gc->to.start; - -#if GC_DEBUG - printstats(gc, "after GC"); -#endif -} - -Hd *gc_alloc(Vm *vm, Z sz) { - Gc *gc = &vm->gc; - sz = ALIGN(sz); - if (gc->from.free + sz > gc->from.end) { - gc_collect(vm, 0); - if (gc->from.free + sz > gc->from.end) { - fprintf(stderr, "out of memory (requested %" PRIdPTR "bytes\n", sz); - abort(); - } - } - Hd *hdr = (Hd *)gc->from.free; - gc->from.free += sz; - hdr->size = sz; - return hdr; -} - -V gc_init(Gc *gc) { - gc->from.start = malloc(HEAP_BYTES); - if (!gc->from.start) - goto fatal; - gc->from.end = gc->from.start + HEAP_BYTES; - gc->from.free = gc->from.start; - - gc->to.start = malloc(HEAP_BYTES); - if (!gc->to.start) - goto fatal; - gc->to.end = gc->to.start + HEAP_BYTES; - gc->to.free = gc->to.start; - - gc->roots.capacity = 0; - gc->roots.count = 0; - gc->roots.items = NULL; - return; - -fatal: - fprintf(stderr, "failed to allocate heap space\n"); - abort(); -} - -V gc_deinit(Gc *gc) { - free(gc->from.start); - free(gc->to.start); - yar_free(&gc->roots); -} diff --git a/src/gc.h b/src/gc.h deleted file mode 100644 index c3bb177..0000000 --- a/src/gc.h +++ /dev/null @@ -1,38 +0,0 @@ -#ifndef GC_H -#define GC_H - -#include "common.h" -#include "object.h" - -#define GC_DEBUG 1 -#if GC_DEBUG -#define HEAP_BYTES (8 * 1024) -#else -#define HEAP_BYTES (4 * 1024 * 1024) -#endif - -typedef struct Gs { - U8 *start, *end; - U8 *free; -} Gs; - -typedef struct Gc { - Gs from, to; - struct { - O **items; - Z count, capacity; - } roots; -} Gc; - -V gc_addroot(Gc *, O *); -I gc_mark(Gc *); -V gc_reset(Gc *, I); -V gc_init(Gc *); -V gc_deinit(Gc *); - -typedef struct Vm Vm; - -V gc_collect(Vm *, I); -Hd *gc_alloc(Vm *, Z); - -#endif diff --git a/next/include/growl.h b/src/include/growl.h similarity index 100% rename from next/include/growl.h rename to src/include/growl.h diff --git a/src/lexer.c b/src/lexer.c deleted file mode 100644 index 9b45afd..0000000 --- a/src/lexer.c +++ /dev/null @@ -1,217 +0,0 @@ -#include -#include -#include -#include - -#include "lexer.h" -#include "vendor/yar.h" - -Lx *lexer_make(Stream *s) { - Lx *lx = calloc(1, sizeof(Lx)); - lx->stream = s; - return lx; -} - -V lexer_free(Lx *lx) { - yar_free(lx); - free(lx); -} - -static int lx_getc(Lx *lx) { - int c = ST_GETC(lx->stream); - if (c == '\n') { - lx->curr_line++; - lx->curr_col = 0; - } else if (c != -1) { - lx->curr_col++; - } - return c; -} - -static void lx_ungetc(Lx *lx, int c) { - ST_UNGETC(c, lx->stream); - if (c == '\n') { - lx->curr_line--; - } else if (c != -1) { - lx->curr_col--; - } -} - -static inline int is_delimiter(int i) { - return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' || - i == ';' || i == '\\' || i == '"'; -} - -static inline void appendrune(Lx *lx, Rune rn) { - char data[5]; - I len = runetochar(data, &rn); - yar_append_many(lx, data, len); -} - -static inline void appendbyte(Lx *lx, char byte) { *yar_append(lx) = byte; } - -static int getc_ws(Lx *lx) { - if (ST_EOF(lx->stream)) - return -1; - for (;;) { - int ch = lx_getc(lx); - if (isspace(ch)) - continue; - return ch; - } -} - -static int scanword(Lx *lx) { - int next = lx_getc(lx); - - for (;;) { - if (next == -1) { - if (lx->count == 0) - lx->kind = TOK_EOF; - appendbyte(lx, 0); - return lx->kind; - } else if (is_delimiter(next) || isspace(next)) { - lx_ungetc(lx, next); - appendbyte(lx, 0); - return lx->kind; - } else { - appendbyte(lx, next); - next = lx_getc(lx); - continue; - } - } -} - -static void scanescape(Lx *lx) { - char escbuf[7], *escptr = escbuf; - int next; - Rune tmp; - - for (;;) { - next = lx_getc(lx); - - if (next == -1) { - errx(1, "unterminated hex sequence '%s'", escbuf); - } else if (next == ';') { - *escptr = 0; - break; - } else if (!isxdigit(next)) { - errx(1, "invalid hex digit '%c'", next); - } - - if (escptr - escbuf >= 6) { - errx(1, "hex sequence too long (6 chars max.)"); - } else { - *(escptr++) = next; - } - } - - tmp = strtol(escbuf, &escptr, 16); - if (*escptr == '\0') { - if (tmp < 256) { - appendbyte(lx, (U8)(tmp & 255)); - } else { - appendrune(lx, tmp); - } - - } else { - errx(1, "invalid hex sequence '%s'", escbuf); - } -} - -static int scanstring(Lx *lx) { - int next; - - for (;;) { - next = lx_getc(lx); - switch (next) { - case -1: - goto eof; - case '\\': - next = lx_getc(lx); - if (next == -1) - goto eof; - switch (next) { - case 't': - appendbyte(lx, '\t'); - break; - case 'n': - appendbyte(lx, '\n'); - break; - case 'r': - appendbyte(lx, '\r'); - break; - case 'b': - appendbyte(lx, '\b'); - break; - case 'v': - appendbyte(lx, '\v'); - break; - case 'f': - appendbyte(lx, '\f'); - break; - case '0': - appendbyte(lx, '\0'); - break; - case 'e': - appendbyte(lx, '\x1b'); - break; - case '\\': - case '"': - appendbyte(lx, next); - break; - case 'x': - scanescape(lx); - break; - default: - return (lx->kind = TOK_INVALID); - } - break; - case '"': - appendbyte(lx, 0); - return (lx->kind = TOK_STRING); - default: - appendbyte(lx, next); - } - } - -eof: - return (lx->kind = TOK_INVALID); -} - -I lexer_next(Lx *lx) { - int next; - lx->cursor = 0; - lx->count = 0; - - if (ST_EOF(lx->stream)) { - lx->kind = TOK_EOF; - return 0; - } - - next = getc_ws(lx); - - lx->start_line = lx->curr_line; - lx->start_col = (lx->curr_col > 0) ? lx->curr_col - 1 : 0; - - switch (next) { - case '\\': - for (; next != '\n'; next = lx_getc(lx)) - ; - return lexer_next(lx); - case '(': - case ')': - case '[': - case ']': - case '{': - case '}': - case ';': - return (lx->kind = next); - case '"': - return scanstring(lx); - default: - lx_ungetc(lx, next); - lx->kind = TOK_WORD; - return scanword(lx); - }; -} diff --git a/src/lexer.h b/src/lexer.h deleted file mode 100644 index f3fa2de..0000000 --- a/src/lexer.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef LEXER_H -#define LEXER_H - -#include "common.h" -#include "stream.h" - -enum { - TOK_INVALID = -1, - TOK_EOF = 0, - TOK_WORD = 'a', - TOK_STRING = '"', - TOK_SEMICOLON = ';', - TOK_LPAREN = '(', - TOK_RPAREN = ')', - TOK_LBRACKET = '[', - TOK_RBRACKET = ']', - TOK_LBRACE = '{', - TOK_RBRACE = '}', - TOK_COMMENT = '\\', -}; - -typedef struct Lx { - I kind; - I cursor; - I curr_line, curr_col; - I start_line, start_col; - Stream *stream; - char *items; - Z count, capacity; -} Lx; - -Lx *lexer_make(Stream *); -V lexer_free(Lx *lx); -I lexer_next(Lx *); - -#endif diff --git a/src/main.c b/src/main.c index 5d292cc..4976e06 100644 --- a/src/main.c +++ b/src/main.c @@ -1,91 +1,28 @@ -#include -#include -#include +#include +#include -#include "chunk.h" -#include "compile.h" -#include "debug.h" -#include "parser.h" -#include "vm.h" +int main(void) { + GrowlVM *vm = growl_vm_init(); + growl_register_file_library(vm); + GrowlLexer lexer = {0}; + lexer.file = stdin; -#include "vendor/linenoise.h" - -#define REPL_BUFFER_SIZE 4096 - -I repl(void) { - Vm vm = {0}; - vm_init(&vm); - - char *line; - while ((line = linenoise("growl> ")) != NULL) { - Buf b = { line, (int)strlen(line), 0, -1 }; - Stream s = { bufstream_vtable, &b }; - - Lx *lx = lexer_make(&s); - Ast *root = parser_parse(lx); - - Cm cm = {0}; - compiler_init(&cm, &vm, ""); - Bc *chunk = compile_program(&cm, root); - ast_free(root); - lexer_free(lx); - - if (chunk != NULL) { - vm_run(&vm, chunk, 0); - chunk_release(chunk); - linenoiseHistoryAdd(line); + Growl obj = growl_compile(vm, &lexer); + if (obj != GROWL_NIL) { + GrowlQuotation *quot = growl_unwrap_quotation(obj); + if (!growl_vm_execute(vm, quot)) { + if (vm->sp != vm->wst) { + fprintf(stderr, "Stack:"); + for (Growl *p = vm->wst; p < vm->sp; p++) { + putc(' ', stderr); + growl_print_to(stderr, *p); + } + putchar('\n'); + } } - compiler_deinit(&cm); - linenoiseFree(line); } - vm_deinit(&vm); + + growl_gc_collect(vm); + growl_vm_free(vm); return 0; } - -I loadfile(const char *fname) { - Vm vm = {0}; - vm_init(&vm); - - FILE *f = fopen(fname, "rb"); - if (!f) { - fprintf(stderr, "error: cannot open file '%s'\n", fname); - return 1; - } - - Stream s = { filestream_vtable, f }; - Lx *lx = lexer_make(&s); - Ast *root = parser_parse(lx); - - Cm cm = {0}; - compiler_init(&cm, &vm, fname); - - Bc *chunk = compile_program(&cm, root); - ast_free(root); - lexer_free(lx); - fclose(f); - - if (chunk != NULL) { -#if COMPILER_DEBUG - disassemble(chunk, fname, &vm.dictionary); -#endif - I res = vm_run(&vm, chunk, 0); - chunk_release(chunk); - vm_deinit(&vm); - return !res; - } else { - vm_deinit(&vm); - return 1; - } -} - -int main(int argc, const char *argv[]) { - switch (argc) { - case 1: - return repl(); - case 2: - return loadfile(argv[1]); - default: - fprintf(stderr, "usage: growl [file]\n"); - return 64; - } -} diff --git a/src/object.c b/src/object.c deleted file mode 100644 index c947ee0..0000000 --- a/src/object.c +++ /dev/null @@ -1,10 +0,0 @@ -#include "object.h" - -I type(O o) { - if (o == NIL) - return OBJ_NIL; - if (IMM(o)) - return OBJ_NUM; - Hd *h = UNBOX(o); - return h->type; -} diff --git a/src/object.h b/src/object.h deleted file mode 100644 index 3233892..0000000 --- a/src/object.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef OBJECT_H -#define OBJECT_H - -#include "common.h" - -#define NIL ((O)0) -#define BOX(x) ((O)(x)) -#define UNBOX(x) ((Hd *)(x)) -#define IMM(x) ((O)(x) & (O)1) -#define NUM(x) (((O)((intptr_t)(x) << 1)) | (O)1) -#define ORD(x) ((intptr_t)(x) >> 1) - -enum { - OBJ_NIL = 0, - OBJ_NUM = 1, - OBJ_FWD = 2, - OBJ_QUOT, - OBJ_COMPOSE, - OBJ_CURRY, - OBJ_STR, - OBJ_ARRAY, - OBJ_USERDATA, -}; - -typedef uintptr_t O; - -/** Object header */ -typedef struct Hd { - U32 size, type; -} Hd; - -/** Composition */ -typedef struct Qo { - O first, second; -} Qo; - -/** Curry */ -typedef struct Qc { - O value, callable; -} Qc; // - -I type(O); -static inline I callable(O o) { - I t = type(o); - return t == OBJ_QUOT || t == OBJ_COMPOSE || t == OBJ_CURRY; -} - -#endif diff --git a/src/parser.c b/src/parser.c deleted file mode 100644 index 724bea2..0000000 --- a/src/parser.c +++ /dev/null @@ -1,156 +0,0 @@ -#include "parser.h" -#include -#include -#include - -static Ast *ast_new(I type, I line, I col) { - Ast *node = calloc(1, sizeof(Ast)); - node->type = type; - node->line = line; - node->col = col; - return node; -} - -void ast_free(Ast *ast) { - if (!ast) - return; - if (ast->name) - free(ast->name); - for (size_t i = 0; i < ast->children.count; i++) { - ast_free(ast->children.items[i]); - } - yar_free(&ast->children); - free(ast); -} - -static Ast *parse_expr_at(Lx *lx); - -static void parse_block(Lx *lx, Ast *parent, int close_token) { - while (1) { - if (lx->kind == TOK_EOF) { - if (close_token != TOK_EOF) - fprintf(stderr, "syntax error: unexpected EOF, expected '%c'\n", - close_token); - break; - } - if (lx->kind == close_token) { - lexer_next(lx); - break; - } - Ast *expr = parse_expr_at(lx); - *yar_append(&parent->children) = expr; - } -} - -static Ast *parse_expr_at(Lx *lx) { - int kind = lx->kind; - I line = lx->start_line; - I col = lx->start_col; - - if (kind == TOK_WORD) { - char *text = lx->items; - - if (strcmp(text, "def") == 0) { - Ast *node = ast_new(AST_DEF, line, col); - lexer_next(lx); - - if (lx->kind != TOK_WORD) { - fprintf(stderr, "syntax error: expected word after 'def' at %ld:%ld\n", - (long)line + 1, (long)col + 1); - return node; - } - node->name = strdup(lx->items); - lexer_next(lx); - - if (lx->kind != '{') { - fprintf(stderr, - "syntax error: expected '{' after def name at %ld:%ld\n", - (long)lx->start_line + 1, (long)lx->start_col + 1); - return node; - } - lexer_next(lx); - parse_block(lx, node, '}'); - return node; - } - - size_t len = strlen(text); - if (len > 0 && text[len - 1] == ':') { - Ast *node = ast_new(AST_CMD, line, col); - node->name = strndup(text, len - 1); - lexer_next(lx); - parse_block(lx, node, ';'); - return node; - } - - if (text[0] == '#') { - Ast *node = ast_new(AST_PRAGMA, line, col); - node->name = strdup(text); - lexer_next(lx); - if (lx->kind == '(') { - lexer_next(lx); - parse_block(lx, node, ')'); - } - return node; - } - - char *end; - long val = strtol(text, &end, 0); - if (*end == '\0') { - Ast *node = ast_new(AST_INT, line, col); - node->int_val = val; - lexer_next(lx); - return node; - } - - Ast *node = ast_new(AST_WORD, line, col); - node->name = strdup(text); - lexer_next(lx); - return node; - } - - if (kind == TOK_STRING) { - Ast *node = ast_new(AST_STR, line, col); - node->name = strdup(lx->items); - lexer_next(lx); - return node; - } - - if (kind == '[') { - Ast *node = ast_new(AST_QUOTE, line, col); - lexer_next(lx); - parse_block(lx, node, ']'); - return node; - } - - if (kind == '{') { - Ast *node = ast_new(AST_TABLE, line, col); - lexer_next(lx); - parse_block(lx, node, '}'); - return node; - } - - if (kind == '(') { - Ast *node = ast_new(AST_LIST, line, col); - lexer_next(lx); - parse_block(lx, node, ')'); - return node; - } - - if (kind == TOK_INVALID) { - fprintf(stderr, "syntax error: invalid token at %ld:%ld\n", (long)line + 1, - (long)col + 1); - } else { - fprintf(stderr, "syntax error: unexpected token '%c' (%d) at %ld:%ld\n", - kind, kind, (long)line + 1, (long)col + 1); - } - lexer_next(lx); - - return NULL; -} - -Ast *parser_parse(Lx *lx) { - Ast *root = ast_new(AST_PROGRAM, 0, 0); - lexer_next(lx); - parse_block(lx, root, TOK_EOF); - return root; -} diff --git a/src/parser.h b/src/parser.h deleted file mode 100644 index ea8ddda..0000000 --- a/src/parser.h +++ /dev/null @@ -1,35 +0,0 @@ -#ifndef PARSER_H -#define PARSER_H - -#include "common.h" -#include "lexer.h" -#include "vendor/yar.h" - -enum { - AST_PROGRAM, - AST_INT, - AST_STR, - AST_WORD, - AST_LIST, - AST_TABLE, - AST_QUOTE, - AST_DEF, - AST_CMD, - AST_PRAGMA, -}; - -typedef struct Ast { - I type; - char *name; - I int_val; - struct { - struct Ast **items; - Z count, capacity; - } children; - I line, col; -} Ast; - -Ast *parser_parse(Lx *lx); -void ast_free(Ast *ast); - -#endif diff --git a/src/primitive.c b/src/primitive.c deleted file mode 100644 index 983112e..0000000 --- a/src/primitive.c +++ /dev/null @@ -1,46 +0,0 @@ -#include -#include - -#include "primitive.h" -#include "print.h" -#include "string.h" -#include "vm.h" - -#include "file.h" - -// Pretty-printing primitives -static I prim_pprint(Vm *vm) { - println(vm_pop(vm)); - return 0; -} - -static I prim_printstack(Vm *vm) { - printf("Stk:"); - for (O *p = vm->stack; p < vm->sp; p++) { - putchar(' '); - print(*p); - } - putchar('\n'); - return 0; -} - -// clang-format off -Pr primitives_table[] = { - {".", prim_pprint}, - {".s", prim_printstack}, - {"stdin", prim_file_stdin}, - {"stdout", prim_file_stdout}, - {"stderr", prim_file_stderr}, - {"fprint", prim_file_fprint}, - {"fgetline", prim_file_fgetline}, - {NULL, NULL}, -}; -// clang-format on - -I prim_find(const char *name) { - for (Z i = 0; primitives_table[i].name != NULL; i++) { - if (strcmp(primitives_table[i].name, name) == 0) - return i; - } - return -1; -} diff --git a/src/primitive.h b/src/primitive.h deleted file mode 100644 index 2e6ca97..0000000 --- a/src/primitive.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef PRIMITIVE_H -#define PRIMITIVE_H - -#include "vm.h" - -typedef struct Pr { - const char *name; - I (*fn)(Vm *); -} Pr; - -extern Pr primitives_table[]; -I prim_find(const char *name); - -#endif diff --git a/src/print.c b/src/print.c deleted file mode 100644 index ffb6037..0000000 --- a/src/print.c +++ /dev/null @@ -1,91 +0,0 @@ -#include -#include - -#include "object.h" -#include "print.h" -#include "string.h" -#include "userdata.h" - -static V print_string(Str *s) { - putchar('"'); - for (Z i = 0; i < s->len; i++) { - unsigned char c = s->data[i]; - switch (c) { - case '\t': - printf("\\t"); - break; - case '\n': - printf("\\n"); - break; - case '\r': - printf("\\r"); - break; - case '\b': - printf("\\b"); - break; - case '\v': - printf("\\v"); - break; - case '\f': - printf("\\f"); - break; - case '\0': - printf("\\0"); - break; - case '\x1b': - printf("\\e"); - break; - case '\\': - printf("\\\\"); - break; - case '\"': - printf("\\\""); - break; - default: - if (c < 32 || c > 126) { - printf("\\x%02x;", c); - } else { - putchar(c); - } - } - } - putchar('"'); -} - -V print(O o) { - if (o == NIL) { - printf("nil"); - } else if (IMM(o)) { - printf("%" PRIdPTR, ORD(o)); - } else { - Hd *hdr = UNBOX(o); - switch (hdr->type) { - case OBJ_QUOT: - printf(""); - break; - case OBJ_COMPOSE: - printf(""); - break; - case OBJ_CURRY: - printf(""); - break; - case OBJ_STR: { - Str *s = string_unwrap(o); - print_string(s); - break; - } - case OBJ_USERDATA: { - Ud *ud = (Ud *)(hdr + 1); - printf("<#userdata %s@%p>", ud->kind->name, ud->data); - break; - } - default: - printf("<#obj type=%ld ptr=%p>", type(o), (void *)o); - } - } -} - -V println(O o) { - print(o); - putchar('\n'); -} diff --git a/src/print.h b/src/print.h deleted file mode 100644 index ed86397..0000000 --- a/src/print.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef PRINT_H -#define PRINT_H - -#include "common.h" -#include "object.h" - -V print(O); -V println(O); - -#endif diff --git a/src/stream.c b/src/stream.c deleted file mode 100644 index 59eafe2..0000000 --- a/src/stream.c +++ /dev/null @@ -1,39 +0,0 @@ -#include "stream.h" -#include - -static int filestream_getc(void *f) { return fgetc((FILE *)f); } -static int filestream_ungetc(int c, void *f) { return ungetc(c, (FILE *)f); } -static int filestream_eof(void *f) { return feof((FILE *)f); } - -static int bufstream_getc(void *f) { - Buf *b = f; - if (b->unread != -1) { - int c = b->unread; - b->unread = -1; - return c; - } else if (b->pos >= b->len) { - return -1; - } - return b->data[b->pos++]; -} - -static int bufstream_ungetc(int c, void *f) { return ((Buf *)f)->unread = c; } - -static int bufstream_eof(void *f) { - Buf *b = f; - if (b->unread != -1) - return 0; - return b->pos >= b->len; -} - -// clang-format off -static const StreamVtable _filestream_vtable = { - filestream_getc, filestream_ungetc, filestream_eof -}; -const StreamVtable *filestream_vtable = &_filestream_vtable; - -static const StreamVtable _bufstream_vtable = { - bufstream_getc, bufstream_ungetc, bufstream_eof -}; -const StreamVtable *bufstream_vtable = &_bufstream_vtable; -// clang-format on diff --git a/src/stream.h b/src/stream.h deleted file mode 100644 index 59e571b..0000000 --- a/src/stream.h +++ /dev/null @@ -1,30 +0,0 @@ -#ifndef STREAM_H -#define STREAM_H - -typedef struct StreamVtable { - int (*__sgetc)(void *); - int (*__sungetc)(int, void *); - int (*__seof)(void *); -} StreamVtable; - -typedef struct Stream { - const StreamVtable *vtable; - void *data; -} Stream; - -typedef struct Buf { - const char *data; - int len, pos; - int unread; -} Buf; - -#define ST_GETC(R) ((R)->vtable->__sgetc((R)->data)) -#define ST_UNGETC(C, R) ((R)->vtable->__sungetc(C, (R)->data)) -#define ST_EOF(R) ((R)->vtable->__seof((R)->data)) - -#define BUF(s) ((Buf){s, sizeof(s)-1, 0, -1}) - -extern const StreamVtable *filestream_vtable; -extern const StreamVtable *bufstream_vtable; - -#endif diff --git a/src/string.c b/src/string.c deleted file mode 100644 index e89d8c0..0000000 --- a/src/string.c +++ /dev/null @@ -1,51 +0,0 @@ -#include - -#include "string.h" -#include "src/gc.h" - -O string_make(Vm *vm, const char *str, I len) { - if (len < 0) - len = strlen(str); - Z size = sizeof(Hd) + sizeof(Str) + len + 1; - Hd *hdr = gc_alloc(vm, size); - hdr->type = OBJ_STR; - Str *s = (Str *)(hdr + 1); - s->len = len; - memcpy(s->data, str, len); - s->data[len] = 0; - return BOX(hdr); -} - -Str *string_unwrap(O o) { - if (o == NIL || IMM(o)) - return NULL; - Hd *hdr = UNBOX(o); - if (hdr->type != OBJ_STR) - return NULL; - return (Str *)(hdr + 1); -} - -O string_concat(Vm *vm, O a_obj, O b_obj) { - I mark = gc_mark(&vm->gc); - gc_addroot(&vm->gc, &a_obj); - gc_addroot(&vm->gc, &b_obj); - - Str *as = string_unwrap(a_obj); - Str *bs = string_unwrap(b_obj); - I a_len = as->len; - I b_len = bs->len; - - O new = string_make(vm, "", a_len + b_len); - - as = string_unwrap(a_obj); - bs = string_unwrap(b_obj); - Str *news = (Str *)(UNBOX(new) + 1); - - memcpy(news->data, as->data, a_len); - memcpy(news->data + a_len, bs->data, b_len); - news->data[a_len + b_len] = 0; - - gc_reset(&vm->gc, mark); - - return new; -} diff --git a/src/string.h b/src/string.h deleted file mode 100644 index 54a71e7..0000000 --- a/src/string.h +++ /dev/null @@ -1,13 +0,0 @@ -#include "common.h" -#include "object.h" -#include "vm.h" - -/** String */ -typedef struct Str { - Z len; - char data[]; -} Str; - -O string_make(Vm *, const char *, I); -Str *string_unwrap(O); -O string_concat(Vm *, O, O); diff --git a/src/userdata.c b/src/userdata.c deleted file mode 100644 index 1c8ba0a..0000000 --- a/src/userdata.c +++ /dev/null @@ -1,24 +0,0 @@ -#include "userdata.h" -#include "gc.h" - -O userdata_make(Vm *vm, V *data, Ut *kind) { - Z size = sizeof(Hd) + sizeof(Ud); - Hd *hdr = gc_alloc(vm, size); - hdr->type = OBJ_USERDATA; - Ud *ud = (Ud *)(hdr + 1); - ud->kind = kind; - ud->data = data; - return BOX(hdr); -} - -Ud *userdata_unwrap(O o, Ut *kind) { - if (o == NIL || IMM(o)) - return NULL; - Hd *hdr = UNBOX(o); - if (hdr->type != OBJ_USERDATA) - return NULL; - Ud *ud = (Ud *)(hdr + 1); - if (ud->kind != kind) - return NULL; - return ud; -} diff --git a/src/userdata.h b/src/userdata.h deleted file mode 100644 index 3baa2fd..0000000 --- a/src/userdata.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef USERDATA_H -#define USERDATA_H - -#include "common.h" -#include "object.h" -#include "vm.h" - -typedef struct Ut { - const char *name; - V (*finalizer)(V *); -} Ut; - -typedef struct Ud { - Ut *kind; - V *data; -} Ud; - -O userdata_make(Vm *, V *, Ut *); -Ud *userdata_unwrap(O, Ut *); - -#endif diff --git a/src/vendor/linenoise.c b/src/vendor/linenoise.c deleted file mode 100644 index 15b34a7..0000000 --- a/src/vendor/linenoise.c +++ /dev/null @@ -1,1763 +0,0 @@ -/* linenoise.c -- guerrilla line editing library against the idea that a - * line editing lib needs to be 20,000 lines of C code. - * - * You can find the latest source code at: - * - * http://github.com/antirez/linenoise - * - * Does a number of crazy assumptions that happen to be true in 99.9999% of - * the 2010 UNIX computers around. - * - * ------------------------------------------------------------------------ - * - * Copyright (c) 2010-2023, Salvatore Sanfilippo - * Copyright (c) 2010-2013, Pieter Noordhuis - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * ------------------------------------------------------------------------ - * - * References: - * - http://invisible-island.net/xterm/ctlseqs/ctlseqs.html - * - http://www.3waylabs.com/nw/WWW/products/wizcon/vt220.html - * - * Todo list: - * - Filter bogus Ctrl+ combinations. - * - Win32 support - * - * Bloat: - * - History search like Ctrl+r in readline? - * - * List of escape sequences used by this program, we do everything just - * with three sequences. In order to be so cheap we may have some - * flickering effect with some slow terminal, but the lesser sequences - * the more compatible. - * - * EL (Erase Line) - * Sequence: ESC [ n K - * Effect: if n is 0 or missing, clear from cursor to end of line - * Effect: if n is 1, clear from beginning of line to cursor - * Effect: if n is 2, clear entire line - * - * CUF (CUrsor Forward) - * Sequence: ESC [ n C - * Effect: moves cursor forward n chars - * - * CUB (CUrsor Backward) - * Sequence: ESC [ n D - * Effect: moves cursor backward n chars - * - * The following is used to get the terminal width if getting - * the width with the TIOCGWINSZ ioctl fails - * - * DSR (Device Status Report) - * Sequence: ESC [ 6 n - * Effect: reports the current cusor position as ESC [ n ; m R - * where n is the row and m is the column - * - * When multi line mode is enabled, we also use an additional escape - * sequence. However multi line editing is disabled by default. - * - * CUU (Cursor Up) - * Sequence: ESC [ n A - * Effect: moves cursor up of n chars. - * - * CUD (Cursor Down) - * Sequence: ESC [ n B - * Effect: moves cursor down of n chars. - * - * When linenoiseClearScreen() is called, two additional escape sequences - * are used in order to clear the screen and position the cursor at home - * position. - * - * CUP (Cursor position) - * Sequence: ESC [ H - * Effect: moves the cursor to upper left corner - * - * ED (Erase display) - * Sequence: ESC [ 2 J - * Effect: clear the whole screen - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "linenoise.h" - -#define LINENOISE_DEFAULT_HISTORY_MAX_LEN 100 -#define LINENOISE_MAX_LINE 4096 - -static char *unsupported_term[] = {"dumb","cons25","emacs",NULL}; -static linenoiseCompletionCallback *completionCallback = NULL; -static linenoiseHintsCallback *hintsCallback = NULL; -static linenoiseFreeHintsCallback *freeHintsCallback = NULL; -static char *linenoiseNoTTY(void); -static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseCompletions *lc, int flags); -static void refreshLineWithFlags(struct linenoiseState *l, int flags); - -static struct termios orig_termios; /* In order to restore at exit.*/ -static int maskmode = 0; /* Show "***" instead of input. For passwords. */ -static int rawmode = 0; /* For atexit() function to check if restore is needed*/ -static int mlmode = 0; /* Multi line mode. Default is single line. */ -static int atexit_registered = 0; /* Register atexit just 1 time. */ -static int history_max_len = LINENOISE_DEFAULT_HISTORY_MAX_LEN; -static int history_len = 0; -static char **history = NULL; - -/* =========================== UTF-8 support ================================ */ - -/* Return the number of bytes that compose the UTF-8 character starting at - * 'c'. This function assumes a valid UTF-8 encoding and handles the four - * standard byte patterns: - * 0xxxxxxx -> 1 byte (ASCII) - * 110xxxxx -> 2 bytes - * 1110xxxx -> 3 bytes - * 11110xxx -> 4 bytes */ -static int utf8ByteLen(char c) { - unsigned char uc = (unsigned char)c; - if ((uc & 0x80) == 0) return 1; /* 0xxxxxxx: ASCII */ - if ((uc & 0xE0) == 0xC0) return 2; /* 110xxxxx: 2-byte seq */ - if ((uc & 0xF0) == 0xE0) return 3; /* 1110xxxx: 3-byte seq */ - if ((uc & 0xF8) == 0xF0) return 4; /* 11110xxx: 4-byte seq */ - return 1; /* Fallback for invalid encoding, treat as single byte. */ -} - -/* Decode a UTF-8 sequence starting at 's' into a Unicode codepoint. - * Returns the codepoint value. Assumes valid UTF-8 encoding. */ -static uint32_t utf8DecodeChar(const char *s, size_t *len) { - unsigned char *p = (unsigned char *)s; - uint32_t cp; - - if ((*p & 0x80) == 0) { - *len = 1; - return *p; - } else if ((*p & 0xE0) == 0xC0) { - *len = 2; - cp = (*p & 0x1F) << 6; - cp |= (p[1] & 0x3F); - return cp; - } else if ((*p & 0xF0) == 0xE0) { - *len = 3; - cp = (*p & 0x0F) << 12; - cp |= (p[1] & 0x3F) << 6; - cp |= (p[2] & 0x3F); - return cp; - } else if ((*p & 0xF8) == 0xF0) { - *len = 4; - cp = (*p & 0x07) << 18; - cp |= (p[1] & 0x3F) << 12; - cp |= (p[2] & 0x3F) << 6; - cp |= (p[3] & 0x3F); - return cp; - } - *len = 1; - return *p; /* Fallback for invalid sequences. */ -} - -/* Check if codepoint is a variation selector (emoji style modifiers). */ -static int isVariationSelector(uint32_t cp) { - return cp == 0xFE0E || cp == 0xFE0F; /* Text/emoji style */ -} - -/* Check if codepoint is a skin tone modifier. */ -static int isSkinToneModifier(uint32_t cp) { - return cp >= 0x1F3FB && cp <= 0x1F3FF; -} - -/* Check if codepoint is Zero Width Joiner. */ -static int isZWJ(uint32_t cp) { - return cp == 0x200D; -} - -/* Check if codepoint is a Regional Indicator (for flag emoji). */ -static int isRegionalIndicator(uint32_t cp) { - return cp >= 0x1F1E6 && cp <= 0x1F1FF; -} - -/* Check if codepoint is a combining mark or other zero-width character. */ -static int isCombiningMark(uint32_t cp) { - return (cp >= 0x0300 && cp <= 0x036F) || /* Combining Diacriticals */ - (cp >= 0x1AB0 && cp <= 0x1AFF) || /* Combining Diacriticals Extended */ - (cp >= 0x1DC0 && cp <= 0x1DFF) || /* Combining Diacriticals Supplement */ - (cp >= 0x20D0 && cp <= 0x20FF) || /* Combining Diacriticals for Symbols */ - (cp >= 0xFE20 && cp <= 0xFE2F); /* Combining Half Marks */ -} - -/* Check if codepoint extends the previous character (doesn't start a new grapheme). */ -static int isGraphemeExtend(uint32_t cp) { - return isVariationSelector(cp) || isSkinToneModifier(cp) || - isZWJ(cp) || isCombiningMark(cp); -} - -/* Decode the UTF-8 codepoint ending at position 'pos' (exclusive) and - * return its value. Also sets *cplen to the byte length of the codepoint. */ -static uint32_t utf8DecodePrev(const char *buf, size_t pos, size_t *cplen) { - if (pos == 0) { - *cplen = 0; - return 0; - } - /* Scan backwards to find the start byte. */ - size_t i = pos; - do { - i--; - } while (i > 0 && (pos - i) < 4 && ((unsigned char)buf[i] & 0xC0) == 0x80); - *cplen = pos - i; - size_t dummy; - return utf8DecodeChar(buf + i, &dummy); -} - -/* Given a buffer and a position, return the byte length of the grapheme - * cluster before that position. A grapheme cluster includes: - * - The base character - * - Any following variation selectors, skin tone modifiers - * - ZWJ sequences (emoji joined by Zero Width Joiner) - * - Regional indicator pairs (flag emoji) */ -static size_t utf8PrevCharLen(const char *buf, size_t pos) { - if (pos == 0) return 0; - - size_t total = 0; - size_t curpos = pos; - - /* First, get the last codepoint. */ - size_t cplen; - uint32_t cp = utf8DecodePrev(buf, curpos, &cplen); - if (cplen == 0) return 0; - total += cplen; - curpos -= cplen; - - /* If we're at an extending character, we need to find what it extends. - * Keep going back through the grapheme cluster. */ - while (curpos > 0) { - size_t prevlen; - uint32_t prevcp = utf8DecodePrev(buf, curpos, &prevlen); - if (prevlen == 0) break; - - if (isZWJ(prevcp)) { - /* ZWJ joins two emoji. Include the ZWJ and continue to get - * the preceding character. */ - total += prevlen; - curpos -= prevlen; - /* Now get the character before ZWJ. */ - prevcp = utf8DecodePrev(buf, curpos, &prevlen); - if (prevlen == 0) break; - total += prevlen; - curpos -= prevlen; - cp = prevcp; - continue; /* Check if there's more extending before this. */ - } else if (isGraphemeExtend(cp)) { - /* Current cp is an extending character; include previous. */ - total += prevlen; - curpos -= prevlen; - cp = prevcp; - continue; - } else if (isRegionalIndicator(cp) && isRegionalIndicator(prevcp)) { - /* Two regional indicators form a flag. But we need to be careful: - * flags are always pairs, so only join if we're at an even boundary. - * For simplicity, just join one pair. */ - total += prevlen; - curpos -= prevlen; - break; - } else { - /* No more extending; we've found the start of the cluster. */ - break; - } - } - - return total; -} - -/* Given a buffer, position and total length, return the byte length of the - * grapheme cluster at the current position. */ -static size_t utf8NextCharLen(const char *buf, size_t pos, size_t len) { - if (pos >= len) return 0; - - size_t total = 0; - size_t curpos = pos; - - /* Get the first codepoint. */ - size_t cplen; - uint32_t cp = utf8DecodeChar(buf + curpos, &cplen); - total += cplen; - curpos += cplen; - - int isRI = isRegionalIndicator(cp); - - /* Consume any extending characters that follow. */ - while (curpos < len) { - size_t nextlen; - uint32_t nextcp = utf8DecodeChar(buf + curpos, &nextlen); - - if (isZWJ(nextcp) && curpos + nextlen < len) { - /* ZWJ: include it and the following character. */ - total += nextlen; - curpos += nextlen; - /* Get the character after ZWJ. */ - nextcp = utf8DecodeChar(buf + curpos, &nextlen); - total += nextlen; - curpos += nextlen; - continue; /* Check for more extending after the joined char. */ - } else if (isGraphemeExtend(nextcp)) { - /* Variation selector, skin tone, combining mark, etc. */ - total += nextlen; - curpos += nextlen; - continue; - } else if (isRI && isRegionalIndicator(nextcp)) { - /* Second regional indicator for a flag pair. */ - total += nextlen; - curpos += nextlen; - isRI = 0; /* Only pair once. */ - continue; - } else { - break; - } - } - - return total; -} - -/* Return the display width of a Unicode codepoint. This is a heuristic - * that works for most common cases: - * - Control chars and zero-width: 0 columns - * - Grapheme-extending chars (VS, skin tone, ZWJ): 0 columns - * - ASCII printable: 1 column - * - Wide chars (CJK, emoji, fullwidth): 2 columns - * - Everything else: 1 column - * - * This is not a full wcwidth() implementation, but a minimal heuristic - * that handles emoji and CJK characters reasonably well. */ -static int utf8CharWidth(uint32_t cp) { - /* Control characters and combining marks: zero width. */ - if (cp < 32 || (cp >= 0x7F && cp < 0xA0)) return 0; - if (isCombiningMark(cp)) return 0; - - /* Grapheme-extending characters: zero width. - * These modify the preceding character rather than taking space. */ - if (isVariationSelector(cp)) return 0; - if (isSkinToneModifier(cp)) return 0; - if (isZWJ(cp)) return 0; - - /* Wide character ranges - these display as 2 columns: - * - CJK Unified Ideographs and Extensions - * - Fullwidth forms - * - Various emoji ranges */ - if (cp >= 0x1100 && - (cp <= 0x115F || /* Hangul Jamo */ - cp == 0x2329 || cp == 0x232A || /* Angle brackets */ - (cp >= 0x231A && cp <= 0x231B) || /* Watch, Hourglass */ - (cp >= 0x23E9 && cp <= 0x23F3) || /* Various symbols */ - (cp >= 0x23F8 && cp <= 0x23FA) || /* Various symbols */ - (cp >= 0x25AA && cp <= 0x25AB) || /* Small squares */ - (cp >= 0x25B6 && cp <= 0x25C0) || /* Play/reverse buttons */ - (cp >= 0x25FB && cp <= 0x25FE) || /* Squares */ - (cp >= 0x2600 && cp <= 0x26FF) || /* Misc Symbols (sun, cloud, etc) */ - (cp >= 0x2700 && cp <= 0x27BF) || /* Dingbats (❤, ✂, etc) */ - (cp >= 0x2934 && cp <= 0x2935) || /* Arrows */ - (cp >= 0x2B05 && cp <= 0x2B07) || /* Arrows */ - (cp >= 0x2B1B && cp <= 0x2B1C) || /* Squares */ - cp == 0x2B50 || cp == 0x2B55 || /* Star, circle */ - (cp >= 0x2E80 && cp <= 0xA4CF && - cp != 0x303F) || /* CJK ... Yi */ - (cp >= 0xAC00 && cp <= 0xD7A3) || /* Hangul Syllables */ - (cp >= 0xF900 && cp <= 0xFAFF) || /* CJK Compatibility Ideographs */ - (cp >= 0xFE10 && cp <= 0xFE1F) || /* Vertical forms */ - (cp >= 0xFE30 && cp <= 0xFE6F) || /* CJK Compatibility Forms */ - (cp >= 0xFF00 && cp <= 0xFF60) || /* Fullwidth Forms */ - (cp >= 0xFFE0 && cp <= 0xFFE6) || /* Fullwidth Signs */ - (cp >= 0x1F1E6 && cp <= 0x1F1FF) || /* Regional Indicators (flags) */ - (cp >= 0x1F300 && cp <= 0x1F64F) || /* Misc Symbols and Emoticons */ - (cp >= 0x1F680 && cp <= 0x1F6FF) || /* Transport and Map Symbols */ - (cp >= 0x1F900 && cp <= 0x1F9FF) || /* Supplemental Symbols */ - (cp >= 0x1FA00 && cp <= 0x1FAFF) || /* Chess, Extended-A */ - (cp >= 0x20000 && cp <= 0x2FFFF))) /* CJK Extension B and beyond */ - return 2; - - return 1; /* Default: single width */ -} - -/* Calculate the display width of a UTF-8 string of 'len' bytes. - * This is used for cursor positioning in the terminal. - * Handles grapheme clusters: characters joined by ZWJ contribute 0 width - * after the first character in the sequence. */ -static size_t utf8StrWidth(const char *s, size_t len) { - size_t width = 0; - size_t i = 0; - int after_zwj = 0; /* Track if previous char was ZWJ */ - - while (i < len) { - size_t clen; - uint32_t cp = utf8DecodeChar(s + i, &clen); - - if (after_zwj) { - /* Character after ZWJ: don't add width, it's joined. - * But do check for extending chars after it. */ - after_zwj = 0; - } else { - width += utf8CharWidth(cp); - } - - /* Check if this is a ZWJ - next char will be joined. */ - if (isZWJ(cp)) { - after_zwj = 1; - } - - i += clen; - } - return width; -} - -/* Return the display width of a single UTF-8 character at position 's'. */ -static int utf8SingleCharWidth(const char *s, size_t len) { - if (len == 0) return 0; - size_t clen; - uint32_t cp = utf8DecodeChar(s, &clen); - return utf8CharWidth(cp); -} - -enum KEY_ACTION{ - KEY_NULL = 0, /* NULL */ - CTRL_A = 1, /* Ctrl+a */ - CTRL_B = 2, /* Ctrl-b */ - CTRL_C = 3, /* Ctrl-c */ - CTRL_D = 4, /* Ctrl-d */ - CTRL_E = 5, /* Ctrl-e */ - CTRL_F = 6, /* Ctrl-f */ - CTRL_H = 8, /* Ctrl-h */ - TAB = 9, /* Tab */ - CTRL_K = 11, /* Ctrl+k */ - CTRL_L = 12, /* Ctrl+l */ - ENTER = 13, /* Enter */ - CTRL_N = 14, /* Ctrl-n */ - CTRL_P = 16, /* Ctrl-p */ - CTRL_T = 20, /* Ctrl-t */ - CTRL_U = 21, /* Ctrl+u */ - CTRL_W = 23, /* Ctrl+w */ - ESC = 27, /* Escape */ - BACKSPACE = 127 /* Backspace */ -}; - -static void linenoiseAtExit(void); -int linenoiseHistoryAdd(const char *line); -#define REFRESH_CLEAN (1<<0) // Clean the old prompt from the screen -#define REFRESH_WRITE (1<<1) // Rewrite the prompt on the screen. -#define REFRESH_ALL (REFRESH_CLEAN|REFRESH_WRITE) // Do both. -static void refreshLine(struct linenoiseState *l); - -/* Debugging macro. */ -#if 0 -FILE *lndebug_fp = NULL; -#define lndebug(...) \ - do { \ - if (lndebug_fp == NULL) { \ - lndebug_fp = fopen("/tmp/lndebug.txt","a"); \ - fprintf(lndebug_fp, \ - "[%d %d %d] p: %d, rows: %d, rpos: %d, max: %d, oldmax: %d\n", \ - (int)l->len,(int)l->pos,(int)l->oldpos,plen,rows,rpos, \ - (int)l->oldrows,old_rows); \ - } \ - fprintf(lndebug_fp, ", " __VA_ARGS__); \ - fflush(lndebug_fp); \ - } while (0) -#else -#define lndebug(fmt, ...) -#endif - -/* ======================= Low level terminal handling ====================== */ - -/* Enable "mask mode". When it is enabled, instead of the input that - * the user is typing, the terminal will just display a corresponding - * number of asterisks, like "****". This is useful for passwords and other - * secrets that should not be displayed. */ -void linenoiseMaskModeEnable(void) { - maskmode = 1; -} - -/* Disable mask mode. */ -void linenoiseMaskModeDisable(void) { - maskmode = 0; -} - -/* Set if to use or not the multi line mode. */ -void linenoiseSetMultiLine(int ml) { - mlmode = ml; -} - -/* Return true if the terminal name is in the list of terminals we know are - * not able to understand basic escape sequences. */ -static int isUnsupportedTerm(void) { - char *term = getenv("TERM"); - int j; - - if (term == NULL) return 0; - for (j = 0; unsupported_term[j]; j++) - if (!strcasecmp(term,unsupported_term[j])) return 1; - return 0; -} - -/* Raw mode: 1960 magic shit. */ -static int enableRawMode(int fd) { - struct termios raw; - - /* Test mode: when LINENOISE_ASSUME_TTY is set, skip terminal setup. - * This allows testing via pipes without a real terminal. */ - if (getenv("LINENOISE_ASSUME_TTY")) { - rawmode = 1; - return 0; - } - - if (!isatty(STDIN_FILENO)) goto fatal; - if (!atexit_registered) { - atexit(linenoiseAtExit); - atexit_registered = 1; - } - if (tcgetattr(fd,&orig_termios) == -1) goto fatal; - - raw = orig_termios; /* modify the original mode */ - /* input modes: no break, no CR to NL, no parity check, no strip char, - * no start/stop output control. */ - raw.c_iflag &= ~(BRKINT | ICRNL | INPCK | ISTRIP | IXON); - /* output modes - disable post processing */ - raw.c_oflag &= ~(OPOST); - /* control modes - set 8 bit chars */ - raw.c_cflag |= (CS8); - /* local modes - choing off, canonical off, no extended functions, - * no signal chars (^Z,^C) */ - raw.c_lflag &= ~(ECHO | ICANON | IEXTEN | ISIG); - /* control chars - set return condition: min number of bytes and timer. - * We want read to return every single byte, without timeout. */ - raw.c_cc[VMIN] = 1; raw.c_cc[VTIME] = 0; /* 1 byte, no timer */ - - /* put terminal in raw mode after flushing */ - if (tcsetattr(fd,TCSAFLUSH,&raw) < 0) goto fatal; - rawmode = 1; - return 0; - -fatal: - errno = ENOTTY; - return -1; -} - -static void disableRawMode(int fd) { - /* Test mode: nothing to restore. */ - if (getenv("LINENOISE_ASSUME_TTY")) { - rawmode = 0; - return; - } - /* Don't even check the return value as it's too late. */ - if (rawmode && tcsetattr(fd,TCSAFLUSH,&orig_termios) != -1) - rawmode = 0; -} - -/* Use the ESC [6n escape sequence to query the horizontal cursor position - * and return it. On error -1 is returned, on success the position of the - * cursor. */ -static int getCursorPosition(int ifd, int ofd) { - char buf[32]; - int cols, rows; - unsigned int i = 0; - - /* Report cursor location */ - if (write(ofd, "\x1b[6n", 4) != 4) return -1; - - /* Read the response: ESC [ rows ; cols R */ - while (i < sizeof(buf)-1) { - if (read(ifd,buf+i,1) != 1) break; - if (buf[i] == 'R') break; - i++; - } - buf[i] = '\0'; - - /* Parse it. */ - if (buf[0] != ESC || buf[1] != '[') return -1; - if (sscanf(buf+2,"%d;%d",&rows,&cols) != 2) return -1; - return cols; -} - -/* Try to get the number of columns in the current terminal, or assume 80 - * if it fails. */ -static int getColumns(int ifd, int ofd) { - struct winsize ws; - - /* Test mode: use LINENOISE_COLS env var for fixed width. */ - char *cols_env = getenv("LINENOISE_COLS"); - if (cols_env) return atoi(cols_env); - - if (ioctl(1, TIOCGWINSZ, &ws) == -1 || ws.ws_col == 0) { - /* ioctl() failed. Try to query the terminal itself. */ - int start, cols; - - /* Get the initial position so we can restore it later. */ - start = getCursorPosition(ifd,ofd); - if (start == -1) goto failed; - - /* Go to right margin and get position. */ - if (write(ofd,"\x1b[999C",6) != 6) goto failed; - cols = getCursorPosition(ifd,ofd); - if (cols == -1) goto failed; - - /* Restore position. */ - if (cols > start) { - char seq[32]; - snprintf(seq,32,"\x1b[%dD",cols-start); - if (write(ofd,seq,strlen(seq)) == -1) { - /* Can't recover... */ - } - } - return cols; - } else { - return ws.ws_col; - } - -failed: - return 80; -} - -/* Clear the screen. Used to handle ctrl+l */ -void linenoiseClearScreen(void) { - if (write(STDOUT_FILENO,"\x1b[H\x1b[2J",7) <= 0) { - /* nothing to do, just to avoid warning. */ - } -} - -/* Beep, used for completion when there is nothing to complete or when all - * the choices were already shown. */ -static void linenoiseBeep(void) { - fprintf(stderr, "\x7"); - fflush(stderr); -} - -/* ============================== Completion ================================ */ - -/* Free a list of completion option populated by linenoiseAddCompletion(). */ -static void freeCompletions(linenoiseCompletions *lc) { - size_t i; - for (i = 0; i < lc->len; i++) - free(lc->cvec[i]); - if (lc->cvec != NULL) - free(lc->cvec); -} - -/* Called by completeLine() and linenoiseShow() to render the current - * edited line with the proposed completion. If the current completion table - * is already available, it is passed as second argument, otherwise the - * function will use the callback to obtain it. - * - * Flags are the same as refreshLine*(), that is REFRESH_* macros. */ -static void refreshLineWithCompletion(struct linenoiseState *ls, linenoiseCompletions *lc, int flags) { - /* Obtain the table of completions if the caller didn't provide one. */ - linenoiseCompletions ctable = { 0, NULL }; - if (lc == NULL) { - completionCallback(ls->buf,&ctable); - lc = &ctable; - } - - /* Show the edited line with completion if possible, or just refresh. */ - if (ls->completion_idx < lc->len) { - struct linenoiseState saved = *ls; - ls->len = ls->pos = strlen(lc->cvec[ls->completion_idx]); - ls->buf = lc->cvec[ls->completion_idx]; - refreshLineWithFlags(ls,flags); - ls->len = saved.len; - ls->pos = saved.pos; - ls->buf = saved.buf; - } else { - refreshLineWithFlags(ls,flags); - } - - /* Free the completions table if needed. */ - if (lc != &ctable) freeCompletions(&ctable); -} - -/* This is an helper function for linenoiseEdit*() and is called when the - * user types the key in order to complete the string currently in the - * input. - * - * The state of the editing is encapsulated into the pointed linenoiseState - * structure as described in the structure definition. - * - * If the function returns non-zero, the caller should handle the - * returned value as a byte read from the standard input, and process - * it as usually: this basically means that the function may return a byte - * read from the termianl but not processed. Otherwise, if zero is returned, - * the input was consumed by the completeLine() function to navigate the - * possible completions, and the caller should read for the next characters - * from stdin. */ -static int completeLine(struct linenoiseState *ls, int keypressed) { - linenoiseCompletions lc = { 0, NULL }; - int nwritten; - char c = keypressed; - - completionCallback(ls->buf,&lc); - if (lc.len == 0) { - linenoiseBeep(); - ls->in_completion = 0; - } else { - switch(c) { - case 9: /* tab */ - if (ls->in_completion == 0) { - ls->in_completion = 1; - ls->completion_idx = 0; - } else { - ls->completion_idx = (ls->completion_idx+1) % (lc.len+1); - if (ls->completion_idx == lc.len) linenoiseBeep(); - } - c = 0; - break; - case 27: /* escape */ - /* Re-show original buffer */ - if (ls->completion_idx < lc.len) refreshLine(ls); - ls->in_completion = 0; - c = 0; - break; - default: - /* Update buffer and return */ - if (ls->completion_idx < lc.len) { - nwritten = snprintf(ls->buf,ls->buflen,"%s", - lc.cvec[ls->completion_idx]); - ls->len = ls->pos = nwritten; - } - ls->in_completion = 0; - break; - } - - /* Show completion or original buffer */ - if (ls->in_completion && ls->completion_idx < lc.len) { - refreshLineWithCompletion(ls,&lc,REFRESH_ALL); - } else { - refreshLine(ls); - } - } - - freeCompletions(&lc); - return c; /* Return last read character */ -} - -/* Register a callback function to be called for tab-completion. */ -void linenoiseSetCompletionCallback(linenoiseCompletionCallback *fn) { - completionCallback = fn; -} - -/* Register a hits function to be called to show hits to the user at the - * right of the prompt. */ -void linenoiseSetHintsCallback(linenoiseHintsCallback *fn) { - hintsCallback = fn; -} - -/* Register a function to free the hints returned by the hints callback - * registered with linenoiseSetHintsCallback(). */ -void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *fn) { - freeHintsCallback = fn; -} - -/* This function is used by the callback function registered by the user - * in order to add completion options given the input string when the - * user typed . See the example.c source code for a very easy to - * understand example. */ -void linenoiseAddCompletion(linenoiseCompletions *lc, const char *str) { - size_t len = strlen(str); - char *copy, **cvec; - - copy = malloc(len+1); - if (copy == NULL) return; - memcpy(copy,str,len+1); - cvec = realloc(lc->cvec,sizeof(char*)*(lc->len+1)); - if (cvec == NULL) { - free(copy); - return; - } - lc->cvec = cvec; - lc->cvec[lc->len++] = copy; -} - -/* =========================== Line editing ================================= */ - -/* We define a very simple "append buffer" structure, that is an heap - * allocated string where we can append to. This is useful in order to - * write all the escape sequences in a buffer and flush them to the standard - * output in a single call, to avoid flickering effects. */ -struct abuf { - char *b; - int len; -}; - -static void abInit(struct abuf *ab) { - ab->b = NULL; - ab->len = 0; -} - -static void abAppend(struct abuf *ab, const char *s, int len) { - char *new = realloc(ab->b,ab->len+len); - - if (new == NULL) return; - memcpy(new+ab->len,s,len); - ab->b = new; - ab->len += len; -} - -static void abFree(struct abuf *ab) { - free(ab->b); -} - -/* Helper of refreshSingleLine() and refreshMultiLine() to show hints - * to the right of the prompt. Now uses display widths for proper UTF-8. */ -void refreshShowHints(struct abuf *ab, struct linenoiseState *l, int pwidth) { - char seq[64]; - size_t bufwidth = utf8StrWidth(l->buf, l->len); - if (hintsCallback && pwidth + bufwidth < l->cols) { - int color = -1, bold = 0; - char *hint = hintsCallback(l->buf,&color,&bold); - if (hint) { - size_t hintlen = strlen(hint); - size_t hintwidth = utf8StrWidth(hint, hintlen); - size_t hintmaxwidth = l->cols - (pwidth + bufwidth); - /* Truncate hint to fit, respecting UTF-8 boundaries. */ - if (hintwidth > hintmaxwidth) { - size_t i = 0, w = 0; - while (i < hintlen) { - size_t clen = utf8NextCharLen(hint, i, hintlen); - int cwidth = utf8SingleCharWidth(hint + i, clen); - if (w + cwidth > hintmaxwidth) break; - w += cwidth; - i += clen; - } - hintlen = i; - } - if (bold == 1 && color == -1) color = 37; - if (color != -1 || bold != 0) - snprintf(seq,64,"\033[%d;%d;49m",bold,color); - else - seq[0] = '\0'; - abAppend(ab,seq,strlen(seq)); - abAppend(ab,hint,hintlen); - if (color != -1 || bold != 0) - abAppend(ab,"\033[0m",4); - /* Call the function to free the hint returned. */ - if (freeHintsCallback) freeHintsCallback(hint); - } - } -} - -/* Single line low level line refresh. - * - * Rewrite the currently edited line accordingly to the buffer content, - * cursor position, and number of columns of the terminal. - * - * Flags is REFRESH_* macros. The function can just remove the old - * prompt, just write it, or both. - * - * This function is UTF-8 aware and uses display widths (not byte counts) - * for cursor positioning and horizontal scrolling. */ -static void refreshSingleLine(struct linenoiseState *l, int flags) { - char seq[64]; - size_t pwidth = utf8StrWidth(l->prompt, l->plen); /* Prompt display width */ - int fd = l->ofd; - char *buf = l->buf; - size_t len = l->len; /* Byte length of buffer to display */ - size_t pos = l->pos; /* Byte position of cursor */ - size_t poscol; /* Display column of cursor */ - size_t lencol; /* Display width of buffer */ - struct abuf ab; - - /* Calculate the display width up to cursor and total display width. */ - poscol = utf8StrWidth(buf, pos); - lencol = utf8StrWidth(buf, len); - - /* Scroll the buffer horizontally if cursor is past the right edge. - * We need to trim full UTF-8 characters from the left until the - * cursor position fits within the terminal width. */ - while (pwidth + poscol >= l->cols) { - size_t clen = utf8NextCharLen(buf, 0, len); - int cwidth = utf8SingleCharWidth(buf, clen); - buf += clen; - len -= clen; - pos -= clen; - poscol -= cwidth; - lencol -= cwidth; - } - - /* Trim from the right if the line still doesn't fit. */ - while (pwidth + lencol > l->cols) { - size_t clen = utf8PrevCharLen(buf, len); - int cwidth = utf8SingleCharWidth(buf + len - clen, clen); - len -= clen; - lencol -= cwidth; - } - - abInit(&ab); - /* Cursor to left edge */ - snprintf(seq,sizeof(seq),"\r"); - abAppend(&ab,seq,strlen(seq)); - - if (flags & REFRESH_WRITE) { - /* Write the prompt and the current buffer content */ - abAppend(&ab,l->prompt,l->plen); - if (maskmode == 1) { - /* In mask mode, we output one '*' per UTF-8 character, not byte */ - size_t i = 0; - while (i < len) { - abAppend(&ab,"*",1); - i += utf8NextCharLen(buf, i, len); - } - } else { - abAppend(&ab,buf,len); - } - /* Show hints if any. */ - refreshShowHints(&ab,l,pwidth); - } - - /* Erase to right */ - snprintf(seq,sizeof(seq),"\x1b[0K"); - abAppend(&ab,seq,strlen(seq)); - - if (flags & REFRESH_WRITE) { - /* Move cursor to original position (using display column, not byte). */ - snprintf(seq,sizeof(seq),"\r\x1b[%dC", (int)(poscol+pwidth)); - abAppend(&ab,seq,strlen(seq)); - } - - if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */ - abFree(&ab); -} - -/* Multi line low level line refresh. - * - * Rewrite the currently edited line accordingly to the buffer content, - * cursor position, and number of columns of the terminal. - * - * Flags is REFRESH_* macros. The function can just remove the old - * prompt, just write it, or both. - * - * This function is UTF-8 aware and uses display widths for positioning. */ -static void refreshMultiLine(struct linenoiseState *l, int flags) { - char seq[64]; - size_t pwidth = utf8StrWidth(l->prompt, l->plen); /* Prompt display width */ - size_t bufwidth = utf8StrWidth(l->buf, l->len); /* Buffer display width */ - size_t poswidth = utf8StrWidth(l->buf, l->pos); /* Cursor display width */ - int rows = (pwidth+bufwidth+l->cols-1)/l->cols; /* rows used by current buf. */ - int rpos = l->oldrpos; /* cursor relative row from previous refresh. */ - int rpos2; /* rpos after refresh. */ - int col; /* column position, zero-based. */ - int old_rows = l->oldrows; - int fd = l->ofd, j; - struct abuf ab; - - l->oldrows = rows; - - /* First step: clear all the lines used before. To do so start by - * going to the last row. */ - abInit(&ab); - - if (flags & REFRESH_CLEAN) { - if (old_rows-rpos > 0) { - lndebug("go down %d", old_rows-rpos); - snprintf(seq,64,"\x1b[%dB", old_rows-rpos); - abAppend(&ab,seq,strlen(seq)); - } - - /* Now for every row clear it, go up. */ - for (j = 0; j < old_rows-1; j++) { - lndebug("clear+up"); - snprintf(seq,64,"\r\x1b[0K\x1b[1A"); - abAppend(&ab,seq,strlen(seq)); - } - } - - if (flags & REFRESH_ALL) { - /* Clean the top line. */ - lndebug("clear"); - snprintf(seq,64,"\r\x1b[0K"); - abAppend(&ab,seq,strlen(seq)); - } - - if (flags & REFRESH_WRITE) { - /* Write the prompt and the current buffer content */ - abAppend(&ab,l->prompt,l->plen); - if (maskmode == 1) { - /* In mask mode, output one '*' per UTF-8 character, not byte */ - size_t i = 0; - while (i < l->len) { - abAppend(&ab,"*",1); - i += utf8NextCharLen(l->buf, i, l->len); - } - } else { - abAppend(&ab,l->buf,l->len); - } - - /* Show hints if any. */ - refreshShowHints(&ab,l,pwidth); - - /* If we are at the very end of the screen with our prompt, we need to - * emit a newline and move the prompt to the first column. */ - if (l->pos && - l->pos == l->len && - (poswidth+pwidth) % l->cols == 0) - { - lndebug(""); - abAppend(&ab,"\n",1); - snprintf(seq,64,"\r"); - abAppend(&ab,seq,strlen(seq)); - rows++; - if (rows > (int)l->oldrows) l->oldrows = rows; - } - - /* Move cursor to right position. */ - rpos2 = (pwidth+poswidth+l->cols)/l->cols; /* Current cursor relative row */ - lndebug("rpos2 %d", rpos2); - - /* Go up till we reach the expected position. */ - if (rows-rpos2 > 0) { - lndebug("go-up %d", rows-rpos2); - snprintf(seq,64,"\x1b[%dA", rows-rpos2); - abAppend(&ab,seq,strlen(seq)); - } - - /* Set column. */ - col = (pwidth+poswidth) % l->cols; - lndebug("set col %d", 1+col); - if (col) - snprintf(seq,64,"\r\x1b[%dC", col); - else - snprintf(seq,64,"\r"); - abAppend(&ab,seq,strlen(seq)); - } - - lndebug("\n"); - l->oldpos = l->pos; - if (flags & REFRESH_WRITE) l->oldrpos = rpos2; - - if (write(fd,ab.b,ab.len) == -1) {} /* Can't recover from write error. */ - abFree(&ab); -} - -/* Calls the two low level functions refreshSingleLine() or - * refreshMultiLine() according to the selected mode. */ -static void refreshLineWithFlags(struct linenoiseState *l, int flags) { - if (mlmode) - refreshMultiLine(l,flags); - else - refreshSingleLine(l,flags); -} - -/* Utility function to avoid specifying REFRESH_ALL all the times. */ -static void refreshLine(struct linenoiseState *l) { - refreshLineWithFlags(l,REFRESH_ALL); -} - -/* Hide the current line, when using the multiplexing API. */ -void linenoiseHide(struct linenoiseState *l) { - if (mlmode) - refreshMultiLine(l,REFRESH_CLEAN); - else - refreshSingleLine(l,REFRESH_CLEAN); -} - -/* Show the current line, when using the multiplexing API. */ -void linenoiseShow(struct linenoiseState *l) { - if (l->in_completion) { - refreshLineWithCompletion(l,NULL,REFRESH_WRITE); - } else { - refreshLineWithFlags(l,REFRESH_WRITE); - } -} - -/* Insert the character(s) 'c' of length 'clen' at cursor current position. - * This handles both single-byte ASCII and multi-byte UTF-8 sequences. - * - * On error writing to the terminal -1 is returned, otherwise 0. */ -int linenoiseEditInsert(struct linenoiseState *l, const char *c, size_t clen) { - if (l->len + clen <= l->buflen) { - if (l->len == l->pos) { - /* Append at end of line. */ - memcpy(l->buf+l->pos, c, clen); - l->pos += clen; - l->len += clen; - l->buf[l->len] = '\0'; - if ((!mlmode && - utf8StrWidth(l->prompt,l->plen)+utf8StrWidth(l->buf,l->len) < l->cols && - !hintsCallback)) { - /* Avoid a full update of the line in the trivial case: - * single-width char, no hints, fits in one line. */ - if (maskmode == 1) { - if (write(l->ofd,"*",1) == -1) return -1; - } else { - if (write(l->ofd,c,clen) == -1) return -1; - } - } else { - refreshLine(l); - } - } else { - /* Insert in the middle of the line. */ - memmove(l->buf+l->pos+clen, l->buf+l->pos, l->len-l->pos); - memcpy(l->buf+l->pos, c, clen); - l->len += clen; - l->pos += clen; - l->buf[l->len] = '\0'; - refreshLine(l); - } - } - return 0; -} - -/* Move cursor on the left. Moves by one UTF-8 character, not byte. */ -void linenoiseEditMoveLeft(struct linenoiseState *l) { - if (l->pos > 0) { - l->pos -= utf8PrevCharLen(l->buf, l->pos); - refreshLine(l); - } -} - -/* Move cursor on the right. Moves by one UTF-8 character, not byte. */ -void linenoiseEditMoveRight(struct linenoiseState *l) { - if (l->pos != l->len) { - l->pos += utf8NextCharLen(l->buf, l->pos, l->len); - refreshLine(l); - } -} - -/* Move cursor to the start of the line. */ -void linenoiseEditMoveHome(struct linenoiseState *l) { - if (l->pos != 0) { - l->pos = 0; - refreshLine(l); - } -} - -/* Move cursor to the end of the line. */ -void linenoiseEditMoveEnd(struct linenoiseState *l) { - if (l->pos != l->len) { - l->pos = l->len; - refreshLine(l); - } -} - -/* Substitute the currently edited line with the next or previous history - * entry as specified by 'dir'. */ -#define LINENOISE_HISTORY_NEXT 0 -#define LINENOISE_HISTORY_PREV 1 -void linenoiseEditHistoryNext(struct linenoiseState *l, int dir) { - if (history_len > 1) { - /* Update the current history entry before to - * overwrite it with the next one. */ - free(history[history_len - 1 - l->history_index]); - history[history_len - 1 - l->history_index] = strdup(l->buf); - /* Show the new entry */ - l->history_index += (dir == LINENOISE_HISTORY_PREV) ? 1 : -1; - if (l->history_index < 0) { - l->history_index = 0; - return; - } else if (l->history_index >= history_len) { - l->history_index = history_len-1; - return; - } - strncpy(l->buf,history[history_len - 1 - l->history_index],l->buflen); - l->buf[l->buflen-1] = '\0'; - l->len = l->pos = strlen(l->buf); - refreshLine(l); - } -} - -/* Delete the character at the right of the cursor without altering the cursor - * position. Basically this is what happens with the "Delete" keyboard key. - * Now handles multi-byte UTF-8 characters. */ -void linenoiseEditDelete(struct linenoiseState *l) { - if (l->len > 0 && l->pos < l->len) { - size_t clen = utf8NextCharLen(l->buf, l->pos, l->len); - memmove(l->buf+l->pos, l->buf+l->pos+clen, l->len-l->pos-clen); - l->len -= clen; - l->buf[l->len] = '\0'; - refreshLine(l); - } -} - -/* Backspace implementation. Deletes the UTF-8 character before the cursor. */ -void linenoiseEditBackspace(struct linenoiseState *l) { - if (l->pos > 0 && l->len > 0) { - size_t clen = utf8PrevCharLen(l->buf, l->pos); - memmove(l->buf+l->pos-clen, l->buf+l->pos, l->len-l->pos); - l->pos -= clen; - l->len -= clen; - l->buf[l->len] = '\0'; - refreshLine(l); - } -} - -/* Delete the previous word, maintaining the cursor at the start of the - * current word. Handles UTF-8 by moving character-by-character. */ -void linenoiseEditDeletePrevWord(struct linenoiseState *l) { - size_t old_pos = l->pos; - size_t diff; - - /* Skip spaces before the word (move backwards by UTF-8 chars). */ - while (l->pos > 0 && l->buf[l->pos-1] == ' ') - l->pos -= utf8PrevCharLen(l->buf, l->pos); - /* Skip non-space characters (move backwards by UTF-8 chars). */ - while (l->pos > 0 && l->buf[l->pos-1] != ' ') - l->pos -= utf8PrevCharLen(l->buf, l->pos); - diff = old_pos - l->pos; - memmove(l->buf+l->pos, l->buf+old_pos, l->len-old_pos+1); - l->len -= diff; - refreshLine(l); -} - -/* This function is part of the multiplexed API of Linenoise, that is used - * in order to implement the blocking variant of the API but can also be - * called by the user directly in an event driven program. It will: - * - * 1. Initialize the linenoise state passed by the user. - * 2. Put the terminal in RAW mode. - * 3. Show the prompt. - * 4. Return control to the user, that will have to call linenoiseEditFeed() - * each time there is some data arriving in the standard input. - * - * The user can also call linenoiseEditHide() and linenoiseEditShow() if it - * is required to show some input arriving asyncronously, without mixing - * it with the currently edited line. - * - * When linenoiseEditFeed() returns non-NULL, the user finished with the - * line editing session (pressed enter CTRL-D/C): in this case the caller - * needs to call linenoiseEditStop() to put back the terminal in normal - * mode. This will not destroy the buffer, as long as the linenoiseState - * is still valid in the context of the caller. - * - * The function returns 0 on success, or -1 if writing to standard output - * fails. If stdin_fd or stdout_fd are set to -1, the default is to use - * STDIN_FILENO and STDOUT_FILENO. - */ -int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) { - /* Populate the linenoise state that we pass to functions implementing - * specific editing functionalities. */ - l->in_completion = 0; - l->ifd = stdin_fd != -1 ? stdin_fd : STDIN_FILENO; - l->ofd = stdout_fd != -1 ? stdout_fd : STDOUT_FILENO; - l->buf = buf; - l->buflen = buflen; - l->prompt = prompt; - l->plen = strlen(prompt); - l->oldpos = l->pos = 0; - l->len = 0; - - /* Enter raw mode. */ - if (enableRawMode(l->ifd) == -1) return -1; - - l->cols = getColumns(stdin_fd, stdout_fd); - l->oldrows = 0; - l->oldrpos = 1; /* Cursor starts on row 1. */ - l->history_index = 0; - - /* Buffer starts empty. */ - l->buf[0] = '\0'; - l->buflen--; /* Make sure there is always space for the nulterm */ - - /* If stdin is not a tty, stop here with the initialization. We - * will actually just read a line from standard input in blocking - * mode later, in linenoiseEditFeed(). */ - if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return 0; - - /* The latest history entry is always our current buffer, that - * initially is just an empty string. */ - linenoiseHistoryAdd(""); - - if (write(l->ofd,prompt,l->plen) == -1) return -1; - return 0; -} - -char *linenoiseEditMore = "If you see this, you are misusing the API: when linenoiseEditFeed() is called, if it returns linenoiseEditMore the user is yet editing the line. See the README file for more information."; - -/* This function is part of the multiplexed API of linenoise, see the top - * comment on linenoiseEditStart() for more information. Call this function - * each time there is some data to read from the standard input file - * descriptor. In the case of blocking operations, this function can just be - * called in a loop, and block. - * - * The function returns linenoiseEditMore to signal that line editing is still - * in progress, that is, the user didn't yet pressed enter / CTRL-D. Otherwise - * the function returns the pointer to the heap-allocated buffer with the - * edited line, that the user should free with linenoiseFree(). - * - * On special conditions, NULL is returned and errno is populated: - * - * EAGAIN if the user pressed Ctrl-C - * ENOENT if the user pressed Ctrl-D - * - * Some other errno: I/O error. - */ -char *linenoiseEditFeed(struct linenoiseState *l) { - /* Not a TTY, pass control to line reading without character - * count limits. */ - if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return linenoiseNoTTY(); - - char c; - int nread; - char seq[3]; - - nread = read(l->ifd,&c,1); - if (nread < 0) { - return (errno == EAGAIN || errno == EWOULDBLOCK) ? linenoiseEditMore : NULL; - } else if (nread == 0) { - return NULL; - } - - /* Only autocomplete when the callback is set. It returns < 0 when - * there was an error reading from fd. Otherwise it will return the - * character that should be handled next. */ - if ((l->in_completion || c == 9) && completionCallback != NULL) { - c = completeLine(l,c); - /* Return on errors */ - if (c < 0) return NULL; - /* Read next character when 0 */ - if (c == 0) return linenoiseEditMore; - } - - switch(c) { - case ENTER: /* enter */ - history_len--; - free(history[history_len]); - if (mlmode) linenoiseEditMoveEnd(l); - if (hintsCallback) { - /* Force a refresh without hints to leave the previous - * line as the user typed it after a newline. */ - linenoiseHintsCallback *hc = hintsCallback; - hintsCallback = NULL; - refreshLine(l); - hintsCallback = hc; - } - return strdup(l->buf); - case CTRL_C: /* ctrl-c */ - errno = EAGAIN; - return NULL; - case BACKSPACE: /* backspace */ - case 8: /* ctrl-h */ - linenoiseEditBackspace(l); - break; - case CTRL_D: /* ctrl-d, remove char at right of cursor, or if the - line is empty, act as end-of-file. */ - if (l->len > 0) { - linenoiseEditDelete(l); - } else { - history_len--; - free(history[history_len]); - errno = ENOENT; - return NULL; - } - break; - case CTRL_T: /* ctrl-t, swaps current character with previous. */ - /* Handle UTF-8: swap the two UTF-8 characters around cursor. */ - if (l->pos > 0 && l->pos < l->len) { - char tmp[32]; - size_t prevlen = utf8PrevCharLen(l->buf, l->pos); - size_t currlen = utf8NextCharLen(l->buf, l->pos, l->len); - size_t prevstart = l->pos - prevlen; - /* Copy current char to tmp, move previous char right, paste tmp. */ - memcpy(tmp, l->buf + l->pos, currlen); - memmove(l->buf + prevstart + currlen, l->buf + prevstart, prevlen); - memcpy(l->buf + prevstart, tmp, currlen); - if (l->pos + currlen <= l->len) l->pos += currlen; - refreshLine(l); - } - break; - case CTRL_B: /* ctrl-b */ - linenoiseEditMoveLeft(l); - break; - case CTRL_F: /* ctrl-f */ - linenoiseEditMoveRight(l); - break; - case CTRL_P: /* ctrl-p */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); - break; - case CTRL_N: /* ctrl-n */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); - break; - case ESC: /* escape sequence */ - /* Read the next two bytes representing the escape sequence. - * Use two calls to handle slow terminals returning the two - * chars at different times. */ - if (read(l->ifd,seq,1) == -1) break; - if (read(l->ifd,seq+1,1) == -1) break; - - /* ESC [ sequences. */ - if (seq[0] == '[') { - if (seq[1] >= '0' && seq[1] <= '9') { - /* Extended escape, read additional byte. */ - if (read(l->ifd,seq+2,1) == -1) break; - if (seq[2] == '~') { - switch(seq[1]) { - case '3': /* Delete key. */ - linenoiseEditDelete(l); - break; - } - } - } else { - switch(seq[1]) { - case 'A': /* Up */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_PREV); - break; - case 'B': /* Down */ - linenoiseEditHistoryNext(l, LINENOISE_HISTORY_NEXT); - break; - case 'C': /* Right */ - linenoiseEditMoveRight(l); - break; - case 'D': /* Left */ - linenoiseEditMoveLeft(l); - break; - case 'H': /* Home */ - linenoiseEditMoveHome(l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(l); - break; - } - } - } - - /* ESC O sequences. */ - else if (seq[0] == 'O') { - switch(seq[1]) { - case 'H': /* Home */ - linenoiseEditMoveHome(l); - break; - case 'F': /* End*/ - linenoiseEditMoveEnd(l); - break; - } - } - break; - default: - /* Handle UTF-8 multi-byte sequences. When we receive the first byte - * of a multi-byte UTF-8 character, read the remaining bytes to - * complete the sequence before inserting. */ - { - char utf8[4]; - int utf8len = utf8ByteLen(c); - utf8[0] = c; - if (utf8len > 1) { - /* Read remaining bytes of the UTF-8 sequence. */ - int i; - for (i = 1; i < utf8len; i++) { - if (read(l->ifd, utf8+i, 1) != 1) break; - } - } - if (linenoiseEditInsert(l, utf8, utf8len)) return NULL; - } - break; - case CTRL_U: /* Ctrl+u, delete the whole line. */ - l->buf[0] = '\0'; - l->pos = l->len = 0; - refreshLine(l); - break; - case CTRL_K: /* Ctrl+k, delete from current to end of line. */ - l->buf[l->pos] = '\0'; - l->len = l->pos; - refreshLine(l); - break; - case CTRL_A: /* Ctrl+a, go to the start of the line */ - linenoiseEditMoveHome(l); - break; - case CTRL_E: /* ctrl+e, go to the end of the line */ - linenoiseEditMoveEnd(l); - break; - case CTRL_L: /* ctrl+l, clear screen */ - linenoiseClearScreen(); - refreshLine(l); - break; - case CTRL_W: /* ctrl+w, delete previous word */ - linenoiseEditDeletePrevWord(l); - break; - } - return linenoiseEditMore; -} - -/* This is part of the multiplexed linenoise API. See linenoiseEditStart() - * for more information. This function is called when linenoiseEditFeed() - * returns something different than NULL. At this point the user input - * is in the buffer, and we can restore the terminal in normal mode. */ -void linenoiseEditStop(struct linenoiseState *l) { - if (!isatty(l->ifd) && !getenv("LINENOISE_ASSUME_TTY")) return; - disableRawMode(l->ifd); - printf("\n"); -} - -/* This just implements a blocking loop for the multiplexed API. - * In many applications that are not event-drivern, we can just call - * the blocking linenoise API, wait for the user to complete the editing - * and return the buffer. */ -static char *linenoiseBlockingEdit(int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt) -{ - struct linenoiseState l; - - /* Editing without a buffer is invalid. */ - if (buflen == 0) { - errno = EINVAL; - return NULL; - } - - linenoiseEditStart(&l,stdin_fd,stdout_fd,buf,buflen,prompt); - char *res; - while((res = linenoiseEditFeed(&l)) == linenoiseEditMore); - linenoiseEditStop(&l); - return res; -} - -/* This special mode is used by linenoise in order to print scan codes - * on screen for debugging / development purposes. It is implemented - * by the linenoise_example program using the --keycodes option. */ -void linenoisePrintKeyCodes(void) { - char quit[4]; - - printf("Linenoise key codes debugging mode.\n" - "Press keys to see scan codes. Type 'quit' at any time to exit.\n"); - if (enableRawMode(STDIN_FILENO) == -1) return; - memset(quit,' ',4); - while(1) { - char c; - int nread; - - nread = read(STDIN_FILENO,&c,1); - if (nread <= 0) continue; - memmove(quit,quit+1,sizeof(quit)-1); /* shift string to left. */ - quit[sizeof(quit)-1] = c; /* Insert current char on the right. */ - if (memcmp(quit,"quit",sizeof(quit)) == 0) break; - - printf("'%c' %02x (%d) (type quit to exit)\n", - isprint(c) ? c : '?', (int)c, (int)c); - printf("\r"); /* Go left edge manually, we are in raw mode. */ - fflush(stdout); - } - disableRawMode(STDIN_FILENO); -} - -/* This function is called when linenoise() is called with the standard - * input file descriptor not attached to a TTY. So for example when the - * program using linenoise is called in pipe or with a file redirected - * to its standard input. In this case, we want to be able to return the - * line regardless of its length (by default we are limited to 4k). */ -static char *linenoiseNoTTY(void) { - char *line = NULL; - size_t len = 0, maxlen = 0; - - while(1) { - if (len == maxlen) { - if (maxlen == 0) maxlen = 16; - maxlen *= 2; - char *oldval = line; - line = realloc(line,maxlen); - if (line == NULL) { - if (oldval) free(oldval); - return NULL; - } - } - int c = fgetc(stdin); - if (c == EOF || c == '\n') { - if (c == EOF && len == 0) { - free(line); - return NULL; - } else { - line[len] = '\0'; - return line; - } - } else { - line[len] = c; - len++; - } - } -} - -/* The high level function that is the main API of the linenoise library. - * This function checks if the terminal has basic capabilities, just checking - * for a blacklist of stupid terminals, and later either calls the line - * editing function or uses dummy fgets() so that you will be able to type - * something even in the most desperate of the conditions. */ -char *linenoise(const char *prompt) { - char buf[LINENOISE_MAX_LINE]; - - if (!isatty(STDIN_FILENO) && !getenv("LINENOISE_ASSUME_TTY")) { - /* Not a tty: read from file / pipe. In this mode we don't want any - * limit to the line size, so we call a function to handle that. */ - return linenoiseNoTTY(); - } else if (isUnsupportedTerm()) { - size_t len; - - printf("%s",prompt); - fflush(stdout); - if (fgets(buf,LINENOISE_MAX_LINE,stdin) == NULL) return NULL; - len = strlen(buf); - while(len && (buf[len-1] == '\n' || buf[len-1] == '\r')) { - len--; - buf[len] = '\0'; - } - return strdup(buf); - } else { - char *retval = linenoiseBlockingEdit(STDIN_FILENO,STDOUT_FILENO,buf,LINENOISE_MAX_LINE,prompt); - return retval; - } -} - -/* This is just a wrapper the user may want to call in order to make sure - * the linenoise returned buffer is freed with the same allocator it was - * created with. Useful when the main program is using an alternative - * allocator. */ -void linenoiseFree(void *ptr) { - if (ptr == linenoiseEditMore) return; // Protect from API misuse. - free(ptr); -} - -/* ================================ History ================================= */ - -/* Free the history, but does not reset it. Only used when we have to - * exit() to avoid memory leaks are reported by valgrind & co. */ -static void freeHistory(void) { - if (history) { - int j; - - for (j = 0; j < history_len; j++) - free(history[j]); - free(history); - } -} - -/* At exit we'll try to fix the terminal to the initial conditions. */ -static void linenoiseAtExit(void) { - disableRawMode(STDIN_FILENO); - freeHistory(); -} - -/* This is the API call to add a new entry in the linenoise history. - * It uses a fixed array of char pointers that are shifted (memmoved) - * when the history max length is reached in order to remove the older - * entry and make room for the new one, so it is not exactly suitable for huge - * histories, but will work well for a few hundred of entries. - * - * Using a circular buffer is smarter, but a bit more complex to handle. */ -int linenoiseHistoryAdd(const char *line) { - char *linecopy; - - if (history_max_len == 0) return 0; - - /* Initialization on first call. */ - if (history == NULL) { - history = malloc(sizeof(char*)*history_max_len); - if (history == NULL) return 0; - memset(history,0,(sizeof(char*)*history_max_len)); - } - - /* Don't add duplicated lines. */ - if (history_len && !strcmp(history[history_len-1], line)) return 0; - - /* Add an heap allocated copy of the line in the history. - * If we reached the max length, remove the older line. */ - linecopy = strdup(line); - if (!linecopy) return 0; - if (history_len == history_max_len) { - free(history[0]); - memmove(history,history+1,sizeof(char*)*(history_max_len-1)); - history_len--; - } - history[history_len] = linecopy; - history_len++; - return 1; -} - -/* Set the maximum length for the history. This function can be called even - * if there is already some history, the function will make sure to retain - * just the latest 'len' elements if the new history length value is smaller - * than the amount of items already inside the history. */ -int linenoiseHistorySetMaxLen(int len) { - char **new; - - if (len < 1) return 0; - if (history) { - int tocopy = history_len; - - new = malloc(sizeof(char*)*len); - if (new == NULL) return 0; - - /* If we can't copy everything, free the elements we'll not use. */ - if (len < tocopy) { - int j; - - for (j = 0; j < tocopy-len; j++) free(history[j]); - tocopy = len; - } - memset(new,0,sizeof(char*)*len); - memcpy(new,history+(history_len-tocopy), sizeof(char*)*tocopy); - free(history); - history = new; - } - history_max_len = len; - if (history_len > history_max_len) - history_len = history_max_len; - return 1; -} - -/* Save the history in the specified file. On success 0 is returned - * otherwise -1 is returned. */ -int linenoiseHistorySave(const char *filename) { - mode_t old_umask = umask(S_IXUSR|S_IRWXG|S_IRWXO); - FILE *fp; - int j; - - fp = fopen(filename,"w"); - umask(old_umask); - if (fp == NULL) return -1; - fchmod(fileno(fp),S_IRUSR|S_IWUSR); - for (j = 0; j < history_len; j++) - fprintf(fp,"%s\n",history[j]); - fclose(fp); - return 0; -} - -/* Load the history from the specified file. If the file does not exist - * zero is returned and no operation is performed. - * - * If the file exists and the operation succeeded 0 is returned, otherwise - * on error -1 is returned. */ -int linenoiseHistoryLoad(const char *filename) { - FILE *fp = fopen(filename,"r"); - char buf[LINENOISE_MAX_LINE]; - - if (fp == NULL) return -1; - - while (fgets(buf,LINENOISE_MAX_LINE,fp) != NULL) { - char *p; - - p = strchr(buf,'\r'); - if (!p) p = strchr(buf,'\n'); - if (p) *p = '\0'; - linenoiseHistoryAdd(buf); - } - fclose(fp); - return 0; -} diff --git a/src/vendor/linenoise.h b/src/vendor/linenoise.h deleted file mode 100644 index e56b627..0000000 --- a/src/vendor/linenoise.h +++ /dev/null @@ -1,114 +0,0 @@ -/* linenoise.h -- VERSION 1.0 - * - * Guerrilla line editing library against the idea that a line editing lib - * needs to be 20,000 lines of C code. - * - * See linenoise.c for more information. - * - * ------------------------------------------------------------------------ - * - * Copyright (c) 2010-2023, Salvatore Sanfilippo - * Copyright (c) 2010-2013, Pieter Noordhuis - * - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are - * met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef __LINENOISE_H -#define __LINENOISE_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include /* For size_t. */ - -extern char *linenoiseEditMore; - -/* The linenoiseState structure represents the state during line editing. - * We pass this state to functions implementing specific editing - * functionalities. */ -struct linenoiseState { - int in_completion; /* The user pressed TAB and we are now in completion - * mode, so input is handled by completeLine(). */ - size_t completion_idx; /* Index of next completion to propose. */ - int ifd; /* Terminal stdin file descriptor. */ - int ofd; /* Terminal stdout file descriptor. */ - char *buf; /* Edited line buffer. */ - size_t buflen; /* Edited line buffer size. */ - const char *prompt; /* Prompt to display. */ - size_t plen; /* Prompt length. */ - size_t pos; /* Current cursor position. */ - size_t oldpos; /* Previous refresh cursor position. */ - size_t len; /* Current edited line length. */ - size_t cols; /* Number of columns in terminal. */ - size_t oldrows; /* Rows used by last refrehsed line (multiline mode) */ - int oldrpos; /* Cursor row from last refresh (for multiline clearing). */ - int history_index; /* The history index we are currently editing. */ -}; - -typedef struct linenoiseCompletions { - size_t len; - char **cvec; -} linenoiseCompletions; - -/* Non blocking API. */ -int linenoiseEditStart(struct linenoiseState *l, int stdin_fd, int stdout_fd, char *buf, size_t buflen, const char *prompt); -char *linenoiseEditFeed(struct linenoiseState *l); -void linenoiseEditStop(struct linenoiseState *l); -void linenoiseHide(struct linenoiseState *l); -void linenoiseShow(struct linenoiseState *l); - -/* Blocking API. */ -char *linenoise(const char *prompt); -void linenoiseFree(void *ptr); - -/* Completion API. */ -typedef void(linenoiseCompletionCallback)(const char *, linenoiseCompletions *); -typedef char*(linenoiseHintsCallback)(const char *, int *color, int *bold); -typedef void(linenoiseFreeHintsCallback)(void *); -void linenoiseSetCompletionCallback(linenoiseCompletionCallback *); -void linenoiseSetHintsCallback(linenoiseHintsCallback *); -void linenoiseSetFreeHintsCallback(linenoiseFreeHintsCallback *); -void linenoiseAddCompletion(linenoiseCompletions *, const char *); - -/* History API. */ -int linenoiseHistoryAdd(const char *line); -int linenoiseHistorySetMaxLen(int len); -int linenoiseHistorySave(const char *filename); -int linenoiseHistoryLoad(const char *filename); - -/* Other utilities. */ -void linenoiseClearScreen(void); -void linenoiseSetMultiLine(int ml); -void linenoisePrintKeyCodes(void); -void linenoiseMaskModeEnable(void); -void linenoiseMaskModeDisable(void); - -#ifdef __cplusplus -} -#endif - -#endif /* __LINENOISE_H */ diff --git a/src/vendor/mpc.c b/src/vendor/mpc.c deleted file mode 100644 index 73a658b..0000000 --- a/src/vendor/mpc.c +++ /dev/null @@ -1,4128 +0,0 @@ -#include "mpc.h" - -/* -** State Type -*/ - -static mpc_state_t mpc_state_invalid(void) { - mpc_state_t s; - s.pos = -1; - s.row = -1; - s.col = -1; - s.term = 0; - return s; -} - -static mpc_state_t mpc_state_new(void) { - mpc_state_t s; - s.pos = 0; - s.row = 0; - s.col = 0; - s.term = 0; - return s; -} - -/* -** Input Type -*/ - -/* -** In mpc the input type has three modes of -** operation: String, File and Pipe. -** -** String is easy. The whole contents are -** loaded into a buffer and scanned through. -** The cursor can jump around at will making -** backtracking easy. -** -** The second is a File which is also somewhat -** easy. The contents are never loaded into -** memory but backtracking can still be achieved -** by seeking in the file at different positions. -** -** The final mode is Pipe. This is the difficult -** one. As we assume pipes cannot be seeked - and -** only support a single character lookahead at -** any point, when the input is marked for a -** potential backtracking we start buffering any -** input. -** -** This means that if we are requested to seek -** back we can simply start reading from the -** buffer instead of the input. -** -** Of course using `mpc_predictive` will disable -** backtracking and make LL(1) grammars easy -** to parse for all input methods. -** -*/ - -enum { - MPC_INPUT_STRING = 0, - MPC_INPUT_FILE = 1, - MPC_INPUT_PIPE = 2 -}; - -enum { - MPC_INPUT_MARKS_MIN = 32 -}; - -enum { - MPC_INPUT_MEM_NUM = 512 -}; - -typedef struct { - char mem[64]; -} mpc_mem_t; - -typedef struct { - - int type; - char *filename; - mpc_state_t state; - - char *string; - char *buffer; - FILE *file; - - int suppress; - int backtrack; - int marks_slots; - int marks_num; - mpc_state_t *marks; - - char *lasts; - char last; - - size_t mem_index; - char mem_full[MPC_INPUT_MEM_NUM]; - mpc_mem_t mem[MPC_INPUT_MEM_NUM]; - -} mpc_input_t; - -static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_STRING; - - i->state = mpc_state_new(); - - i->string = malloc(strlen(string) + 1); - strcpy(i->string, string); - i->buffer = NULL; - i->file = NULL; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; -} - -static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_STRING; - - i->state = mpc_state_new(); - - i->string = malloc(length + 1); - strncpy(i->string, string, length); - i->string[length] = '\0'; - i->buffer = NULL; - i->file = NULL; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; - -} - -static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - - i->type = MPC_INPUT_PIPE; - i->state = mpc_state_new(); - - i->string = NULL; - i->buffer = NULL; - i->file = pipe; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; - -} - -static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { - - mpc_input_t *i = malloc(sizeof(mpc_input_t)); - - i->filename = malloc(strlen(filename) + 1); - strcpy(i->filename, filename); - i->type = MPC_INPUT_FILE; - i->state = mpc_state_new(); - - i->string = NULL; - i->buffer = NULL; - i->file = file; - - i->suppress = 0; - i->backtrack = 1; - i->marks_num = 0; - i->marks_slots = MPC_INPUT_MARKS_MIN; - i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); - i->lasts = malloc(sizeof(char) * i->marks_slots); - i->last = '\0'; - - i->mem_index = 0; - memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); - - return i; -} - -static void mpc_input_delete(mpc_input_t *i) { - - free(i->filename); - - if (i->type == MPC_INPUT_STRING) { free(i->string); } - if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } - - free(i->marks); - free(i->lasts); - free(i); -} - -static int mpc_mem_ptr(mpc_input_t *i, void *p) { - return - (char*)p >= (char*)(i->mem) && - (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); -} - -static void *mpc_malloc(mpc_input_t *i, size_t n) { - size_t j; - char *p; - - if (n > sizeof(mpc_mem_t)) { return malloc(n); } - - j = i->mem_index; - do { - if (!i->mem_full[i->mem_index]) { - p = (void*)(i->mem + i->mem_index); - i->mem_full[i->mem_index] = 1; - i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; - return p; - } - i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; - } while (j != i->mem_index); - - return malloc(n); -} - -static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { - char *x = mpc_malloc(i, n * m); - memset(x, 0, n * m); - return x; -} - -static void mpc_free(mpc_input_t *i, void *p) { - size_t j; - if (!mpc_mem_ptr(i, p)) { free(p); return; } - j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); - i->mem_full[j] = 0; -} - -static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { - - char *q = NULL; - - if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } - - if (n > sizeof(mpc_mem_t)) { - q = malloc(n); - memcpy(q, p, sizeof(mpc_mem_t)); - mpc_free(i, p); - return q; - } - - return p; -} - -static void *mpc_export(mpc_input_t *i, void *p) { - char *q = NULL; - if (!mpc_mem_ptr(i, p)) { return p; } - q = malloc(sizeof(mpc_mem_t)); - memcpy(q, p, sizeof(mpc_mem_t)); - mpc_free(i, p); - return q; -} - -static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } -static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } - -static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } -static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } - -static void mpc_input_mark(mpc_input_t *i) { - - if (i->backtrack < 1) { return; } - - i->marks_num++; - - if (i->marks_num > i->marks_slots) { - i->marks_slots = i->marks_num + i->marks_num / 2; - i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); - i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); - } - - i->marks[i->marks_num-1] = i->state; - i->lasts[i->marks_num-1] = i->last; - - if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { - i->buffer = calloc(1, 1); - } - -} - -static void mpc_input_unmark(mpc_input_t *i) { - int j; - - if (i->backtrack < 1) { return; } - - i->marks_num--; - - if (i->marks_slots > i->marks_num + i->marks_num / 2 - && i->marks_slots > MPC_INPUT_MARKS_MIN) { - i->marks_slots = - i->marks_num > MPC_INPUT_MARKS_MIN ? - i->marks_num : MPC_INPUT_MARKS_MIN; - i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); - i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); - } - - if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { - for (j = strlen(i->buffer) - 1; j >= 0; j--) - ungetc(i->buffer[j], i->file); - - free(i->buffer); - i->buffer = NULL; - } - -} - -static void mpc_input_rewind(mpc_input_t *i) { - - if (i->backtrack < 1) { return; } - - i->state = i->marks[i->marks_num-1]; - i->last = i->lasts[i->marks_num-1]; - - if (i->type == MPC_INPUT_FILE) { - fseek(i->file, i->state.pos, SEEK_SET); - } - - mpc_input_unmark(i); -} - -static int mpc_input_buffer_in_range(mpc_input_t *i) { - return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); -} - -static char mpc_input_buffer_get(mpc_input_t *i) { - return i->buffer[i->state.pos - i->marks[0].pos]; -} - -static char mpc_input_getc(mpc_input_t *i) { - - char c = '\0'; - - switch (i->type) { - - case MPC_INPUT_STRING: return i->string[i->state.pos]; - case MPC_INPUT_FILE: c = fgetc(i->file); return c; - case MPC_INPUT_PIPE: - - if (!i->buffer) { c = getc(i->file); return c; } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - c = mpc_input_buffer_get(i); - return c; - } else { - c = getc(i->file); - return c; - } - - default: return c; - } -} - -static char mpc_input_peekc(mpc_input_t *i) { - - char c = '\0'; - - switch (i->type) { - case MPC_INPUT_STRING: return i->string[i->state.pos]; - case MPC_INPUT_FILE: - - c = fgetc(i->file); - if (feof(i->file)) { return '\0'; } - - fseek(i->file, -1, SEEK_CUR); - return c; - - case MPC_INPUT_PIPE: - - if (!i->buffer) { - c = getc(i->file); - if (feof(i->file)) { return '\0'; } - ungetc(c, i->file); - return c; - } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - return mpc_input_buffer_get(i); - } else { - c = getc(i->file); - if (feof(i->file)) { return '\0'; } - ungetc(c, i->file); - return c; - } - - default: return c; - } - -} - -static int mpc_input_terminated(mpc_input_t *i) { - return mpc_input_peekc(i) == '\0'; -} - -static int mpc_input_failure(mpc_input_t *i, char c) { - - switch (i->type) { - case MPC_INPUT_STRING: { break; } - case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } - case MPC_INPUT_PIPE: { - - if (!i->buffer) { ungetc(c, i->file); break; } - - if (i->buffer && mpc_input_buffer_in_range(i)) { - break; - } else { - ungetc(c, i->file); - } - } - default: { break; } - } - return 0; -} - -static int mpc_input_success(mpc_input_t *i, char c, char **o) { - - if (i->type == MPC_INPUT_PIPE - && i->buffer && !mpc_input_buffer_in_range(i)) { - i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); - i->buffer[strlen(i->buffer) + 1] = '\0'; - i->buffer[strlen(i->buffer) + 0] = c; - } - - i->last = c; - i->state.pos++; - i->state.col++; - - if (c == '\n') { - i->state.col = 0; - i->state.row++; - } - - if (o) { - (*o) = mpc_malloc(i, 2); - (*o)[0] = c; - (*o)[1] = '\0'; - } - - return 1; -} - -static int mpc_input_any(mpc_input_t *i, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return mpc_input_success(i, x, o); -} - -static int mpc_input_char(mpc_input_t *i, char c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { - char x; - if (mpc_input_terminated(i)) { return 0; } - x = mpc_input_getc(i); - return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); -} - -static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { - - const char *x = c; - - mpc_input_mark(i); - while (*x) { - if (!mpc_input_char(i, *x, NULL)) { - mpc_input_rewind(i); - return 0; - } - x++; - } - mpc_input_unmark(i); - - *o = mpc_malloc(i, strlen(c) + 1); - strcpy(*o, c); - return 1; -} - -static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { - *o = NULL; - return f(i->last, mpc_input_peekc(i)); -} - -static int mpc_input_soi(mpc_input_t* i, char **o) { - *o = NULL; - return i->last == '\0'; -} - -static int mpc_input_eoi(mpc_input_t* i, char **o) { - *o = NULL; - if (i->state.term) { - return 0; - } else if (mpc_input_terminated(i)) { - i->state.term = 1; - return 1; - } else { - return 0; - } -} - -static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { - mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); - memcpy(r, &i->state, sizeof(mpc_state_t)); - return r; -} - -/* -** Error Type -*/ - -void mpc_err_delete(mpc_err_t *x) { - int i; - for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } - free(x->expected); - free(x->filename); - free(x->failure); - free(x); -} - -void mpc_err_print(mpc_err_t *x) { - mpc_err_print_to(x, stdout); -} - -void mpc_err_print_to(mpc_err_t *x, FILE *f) { - char *str = mpc_err_string(x); - fprintf(f, "%s", str); - free(str); -} - -static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { - /* TODO: Error Checking on Length */ - int left = ((*max) - (*pos)); - va_list va; - va_start(va, fmt); - if (left < 0) { left = 0;} - (*pos) += vsprintf(buffer + (*pos), fmt, va); - va_end(va); -} - -static const char *mpc_err_char_unescape(char c, char char_unescape_buffer[4]) { - - char_unescape_buffer[0] = '\''; - char_unescape_buffer[1] = ' '; - char_unescape_buffer[2] = '\''; - char_unescape_buffer[3] = '\0'; - - switch (c) { - case '\a': return "bell"; - case '\b': return "backspace"; - case '\f': return "formfeed"; - case '\r': return "carriage return"; - case '\v': return "vertical tab"; - case '\0': return "end of input"; - case '\n': return "newline"; - case '\t': return "tab"; - case ' ' : return "space"; - default: - char_unescape_buffer[1] = c; - return char_unescape_buffer; - } - -} - -char *mpc_err_string(mpc_err_t *x) { - - int i; - int pos = 0; - int max = 1023; - char *buffer = calloc(1, 1024); - char char_unescape_buffer[4]; - - if (x->failure) { - mpc_err_string_cat(buffer, &pos, &max, - "%s: error: %s\n", x->filename, x->failure); - return buffer; - } - - mpc_err_string_cat(buffer, &pos, &max, - "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); - - if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } - if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } - if (x->expected_num >= 2) { - - for (i = 0; i < x->expected_num-2; i++) { - mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); - } - - mpc_err_string_cat(buffer, &pos, &max, "%s or %s", - x->expected[x->expected_num-2], - x->expected[x->expected_num-1]); - } - - mpc_err_string_cat(buffer, &pos, &max, " at "); - mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received, char_unescape_buffer)); - mpc_err_string_cat(buffer, &pos, &max, "\n"); - - return realloc(buffer, strlen(buffer) + 1); -} - -static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { - mpc_err_t *x; - if (i->suppress) { return NULL; } - x = mpc_malloc(i, sizeof(mpc_err_t)); - x->filename = mpc_malloc(i, strlen(i->filename) + 1); - strcpy(x->filename, i->filename); - x->state = i->state; - x->expected_num = 1; - x->expected = mpc_malloc(i, sizeof(char*)); - x->expected[0] = mpc_malloc(i, strlen(expected) + 1); - strcpy(x->expected[0], expected); - x->failure = NULL; - x->received = mpc_input_peekc(i); - return x; -} - -static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { - mpc_err_t *x; - if (i->suppress) { return NULL; } - x = mpc_malloc(i, sizeof(mpc_err_t)); - x->filename = mpc_malloc(i, strlen(i->filename) + 1); - strcpy(x->filename, i->filename); - x->state = i->state; - x->expected_num = 0; - x->expected = NULL; - x->failure = mpc_malloc(i, strlen(failure) + 1); - strcpy(x->failure, failure); - x->received = ' '; - return x; -} - -static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { - mpc_err_t *x; - x = malloc(sizeof(mpc_err_t)); - x->filename = malloc(strlen(filename) + 1); - strcpy(x->filename, filename); - x->state = mpc_state_new(); - x->expected_num = 0; - x->expected = NULL; - x->failure = malloc(strlen(failure) + 1); - strcpy(x->failure, failure); - x->received = ' '; - return x; -} - -static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { - int j; - if (x == NULL) { return; } - for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } - mpc_free(i, x->expected); - mpc_free(i, x->filename); - mpc_free(i, x->failure); - mpc_free(i, x); -} - -static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { - int j; - for (j = 0; j < x->expected_num; j++) { - x->expected[j] = mpc_export(i, x->expected[j]); - } - x->expected = mpc_export(i, x->expected); - x->filename = mpc_export(i, x->filename); - x->failure = mpc_export(i, x->failure); - return mpc_export(i, x); -} - -static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { - int j; - (void)i; - for (j = 0; j < x->expected_num; j++) { - if (strcmp(x->expected[j], expected) == 0) { return 1; } - } - return 0; -} - -static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { - (void)i; - x->expected_num++; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); - strcpy(x->expected[x->expected_num-1], expected); -} - -static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { - - int j, k, fst; - mpc_err_t *e; - - fst = -1; - for (j = 0; j < n; j++) { - if (x[j] != NULL) { fst = j; } - } - - if (fst == -1) { return NULL; } - - e = mpc_malloc(i, sizeof(mpc_err_t)); - e->state = mpc_state_invalid(); - e->expected_num = 0; - e->expected = NULL; - e->failure = NULL; - e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); - strcpy(e->filename, x[fst]->filename); - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } - } - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - if (x[j]->state.pos < e->state.pos) { continue; } - - if (x[j]->failure) { - e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); - strcpy(e->failure, x[j]->failure); - break; - } - - e->received = x[j]->received; - - for (k = 0; k < x[j]->expected_num; k++) { - if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { - mpc_err_add_expected(i, e, x[j]->expected[k]); - } - } - } - - for (j = 0; j < n; j++) { - if (x[j] == NULL) { continue; } - mpc_err_delete_internal(i, x[j]); - } - - return e; -} - -static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { - - int j = 0; - size_t l = 0; - char *expect = NULL; - - if (x == NULL) { return NULL; } - - if (x->expected_num == 0) { - expect = mpc_calloc(i, 1, 1); - x->expected_num = 1; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[0] = expect; - return x; - } - - else if (x->expected_num == 1) { - expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); - strcpy(expect, prefix); - strcat(expect, x->expected[0]); - mpc_free(i, x->expected[0]); - x->expected[0] = expect; - return x; - } - - else if (x->expected_num > 1) { - - l += strlen(prefix); - for (j = 0; j < x->expected_num-2; j++) { - l += strlen(x->expected[j]) + strlen(", "); - } - l += strlen(x->expected[x->expected_num-2]); - l += strlen(" or "); - l += strlen(x->expected[x->expected_num-1]); - - expect = mpc_malloc(i, l + 1); - - strcpy(expect, prefix); - for (j = 0; j < x->expected_num-2; j++) { - strcat(expect, x->expected[j]); strcat(expect, ", "); - } - strcat(expect, x->expected[x->expected_num-2]); - strcat(expect, " or "); - strcat(expect, x->expected[x->expected_num-1]); - - for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } - - x->expected_num = 1; - x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); - x->expected[0] = expect; - return x; - } - - return NULL; -} - -static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { - return mpc_err_repeat(i, x, "one or more of "); -} - -static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { - mpc_err_t *y; - int digits = n/10 + 1; - char *prefix; - prefix = mpc_malloc(i, digits + strlen(" of ") + 1); - if (!prefix) { - return NULL; - } - sprintf(prefix, "%i of ", n); - y = mpc_err_repeat(i, x, prefix); - mpc_free(i, prefix); - return y; -} - -static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { - mpc_err_t *errs[2]; - errs[0] = x; - errs[1] = y; - return mpc_err_or(i, errs, 2); -} - -/* -** Parser Type -*/ - -enum { - MPC_TYPE_UNDEFINED = 0, - MPC_TYPE_PASS = 1, - MPC_TYPE_FAIL = 2, - MPC_TYPE_LIFT = 3, - MPC_TYPE_LIFT_VAL = 4, - MPC_TYPE_EXPECT = 5, - MPC_TYPE_ANCHOR = 6, - MPC_TYPE_STATE = 7, - - MPC_TYPE_ANY = 8, - MPC_TYPE_SINGLE = 9, - MPC_TYPE_ONEOF = 10, - MPC_TYPE_NONEOF = 11, - MPC_TYPE_RANGE = 12, - MPC_TYPE_SATISFY = 13, - MPC_TYPE_STRING = 14, - - MPC_TYPE_APPLY = 15, - MPC_TYPE_APPLY_TO = 16, - MPC_TYPE_PREDICT = 17, - MPC_TYPE_NOT = 18, - MPC_TYPE_MAYBE = 19, - MPC_TYPE_MANY = 20, - MPC_TYPE_MANY1 = 21, - MPC_TYPE_COUNT = 22, - - MPC_TYPE_OR = 23, - MPC_TYPE_AND = 24, - - MPC_TYPE_CHECK = 25, - MPC_TYPE_CHECK_WITH = 26, - - MPC_TYPE_SOI = 27, - MPC_TYPE_EOI = 28, - - MPC_TYPE_SEPBY1 = 29 -}; - -typedef struct { char *m; } mpc_pdata_fail_t; -typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; -typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; -typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; -typedef struct { char x; } mpc_pdata_single_t; -typedef struct { char x; char y; } mpc_pdata_range_t; -typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; -typedef struct { char *x; } mpc_pdata_string_t; -typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; -typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; -typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; -typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; -typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; -typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; -typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; -typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_parser_t *sep; } mpc_pdata_sepby1; - -typedef union { - mpc_pdata_fail_t fail; - mpc_pdata_lift_t lift; - mpc_pdata_expect_t expect; - mpc_pdata_anchor_t anchor; - mpc_pdata_single_t single; - mpc_pdata_range_t range; - mpc_pdata_satisfy_t satisfy; - mpc_pdata_string_t string; - mpc_pdata_apply_t apply; - mpc_pdata_apply_to_t apply_to; - mpc_pdata_check_t check; - mpc_pdata_check_with_t check_with; - mpc_pdata_predict_t predict; - mpc_pdata_not_t not; - mpc_pdata_repeat_t repeat; - mpc_pdata_and_t and; - mpc_pdata_or_t or; - mpc_pdata_sepby1 sepby1; -} mpc_pdata_t; - -struct mpc_parser_t { - char *name; - mpc_pdata_t data; - char type; - char retained; -}; - -static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { - int j; - for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } - return xs[x]; -} - -static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } -static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } -static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } - -static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { - int j; - size_t l = 0; - if (n == 0) { return mpc_calloc(i, 1, 1); } - for (j = 0; j < n; j++) { l += strlen(xs[j]); } - xs[0] = mpc_realloc(i, xs[0], l + 1); - for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } - return xs[0]; -} - -static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { - mpc_state_t *s = ((mpc_state_t**)xs)[0]; - mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; - a = mpc_ast_state(a, *s); - mpc_free(i, s); - (void) n; - return a; -} - -static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { - int j; - if (f == mpcf_null) { return mpcf_null(n, xs); } - if (f == mpcf_fst) { return mpcf_fst(n, xs); } - if (f == mpcf_snd) { return mpcf_snd(n, xs); } - if (f == mpcf_trd) { return mpcf_trd(n, xs); } - if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } - if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } - if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } - if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } - if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } - for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } - return f(j, xs); -} - -static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { - mpc_free(i, x); - return NULL; -} - -static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { - mpc_ast_t *a = mpc_ast_new("", c); - mpc_free(i, c); - return a; -} - -static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { - if (f == mpcf_free) { return mpcf_input_free(i, x); } - if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } - return f(mpc_export(i, x)); -} - -static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { - return f(mpc_export(i, x), d); -} - -static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { - if (d == free) { mpc_free(i, x); return; } - d(mpc_export(i, x)); -} - -enum { - MPC_PARSE_STACK_MIN = 4 -}; - -#define MPC_SUCCESS(x) r->output = x; return 1 -#define MPC_FAILURE(x) r->error = x; return 0 -#define MPC_PRIMITIVE(x) \ - if (x) { MPC_SUCCESS(r->output); } \ - else { MPC_FAILURE(NULL); } - -#define MPC_MAX_RECURSION_DEPTH 1000 - -static mpc_result_t *mpc_grow_results(mpc_input_t *i, int j, mpc_result_t *results_stk, mpc_result_t *results){ - mpc_result_t *tmp_results = results; - - if (j == MPC_PARSE_STACK_MIN) { - int results_slots = j + j / 2; - tmp_results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); - memcpy(tmp_results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); - } else if (j >= MPC_PARSE_STACK_MIN) { - int results_slots = j + j / 2; - tmp_results = mpc_realloc(i, tmp_results, sizeof(mpc_result_t) * results_slots); - } - - return tmp_results; -} - -static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { - - int j = 0, k = 0; - mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; - mpc_result_t *results; - - if (depth == MPC_MAX_RECURSION_DEPTH) - { - MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); - } - - switch (p->type) { - - /* Basic Parsers */ - - case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); - case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); - case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); - case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); - case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); - case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); - case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); - case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); - - /* Other parsers */ - - case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); - case MPC_TYPE_PASS: MPC_SUCCESS(NULL); - case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); - case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); - case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); - case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); - - /* Application Parsers */ - - case MPC_TYPE_APPLY: - if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { - MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); - } else { - MPC_FAILURE(r->output); - } - - case MPC_TYPE_APPLY_TO: - if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { - MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_CHECK: - if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { - if (p->data.check.f(&r->output)) { - MPC_SUCCESS(r->output); - } else { - mpc_parse_dtor(i, p->data.check.dx, r->output); - MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); - } - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_CHECK_WITH: - if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { - if (p->data.check_with.f(&r->output, p->data.check_with.d)) { - MPC_SUCCESS(r->output); - } else { - mpc_parse_dtor(i, p->data.check.dx, r->output); - MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); - } - } else { - MPC_FAILURE(r->error); - } - - case MPC_TYPE_EXPECT: - mpc_input_suppress_enable(i); - if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { - mpc_input_suppress_disable(i); - MPC_SUCCESS(r->output); - } else { - mpc_input_suppress_disable(i); - MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); - } - - case MPC_TYPE_PREDICT: - mpc_input_backtrack_disable(i); - if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { - mpc_input_backtrack_enable(i); - MPC_SUCCESS(r->output); - } else { - mpc_input_backtrack_enable(i); - MPC_FAILURE(r->error); - } - - /* Optional Parsers */ - - /* TODO: Update Not Error Message */ - - case MPC_TYPE_NOT: - mpc_input_mark(i); - mpc_input_suppress_enable(i); - if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { - mpc_input_rewind(i); - mpc_input_suppress_disable(i); - mpc_parse_dtor(i, p->data.not.dx, r->output); - MPC_FAILURE(mpc_err_new(i, "opposite")); - } else { - mpc_input_unmark(i); - mpc_input_suppress_disable(i); - MPC_SUCCESS(p->data.not.lf()); - } - - case MPC_TYPE_MAYBE: - if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { - MPC_SUCCESS(r->output); - } else { - *e = mpc_err_merge(i, *e, r->error); - MPC_SUCCESS(p->data.not.lf()); - } - - /* Repeat Parsers */ - - case MPC_TYPE_MANY: - - results = results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - results = mpc_grow_results(i, j, results_stk, results); - } - - *e = mpc_err_merge(i, *e, results[j].error); - - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - case MPC_TYPE_MANY1: - - results = results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - results = mpc_grow_results(i, j, results_stk, results); - } - - if (j == 0) { - MPC_FAILURE( - mpc_err_many1(i, results[j].error); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - - *e = mpc_err_merge(i, *e, results[j].error); - - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - - case MPC_TYPE_SEPBY1: - - results = results_stk; - - if(mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1)){ - j++; - results = mpc_grow_results(i, j, results_stk, results); - - while ( - mpc_parse_run(i, p->data.sepby1.sep, &results[j], e, depth+1) && - mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1) - ) { - j++; - results = mpc_grow_results(i, j, results_stk, results); - } - } - - if (j == 0) { - MPC_FAILURE( - mpc_err_many1(i, results[j].error); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - *e = mpc_err_merge(i, *e, results[j].error); - - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - - case MPC_TYPE_COUNT: - - results = p->data.repeat.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) - : results_stk; - - while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { - j++; - if (j == p->data.repeat.n) { break; } - } - - if (j == p->data.repeat.n) { - MPC_SUCCESS( - mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); - if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - for (k = 0; k < j; k++) { - mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); - } - MPC_FAILURE( - mpc_err_count(i, results[j].error, p->data.repeat.n); - if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - - /* Combinatory Parsers */ - - case MPC_TYPE_OR: - - if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } - - results = p->data.or.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) - : results_stk; - - for (j = 0; j < p->data.or.n; j++) { - if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { - MPC_SUCCESS(results[j].output; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } else { - *e = mpc_err_merge(i, *e, results[j].error); - } - } - - MPC_FAILURE(NULL; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - case MPC_TYPE_AND: - if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } - - results = p->data.or.n > MPC_PARSE_STACK_MIN - ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) - : results_stk; - - mpc_input_mark(i); - for (j = 0; j < p->data.and.n; j++) { - if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { - mpc_input_rewind(i); - for (k = 0; k < j; k++) { - mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); - } - MPC_FAILURE(results[j].error; - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - } - } - mpc_input_unmark(i); - MPC_SUCCESS( - mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); - if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); - - /* End */ - - default: - - MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); - } - - return 0; - -} - -#undef MPC_SUCCESS -#undef MPC_FAILURE -#undef MPC_PRIMITIVE - -int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); - e->state = mpc_state_invalid(); - x = mpc_parse_run(i, p, r, &e, 0); - if (x) { - mpc_err_delete_internal(i, e); - r->output = mpc_export(i, r->output); - } else { - r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); - } - return x; -} - -int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_string(filename, string); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_nstring(filename, string, length); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_file(filename, file); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { - int x; - mpc_input_t *i = mpc_input_new_pipe(filename, pipe); - x = mpc_parse_input(i, p, r); - mpc_input_delete(i); - return x; -} - -int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { - - FILE *f = fopen(filename, "rb"); - int res; - - if (f == NULL) { - r->output = NULL; - r->error = mpc_err_file(filename, "Unable to open file!"); - return 0; - } - - res = mpc_parse_file(filename, f, p, r); - fclose(f); - return res; -} - -/* -** Building a Parser -*/ - -static void mpc_undefine_unretained(mpc_parser_t *p, int force); - -static void mpc_undefine_or(mpc_parser_t *p) { - - int i; - for (i = 0; i < p->data.or.n; i++) { - mpc_undefine_unretained(p->data.or.xs[i], 0); - } - free(p->data.or.xs); - -} - -static void mpc_undefine_and(mpc_parser_t *p) { - - int i; - for (i = 0; i < p->data.and.n; i++) { - mpc_undefine_unretained(p->data.and.xs[i], 0); - } - free(p->data.and.xs); - free(p->data.and.dxs); - -} - -static void mpc_undefine_unretained(mpc_parser_t *p, int force) { - - if (p->retained && !force) { return; } - - switch (p->type) { - - case MPC_TYPE_FAIL: free(p->data.fail.m); break; - - case MPC_TYPE_ONEOF: - case MPC_TYPE_NONEOF: - case MPC_TYPE_STRING: - free(p->data.string.x); - break; - - case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; - case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; - case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; - - case MPC_TYPE_MAYBE: - case MPC_TYPE_NOT: - mpc_undefine_unretained(p->data.not.x, 0); - break; - - case MPC_TYPE_EXPECT: - mpc_undefine_unretained(p->data.expect.x, 0); - free(p->data.expect.m); - break; - - case MPC_TYPE_MANY: - case MPC_TYPE_MANY1: - case MPC_TYPE_COUNT: - mpc_undefine_unretained(p->data.repeat.x, 0); - break; - - case MPC_TYPE_SEPBY1: - mpc_undefine_unretained(p->data.sepby1.x, 0); - mpc_undefine_unretained(p->data.sepby1.sep, 0); - break; - - case MPC_TYPE_OR: mpc_undefine_or(p); break; - case MPC_TYPE_AND: mpc_undefine_and(p); break; - - case MPC_TYPE_CHECK: - mpc_undefine_unretained(p->data.check.x, 0); - free(p->data.check.e); - break; - - case MPC_TYPE_CHECK_WITH: - mpc_undefine_unretained(p->data.check_with.x, 0); - free(p->data.check_with.e); - break; - - default: break; - } - - if (!force) { - free(p->name); - free(p); - } - -} - -void mpc_delete(mpc_parser_t *p) { - if (p->retained) { - - if (p->type != MPC_TYPE_UNDEFINED) { - mpc_undefine_unretained(p, 0); - } - - free(p->name); - free(p); - - } else { - mpc_undefine_unretained(p, 0); - } -} - -static void mpc_soft_delete(mpc_val_t *x) { - mpc_undefine_unretained(x, 0); -} - -static mpc_parser_t *mpc_undefined(void) { - mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); - p->retained = 0; - p->type = MPC_TYPE_UNDEFINED; - p->name = NULL; - return p; -} - -mpc_parser_t *mpc_new(const char *name) { - mpc_parser_t *p = mpc_undefined(); - p->retained = 1; - p->name = realloc(p->name, strlen(name) + 1); - strcpy(p->name, name); - return p; -} - -mpc_parser_t *mpc_copy(mpc_parser_t *a) { - int i = 0; - mpc_parser_t *p; - - if (a->retained) { return a; } - - p = mpc_undefined(); - p->retained = a->retained; - p->type = a->type; - p->data = a->data; - - if (a->name) { - p->name = malloc(strlen(a->name)+1); - strcpy(p->name, a->name); - } - - switch (a->type) { - - case MPC_TYPE_FAIL: - p->data.fail.m = malloc(strlen(a->data.fail.m)+1); - strcpy(p->data.fail.m, a->data.fail.m); - break; - - case MPC_TYPE_ONEOF: - case MPC_TYPE_NONEOF: - case MPC_TYPE_STRING: - p->data.string.x = malloc(strlen(a->data.string.x)+1); - strcpy(p->data.string.x, a->data.string.x); - break; - - case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; - case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; - case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; - - case MPC_TYPE_MAYBE: - case MPC_TYPE_NOT: - p->data.not.x = mpc_copy(a->data.not.x); - break; - - case MPC_TYPE_EXPECT: - p->data.expect.x = mpc_copy(a->data.expect.x); - p->data.expect.m = malloc(strlen(a->data.expect.m)+1); - strcpy(p->data.expect.m, a->data.expect.m); - break; - - case MPC_TYPE_MANY: - case MPC_TYPE_MANY1: - case MPC_TYPE_COUNT: - p->data.repeat.x = mpc_copy(a->data.repeat.x); - break; - - case MPC_TYPE_SEPBY1: - p->data.sepby1.x = mpc_copy(a->data.sepby1.x); - p->data.sepby1.sep = mpc_copy(a->data.sepby1.sep); - break; - - case MPC_TYPE_OR: - p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); - for (i = 0; i < a->data.or.n; i++) { - p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); - } - break; - case MPC_TYPE_AND: - p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); - for (i = 0; i < a->data.and.n; i++) { - p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); - } - if (a->data.and.n > 0) { - p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); - for (i = 0; i < a->data.and.n-1; i++) { - p->data.and.dxs[i] = a->data.and.dxs[i]; - } - } - break; - - case MPC_TYPE_CHECK: - p->data.check.x = mpc_copy(a->data.check.x); - p->data.check.e = malloc(strlen(a->data.check.e)+1); - strcpy(p->data.check.e, a->data.check.e); - break; - case MPC_TYPE_CHECK_WITH: - p->data.check_with.x = mpc_copy(a->data.check_with.x); - p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); - strcpy(p->data.check_with.e, a->data.check_with.e); - break; - - default: break; - } - - - return p; -} - -mpc_parser_t *mpc_undefine(mpc_parser_t *p) { - mpc_undefine_unretained(p, 1); - p->type = MPC_TYPE_UNDEFINED; - return p; -} - -mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { - - if (p->retained) { - p->type = a->type; - p->data = a->data; - } else { - mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); - p->type = a2->type; - p->data = a2->data; - free(a2); - } - - free(a); - return p; -} - -void mpc_cleanup(int n, ...) { - int i; - mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); - - va_list va; - va_start(va, n); - for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } - for (i = 0; i < n; i++) { mpc_undefine(list[i]); } - for (i = 0; i < n; i++) { mpc_delete(list[i]); } - va_end(va); - - free(list); -} - -mpc_parser_t *mpc_pass(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_PASS; - return p; -} - -mpc_parser_t *mpc_fail(const char *m) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_FAIL; - p->data.fail.m = malloc(strlen(m) + 1); - strcpy(p->data.fail.m, m); - return p; -} - -/* -** As `snprintf` is not ANSI standard this -** function `mpc_failf` should be considered -** unsafe. -** -** You have a few options if this is going to be -** trouble. -** -** - Ensure the format string does not exceed -** the buffer length using precision specifiers -** such as `%.512s`. -** -** - Patch this function in your code base to -** use `snprintf` or whatever variant your -** system supports. -** -** - Avoid it altogether. -** -*/ - -mpc_parser_t *mpc_failf(const char *fmt, ...) { - - va_list va; - char *buffer; - - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_FAIL; - - va_start(va, fmt); - buffer = malloc(2048); - if (!buffer) { - return NULL; - } - vsprintf(buffer, fmt, va); - va_end(va); - - buffer = realloc(buffer, strlen(buffer) + 1); - p->data.fail.m = buffer; - return p; - -} - -mpc_parser_t *mpc_lift_val(mpc_val_t *x) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_LIFT_VAL; - p->data.lift.x = x; - return p; -} - -mpc_parser_t *mpc_lift(mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_LIFT; - p->data.lift.lf = lf; - return p; -} - -mpc_parser_t *mpc_anchor(int(*f)(char,char)) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ANCHOR; - p->data.anchor.f = f; - return mpc_expect(p, "anchor"); -} - -mpc_parser_t *mpc_state(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_STATE; - return p; -} - -mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EXPECT; - p->data.expect.x = a; - p->data.expect.m = malloc(strlen(expected) + 1); - strcpy(p->data.expect.m, expected); - return p; -} - -/* -** As `snprintf` is not ANSI standard this -** function `mpc_expectf` should be considered -** unsafe. -** -** You have a few options if this is going to be -** trouble. -** -** - Ensure the format string does not exceed -** the buffer length using precision specifiers -** such as `%.512s`. -** -** - Patch this function in your code base to -** use `snprintf` or whatever variant your -** system supports. -** -** - Avoid it altogether. -** -*/ - -mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { - va_list va; - char *buffer; - - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EXPECT; - - va_start(va, fmt); - buffer = malloc(2048); - if (!buffer) { - return NULL; - } - vsprintf(buffer, fmt, va); - va_end(va); - - buffer = realloc(buffer, strlen(buffer) + 1); - p->data.expect.x = a; - p->data.expect.m = buffer; - return p; -} - -/* -** Basic Parsers -*/ - -mpc_parser_t *mpc_any(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ANY; - return mpc_expect(p, "any character"); -} - -mpc_parser_t *mpc_char(char c) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SINGLE; - p->data.single.x = c; - return mpc_expectf(p, "'%c'", c); -} - -mpc_parser_t *mpc_range(char s, char e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_RANGE; - p->data.range.x = s; - p->data.range.y = e; - return mpc_expectf(p, "character between '%c' and '%c'", s, e); -} - -mpc_parser_t *mpc_oneof(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_ONEOF; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "one of '%s'", s); -} - -mpc_parser_t *mpc_noneof(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_NONEOF; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "none of '%s'", s); - -} - -mpc_parser_t *mpc_satisfy(int(*f)(char)) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SATISFY; - p->data.satisfy.f = f; - return mpc_expectf(p, "character satisfying function %p", f); -} - -mpc_parser_t *mpc_string(const char *s) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_STRING; - p->data.string.x = malloc(strlen(s) + 1); - strcpy(p->data.string.x, s); - return mpc_expectf(p, "\"%s\"", s); -} - -/* -** Core Parsers -*/ - -mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_APPLY; - p->data.apply.x = a; - p->data.apply.f = f; - return p; -} - -mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_APPLY_TO; - p->data.apply_to.x = a; - p->data.apply_to.f = f; - p->data.apply_to.d = x; - return p; -} - -mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_CHECK; - p->data.check.x = a; - p->data.check.dx = da; - p->data.check.f = f; - p->data.check.e = malloc(strlen(e) + 1); - strcpy(p->data.check.e, e); - return p; -} - -mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_CHECK_WITH; - p->data.check_with.x = a; - p->data.check_with.dx = da; - p->data.check_with.f = f; - p->data.check_with.d = x; - p->data.check_with.e = malloc(strlen(e) + 1); - strcpy(p->data.check_with.e, e); - return p; -} - -mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { - va_list va; - char *buffer; - mpc_parser_t *p; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - p = mpc_check(a, da, f, buffer); - free(buffer); - - return p; -} - -mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { - va_list va; - char *buffer; - mpc_parser_t *p; - - va_start(va, fmt); - buffer = malloc(2048); - vsprintf(buffer, fmt, va); - va_end(va); - - p = mpc_check_with(a, da, f, x, buffer); - free(buffer); - - return p; -} - -mpc_parser_t *mpc_predictive(mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_PREDICT; - p->data.predict.x = a; - return p; -} - -mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_NOT; - p->data.not.x = a; - p->data.not.dx = da; - p->data.not.lf = lf; - return p; -} - -mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { - return mpc_not_lift(a, da, mpcf_ctor_null); -} - -mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MAYBE; - p->data.not.x = a; - p->data.not.lf = lf; - return p; -} - -mpc_parser_t *mpc_maybe(mpc_parser_t *a) { - return mpc_maybe_lift(a, mpcf_ctor_null); -} - -mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MANY; - p->data.repeat.x = a; - p->data.repeat.f = f; - return p; -} - -mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_MANY1; - p->data.repeat.x = a; - p->data.repeat.f = f; - return p; -} - -mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_COUNT; - p->data.repeat.n = n; - p->data.repeat.f = f; - p->data.repeat.x = a; - p->data.repeat.dx = da; - return p; -} - -mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SEPBY1; - p->data.sepby1.x = a; - p->data.sepby1.f = f; - p->data.sepby1.sep = sep; - return p; -} - -mpc_parser_t *mpc_or(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_OR; - p->data.or.n = n; - p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.or.xs[i] = va_arg(va, mpc_parser_t*); - } - va_end(va); - - return p; -} - -mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_AND; - p->data.and.n = n; - p->data.and.f = f; - p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); - p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - - va_start(va, f); - for (i = 0; i < n; i++) { - p->data.and.xs[i] = va_arg(va, mpc_parser_t*); - } - for (i = 0; i < (n-1); i++) { - p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); - } - va_end(va); - - return p; -} - -/* -** Common Parsers -*/ - -mpc_parser_t *mpc_soi(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_SOI; - return mpc_expect(p, "start of input"); -} - -mpc_parser_t *mpc_eoi(void) { - mpc_parser_t *p = mpc_undefined(); - p->type = MPC_TYPE_EOI; - return mpc_expect(p, "end of input"); -} - -static int mpc_boundary_anchor(char prev, char next) { - const char* word = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "0123456789_"; - if ( strchr(word, next) && prev == '\0') { return 1; } - if ( strchr(word, prev) && next == '\0') { return 1; } - if ( strchr(word, next) && !strchr(word, prev)) { return 1; } - if (!strchr(word, next) && strchr(word, prev)) { return 1; } - return 0; -} - -static int mpc_boundary_newline_anchor(char prev, char next) { - (void)next; - return prev == '\n'; -} - -mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } -mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } - -mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } -mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } -mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } - -mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } -mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } -mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } - -mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } -mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } -mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } -mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } -mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } -mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } - -mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } -mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } -mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } -mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } -mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } - -mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } -mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } -mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } -mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } - -mpc_parser_t *mpc_real(void) { - - /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ - - mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; - - p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); - p1 = mpc_digits(); - p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); - p30 = mpc_oneof("eE"); - p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); - p32 = mpc_digits(); - p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); - - return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); - -} - -mpc_parser_t *mpc_float(void) { - return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); -} - -mpc_parser_t *mpc_char_lit(void) { - return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); -} - -mpc_parser_t *mpc_string_lit(void) { - mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); - return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); -} - -mpc_parser_t *mpc_regex_lit(void) { - mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); - return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); -} - -mpc_parser_t *mpc_ident(void) { - mpc_parser_t *p0, *p1; - p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); - p1 = mpc_many(mpcf_strfold, mpc_alphanum()); - return mpc_and(2, mpcf_strfold, p0, p1, free); -} - -/* -** Useful Parsers -*/ - -mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } -mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } -mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } - -mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } -mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } -mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } -mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } -mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } - -mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } - -mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { - return mpc_and(3, mpcf_snd_free, - mpc_string(o), a, mpc_string(c), - free, ad); -} - -mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } -mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } -mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } -mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } - -mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { - return mpc_and(3, mpcf_snd_free, - mpc_sym(o), mpc_tok(a), mpc_sym(c), - free, ad); -} - -mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } -mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } -mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } -mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } - -/* -** Regular Expression Parsers -*/ - -/* -** So here is a cute bootstrapping. -** -** I'm using the previously defined -** mpc constructs and functions to -** parse the user regex string and -** construct a parser from it. -** -** As it turns out lots of the standard -** mpc functions look a lot like `fold` -** functions and so can be used indirectly -** by many of the parsing functions to build -** a parser directly - as we are parsing. -** -** This is certainly something that -** would be less elegant/interesting -** in a two-phase parser which first -** builds an AST and then traverses it -** to generate the object. -** -** This whole thing acts as a great -** case study for how trivial it can be -** to write a great parser in a few -** lines of code using mpc. -*/ - -/* -** -** ### Regular Expression Grammar -** -** : | ( "|" ) -** -** : * -** -** : -** | "*" -** | "+" -** | "?" -** | "{" "}" -** -** : -** | "\" -** | "(" ")" -** | "[" "]" -*/ - -static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { - (void) n; - if (xs[1] == NULL) { return xs[0]; } - else { return mpc_or(2, xs[0], xs[1]); } -} - -static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { - int i; - mpc_parser_t *p = mpc_lift(mpcf_ctor_str); - for (i = 0; i < n; i++) { - p = mpc_and(2, mpcf_strfold, p, xs[i], free); - } - return p; -} - -static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { - int num; - (void) n; - if (xs[1] == NULL) { return xs[0]; } - switch(((char*)xs[1])[0]) - { - case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; - case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; - case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; - default: - num = *(int*)xs[1]; - free(xs[1]); - } - - return mpc_count(num, mpcf_strfold, xs[0], free); -} - -static mpc_parser_t *mpc_re_escape_char(char c) { - switch (c) { - case 'a': return mpc_char('\a'); - case 'f': return mpc_char('\f'); - case 'n': return mpc_char('\n'); - case 'r': return mpc_char('\r'); - case 't': return mpc_char('\t'); - case 'v': return mpc_char('\v'); - case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); - case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); - case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); - case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); - case 'd': return mpc_digit(); - case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); - case 's': return mpc_whitespace(); - case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); - case 'w': return mpc_alphanum(); - case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); - default: return NULL; - } -} - -static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { - - int mode = *((int*)data); - char *s = x; - mpc_parser_t *p; - - /* Any Character */ - if (s[0] == '.') { - free(s); - if (mode & MPC_RE_DOTALL) { - return mpc_any(); - } else { - return mpc_expect(mpc_noneof("\n"), "any character except a newline"); - } - } - - /* Start of Input */ - if (s[0] == '^') { - free(s); - if (mode & MPC_RE_MULTILINE) { - return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); - } else { - return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); - } - } - - /* End of Input */ - if (s[0] == '$') { - free(s); - if (mode & MPC_RE_MULTILINE) { - return mpc_or(2, - mpc_newline(), - mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); - } else { - return mpc_or(2, - mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), - mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); - } - } - - /* Regex Escape */ - if (s[0] == '\\') { - p = mpc_re_escape_char(s[1]); - p = (p == NULL) ? mpc_char(s[1]) : p; - free(s); - return p; - } - - /* Regex Standard */ - p = mpc_char(s[0]); - free(s); - return p; -} - -static const char *mpc_re_range_escape_char(char c) { - switch (c) { - case '-': return "-"; - case 'a': return "\a"; - case 'f': return "\f"; - case 'n': return "\n"; - case 'r': return "\r"; - case 't': return "\t"; - case 'v': return "\v"; - case 'b': return "\b"; - case 'd': return "0123456789"; - case 's': return " \f\n\r\t\v"; - case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; - default: return NULL; - } -} - -static mpc_val_t *mpcf_re_range(mpc_val_t *x) { - - mpc_parser_t *out; - size_t i, j; - size_t start, end; - const char *tmp = NULL; - const char *s = x; - int comp = s[0] == '^' ? 1 : 0; - char *range = calloc(1,1); - - if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } - if (s[0] == '^' && - s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } - - for (i = comp; i < strlen(s); i++){ - - /* Regex Range Escape */ - if (s[i] == '\\') { - tmp = mpc_re_range_escape_char(s[i+1]); - if (tmp != NULL) { - range = realloc(range, strlen(range) + strlen(tmp) + 1); - strcat(range, tmp); - } else { - range = realloc(range, strlen(range) + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = s[i+1]; - } - i++; - } - - /* Regex Range...Range */ - else if (s[i] == '-') { - if (s[i+1] == '\0' || i == 0) { - range = realloc(range, strlen(range) + strlen("-") + 1); - strcat(range, "-"); - } else { - start = s[i-1]+1; - end = s[i+1]-1; - for (j = start; j <= end; j++) { - range = realloc(range, strlen(range) + 1 + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = (char)j; - } - } - } - - /* Regex Range Normal */ - else { - range = realloc(range, strlen(range) + 1 + 1); - range[strlen(range) + 1] = '\0'; - range[strlen(range) + 0] = s[i]; - } - - } - - out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); - - free(x); - free(range); - - return out; -} - -mpc_parser_t *mpc_re(const char *re) { - return mpc_re_mode(re, MPC_RE_DEFAULT); -} - -mpc_parser_t *mpc_re_mode(const char *re, int mode) { - - char *err_msg; - mpc_parser_t *err_out; - mpc_result_t r; - mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; - - Regex = mpc_new("regex"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - Range = mpc_new("range"); - - mpc_define(Regex, mpc_and(2, mpcf_re_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), - (mpc_dtor_t)mpc_delete - )); - - mpc_define(Term, mpc_many(mpcf_re_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcf_re_repeat, - Base, - mpc_or(5, - mpc_char('*'), mpc_char('+'), mpc_char('?'), - mpc_brackets(mpc_int(), free), - mpc_pass()), - (mpc_dtor_t)mpc_delete - )); - - mpc_define(Base, mpc_or(4, - mpc_parens(Regex, (mpc_dtor_t)mpc_delete), - mpc_squares(Range, (mpc_dtor_t)mpc_delete), - mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), - mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) - )); - - mpc_define(Range, mpc_apply( - mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), - mpcf_re_range - )); - - RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); - - mpc_optimise(RegexEnclose); - mpc_optimise(Regex); - mpc_optimise(Term); - mpc_optimise(Factor); - mpc_optimise(Base); - mpc_optimise(Range); - - if(!mpc_parse("", re, RegexEnclose, &r)) { - err_msg = mpc_err_string(r.error); - err_out = mpc_failf("Invalid Regex: %s", err_msg); - mpc_err_delete(r.error); - free(err_msg); - r.output = err_out; - } - - mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); - - mpc_optimise(r.output); - - return r.output; - -} - -/* -** Common Fold Functions -*/ - -void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } - -mpc_val_t *mpcf_ctor_null(void) { return NULL; } -mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } -mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } - -mpc_val_t *mpcf_int(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 10); - free(x); - return y; -} - -mpc_val_t *mpcf_hex(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 16); - free(x); - return y; -} - -mpc_val_t *mpcf_oct(mpc_val_t *x) { - int *y = malloc(sizeof(int)); - *y = strtol(x, NULL, 8); - free(x); - return y; -} - -mpc_val_t *mpcf_float(mpc_val_t *x) { - float *y = malloc(sizeof(float)); - *y = strtod(x, NULL); - free(x); - return y; -} - -mpc_val_t *mpcf_strtriml(mpc_val_t *x) { - char *s = x; - while (isspace((unsigned char)*s)) { - memmove(s, s+1, strlen(s)); - } - return s; -} - -mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { - char *s = x; - size_t l = strlen(s); - while (l > 0 && isspace((unsigned char)s[l-1])) { - s[l-1] = '\0'; l--; - } - return s; -} - -mpc_val_t *mpcf_strtrim(mpc_val_t *x) { - return mpcf_strtriml(mpcf_strtrimr(x)); -} - -static const char mpc_escape_input_c[] = { - '\a', '\b', '\f', '\n', '\r', - '\t', '\v', '\\', '\'', '\"', '\0'}; - -static const char *mpc_escape_output_c[] = { - "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", - "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; - -static const char mpc_escape_input_raw_re[] = { '/' }; -static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; - -static const char mpc_escape_input_raw_cstr[] = { '"' }; -static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; - -static const char mpc_escape_input_raw_cchar[] = { '\'' }; -static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; - -static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { - - int i; - int found; - char buff[2]; - char *s = x; - char *y = calloc(1, 1); - - while (*s) { - - i = 0; - found = 0; - - while (output[i]) { - if (*s == input[i]) { - y = realloc(y, strlen(y) + strlen(output[i]) + 1); - strcat(y, output[i]); - found = 1; - break; - } - i++; - } - - if (!found) { - y = realloc(y, strlen(y) + 2); - buff[0] = *s; buff[1] = '\0'; - strcat(y, buff); - } - - s++; - } - - - return y; -} - -static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { - - int i; - int found = 0; - char buff[2]; - char *s = x; - char *y = calloc(1, 1); - - while (*s) { - - i = 0; - found = 0; - - while (output[i]) { - if ((*(s+0)) == output[i][0] && - (*(s+1)) == output[i][1]) { - y = realloc(y, strlen(y) + 1 + 1); - buff[0] = input[i]; buff[1] = '\0'; - strcat(y, buff); - found = 1; - s++; - break; - } - i++; - } - - if (!found) { - y = realloc(y, strlen(y) + 1 + 1); - buff[0] = *s; buff[1] = '\0'; - strcat(y, buff); - } - - if (*s == '\0') { break; } - else { s++; } - } - - return y; - -} - -mpc_val_t *mpcf_escape(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); - free(x); - return y; -} - -mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); - free(x); - return y; -} - -mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { - mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); - free(x); - return y; -} - -mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } -mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } -mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } -mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } - -static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { - int i; - for (i = 0; i < n; i++) { - if (i != x) { free(xs[i]); } - } - return xs[x]; -} - -mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } -mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } -mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } -mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs) { - int i; - for (i = 0; i < n; i++) { - free(xs[i]); - } - return NULL; -} - -mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { - int i; - size_t l = 0; - - if (n == 0) { return calloc(1, 1); } - - for (i = 0; i < n; i++) { l += strlen(xs[i]); } - - xs[0] = realloc(xs[0], l + 1); - - for (i = 1; i < n; i++) { - strcat(xs[0], xs[i]); free(xs[i]); - } - - return xs[0]; -} - -/* -** Printing -*/ - -static void mpc_print_unretained(mpc_parser_t *p, int force) { - - /* TODO: Print Everything Escaped */ - - int i; - char *s, *e; - char buff[2]; - - if (p->retained && !force) {; - if (p->name) { printf("<%s>", p->name); } - else { printf(""); } - return; - } - - if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } - if (p->type == MPC_TYPE_PASS) { printf("<:>"); } - if (p->type == MPC_TYPE_FAIL) { printf(""); } - if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } - if (p->type == MPC_TYPE_STATE) { printf(""); } - if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } - if (p->type == MPC_TYPE_EXPECT) { - printf("%s", p->data.expect.m); - /*mpc_print_unretained(p->data.expect.x, 0);*/ - } - - if (p->type == MPC_TYPE_ANY) { printf("<.>"); } - if (p->type == MPC_TYPE_SATISFY) { printf(""); } - - if (p->type == MPC_TYPE_SINGLE) { - buff[0] = p->data.single.x; buff[1] = '\0'; - s = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - printf("'%s'", s); - free(s); - } - - if (p->type == MPC_TYPE_RANGE) { - buff[0] = p->data.range.x; buff[1] = '\0'; - s = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - buff[0] = p->data.range.y; buff[1] = '\0'; - e = mpcf_escape_new( - buff, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[%s-%s]", s, e); - free(s); - free(e); - } - - if (p->type == MPC_TYPE_ONEOF) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[%s]", s); - free(s); - } - - if (p->type == MPC_TYPE_NONEOF) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("[^%s]", s); - free(s); - } - - if (p->type == MPC_TYPE_STRING) { - s = mpcf_escape_new( - p->data.string.x, - mpc_escape_input_c, - mpc_escape_output_c); - printf("\"%s\"", s); - free(s); - } - - if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } - - if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } - if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } - - if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } - if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } - if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } - if (p->type == MPC_TYPE_SEPBY1) { - mpc_print_unretained(p->data.sepby1.x, 0); - printf(" ("); - mpc_print_unretained(p->data.sepby1.sep, 0); - printf(" "); - mpc_print_unretained(p->data.sepby1.x, 0); - printf(")"); - printf("*"); - } - - if (p->type == MPC_TYPE_OR) { - printf("("); - for(i = 0; i < p->data.or.n-1; i++) { - mpc_print_unretained(p->data.or.xs[i], 0); - printf(" | "); - } - mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); - printf(")"); - } - - if (p->type == MPC_TYPE_AND) { - printf("("); - for(i = 0; i < p->data.and.n-1; i++) { - mpc_print_unretained(p->data.and.xs[i], 0); - printf(" "); - } - mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); - printf(")"); - } - - if (p->type == MPC_TYPE_CHECK) { - mpc_print_unretained(p->data.check.x, 0); - printf("->?"); - } - if (p->type == MPC_TYPE_CHECK_WITH) { - mpc_print_unretained(p->data.check_with.x, 0); - printf("->?"); - } - -} - -void mpc_print(mpc_parser_t *p) { - mpc_print_unretained(p, 1); - printf("\n"); -} - -/* -** Testing -*/ - -/* -** These functions are slightly unwieldy and -** also the whole of the testing suite for mpc -** mpc is pretty shaky. -** -** It could do with a lot more tests and more -** precision. Currently I am only really testing -** changes off of the examples. -** -*/ - -int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)) { - mpc_result_t r; - (void) printer; - if (mpc_parse("", s, p, &r)) { - - if (tester(r.output, d)) { - destructor(r.output); - return 0; - } else { - destructor(r.output); - return 1; - } - - } else { - mpc_err_delete(r.error); - return 1; - } - -} - -int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)) { - - mpc_result_t r; - if (mpc_parse("", s, p, &r)) { - - if (tester(r.output, d)) { - destructor(r.output); - return 1; - } else { - printf("Got "); printer(r.output); printf("\n"); - printf("Expected "); printer(d); printf("\n"); - destructor(r.output); - return 0; - } - - } else { - mpc_err_print(r.error); - mpc_err_delete(r.error); - return 0; - - } - -} - - -/* -** AST -*/ - -void mpc_ast_delete(mpc_ast_t *a) { - - int i; - - if (a == NULL) { return; } - - for (i = 0; i < a->children_num; i++) { - mpc_ast_delete(a->children[i]); - } - - free(a->children); - free(a->tag); - free(a->contents); - free(a); - -} - -static void mpc_ast_delete_no_children(mpc_ast_t *a) { - free(a->children); - free(a->tag); - free(a->contents); - free(a); -} - -mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { - - mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); - - a->tag = malloc(strlen(tag) + 1); - strcpy(a->tag, tag); - - a->contents = malloc(strlen(contents) + 1); - strcpy(a->contents, contents); - - a->state = mpc_state_new(); - - a->children_num = 0; - a->children = NULL; - return a; - -} - -mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { - - mpc_ast_t *a = mpc_ast_new(tag, ""); - - int i; - va_list va; - va_start(va, tag); - - for (i = 0; i < n; i++) { - mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); - } - - va_end(va); - - return a; - -} - -mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { - - mpc_ast_t *r; - - if (a == NULL) { return a; } - if (a->children_num == 0) { return a; } - if (a->children_num == 1) { return a; } - - r = mpc_ast_new(">", ""); - mpc_ast_add_child(r, a); - return r; -} - -int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { - - int i; - - if (strcmp(a->tag, b->tag) != 0) { return 0; } - if (strcmp(a->contents, b->contents) != 0) { return 0; } - if (a->children_num != b->children_num) { return 0; } - - for (i = 0; i < a->children_num; i++) { - if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } - } - - return 1; -} - -mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { - r->children_num++; - r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); - r->children[r->children_num-1] = a; - return r; -} - -mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { - if (a == NULL) { return a; } - a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); - memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); - memmove(a->tag, t, strlen(t)); - memmove(a->tag + strlen(t), "|", 1); - return a; -} - -mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { - if (a == NULL) { return a; } - a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); - memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); - memmove(a->tag, t, (strlen(t)-1)); - return a; -} - -mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { - a->tag = realloc(a->tag, strlen(t) + 1); - strcpy(a->tag, t); - return a; -} - -mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { - if (a == NULL) { return a; } - a->state = s; - return a; -} - -static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { - - int i; - - if (a == NULL) { - fprintf(fp, "NULL\n"); - return; - } - - for (i = 0; i < d; i++) { fprintf(fp, " "); } - - if (strlen(a->contents)) { - fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, - (long unsigned int)(a->state.row+1), - (long unsigned int)(a->state.col+1), - a->contents); - } else { - fprintf(fp, "%s \n", a->tag); - } - - for (i = 0; i < a->children_num; i++) { - mpc_ast_print_depth(a->children[i], d+1, fp); - } - -} - -void mpc_ast_print(mpc_ast_t *a) { - mpc_ast_print_depth(a, 0, stdout); -} - -void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { - mpc_ast_print_depth(a, 0, fp); -} - -int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { - return mpc_ast_get_index_lb(ast, tag, 0); -} - -int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { - int i; - - for(i=lb; ichildren_num; i++) { - if(strcmp(ast->children[i]->tag, tag) == 0) { - return i; - } - } - - return -1; -} - -mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { - return mpc_ast_get_child_lb(ast, tag, 0); -} - -mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { - int i; - - for(i=lb; ichildren_num; i++) { - if(strcmp(ast->children[i]->tag, tag) == 0) { - return ast->children[i]; - } - } - - return NULL; -} - -mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, - mpc_ast_trav_order_t order) -{ - mpc_ast_trav_t *trav, *n_trav; - mpc_ast_t *cnode = ast; - - /* Create the traversal structure */ - trav = malloc(sizeof(mpc_ast_trav_t)); - trav->curr_node = cnode; - trav->parent = NULL; - trav->curr_child = 0; - trav->order = order; - - /* Get start node */ - switch(order) { - case mpc_ast_trav_order_pre: - /* Nothing else is needed for pre order start */ - break; - - case mpc_ast_trav_order_post: - while(cnode->children_num > 0) { - cnode = cnode->children[0]; - - n_trav = malloc(sizeof(mpc_ast_trav_t)); - n_trav->curr_node = cnode; - n_trav->parent = trav; - n_trav->curr_child = 0; - n_trav->order = order; - - trav = n_trav; - } - - break; - - default: - /* Unreachable, but compiler complaints */ - break; - } - - return trav; -} - -mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { - mpc_ast_trav_t *n_trav, *to_free; - mpc_ast_t *ret = NULL; - int cchild; - - /* The end of traversal was reached */ - if(*trav == NULL) return NULL; - - switch((*trav)->order) { - case mpc_ast_trav_order_pre: - ret = (*trav)->curr_node; - - /* If there aren't any more children, go up */ - while(*trav != NULL && - (*trav)->curr_child >= (*trav)->curr_node->children_num) - { - to_free = *trav; - *trav = (*trav)->parent; - free(to_free); - } - - /* If trav is NULL, the end was reached */ - if(*trav == NULL) { - break; - } - - /* Go to next child */ - n_trav = malloc(sizeof(mpc_ast_trav_t)); - - cchild = (*trav)->curr_child; - n_trav->curr_node = (*trav)->curr_node->children[cchild]; - n_trav->parent = *trav; - n_trav->curr_child = 0; - n_trav->order = (*trav)->order; - - (*trav)->curr_child++; - *trav = n_trav; - - break; - - case mpc_ast_trav_order_post: - ret = (*trav)->curr_node; - - /* Move up tree to the parent If the parent doesn't have any more nodes, - * then this is the current node. If it does, move down to its left most - * child. Also, free the previous traversal node */ - to_free = *trav; - *trav = (*trav)->parent; - free(to_free); - - if(*trav == NULL) - break; - - /* Next child */ - (*trav)->curr_child++; - - /* If there aren't any more children, this is the next node */ - if((*trav)->curr_child >= (*trav)->curr_node->children_num) { - break; - } - - /* If there are still more children, find the leftmost child from this - * node */ - while((*trav)->curr_node->children_num > 0) { - n_trav = malloc(sizeof(mpc_ast_trav_t)); - - cchild = (*trav)->curr_child; - n_trav->curr_node = (*trav)->curr_node->children[cchild]; - n_trav->parent = *trav; - n_trav->curr_child = 0; - n_trav->order = (*trav)->order; - - *trav = n_trav; - } - - default: - /* Unreachable, but compiler complaints */ - break; - } - - return ret; -} - -void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { - mpc_ast_trav_t *n_trav; - - /* Go through parents until all are free */ - while(*trav != NULL) { - n_trav = (*trav)->parent; - free(*trav); - *trav = n_trav; - } -} - -mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { - - int i, j; - mpc_ast_t** as = (mpc_ast_t**)xs; - mpc_ast_t *r; - - if (n == 0) { return NULL; } - if (n == 1) { return xs[0]; } - if (n == 2 && xs[1] == NULL) { return xs[0]; } - if (n == 2 && xs[0] == NULL) { return xs[1]; } - - r = mpc_ast_new(">", ""); - - for (i = 0; i < n; i++) { - - if (as[i] == NULL) { continue; } - - if (as[i] && as[i]->children_num == 0) { - mpc_ast_add_child(r, as[i]); - } else if (as[i] && as[i]->children_num == 1) { - mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); - mpc_ast_delete_no_children(as[i]); - } else if (as[i] && as[i]->children_num >= 2) { - for (j = 0; j < as[i]->children_num; j++) { - mpc_ast_add_child(r, as[i]->children[j]); - } - mpc_ast_delete_no_children(as[i]); - } - - } - - if (r->children_num) { - r->state = r->children[0]->state; - } - - return r; -} - -mpc_val_t *mpcf_str_ast(mpc_val_t *c) { - mpc_ast_t *a = mpc_ast_new("", c); - free(c); - return a; -} - -mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { - mpc_state_t *s = ((mpc_state_t**)xs)[0]; - mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; - (void)n; - a = mpc_ast_state(a, *s); - free(s); - return a; -} - -mpc_parser_t *mpca_state(mpc_parser_t *a) { - return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); -} - -mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { - return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); -} - -mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { - return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); -} - -mpc_parser_t *mpca_root(mpc_parser_t *a) { - return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); -} - -mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } -mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } -mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } -mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } -mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } - -mpc_parser_t *mpca_or(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_OR; - p->data.or.n = n; - p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.or.xs[i] = va_arg(va, mpc_parser_t*); - } - va_end(va); - - return p; - -} - -mpc_parser_t *mpca_and(int n, ...) { - - int i; - va_list va; - - mpc_parser_t *p = mpc_undefined(); - - p->type = MPC_TYPE_AND; - p->data.and.n = n; - p->data.and.f = mpcf_fold_ast; - p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); - p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); - - va_start(va, n); - for (i = 0; i < n; i++) { - p->data.and.xs[i] = va_arg(va, mpc_parser_t*); - } - for (i = 0; i < (n-1); i++) { - p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; - } - va_end(va); - - return p; -} - -mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } - -/* -** Grammar Parser -*/ - -/* -** This is another interesting bootstrapping. -** -** Having a general purpose AST type allows -** users to specify the grammar alone and -** let all fold rules be automatically taken -** care of by existing functions. -** -** You don't get to control the type spat -** out but this means you can make a nice -** parser to take in some grammar in nice -** syntax and spit out a parser that works. -** -** The grammar for this looks surprisingly -** like regex but the main difference is that -** it is now whitespace insensitive and the -** base type takes literals of some form. -*/ - -/* -** -** ### Grammar Grammar -** -** : ( "|" ) | -** -** : * -** -** : -** | "*" -** | "+" -** | "?" -** | "{" "}" -** -** : "<" ( | ) ">" -** | -** | -** | -** | "(" ")" -*/ - -typedef struct { - va_list *va; - int parsers_num; - mpc_parser_t **parsers; - int flags; -} mpca_grammar_st_t; - -static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { - (void) n; - if (xs[1] == NULL) { return xs[0]; } - else { return mpca_or(2, xs[0], xs[1]); } -} - -static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { - int i; - mpc_parser_t *p = mpc_pass(); - for (i = 0; i < n; i++) { - if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } - } - return p; -} - -static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { - int num; - (void) n; - if (xs[1] == NULL) { return xs[0]; } - switch(((char*)xs[1])[0]) - { - case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; - case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; - case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; - case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; - default: - num = *((int*)xs[1]); - free(xs[1]); - } - return mpca_count(num, xs[0]); -} - -static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { - mpca_grammar_st_t *st = s; - char *y = mpcf_unescape(x); - mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); - free(y); - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); -} - -static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { - mpca_grammar_st_t *st = s; - char *y = mpcf_unescape(x); - mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); - free(y); - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); -} - -static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { - char *y = xs[0]; - char *m = xs[1]; - mpca_grammar_st_t *st = xs[2]; - mpc_parser_t *p; - int mode = MPC_RE_DEFAULT; - - (void)n; - if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } - if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } - y = mpcf_unescape_regex(y); - p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); - free(y); - free(m); - - return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); -} - -/* Should this just use `isdigit` instead? */ -static int is_number(const char* s) { - size_t i; - for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } - return 1; -} - -static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { - - int i; - mpc_parser_t *p; - - /* Case of Number */ - if (is_number(x)) { - - i = strtol(x, NULL, 10); - - while (st->parsers_num <= i) { - st->parsers_num++; - st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); - st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); - if (st->parsers[st->parsers_num-1] == NULL) { - return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); - } - } - - return st->parsers[st->parsers_num-1]; - - /* Case of Identifier */ - } else { - - /* Search Existing Parsers */ - for (i = 0; i < st->parsers_num; i++) { - mpc_parser_t *q = st->parsers[i]; - if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } - if (q->name && strcmp(q->name, x) == 0) { return q; } - } - - /* Search New Parsers */ - while (1) { - - p = va_arg(*st->va, mpc_parser_t*); - - st->parsers_num++; - st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); - st->parsers[st->parsers_num-1] = p; - - if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } - if (p->name && strcmp(p->name, x) == 0) { return p; } - - } - - } - -} - -static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { - - mpca_grammar_st_t *st = s; - mpc_parser_t *p = mpca_grammar_find_parser(x, st); - free(x); - - if (p->name) { - return mpca_state(mpca_root(mpca_add_tag(p, p->name))); - } else { - return mpca_state(mpca_root(p)); - } -} - -mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { - - char *err_msg; - mpc_parser_t *err_out; - mpc_result_t r; - mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; - - GrammarTotal = mpc_new("grammar_total"); - Grammar = mpc_new("grammar"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - - mpc_define(GrammarTotal, - mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) - ); - - mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), - mpc_soft_delete - )); - - mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, - Base, - mpc_or(6, - mpc_sym("*"), - mpc_sym("+"), - mpc_sym("?"), - mpc_sym("!"), - mpc_tok_brackets(mpc_int(), free), - mpc_pass()), - mpc_soft_delete - )); - - mpc_define(Base, mpc_or(5, - mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), - mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), - mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), - mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), - mpc_tok_parens(Grammar, mpc_soft_delete) - )); - - mpc_optimise(GrammarTotal); - mpc_optimise(Grammar); - mpc_optimise(Factor); - mpc_optimise(Term); - mpc_optimise(Base); - - if(!mpc_parse("", grammar, GrammarTotal, &r)) { - err_msg = mpc_err_string(r.error); - err_out = mpc_failf("Invalid Grammar: %s", err_msg); - mpc_err_delete(r.error); - free(err_msg); - r.output = err_out; - } - - mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); - - mpc_optimise(r.output); - - return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; - -} - -mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { - mpca_grammar_st_t st; - mpc_parser_t *res; - va_list va; - va_start(va, grammar); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - res = mpca_grammar_st(grammar, &st); - free(st.parsers); - va_end(va); - return res; -} - -typedef struct { - char *ident; - char *name; - mpc_parser_t *grammar; -} mpca_stmt_t; - -static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { - mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); - stmt->ident = ((char**)xs)[0]; - stmt->name = ((char**)xs)[1]; - stmt->grammar = ((mpc_parser_t**)xs)[3]; - (void) n; - free(((char**)xs)[2]); - free(((char**)xs)[4]); - - return stmt; -} - -static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { - - int i; - mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); - - for (i = 0; i < n; i++) { - stmts[i] = xs[i]; - } - stmts[n] = NULL; - - return stmts; -} - -static void mpca_stmt_list_delete(mpc_val_t *x) { - - mpca_stmt_t **stmts = x; - - while(*stmts) { - mpca_stmt_t *stmt = *stmts; - free(stmt->ident); - free(stmt->name); - mpc_soft_delete(stmt->grammar); - free(stmt); - stmts++; - } - free(x); - -} - -static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { - - mpca_grammar_st_t *st = s; - mpca_stmt_t *stmt; - mpca_stmt_t **stmts = x; - mpc_parser_t *left; - - while(*stmts) { - stmt = *stmts; - left = mpca_grammar_find_parser(stmt->ident, st); - if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } - if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } - mpc_optimise(stmt->grammar); - mpc_define(left, stmt->grammar); - free(stmt->ident); - free(stmt->name); - free(stmt); - stmts++; - } - - free(x); - - return NULL; -} - -static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { - - mpc_result_t r; - mpc_err_t *e; - mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; - - Lang = mpc_new("lang"); - Stmt = mpc_new("stmt"); - Grammar = mpc_new("grammar"); - Term = mpc_new("term"); - Factor = mpc_new("factor"); - Base = mpc_new("base"); - - mpc_define(Lang, mpc_apply_to( - mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), - mpca_stmt_list_apply_to, st - )); - - mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, - mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), - free, free, free, mpc_soft_delete - )); - - mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, - Term, - mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), - mpc_soft_delete - )); - - mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); - - mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, - Base, - mpc_or(6, - mpc_sym("*"), - mpc_sym("+"), - mpc_sym("?"), - mpc_sym("!"), - mpc_tok_brackets(mpc_int(), free), - mpc_pass()), - mpc_soft_delete - )); - - mpc_define(Base, mpc_or(5, - mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), - mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), - mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), - mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), - mpc_tok_parens(Grammar, mpc_soft_delete) - )); - - mpc_optimise(Lang); - mpc_optimise(Stmt); - mpc_optimise(Grammar); - mpc_optimise(Term); - mpc_optimise(Factor); - mpc_optimise(Base); - - if (!mpc_parse_input(i, Lang, &r)) { - e = r.error; - } else { - e = NULL; - } - - mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); - - return e; -} - -mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, f); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_file("", f); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, p); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_pipe("", p); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang(int flags, const char *language, ...) { - - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - va_start(va, language); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_string("", language); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - return err; -} - -mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { - - mpca_grammar_st_t st; - mpc_input_t *i; - mpc_err_t *err; - - va_list va; - - FILE *f = fopen(filename, "rb"); - - if (f == NULL) { - err = mpc_err_file(filename, "Unable to open file!"); - return err; - } - - va_start(va, filename); - - st.va = &va; - st.parsers_num = 0; - st.parsers = NULL; - st.flags = flags; - - i = mpc_input_new_file(filename, f); - err = mpca_lang_st(i, &st); - mpc_input_delete(i); - - free(st.parsers); - va_end(va); - - fclose(f); - - return err; -} - -static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { - - int i, total; - - if (p->retained && !force) { return 0; } - - if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } - - if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } - - if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } - if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } - - if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } - - if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_SEPBY1) { - total = 1; - total += mpc_nodecount_unretained(p->data.sepby1.x, 0); - total += mpc_nodecount_unretained(p->data.sepby1.sep, 0); - total += mpc_nodecount_unretained(p->data.sepby1.x, 0); - return total; - } - - if (p->type == MPC_TYPE_OR) { - total = 1; - for(i = 0; i < p->data.or.n; i++) { - total += mpc_nodecount_unretained(p->data.or.xs[i], 0); - } - return total; - } - - if (p->type == MPC_TYPE_AND) { - total = 1; - for(i = 0; i < p->data.and.n; i++) { - total += mpc_nodecount_unretained(p->data.and.xs[i], 0); - } - return total; - } - - return 1; - -} - -void mpc_stats(mpc_parser_t* p) { - printf("Stats\n"); - printf("=====\n"); - printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); -} - -static void mpc_optimise_unretained(mpc_parser_t *p, int force) { - - int i, n, m; - mpc_parser_t *t; - - if (p->retained && !force) { return; } - - /* Optimise Subexpressions */ - - if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } - if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } - if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } - if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } - if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } - if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } - if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } - if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } - if (p->type == MPC_TYPE_SEPBY1) { - mpc_optimise_unretained(p->data.sepby1.x, 0); - mpc_optimise_unretained(p->data.sepby1.sep, 0); - } - - if (p->type == MPC_TYPE_OR) { - for(i = 0; i < p->data.or.n; i++) { - mpc_optimise_unretained(p->data.or.xs[i], 0); - } - } - - if (p->type == MPC_TYPE_AND) { - for(i = 0; i < p->data.and.n; i++) { - mpc_optimise_unretained(p->data.and.xs[i], 0); - } - } - - /* Perform optimisations */ - - while (1) { - - /* Merge rhs `or` */ - if (p->type == MPC_TYPE_OR - && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR - && !p->data.or.xs[p->data.or.n-1]->retained) { - t = p->data.or.xs[p->data.or.n-1]; - n = p->data.or.n; m = t->data.or.n; - p->data.or.n = n + m - 1; - p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); - memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); - free(t->data.or.xs); free(t->name); free(t); - continue; - } - - /* Merge lhs `or` */ - if (p->type == MPC_TYPE_OR - && p->data.or.xs[0]->type == MPC_TYPE_OR - && !p->data.or.xs[0]->retained) { - t = p->data.or.xs[0]; - n = p->data.or.n; m = t->data.or.n; - p->data.or.n = n + m - 1; - p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); - memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); - free(t->data.or.xs); free(t->name); free(t); - continue; - } - - /* Remove ast `pass` */ - if (p->type == MPC_TYPE_AND - && p->data.and.n == 2 - && p->data.and.xs[0]->type == MPC_TYPE_PASS - && !p->data.and.xs[0]->retained - && p->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[1]; - mpc_delete(p->data.and.xs[0]); - free(p->data.and.xs); free(p->data.and.dxs); free(p->name); - memcpy(p, t, sizeof(mpc_parser_t)); - free(t); - continue; - } - - /* Merge ast lhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_fold_ast - && p->data.and.xs[0]->type == MPC_TYPE_AND - && !p->data.and.xs[0]->retained - && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[0]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Merge ast rhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_fold_ast - && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND - && !p->data.and.xs[p->data.and.n-1]->retained - && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { - t = p->data.and.xs[p->data.and.n-1]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Remove re `lift` */ - if (p->type == MPC_TYPE_AND - && p->data.and.n == 2 - && p->data.and.xs[0]->type == MPC_TYPE_LIFT - && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str - && !p->data.and.xs[0]->retained - && p->data.and.f == mpcf_strfold) { - t = p->data.and.xs[1]; - mpc_delete(p->data.and.xs[0]); - free(p->data.and.xs); free(p->data.and.dxs); free(p->name); - memcpy(p, t, sizeof(mpc_parser_t)); - free(t); - continue; - } - - /* Merge re lhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_strfold - && p->data.and.xs[0]->type == MPC_TYPE_AND - && !p->data.and.xs[0]->retained - && p->data.and.xs[0]->data.and.f == mpcf_strfold) { - t = p->data.and.xs[0]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); - memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - /* Merge re rhs `and` */ - if (p->type == MPC_TYPE_AND - && p->data.and.f == mpcf_strfold - && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND - && !p->data.and.xs[p->data.and.n-1]->retained - && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { - t = p->data.and.xs[p->data.and.n-1]; - n = p->data.and.n; m = t->data.and.n; - p->data.and.n = n + m - 1; - p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); - p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); - memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); - for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } - free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); - continue; - } - - return; - - } - -} - -void mpc_optimise(mpc_parser_t *p) { - mpc_optimise_unretained(p, 1); -} diff --git a/src/vendor/mpc.h b/src/vendor/mpc.h deleted file mode 100644 index 49a08ee..0000000 --- a/src/vendor/mpc.h +++ /dev/null @@ -1,391 +0,0 @@ -/* -** mpc - Micro Parser Combinator library for C -** -** https://github.com/orangeduck/mpc -** -** Daniel Holden - contact@daniel-holden.com -** Licensed under BSD3 -*/ - -#ifndef mpc_h -#define mpc_h - -#ifdef __cplusplus -extern "C" { -#endif - -#include -#include -#include -#include -#include -#include -#include - -/* -** State Type -*/ - -typedef struct { - long pos; - long row; - long col; - int term; -} mpc_state_t; - -/* -** Error Type -*/ - -typedef struct { - mpc_state_t state; - int expected_num; - char *filename; - char *failure; - char **expected; - char received; -} mpc_err_t; - -void mpc_err_delete(mpc_err_t *e); -char *mpc_err_string(mpc_err_t *e); -void mpc_err_print(mpc_err_t *e); -void mpc_err_print_to(mpc_err_t *e, FILE *f); - -/* -** Parsing -*/ - -typedef void mpc_val_t; - -typedef union { - mpc_err_t *error; - mpc_val_t *output; -} mpc_result_t; - -struct mpc_parser_t; -typedef struct mpc_parser_t mpc_parser_t; - -int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); -int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); -int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); - -/* -** Function Types -*/ - -typedef void(*mpc_dtor_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_ctor_t)(void); - -typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); -typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); -typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); - -typedef int(*mpc_check_t)(mpc_val_t**); -typedef int(*mpc_check_with_t)(mpc_val_t**,void*); - -/* -** Building a Parser -*/ - -mpc_parser_t *mpc_new(const char *name); -mpc_parser_t *mpc_copy(mpc_parser_t *a); -mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); -mpc_parser_t *mpc_undefine(mpc_parser_t *p); - -void mpc_delete(mpc_parser_t *p); -void mpc_cleanup(int n, ...); - -/* -** Basic Parsers -*/ - -mpc_parser_t *mpc_any(void); -mpc_parser_t *mpc_char(char c); -mpc_parser_t *mpc_range(char s, char e); -mpc_parser_t *mpc_oneof(const char *s); -mpc_parser_t *mpc_noneof(const char *s); -mpc_parser_t *mpc_satisfy(int(*f)(char)); -mpc_parser_t *mpc_string(const char *s); - -/* -** Other Parsers -*/ - -mpc_parser_t *mpc_pass(void); -mpc_parser_t *mpc_fail(const char *m); -mpc_parser_t *mpc_failf(const char *fmt, ...); -mpc_parser_t *mpc_lift(mpc_ctor_t f); -mpc_parser_t *mpc_lift_val(mpc_val_t *x); -mpc_parser_t *mpc_anchor(int(*f)(char,char)); -mpc_parser_t *mpc_state(void); - -/* -** Combinator Parsers -*/ - -mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); -mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); -mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); -mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); -mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); -mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); -mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); -mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); - -mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); -mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); -mpc_parser_t *mpc_maybe(mpc_parser_t *a); -mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); - -mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); -mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); -mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_or(int n, ...); -mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); - -mpc_parser_t *mpc_predictive(mpc_parser_t *a); - -/* -** Common Parsers -*/ - -mpc_parser_t *mpc_eoi(void); -mpc_parser_t *mpc_soi(void); - -mpc_parser_t *mpc_boundary(void); -mpc_parser_t *mpc_boundary_newline(void); - -mpc_parser_t *mpc_whitespace(void); -mpc_parser_t *mpc_whitespaces(void); -mpc_parser_t *mpc_blank(void); - -mpc_parser_t *mpc_newline(void); -mpc_parser_t *mpc_tab(void); -mpc_parser_t *mpc_escape(void); - -mpc_parser_t *mpc_digit(void); -mpc_parser_t *mpc_hexdigit(void); -mpc_parser_t *mpc_octdigit(void); -mpc_parser_t *mpc_digits(void); -mpc_parser_t *mpc_hexdigits(void); -mpc_parser_t *mpc_octdigits(void); - -mpc_parser_t *mpc_lower(void); -mpc_parser_t *mpc_upper(void); -mpc_parser_t *mpc_alpha(void); -mpc_parser_t *mpc_underscore(void); -mpc_parser_t *mpc_alphanum(void); - -mpc_parser_t *mpc_int(void); -mpc_parser_t *mpc_hex(void); -mpc_parser_t *mpc_oct(void); -mpc_parser_t *mpc_number(void); - -mpc_parser_t *mpc_real(void); -mpc_parser_t *mpc_float(void); - -mpc_parser_t *mpc_char_lit(void); -mpc_parser_t *mpc_string_lit(void); -mpc_parser_t *mpc_regex_lit(void); - -mpc_parser_t *mpc_ident(void); - -/* -** Useful Parsers -*/ - -mpc_parser_t *mpc_startwith(mpc_parser_t *a); -mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); -mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_stripl(mpc_parser_t *a); -mpc_parser_t *mpc_stripr(mpc_parser_t *a); -mpc_parser_t *mpc_strip(mpc_parser_t *a); -mpc_parser_t *mpc_tok(mpc_parser_t *a); -mpc_parser_t *mpc_sym(const char *s); -mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); - -mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); -mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); - -mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); -mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); -mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); - -mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a); - -/* -** Common Function Parameters -*/ - -void mpcf_dtor_null(mpc_val_t *x); - -mpc_val_t *mpcf_ctor_null(void); -mpc_val_t *mpcf_ctor_str(void); - -mpc_val_t *mpcf_free(mpc_val_t *x); -mpc_val_t *mpcf_int(mpc_val_t *x); -mpc_val_t *mpcf_hex(mpc_val_t *x); -mpc_val_t *mpcf_oct(mpc_val_t *x); -mpc_val_t *mpcf_float(mpc_val_t *x); -mpc_val_t *mpcf_strtriml(mpc_val_t *x); -mpc_val_t *mpcf_strtrimr(mpc_val_t *x); -mpc_val_t *mpcf_strtrim(mpc_val_t *x); - -mpc_val_t *mpcf_escape(mpc_val_t *x); -mpc_val_t *mpcf_escape_regex(mpc_val_t *x); -mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); -mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); - -mpc_val_t *mpcf_unescape(mpc_val_t *x); -mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); -mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); -mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); - -mpc_val_t *mpcf_null(int n, mpc_val_t** xs); -mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); -mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); -mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); - -mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); -mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); - -mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs); -mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); - -/* -** Regular Expression Parsers -*/ - -enum { - MPC_RE_DEFAULT = 0, - MPC_RE_M = 1, - MPC_RE_S = 2, - MPC_RE_MULTILINE = 1, - MPC_RE_DOTALL = 2 -}; - -mpc_parser_t *mpc_re(const char *re); -mpc_parser_t *mpc_re_mode(const char *re, int mode); - -/* -** AST -*/ - -typedef struct mpc_ast_t { - char *tag; - char *contents; - mpc_state_t state; - int children_num; - struct mpc_ast_t** children; -} mpc_ast_t; - -mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); -mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); -mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); -mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); -mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); -mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); - -void mpc_ast_delete(mpc_ast_t *a); -void mpc_ast_print(mpc_ast_t *a); -void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); - -int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); -int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); -mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); -mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); - -typedef enum { - mpc_ast_trav_order_pre, - mpc_ast_trav_order_post -} mpc_ast_trav_order_t; - -typedef struct mpc_ast_trav_t { - mpc_ast_t *curr_node; - struct mpc_ast_trav_t *parent; - int curr_child; - mpc_ast_trav_order_t order; -} mpc_ast_trav_t; - -mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, - mpc_ast_trav_order_t order); - -mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); - -void mpc_ast_traverse_free(mpc_ast_trav_t **trav); - -/* -** Warning: This function currently doesn't test for equality of the `state` member! -*/ -int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); - -mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); -mpc_val_t *mpcf_str_ast(mpc_val_t *c); -mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); - -mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); -mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); -mpc_parser_t *mpca_root(mpc_parser_t *a); -mpc_parser_t *mpca_state(mpc_parser_t *a); -mpc_parser_t *mpca_total(mpc_parser_t *a); - -mpc_parser_t *mpca_not(mpc_parser_t *a); -mpc_parser_t *mpca_maybe(mpc_parser_t *a); - -mpc_parser_t *mpca_many(mpc_parser_t *a); -mpc_parser_t *mpca_many1(mpc_parser_t *a); -mpc_parser_t *mpca_count(int n, mpc_parser_t *a); - -mpc_parser_t *mpca_or(int n, ...); -mpc_parser_t *mpca_and(int n, ...); - -enum { - MPCA_LANG_DEFAULT = 0, - MPCA_LANG_PREDICTIVE = 1, - MPCA_LANG_WHITESPACE_SENSITIVE = 2 -}; - -mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); - -mpc_err_t *mpca_lang(int flags, const char *language, ...); -mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); -mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); -mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); - -/* -** Misc -*/ - - -void mpc_print(mpc_parser_t *p); -void mpc_optimise(mpc_parser_t *p); -void mpc_stats(mpc_parser_t *p); - -int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)); - -int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, - int(*tester)(const void*, const void*), - mpc_dtor_t destructor, - void(*printer)(const void*)); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/vendor/yar.c b/src/vendor/yar.c deleted file mode 100644 index 54b4179..0000000 --- a/src/vendor/yar.c +++ /dev/null @@ -1,2 +0,0 @@ -#define YAR_IMPLEMENTATION -#include "yar.h" diff --git a/src/vendor/yar.h b/src/vendor/yar.h deleted file mode 100644 index 5f4310f..0000000 --- a/src/vendor/yar.h +++ /dev/null @@ -1,229 +0,0 @@ -/* yar - dynamic arrays in C - public domain Nicholas Rixson 2025 - * - * https://github.com/segcore/yar - * - * Licence: see end of file - - Sample usage: - #define YAR_IMPLEMENTATION - #include "yar.h" - - int main() { - // struct { double *items; size_t count; size_t capacity; } numbers = {0}; - yar(double) numbers = {0}; - *yar_append(&numbers) = 3.14159; - *yar_append(&numbers) = 2.71828; - *yar_append(&numbers) = 1.61803; - - for(size_t i = 0; i < numbers.count; i++) { - printf("%f\n", numbers.items[i]); - } - - yar_free(&numbers); - } - */ -#ifndef YAR_H -#define YAR_H - -#include // size_t -#include // strlen - -/* - * yar(type) - Declare a new basic dynamic array - * - * yar_append(array) - Add a new item at the end of the array, and return a pointer to it - * - * yar_reserve(array, extra) - Reserve space for `extra` count of items - * - * yar_append_many(array, data, num) - Append a copy of existing data - * - * yar_append_cstr(array, data) - Append a C string (nul-terminated char array) - * - * yar_insert(array, index, num) - Insert items somewhere within the array. Moves items to higher indexes as required. Returns &array[index] - * - * yar_remove(array, index, num) - Remove items from somewhere within the array. Moves items to lower indexes as required. - * - * yar_reset(array) - Reset the count of elements to 0, to re-use the memory. Does not free the memory. - * - * yar_init(array) - Set items, count, and capacity to 0. Can usually be avoided with = {0}; - * - * yar_free(array) - Free items memory, and set the items, count, and capacity to 0. - */ - -#define yar(type) struct { type *items; size_t count; size_t capacity; } -#define yar_append(array) ((_yar_append((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0])) ? \ - &(array)->items[(array)->count - 1] : NULL)) -#define yar_reserve(array, extra) ((_yar_reserve((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), (extra)) ? \ - &(array)->items[(array)->count] : NULL)) -#define yar_append_many(array, data, num) ((_yar_append_many((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), 1 ? (data) : ((array)->items), (num)) )) -#define yar_append_cstr(array, data) yar_append_many(array, data, strlen(data)) -#define yar_insert(array, index, num) ((_yar_insert((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), index, num) )) -#define yar_remove(array, index, num) ((_yar_remove((void**)&(array)->items, &(array)->count, sizeof((array)->items[0]), index, num) )) -#define yar_reset(array) (((array)->count = 0)) -#define yar_init(array) ((array)->items = NULL, (array)->count = 0, (array)->capacity = 0) -#define yar_free(array) ((_yar_free((array)->items)), (array)->items = NULL, (array)->count = 0, (array)->capacity = 0) - -#ifndef YARAPI - #define YARAPI // nothing; overridable if needed. -#endif - -#ifdef __cplusplus - extern "C" { -#endif - -// Implementation functions -YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size); -YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra); -YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra); -YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra); -YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove); -YARAPI void* _yar_realloc(void* p, size_t new_size); -YARAPI void _yar_free(void* p); - -#ifdef __cplusplus - } -#endif - -#endif // YAR_H - -#if defined(YAR_IMPLEMENTATION) - -#ifndef YAR_MIN_CAP - #define YAR_MIN_CAP 16 -#endif - -#ifndef YAR_REALLOC - #define YAR_REALLOC realloc -#endif - -#ifndef YAR_FREE - #define YAR_FREE free -#endif - -#include // mem* functions -YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size) -{ - void* result = _yar_reserve(items_pointer, count, capacity, item_size, 1); - if (result != NULL) *count += 1; - return result; -} - -YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra) -{ - void* result = _yar_reserve(items_pointer, count, capacity, item_size, extra); - if (result != NULL) { - memcpy(result, data, item_size * extra); - *count += extra; - } - return result; -} - -YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra) -{ - char* items = *items_pointer; - size_t newcount = *count + extra; - if (newcount > *capacity) { - size_t newcap = (*capacity < YAR_MIN_CAP) ? YAR_MIN_CAP : *capacity * 8 / 5; - if (newcap < newcount) newcap = newcount; - void* next = _yar_realloc(items, newcap * item_size); - if (next == NULL) return NULL; - items = next; - *items_pointer = next; - *capacity = newcap; - } - void* result = items + (*count * item_size); - if (extra && result) memset(result, 0, item_size * extra); - return result; -} - -YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra) -{ - void* next = _yar_reserve(items_pointer, count, capacity, item_size, extra); - if(next == NULL) return NULL; - - char* items = *items_pointer; - if (index < *count) - { - memmove(&items[item_size * (index + extra)], &items[item_size * index], (*count - index) * item_size); - memset(&items[item_size * index], 0, extra * item_size); - } - *count += extra; - return items + index * item_size; -} - -YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove) -{ - if(remove >= *count) { - *count = 0; - return *items_pointer; - } - if (index >= *count) { - return *items_pointer; - } - char* items = *items_pointer; - memmove(&items[item_size * index], &items[item_size * (index + remove)], item_size * (*count - (index + remove))); - *count -= remove; - return items + item_size * index; -} - -YARAPI void* _yar_realloc(void* p, size_t new_size) -{ - // Declaration, so we can call it if the definition is overridden - extern void* YAR_REALLOC(void *ptr, size_t size); - return YAR_REALLOC(p, new_size); -} - -YARAPI void _yar_free(void* p) -{ - extern void YAR_FREE(void *ptr); - YAR_FREE(p); -} - -#endif // YAR_IMPLEMENTATION -/* ------------------------------------------------------------------------------- -This software is available under 2 licenses -- choose whichever you prefer. ------------------------------------------------------------------------------- -ALTERNATIVE A - MIT License - -Copyright (c) 2025 Nicholas Rixson - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. ------------------------------------------------------------------------------- -ALTERNATIVE B - Public Domain (www.unlicense.org) -This is free and unencumbered software released into the public domain. -Anyone is free to copy, modify, publish, use, compile, sell, or distribute this -software, either in source code form or as a compiled binary, for any purpose, -commercial or non-commercial, and by any means. - -In jurisdictions that recognize copyright laws, the author or authors of this -software dedicate any and all copyright interest in the software to the public -domain. We make this dedication for the benefit of the public at large and to -the detriment of our heirs and successors. We intend this dedication to be an -overt act of relinquishment in perpetuity of all present and future rights to -this software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ------------------------------------------------------------------------------- -*/ diff --git a/src/vm.c b/src/vm.c deleted file mode 100644 index f90c68e..0000000 --- a/src/vm.c +++ /dev/null @@ -1,509 +0,0 @@ -#include -#include - -#include "arena.h" -#include "chunk.h" -#include "compile.h" -#include "dictionary.h" -#include "file.h" -#include "gc.h" -#include "object.h" -#include "primitive.h" -#include "string.h" -#include "userdata.h" -#include "vm.h" - -static I decode_sleb128(U8 **ptr) { - I result = 0; - I shift = 0; - U8 byte; - - do { - byte = **ptr; - (*ptr)++; - result |= (I)(byte & 0x7F) << shift; - shift += 7; - } while (byte & 0x80); - - if ((shift < 64) && (byte & 0x40)) { - result |= -(1LL << shift); - } - - return result; -} - -V vm_init(Vm *vm) { - vm->sp = vm->stack; - vm->rsp = vm->rstack; - vm->tsp = vm->tstack; - vm->chunk = NULL; - vm->dictionary = NULL; - - gc_init(&vm->gc); - arena_init(&vm->arena, 1024 * 1024); - - for (Z i = 0; i < STACK_SIZE; i++) { - vm->stack[i] = NIL; - vm->tstack[i] = NIL; - vm->rstack[i].obj = NIL; - gc_addroot(&vm->gc, &vm->stack[i]); - gc_addroot(&vm->gc, &vm->tstack[i]); - gc_addroot(&vm->gc, &vm->rstack[i].obj); - } - - vm->next_call = NIL; - gc_addroot(&vm->gc, &vm->next_call); - - vm->trampoline = chunk_new(""); - chunk_emit_byte(vm->trampoline, OP_CALL_NEXT); - - vm->stdin = userdata_make(vm, (void *)stdin, &userdata_file); - vm->stdout = userdata_make(vm, (void *)stdout, &userdata_file); - vm->stderr = userdata_make(vm, (void *)stderr, &userdata_file); - - gc_addroot(&vm->gc, &vm->stdin); - gc_addroot(&vm->gc, &vm->stdout); - gc_addroot(&vm->gc, &vm->stderr); -} - -V vm_deinit(Vm *vm) { - chunk_release(vm->trampoline); - - // Free all definitions - Dt *dstack[256]; - Dt **dsp = dstack; - *dsp++ = vm->dictionary; - - while (dsp > dstack) { - Dt *node = *--dsp; - if (!node) - continue; - if (node->chunk != NULL) - chunk_release(node->chunk); - for (I i = 0; i < 4; i++) { - if (node->child[i] != NULL) - *dsp++ = node->child[i]; - } - } - - arena_free(&vm->arena); - vm->dictionary = NULL; - - // Run final GC pass - gc_collect(vm, 1); - gc_deinit(&vm->gc); -} - -static V vm_error(Vm *vm, I error, const char *message) { - I col = -1; - I line = chunk_get_line(vm->chunk, vm->ip - vm->chunk->items, &col); - fprintf(stderr, "error at %ld:%ld: %s\n", line + 1, col + 1, message); - longjmp(vm->error, error); -} - -V vm_push(Vm *vm, O o) { - if (vm->sp >= vm->stack + STACK_SIZE) - vm_error(vm, VM_ERR_STACK_OVERFLOW, "data stack overflow"); - *vm->sp++ = o; -} -O vm_pop(Vm *vm) { - if (vm->sp <= vm->stack) - vm_error(vm, VM_ERR_STACK_UNDERFLOW, "data stack underflow"); - O o = *--vm->sp; - *vm->sp = NIL; - return o; -} - -V vm_tpush(Vm *vm, O o) { - if (vm->tsp >= vm->tstack + STACK_SIZE) - vm_error(vm, VM_ERR_STACK_OVERFLOW, "retain stack overflow"); - *vm->tsp++ = o; -} -O vm_tpop(Vm *vm) { - if (vm->tsp <= vm->tstack) - vm_error(vm, VM_ERR_STACK_UNDERFLOW, "retain stack underflow"); - O o = *--vm->tsp; - *vm->tsp = NIL; - return o; -} - -V vm_rpush(Vm *vm, Bc *chunk, U8 *ip) { - if (vm->rsp >= vm->rstack + STACK_SIZE) - vm_error(vm, VM_ERR_STACK_OVERFLOW, "return stack overflow"); - vm->rsp->chunk = chunk; - vm->rsp->ip = ip; - vm->rsp->obj = NIL; - vm->rsp++; -} -Fr vm_rpop(Vm *vm) { - if (vm->rsp <= vm->rstack) - vm_error(vm, VM_ERR_STACK_UNDERFLOW, "return stack underflow"); - return *--vm->rsp; -} - -I vm_run(Vm *vm, Bc *chunk, I offset) { - I mark = gc_mark(&vm->gc); - if (setjmp(vm->error) != 0) { - gc_reset(&vm->gc, mark); - return 0; - } - - for (Z i = 0; i < chunk->constants.count; i++) - gc_addroot(&vm->gc, &chunk->constants.items[i]); - -#define BINOP(op) \ - { \ - O b = vm_pop(vm); \ - O a = vm_pop(vm); \ - if (!IMM(a) || !IMM(b)) \ - vm_error(vm, VM_ERR_TYPE, "numop on non-numeric objects"); \ - vm_push(vm, NUM(ORD(a) op ORD(b))); \ - break; \ - } - -#define CMPOP(op) \ - { \ - O b = vm_pop(vm); \ - O a = vm_pop(vm); \ - if (!IMM(a) || !IMM(b)) \ - vm_error(vm, VM_ERR_TYPE, "comparison on non-numeric objects"); \ - vm_push(vm, (ORD(a) op ORD(b)) ? NUM(1) : NIL); \ - break; \ - } - - vm->ip = chunk->items + offset; - vm->chunk = chunk; - - for (;;) { - U8 opcode; - switch (opcode = *vm->ip++) { - case OP_NOP: - continue; - case OP_NIL: - vm_push(vm, NIL); - break; - case OP_CONST: { - I idx = decode_sleb128(&vm->ip); - vm_push(vm, vm->chunk->constants.items[idx]); - break; - } - case OP_DROP: { - (void)vm_pop(vm); - break; - } - case OP_2DROP: { - (void)vm_pop(vm); - (void)vm_pop(vm); - break; - } - case OP_DUP: { - O obj = vm_pop(vm); - vm_push(vm, obj); - vm_push(vm, obj); - break; - } - case OP_2DUP: { - O obj2 = vm_pop(vm); - O obj1 = vm_pop(vm); - vm_push(vm, obj1); - vm_push(vm, obj2); - vm_push(vm, obj1); - vm_push(vm, obj2); - break; - } - case OP_SWAP: { - O b = vm_pop(vm); - O a = vm_pop(vm); - vm_push(vm, b); - vm_push(vm, a); - break; - } - case OP_2SWAP: { - O d = vm_pop(vm); - O c = vm_pop(vm); - O b = vm_pop(vm); - O a = vm_pop(vm); - vm_push(vm, c); - vm_push(vm, d); - vm_push(vm, a); - vm_push(vm, b); - break; - } - case OP_NIP: { - /* a b -> b */ - O b = vm_pop(vm); - (void)vm_pop(vm); - vm_push(vm, b); - break; - } - case OP_OVER: { - /* a b -> a b a */ - O b = vm_pop(vm); - O a = vm_pop(vm); - vm_push(vm, a); - vm_push(vm, b); - vm_push(vm, a); - break; - } - case OP_BURY: { - /* a b c - c a b */ - O c = vm_pop(vm); - O b = vm_pop(vm); - O a = vm_pop(vm); - vm_push(vm, c); - vm_push(vm, a); - vm_push(vm, b); - break; - } - case OP_DIG: { - /* a b c - b c a */ - O c = vm_pop(vm); - O b = vm_pop(vm); - O a = vm_pop(vm); - vm_push(vm, b); - vm_push(vm, c); - vm_push(vm, a); - break; - } - case OP_TOR: { - vm_tpush(vm, vm_pop(vm)); - break; - } - case OP_2TOR: { - O obj2 = vm_pop(vm); - O obj1 = vm_pop(vm); - vm_tpush(vm, obj1); - vm_tpush(vm, obj2); - break; - } - case OP_FROMR: { - vm_push(vm, vm_tpop(vm)); - break; - } - case OP_2FROMR: { - O obj2 = vm_tpop(vm); - O obj1 = vm_tpop(vm); - vm_push(vm, obj1); - vm_push(vm, obj2); - break; - } - case OP_DOWORD: { - I idx = decode_sleb128(&vm->ip); - Dt *word = vm->chunk->symbols.items[idx].resolved; - if (!word) - vm_error(vm, VM_ERR_RUNTIME, "word not found"); - vm_rpush(vm, vm->chunk, vm->ip); - vm->chunk = word->chunk; - vm->ip = word->chunk->items; - break; - } - case OP_CALL: { - O quot = vm_pop(vm); - vm_rpush(vm, vm->chunk, vm->ip); - do_call: - switch (type(quot)) { - case OBJ_QUOT: { - Bc **ptr = (Bc **)(UNBOX(quot) + 1); - Bc *chunk = *ptr; - vm->chunk = chunk; - vm->ip = chunk->items; - break; - } - case OBJ_COMPOSE: { - Qo *comp = (Qo *)(UNBOX(quot) + 1); - vm_rpush(vm, vm->trampoline, vm->trampoline->items); - vm->rsp[-1].obj = comp->second; - quot = comp->first; - goto do_call; - } - case OBJ_CURRY: { - Qc *curry = (Qc *)(UNBOX(quot) + 1); - vm_push(vm, curry->value); - quot = curry->callable; - goto do_call; - break; - } - default: - vm_error(vm, VM_ERR_TYPE, "attempt to call non-quotation object"); - } - break; - } - case OP_TAIL_DOWORD: { - I idx = decode_sleb128(&vm->ip); - Dt *word = vm->chunk->symbols.items[idx].resolved; - if (!word) - vm_error(vm, VM_ERR_RUNTIME, "word not found"); - vm->chunk = word->chunk; - vm->ip = word->chunk->items; - break; - } - case OP_CALL_NEXT: - vm_push(vm, vm->next_call); - vm->next_call = NIL; - // fallthrough - case OP_TAIL_CALL: { - O quot = vm_pop(vm); - do_tail_call: - switch (type(quot)) { - case OBJ_QUOT: { - Bc **ptr = (Bc **)(UNBOX(quot) + 1); - Bc *chunk = *ptr; - vm->chunk = chunk; - vm->ip = chunk->items; - break; - } - case OBJ_COMPOSE: { - Qo *comp = (Qo *)(UNBOX(quot) + 1); - vm_rpush(vm, vm->trampoline, vm->trampoline->items); - vm->rsp[-1].obj = comp->second; - quot = comp->first; - goto do_tail_call; - } - case OBJ_CURRY: { - Qc *curry = (Qc *)(UNBOX(quot) + 1); - vm_push(vm, curry->value); - quot = curry->callable; - goto do_tail_call; - break; - } - default: - vm_error(vm, VM_ERR_TYPE, "attempt to call non-quotation object"); - } - break; - } - case OP_PRIM: { - I idx = decode_sleb128(&vm->ip); - Pr prim = primitives_table[idx]; - I err = prim.fn(vm); - if (err != 0) - vm_error(vm, err, "primitive call failed"); - break; - } - case OP_COMPOSE: { - I mark = gc_mark(&vm->gc); - O c1 = vm_pop(vm); - O c2 = vm_pop(vm); - gc_addroot(&vm->gc, &c2); - gc_addroot(&vm->gc, &c1); - if (!callable(c2) || !callable(c1)) - vm_error(vm, VM_ERR_TYPE, "non-callable arguments to compose"); - Hd *hd = gc_alloc(vm, sizeof(Hd) + sizeof(Qo)); - hd->type = OBJ_COMPOSE; - Qo *comp = (Qo *)(hd + 1); - comp->first = c2; - comp->second = c1; - vm_push(vm, BOX(hd)); - gc_reset(&vm->gc, mark); - break; - } - case OP_CURRY: { - I mark = gc_mark(&vm->gc); - O cble = vm_pop(vm); - O value = vm_pop(vm); - gc_addroot(&vm->gc, &cble); - gc_addroot(&vm->gc, &value); - if (!callable(cble)) - vm_error(vm, VM_ERR_TYPE, "non-callable argument to curry"); - Hd *hd = gc_alloc(vm, sizeof(Hd) + sizeof(Qc)); - hd->type = OBJ_CURRY; - Qc *curry = (Qc *)(hd + 1); - curry->value = value; - curry->callable = cble; - vm_push(vm, BOX(hd)); - gc_reset(&vm->gc, mark); - break; - } - case OP_RETURN: - if (vm->rsp != vm->rstack) { - Fr frame = vm_rpop(vm); - vm->next_call = frame.obj; - vm->chunk = frame.chunk; - vm->ip = frame.ip; - } else { - goto done; - } - break; - case OP_CHOOSE: { - O fals = vm_pop(vm); - O tru = vm_pop(vm); - O cond = vm_pop(vm); - if (cond == NIL) { - vm_push(vm, fals); - } else { - vm_push(vm, tru); - } - break; - } - case OP_ADD: - BINOP(+); - case OP_SUB: - BINOP(-); - case OP_MUL: - BINOP(*); - case OP_DIV: - BINOP(/); - case OP_MOD: - BINOP(%); - case OP_LOGAND: - BINOP(&); - case OP_LOGOR: - BINOP(|); - case OP_LOGXOR: - BINOP(^); - case OP_LOGNOT: { - O o = vm_pop(vm); - if (!IMM(o)) - vm_error(vm, VM_ERR_TYPE, "numop on non-number"); - vm_push(vm, NUM(~ORD(o))); - break; - } - case OP_EQ: - CMPOP(==); - case OP_NEQ: - CMPOP(!=); - case OP_LT: - CMPOP(<); - case OP_GT: - CMPOP(>); - case OP_LTE: - CMPOP(<=); - case OP_GTE: - CMPOP(>=); - case OP_AND: { - O b = vm_pop(vm); - O a = vm_pop(vm); - if (a == NIL) { - vm_push(vm, NIL); - } else { - vm_push(vm, b); - } - break; - } - case OP_OR: { - O b = vm_pop(vm); - O a = vm_pop(vm); - if (a == NIL) { - vm_push(vm, b); - } else { - vm_push(vm, a); - } - break; - } - case OP_CONCAT: { - O b = vm_pop(vm); - if (type(b) != OBJ_STR) - vm_error(vm, VM_ERR_TYPE, "expected string"); - O a = vm_pop(vm); - if (type(a) != OBJ_STR) - vm_error(vm, VM_ERR_TYPE, "expected string"); - vm_push(vm, string_concat(vm, a, b)); - break; - } - default: - vm_error(vm, VM_ERR_RUNTIME, "unknown opcode"); - } - } -done: - gc_reset(&vm->gc, mark); - return 1; -} diff --git a/src/vm.h b/src/vm.h deleted file mode 100644 index b4e3c6a..0000000 --- a/src/vm.h +++ /dev/null @@ -1,103 +0,0 @@ -#ifndef VM_H -#define VM_H - -#include - -#include "common.h" - -#include "arena.h" -#include "chunk.h" -#include "dictionary.h" -#include "gc.h" -#include "object.h" - -enum { - OP_NOP = 0, - OP_CONST, - OP_NIL, - OP_DROP, - OP_2DROP, - OP_DUP, - OP_2DUP, - OP_SWAP, - OP_2SWAP, - OP_NIP, - OP_OVER, - OP_BURY, - OP_DIG, - OP_TOR, - OP_2TOR, - OP_FROMR, - OP_2FROMR, - OP_DOWORD, - OP_CALL, - OP_TAIL_DOWORD, - OP_TAIL_CALL, - OP_PRIM, - OP_COMPOSE, - OP_CURRY, - OP_RETURN, - OP_CHOOSE, - OP_ADD, - OP_SUB, - OP_MUL, - OP_DIV, - OP_MOD, - OP_EQ, - OP_NEQ, - OP_LOGAND, - OP_LOGOR, - OP_LOGXOR, - OP_LOGNOT, - OP_LT, - OP_GT, - OP_LTE, - OP_GTE, - OP_AND, - OP_OR, - OP_CONCAT, - OP_CALL_NEXT, -}; - -#define STACK_SIZE 256 - -typedef struct Fr { - Bc *chunk; - U8 *ip; - O obj; -} Fr; - -typedef struct Vm { - Gc gc; - O stack[STACK_SIZE], *sp; - O tstack[STACK_SIZE], *tsp; - Fr rstack[STACK_SIZE], *rsp; - U8 *ip; - Bc *chunk; - Dt *dictionary; - Ar arena; - jmp_buf error; - Bc *trampoline; - O next_call; - - // These objects need to stay as roots! - O stdin, stdout, stderr; -} Vm; - -enum { - VM_ERR_STACK_OVERFLOW = 1, - VM_ERR_STACK_UNDERFLOW, - VM_ERR_TYPE, - VM_ERR_RUNTIME -}; - -V vm_init(Vm *); -V vm_deinit(Vm *); -I vm_run(Vm *, Bc *, I); - -V vm_push(Vm *, O); -O vm_pop(Vm *); -V vm_tpush(Vm *, O); -O vm_tpop(Vm *); - -#endif diff --git a/subprojects/.wraplock b/subprojects/.wraplock deleted file mode 100644 index e69de29..0000000 diff --git a/subprojects/libutf/NOTICE b/subprojects/libutf/NOTICE deleted file mode 100644 index b39ee1e..0000000 --- a/subprojects/libutf/NOTICE +++ /dev/null @@ -1,22 +0,0 @@ -This is a Unix port of the Plan 9 UTF-8 library, by Rob Pike and Ken Thompson. -Please send comments about the packaging to Russ Cox . - -Copyright © 2021 Plan 9 Foundation - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/subprojects/libutf/meson.build b/subprojects/libutf/meson.build deleted file mode 100644 index 0690bdd..0000000 --- a/subprojects/libutf/meson.build +++ /dev/null @@ -1,41 +0,0 @@ -project('libutf', 'c') -add_project_arguments( - '-Wno-missing-braces', - '-Wno-parentheses', - '-Wno-sign-compare', - language: 'c' -) - -inc = include_directories('.') - -libutf = static_library( - 'utf', - [ - 'rune.c', - 'runestrcat.c', - 'runestrchr.c', - 'runestrcmp.c', - 'runestrcpy.c', - 'runestrdup.c', - 'runestrecpy.c', - 'runestrlen.c', - 'runestrncat.c', - 'runestrncmp.c', - 'runestrncpy.c', - 'runestrrchr.c', - 'runestrstr.c', - 'runetype.c', - 'utfecpy.c', - 'utflen.c', - 'utfnlen.c', - 'utfrrune.c', - 'utfrune.c', - 'utfutf.c', - ], -) - -libutf_dep = declare_dependency( - include_directories: inc, - link_with: libutf -) - diff --git a/subprojects/libutf/plan9.h b/subprojects/libutf/plan9.h deleted file mode 100644 index 1ca8ace..0000000 --- a/subprojects/libutf/plan9.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * compiler directive on Plan 9 - */ -#ifndef USED -#define USED(x) if(x);else -#endif - -/* - * easiest way to make sure these are defined - */ -#define uchar _utfuchar -#define ushort _utfushort -#define uint _utfuint -#define ulong _utfulong -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; - -/* - * nil cannot be ((void*)0) on ANSI C, - * because it is used for function pointers - */ -#undef nil -#define nil 0 - -#undef nelem -#define nelem(x) (sizeof (x)/sizeof (x)[0]) diff --git a/subprojects/libutf/rune.c b/subprojects/libutf/rune.c deleted file mode 100644 index bb2d82c..0000000 --- a/subprojects/libutf/rune.c +++ /dev/null @@ -1,217 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -enum -{ - Bit1 = 7, - Bitx = 6, - Bit2 = 5, - Bit3 = 4, - Bit4 = 3, - Bit5 = 2, - - T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */ - Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */ - T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */ - T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */ - T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */ - T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */ - - Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ - Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */ - Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */ - Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */ - - Maskx = (1< T1 - */ - c = *(uchar*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(uchar*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(uchar*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence - * 10000-10FFFF => T4 Tx Tx Tx - */ - if(UTFmax >= 4) { - c3 = *(uchar*)(str+3) ^ Tx; - if(c3 & Testx) - goto bad; - if(c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if(l <= Rune3) - goto bad; - if(l > Runemax) - goto bad; - *rune = l; - return 4; - } - } - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -runetochar(char *str, Rune *rune) -{ - long c; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - c = *rune; - if(c <= Rune1) { - str[0] = c; - return 1; - } - - /* - * two character sequence - * 00080-007FF => T2 Tx - */ - if(c <= Rune2) { - str[0] = T2 | (c >> 1*Bitx); - str[1] = Tx | (c & Maskx); - return 2; - } - - /* - * three character sequence - * 00800-0FFFF => T3 Tx Tx - */ - if(c > Runemax) - c = Runeerror; - if(c <= Rune3) { - str[0] = T3 | (c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); - return 3; - } - - /* - * four character sequence - * 010000-1FFFFF => T4 Tx Tx Tx - */ - str[0] = T4 | (c >> 3*Bitx); - str[1] = Tx | ((c >> 2*Bitx) & Maskx); - str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); - return 4; -} - -int -runelen(long c) -{ - Rune rune; - char str[10]; - - rune = c; - return runetochar(str, &rune); -} - -int -runenlen(Rune *r, int nrune) -{ - int nb, c; - - nb = 0; - while(nrune--) { - c = *r++; - if(c <= Rune1) - nb++; - else - if(c <= Rune2) - nb += 2; - else - if(c <= Rune3 || c > Runemax) - nb += 3; - else - nb += 4; - } - return nb; -} - -int -fullrune(char *str, int n) -{ - int c; - - if(n <= 0) - return 0; - c = *(uchar*)str; - if(c < Tx) - return 1; - if(c < T3) - return n >= 2; - if(UTFmax == 3 || c < T4) - return n >= 3; - return n >= 4; -} diff --git a/subprojects/libutf/runestrcat.c b/subprojects/libutf/runestrcat.c deleted file mode 100644 index 65d4c0f..0000000 --- a/subprojects/libutf/runestrcat.c +++ /dev/null @@ -1,25 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrcat(Rune *s1, Rune *s2) -{ - - runestrcpy(runestrchr(s1, 0), s2); - return s1; -} diff --git a/subprojects/libutf/runestrchr.c b/subprojects/libutf/runestrchr.c deleted file mode 100644 index 21fbeeb..0000000 --- a/subprojects/libutf/runestrchr.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrchr(Rune *s, Rune c) -{ - Rune c0 = c; - Rune c1; - - if(c == 0) { - while(*s++) - ; - return s-1; - } - - while(c1 = *s++) - if(c1 == c0) - return s-1; - return 0; -} diff --git a/subprojects/libutf/runestrcmp.c b/subprojects/libutf/runestrcmp.c deleted file mode 100644 index a368613..0000000 --- a/subprojects/libutf/runestrcmp.c +++ /dev/null @@ -1,35 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -int -runestrcmp(Rune *s1, Rune *s2) -{ - Rune c1, c2; - - for(;;) { - c1 = *s1++; - c2 = *s2++; - if(c1 != c2) { - if(c1 > c2) - return 1; - return -1; - } - if(c1 == 0) - return 0; - } -} diff --git a/subprojects/libutf/runestrcpy.c b/subprojects/libutf/runestrcpy.c deleted file mode 100644 index 0659fc3..0000000 --- a/subprojects/libutf/runestrcpy.c +++ /dev/null @@ -1,28 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrcpy(Rune *s1, Rune *s2) -{ - Rune *os1; - - os1 = s1; - while(*s1++ = *s2++) - ; - return os1; -} diff --git a/subprojects/libutf/runestrdup.c b/subprojects/libutf/runestrdup.c deleted file mode 100644 index 4f9d6f4..0000000 --- a/subprojects/libutf/runestrdup.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrdup(Rune *s) -{ - Rune *ns; - - ns = malloc(sizeof(Rune)*(runestrlen(s) + 1)); - if(ns == 0) - return 0; - - return runestrcpy(ns, s); -} diff --git a/subprojects/libutf/runestrecpy.c b/subprojects/libutf/runestrecpy.c deleted file mode 100644 index c543e22..0000000 --- a/subprojects/libutf/runestrecpy.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrecpy(Rune *s1, Rune *es1, Rune *s2) -{ - if(s1 >= es1) - return s1; - - while(*s1++ = *s2++){ - if(s1 == es1){ - *--s1 = '\0'; - break; - } - } - return s1; -} diff --git a/subprojects/libutf/runestrlen.c b/subprojects/libutf/runestrlen.c deleted file mode 100644 index 0a13ecd..0000000 --- a/subprojects/libutf/runestrlen.c +++ /dev/null @@ -1,24 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -long -runestrlen(Rune *s) -{ - - return runestrchr(s, 0) - s; -} diff --git a/subprojects/libutf/runestrncat.c b/subprojects/libutf/runestrncat.c deleted file mode 100644 index 9653637..0000000 --- a/subprojects/libutf/runestrncat.c +++ /dev/null @@ -1,32 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrncat(Rune *s1, Rune *s2, long n) -{ - Rune *os1; - - os1 = s1; - s1 = runestrchr(s1, 0); - while(*s1++ = *s2++) - if(--n < 0) { - s1[-1] = 0; - break; - } - return os1; -} diff --git a/subprojects/libutf/runestrncmp.c b/subprojects/libutf/runestrncmp.c deleted file mode 100644 index 5e9a3b6..0000000 --- a/subprojects/libutf/runestrncmp.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -int -runestrncmp(Rune *s1, Rune *s2, long n) -{ - Rune c1, c2; - - while(n > 0) { - c1 = *s1++; - c2 = *s2++; - n--; - if(c1 != c2) { - if(c1 > c2) - return 1; - return -1; - } - if(c1 == 0) - break; - } - return 0; -} diff --git a/subprojects/libutf/runestrncpy.c b/subprojects/libutf/runestrncpy.c deleted file mode 100644 index ffcb3e1..0000000 --- a/subprojects/libutf/runestrncpy.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrncpy(Rune *s1, Rune *s2, long n) -{ - int i; - Rune *os1; - - os1 = s1; - for(i = 0; i < n; i++) - if((*s1++ = *s2++) == 0) { - while(++i < n) - *s1++ = 0; - return os1; - } - return os1; -} diff --git a/subprojects/libutf/runestrrchr.c b/subprojects/libutf/runestrrchr.c deleted file mode 100644 index 1b0edbb..0000000 --- a/subprojects/libutf/runestrrchr.c +++ /dev/null @@ -1,30 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -Rune* -runestrrchr(Rune *s, Rune c) -{ - Rune *r; - - if(c == 0) - return runestrchr(s, 0); - r = 0; - while(s = runestrchr(s, c)) - r = s++; - return r; -} diff --git a/subprojects/libutf/runestrstr.c b/subprojects/libutf/runestrstr.c deleted file mode 100644 index f5fa997..0000000 --- a/subprojects/libutf/runestrstr.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -/* - * Return pointer to first occurrence of s2 in s1, - * 0 if none - */ -Rune* -runestrstr(Rune *s1, Rune *s2) -{ - Rune *p, *pa, *pb; - int c0, c; - - c0 = *s2; - if(c0 == 0) - return s1; - s2++; - for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) { - pa = p; - for(pb=s2;; pb++) { - c = *pb; - if(c == 0) - return p; - if(c != *++pa) - break; - } - } - return 0; -} diff --git a/subprojects/libutf/runetype.c b/subprojects/libutf/runetype.c deleted file mode 100644 index ac6d7b5..0000000 --- a/subprojects/libutf/runetype.c +++ /dev/null @@ -1,1151 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -/* - * alpha ranges - - * only covers ranges not in lower||upper - */ -static -Rune __alpha2[] = -{ - 0x00d8, 0x00f6, /* Ø - ö */ - 0x00f8, 0x01f5, /* ø - ǵ */ - 0x0250, 0x02a8, /* ɐ - ʨ */ - 0x038e, 0x03a1, /* Ύ - Ρ */ - 0x03a3, 0x03ce, /* Σ - ώ */ - 0x03d0, 0x03d6, /* ϐ - ϖ */ - 0x03e2, 0x03f3, /* Ϣ - ϳ */ - 0x0490, 0x04c4, /* Ґ - ӄ */ - 0x0561, 0x0587, /* ա - և */ - 0x05d0, 0x05ea, /* א - ת */ - 0x05f0, 0x05f2, /* װ - ײ */ - 0x0621, 0x063a, /* ء - غ */ - 0x0640, 0x064a, /* ـ - ي */ - 0x0671, 0x06b7, /* ٱ - ڷ */ - 0x06ba, 0x06be, /* ں - ھ */ - 0x06c0, 0x06ce, /* ۀ - ێ */ - 0x06d0, 0x06d3, /* ې - ۓ */ - 0x0905, 0x0939, /* अ - ह */ - 0x0958, 0x0961, /* क़ - ॡ */ - 0x0985, 0x098c, /* অ - ঌ */ - 0x098f, 0x0990, /* এ - ঐ */ - 0x0993, 0x09a8, /* ও - ন */ - 0x09aa, 0x09b0, /* প - র */ - 0x09b6, 0x09b9, /* শ - হ */ - 0x09dc, 0x09dd, /* ড় - ঢ় */ - 0x09df, 0x09e1, /* য় - ৡ */ - 0x09f0, 0x09f1, /* ৰ - ৱ */ - 0x0a05, 0x0a0a, /* ਅ - ਊ */ - 0x0a0f, 0x0a10, /* ਏ - ਐ */ - 0x0a13, 0x0a28, /* ਓ - ਨ */ - 0x0a2a, 0x0a30, /* ਪ - ਰ */ - 0x0a32, 0x0a33, /* ਲ - ਲ਼ */ - 0x0a35, 0x0a36, /* ਵ - ਸ਼ */ - 0x0a38, 0x0a39, /* ਸ - ਹ */ - 0x0a59, 0x0a5c, /* ਖ਼ - ੜ */ - 0x0a85, 0x0a8b, /* અ - ઋ */ - 0x0a8f, 0x0a91, /* એ - ઑ */ - 0x0a93, 0x0aa8, /* ઓ - ન */ - 0x0aaa, 0x0ab0, /* પ - ર */ - 0x0ab2, 0x0ab3, /* લ - ળ */ - 0x0ab5, 0x0ab9, /* વ - હ */ - 0x0b05, 0x0b0c, /* ଅ - ଌ */ - 0x0b0f, 0x0b10, /* ଏ - ଐ */ - 0x0b13, 0x0b28, /* ଓ - ନ */ - 0x0b2a, 0x0b30, /* ପ - ର */ - 0x0b32, 0x0b33, /* ଲ - ଳ */ - 0x0b36, 0x0b39, /* ଶ - ହ */ - 0x0b5c, 0x0b5d, /* ଡ଼ - ଢ଼ */ - 0x0b5f, 0x0b61, /* ୟ - ୡ */ - 0x0b85, 0x0b8a, /* அ - ஊ */ - 0x0b8e, 0x0b90, /* எ - ஐ */ - 0x0b92, 0x0b95, /* ஒ - க */ - 0x0b99, 0x0b9a, /* ங - ச */ - 0x0b9e, 0x0b9f, /* ஞ - ட */ - 0x0ba3, 0x0ba4, /* ண - த */ - 0x0ba8, 0x0baa, /* ந - ப */ - 0x0bae, 0x0bb5, /* ம - வ */ - 0x0bb7, 0x0bb9, /* ஷ - ஹ */ - 0x0c05, 0x0c0c, /* అ - ఌ */ - 0x0c0e, 0x0c10, /* ఎ - ఐ */ - 0x0c12, 0x0c28, /* ఒ - న */ - 0x0c2a, 0x0c33, /* ప - ళ */ - 0x0c35, 0x0c39, /* వ - హ */ - 0x0c60, 0x0c61, /* ౠ - ౡ */ - 0x0c85, 0x0c8c, /* ಅ - ಌ */ - 0x0c8e, 0x0c90, /* ಎ - ಐ */ - 0x0c92, 0x0ca8, /* ಒ - ನ */ - 0x0caa, 0x0cb3, /* ಪ - ಳ */ - 0x0cb5, 0x0cb9, /* ವ - ಹ */ - 0x0ce0, 0x0ce1, /* ೠ - ೡ */ - 0x0d05, 0x0d0c, /* അ - ഌ */ - 0x0d0e, 0x0d10, /* എ - ഐ */ - 0x0d12, 0x0d28, /* ഒ - ന */ - 0x0d2a, 0x0d39, /* പ - ഹ */ - 0x0d60, 0x0d61, /* ൠ - ൡ */ - 0x0e01, 0x0e30, /* ก - ะ */ - 0x0e32, 0x0e33, /* า - ำ */ - 0x0e40, 0x0e46, /* เ - ๆ */ - 0x0e5a, 0x0e5b, /* ๚ - ๛ */ - 0x0e81, 0x0e82, /* ກ - ຂ */ - 0x0e87, 0x0e88, /* ງ - ຈ */ - 0x0e94, 0x0e97, /* ດ - ທ */ - 0x0e99, 0x0e9f, /* ນ - ຟ */ - 0x0ea1, 0x0ea3, /* ມ - ຣ */ - 0x0eaa, 0x0eab, /* ສ - ຫ */ - 0x0ead, 0x0eae, /* ອ - ຮ */ - 0x0eb2, 0x0eb3, /* າ - ຳ */ - 0x0ec0, 0x0ec4, /* ເ - ໄ */ - 0x0edc, 0x0edd, /* ໜ - ໝ */ - 0x0f18, 0x0f19, /* ༘ - ༙ */ - 0x0f40, 0x0f47, /* ཀ - ཇ */ - 0x0f49, 0x0f69, /* ཉ - ཀྵ */ - 0x10d0, 0x10f6, /* ა - ჶ */ - 0x1100, 0x1159, /* ᄀ - ᅙ */ - 0x115f, 0x11a2, /* ᅟ - ᆢ */ - 0x11a8, 0x11f9, /* ᆨ - ᇹ */ - 0x1e00, 0x1e9b, /* Ḁ - ẛ */ - 0x1f50, 0x1f57, /* ὐ - ὗ */ - 0x1f80, 0x1fb4, /* ᾀ - ᾴ */ - 0x1fb6, 0x1fbc, /* ᾶ - ᾼ */ - 0x1fc2, 0x1fc4, /* ῂ - ῄ */ - 0x1fc6, 0x1fcc, /* ῆ - ῌ */ - 0x1fd0, 0x1fd3, /* ῐ - ΐ */ - 0x1fd6, 0x1fdb, /* ῖ - Ί */ - 0x1fe0, 0x1fec, /* ῠ - Ῥ */ - 0x1ff2, 0x1ff4, /* ῲ - ῴ */ - 0x1ff6, 0x1ffc, /* ῶ - ῼ */ - 0x210a, 0x2113, /* ℊ - ℓ */ - 0x2115, 0x211d, /* ℕ - ℝ */ - 0x2120, 0x2122, /* ℠ - ™ */ - 0x212a, 0x2131, /* K - ℱ */ - 0x2133, 0x2138, /* ℳ - ℸ */ - 0x3041, 0x3094, /* ぁ - ゔ */ - 0x30a1, 0x30fa, /* ァ - ヺ */ - 0x3105, 0x312c, /* ㄅ - ㄬ */ - 0x3131, 0x318e, /* ㄱ - ㆎ */ - 0x3192, 0x319f, /* ㆒ - ㆟ */ - 0x3260, 0x327b, /* ㉠ - ㉻ */ - 0x328a, 0x32b0, /* ㊊ - ㊰ */ - 0x32d0, 0x32fe, /* ㋐ - ㋾ */ - 0x3300, 0x3357, /* ㌀ - ㍗ */ - 0x3371, 0x3376, /* ㍱ - ㍶ */ - 0x337b, 0x3394, /* ㍻ - ㎔ */ - 0x3399, 0x339e, /* ㎙ - ㎞ */ - 0x33a9, 0x33ad, /* ㎩ - ㎭ */ - 0x33b0, 0x33c1, /* ㎰ - ㏁ */ - 0x33c3, 0x33c5, /* ㏃ - ㏅ */ - 0x33c7, 0x33d7, /* ㏇ - ㏗ */ - 0x33d9, 0x33dd, /* ㏙ - ㏝ */ - 0x4e00, 0x9fff, /* 一 - 鿿 */ - 0xac00, 0xd7a3, /* 가 - 힣 */ - 0xf900, 0xfb06, /* 豈 - st */ - 0xfb13, 0xfb17, /* ﬓ - ﬗ */ - 0xfb1f, 0xfb28, /* ײַ - ﬨ */ - 0xfb2a, 0xfb36, /* שׁ - זּ */ - 0xfb38, 0xfb3c, /* טּ - לּ */ - 0xfb40, 0xfb41, /* נּ - סּ */ - 0xfb43, 0xfb44, /* ףּ - פּ */ - 0xfb46, 0xfbb1, /* צּ - ﮱ */ - 0xfbd3, 0xfd3d, /* ﯓ - ﴽ */ - 0xfd50, 0xfd8f, /* ﵐ - ﶏ */ - 0xfd92, 0xfdc7, /* ﶒ - ﷇ */ - 0xfdf0, 0xfdf9, /* ﷰ - ﷹ */ - 0xfe70, 0xfe72, /* ﹰ - ﹲ */ - 0xfe76, 0xfefc, /* ﹶ - ﻼ */ - 0xff66, 0xff6f, /* ヲ - ッ */ - 0xff71, 0xff9d, /* ア - ン */ - 0xffa0, 0xffbe, /* ᅠ - ᄒ */ - 0xffc2, 0xffc7, /* ᅡ - ᅦ */ - 0xffca, 0xffcf, /* ᅧ - ᅬ */ - 0xffd2, 0xffd7, /* ᅭ - ᅲ */ - 0xffda, 0xffdc, /* ᅳ - ᅵ */ -}; - -/* - * alpha singlets - - * only covers ranges not in lower||upper - */ -static -Rune __alpha1[] = -{ - 0x00aa, /* ª */ - 0x00b5, /* µ */ - 0x00ba, /* º */ - 0x03da, /* Ϛ */ - 0x03dc, /* Ϝ */ - 0x03de, /* Ϟ */ - 0x03e0, /* Ϡ */ - 0x06d5, /* ە */ - 0x09b2, /* ল */ - 0x0a5e, /* ਫ਼ */ - 0x0a8d, /* ઍ */ - 0x0ae0, /* ૠ */ - 0x0b9c, /* ஜ */ - 0x0cde, /* ೞ */ - 0x0e4f, /* ๏ */ - 0x0e84, /* ຄ */ - 0x0e8a, /* ຊ */ - 0x0e8d, /* ຍ */ - 0x0ea5, /* ລ */ - 0x0ea7, /* ວ */ - 0x0eb0, /* ະ */ - 0x0ebd, /* ຽ */ - 0x1fbe, /* ι */ - 0x207f, /* ⁿ */ - 0x20a8, /* ₨ */ - 0x2102, /* ℂ */ - 0x2107, /* ℇ */ - 0x2124, /* ℤ */ - 0x2126, /* Ω */ - 0x2128, /* ℨ */ - 0xfb3e, /* מּ */ - 0xfe74, /* ﹴ */ -}; - -/* - * space ranges - */ -static -Rune __space2[] = -{ - 0x0009, 0x000a, /* tab and newline */ - 0x0020, 0x0020, /* space */ - 0x00a0, 0x00a0, /*   */ - 0x2000, 0x200b, /*   - ​ */ - 0x2028, 0x2029, /* 
 - 
 */ - 0x3000, 0x3000, /*   */ - 0xfeff, 0xfeff, /*  */ -}; - -/* - * lower case ranges - * 3rd col is conversion excess 500 - */ -static -Rune __toupper2[] = -{ - 0x0061, 0x007a, 468, /* a-z A-Z */ - 0x00e0, 0x00f6, 468, /* à-ö À-Ö */ - 0x00f8, 0x00fe, 468, /* ø-þ Ø-Þ */ - 0x0256, 0x0257, 295, /* ɖ-ɗ Ɖ-Ɗ */ - 0x0258, 0x0259, 298, /* ɘ-ə Ǝ-Ə */ - 0x028a, 0x028b, 283, /* ʊ-ʋ Ʊ-Ʋ */ - 0x03ad, 0x03af, 463, /* έ-ί Έ-Ί */ - 0x03b1, 0x03c1, 468, /* α-ρ Α-Ρ */ - 0x03c3, 0x03cb, 468, /* σ-ϋ Σ-Ϋ */ - 0x03cd, 0x03ce, 437, /* ύ-ώ Ύ-Ώ */ - 0x0430, 0x044f, 468, /* а-я А-Я */ - 0x0451, 0x045c, 420, /* ё-ќ Ё-Ќ */ - 0x045e, 0x045f, 420, /* ў-џ Ў-Џ */ - 0x0561, 0x0586, 452, /* ա-ֆ Ա-Ֆ */ - 0x1f00, 0x1f07, 508, /* ἀ-ἇ Ἀ-Ἇ */ - 0x1f10, 0x1f15, 508, /* ἐ-ἕ Ἐ-Ἕ */ - 0x1f20, 0x1f27, 508, /* ἠ-ἧ Ἠ-Ἧ */ - 0x1f30, 0x1f37, 508, /* ἰ-ἷ Ἰ-Ἷ */ - 0x1f40, 0x1f45, 508, /* ὀ-ὅ Ὀ-Ὅ */ - 0x1f60, 0x1f67, 508, /* ὠ-ὧ Ὠ-Ὧ */ - 0x1f70, 0x1f71, 574, /* ὰ-ά Ὰ-Ά */ - 0x1f72, 0x1f75, 586, /* ὲ-ή Ὲ-Ή */ - 0x1f76, 0x1f77, 600, /* ὶ-ί Ὶ-Ί */ - 0x1f78, 0x1f79, 628, /* ὸ-ό Ὸ-Ό */ - 0x1f7a, 0x1f7b, 612, /* ὺ-ύ Ὺ-Ύ */ - 0x1f7c, 0x1f7d, 626, /* ὼ-ώ Ὼ-Ώ */ - 0x1f80, 0x1f87, 508, /* ᾀ-ᾇ ᾈ-ᾏ */ - 0x1f90, 0x1f97, 508, /* ᾐ-ᾗ ᾘ-ᾟ */ - 0x1fa0, 0x1fa7, 508, /* ᾠ-ᾧ ᾨ-ᾯ */ - 0x1fb0, 0x1fb1, 508, /* ᾰ-ᾱ Ᾰ-Ᾱ */ - 0x1fd0, 0x1fd1, 508, /* ῐ-ῑ Ῐ-Ῑ */ - 0x1fe0, 0x1fe1, 508, /* ῠ-ῡ Ῠ-Ῡ */ - 0x2170, 0x217f, 484, /* ⅰ-ⅿ Ⅰ-Ⅿ */ - 0x24d0, 0x24e9, 474, /* ⓐ-ⓩ Ⓐ-Ⓩ */ - 0xff41, 0xff5a, 468, /* a-z A-Z */ -}; - -/* - * lower case singlets - * 2nd col is conversion excess 500 - */ -static -Rune __toupper1[] = -{ - 0x00ff, 621, /* ÿ Ÿ */ - 0x0101, 499, /* ā Ā */ - 0x0103, 499, /* ă Ă */ - 0x0105, 499, /* ą Ą */ - 0x0107, 499, /* ć Ć */ - 0x0109, 499, /* ĉ Ĉ */ - 0x010b, 499, /* ċ Ċ */ - 0x010d, 499, /* č Č */ - 0x010f, 499, /* ď Ď */ - 0x0111, 499, /* đ Đ */ - 0x0113, 499, /* ē Ē */ - 0x0115, 499, /* ĕ Ĕ */ - 0x0117, 499, /* ė Ė */ - 0x0119, 499, /* ę Ę */ - 0x011b, 499, /* ě Ě */ - 0x011d, 499, /* ĝ Ĝ */ - 0x011f, 499, /* ğ Ğ */ - 0x0121, 499, /* ġ Ġ */ - 0x0123, 499, /* ģ Ģ */ - 0x0125, 499, /* ĥ Ĥ */ - 0x0127, 499, /* ħ Ħ */ - 0x0129, 499, /* ĩ Ĩ */ - 0x012b, 499, /* ī Ī */ - 0x012d, 499, /* ĭ Ĭ */ - 0x012f, 499, /* į Į */ - 0x0131, 268, /* ı I */ - 0x0133, 499, /* ij IJ */ - 0x0135, 499, /* ĵ Ĵ */ - 0x0137, 499, /* ķ Ķ */ - 0x013a, 499, /* ĺ Ĺ */ - 0x013c, 499, /* ļ Ļ */ - 0x013e, 499, /* ľ Ľ */ - 0x0140, 499, /* ŀ Ŀ */ - 0x0142, 499, /* ł Ł */ - 0x0144, 499, /* ń Ń */ - 0x0146, 499, /* ņ Ņ */ - 0x0148, 499, /* ň Ň */ - 0x014b, 499, /* ŋ Ŋ */ - 0x014d, 499, /* ō Ō */ - 0x014f, 499, /* ŏ Ŏ */ - 0x0151, 499, /* ő Ő */ - 0x0153, 499, /* œ Œ */ - 0x0155, 499, /* ŕ Ŕ */ - 0x0157, 499, /* ŗ Ŗ */ - 0x0159, 499, /* ř Ř */ - 0x015b, 499, /* ś Ś */ - 0x015d, 499, /* ŝ Ŝ */ - 0x015f, 499, /* ş Ş */ - 0x0161, 499, /* š Š */ - 0x0163, 499, /* ţ Ţ */ - 0x0165, 499, /* ť Ť */ - 0x0167, 499, /* ŧ Ŧ */ - 0x0169, 499, /* ũ Ũ */ - 0x016b, 499, /* ū Ū */ - 0x016d, 499, /* ŭ Ŭ */ - 0x016f, 499, /* ů Ů */ - 0x0171, 499, /* ű Ű */ - 0x0173, 499, /* ų Ų */ - 0x0175, 499, /* ŵ Ŵ */ - 0x0177, 499, /* ŷ Ŷ */ - 0x017a, 499, /* ź Ź */ - 0x017c, 499, /* ż Ż */ - 0x017e, 499, /* ž Ž */ - 0x017f, 200, /* ſ S */ - 0x0183, 499, /* ƃ Ƃ */ - 0x0185, 499, /* ƅ Ƅ */ - 0x0188, 499, /* ƈ Ƈ */ - 0x018c, 499, /* ƌ Ƌ */ - 0x0192, 499, /* ƒ Ƒ */ - 0x0199, 499, /* ƙ Ƙ */ - 0x01a1, 499, /* ơ Ơ */ - 0x01a3, 499, /* ƣ Ƣ */ - 0x01a5, 499, /* ƥ Ƥ */ - 0x01a8, 499, /* ƨ Ƨ */ - 0x01ad, 499, /* ƭ Ƭ */ - 0x01b0, 499, /* ư Ư */ - 0x01b4, 499, /* ƴ Ƴ */ - 0x01b6, 499, /* ƶ Ƶ */ - 0x01b9, 499, /* ƹ Ƹ */ - 0x01bd, 499, /* ƽ Ƽ */ - 0x01c5, 499, /* Dž DŽ */ - 0x01c6, 498, /* dž DŽ */ - 0x01c8, 499, /* Lj LJ */ - 0x01c9, 498, /* lj LJ */ - 0x01cb, 499, /* Nj NJ */ - 0x01cc, 498, /* nj NJ */ - 0x01ce, 499, /* ǎ Ǎ */ - 0x01d0, 499, /* ǐ Ǐ */ - 0x01d2, 499, /* ǒ Ǒ */ - 0x01d4, 499, /* ǔ Ǔ */ - 0x01d6, 499, /* ǖ Ǖ */ - 0x01d8, 499, /* ǘ Ǘ */ - 0x01da, 499, /* ǚ Ǚ */ - 0x01dc, 499, /* ǜ Ǜ */ - 0x01df, 499, /* ǟ Ǟ */ - 0x01e1, 499, /* ǡ Ǡ */ - 0x01e3, 499, /* ǣ Ǣ */ - 0x01e5, 499, /* ǥ Ǥ */ - 0x01e7, 499, /* ǧ Ǧ */ - 0x01e9, 499, /* ǩ Ǩ */ - 0x01eb, 499, /* ǫ Ǫ */ - 0x01ed, 499, /* ǭ Ǭ */ - 0x01ef, 499, /* ǯ Ǯ */ - 0x01f2, 499, /* Dz DZ */ - 0x01f3, 498, /* dz DZ */ - 0x01f5, 499, /* ǵ Ǵ */ - 0x01fb, 499, /* ǻ Ǻ */ - 0x01fd, 499, /* ǽ Ǽ */ - 0x01ff, 499, /* ǿ Ǿ */ - 0x0201, 499, /* ȁ Ȁ */ - 0x0203, 499, /* ȃ Ȃ */ - 0x0205, 499, /* ȅ Ȅ */ - 0x0207, 499, /* ȇ Ȇ */ - 0x0209, 499, /* ȉ Ȉ */ - 0x020b, 499, /* ȋ Ȋ */ - 0x020d, 499, /* ȍ Ȍ */ - 0x020f, 499, /* ȏ Ȏ */ - 0x0211, 499, /* ȑ Ȑ */ - 0x0213, 499, /* ȓ Ȓ */ - 0x0215, 499, /* ȕ Ȕ */ - 0x0217, 499, /* ȗ Ȗ */ - 0x0253, 290, /* ɓ Ɓ */ - 0x0254, 294, /* ɔ Ɔ */ - 0x025b, 297, /* ɛ Ɛ */ - 0x0260, 295, /* ɠ Ɠ */ - 0x0263, 293, /* ɣ Ɣ */ - 0x0268, 291, /* ɨ Ɨ */ - 0x0269, 289, /* ɩ Ɩ */ - 0x026f, 289, /* ɯ Ɯ */ - 0x0272, 287, /* ɲ Ɲ */ - 0x0283, 282, /* ʃ Ʃ */ - 0x0288, 282, /* ʈ Ʈ */ - 0x0292, 281, /* ʒ Ʒ */ - 0x03ac, 462, /* ά Ά */ - 0x03cc, 436, /* ό Ό */ - 0x03d0, 438, /* ϐ Β */ - 0x03d1, 443, /* ϑ Θ */ - 0x03d5, 453, /* ϕ Φ */ - 0x03d6, 446, /* ϖ Π */ - 0x03e3, 499, /* ϣ Ϣ */ - 0x03e5, 499, /* ϥ Ϥ */ - 0x03e7, 499, /* ϧ Ϧ */ - 0x03e9, 499, /* ϩ Ϩ */ - 0x03eb, 499, /* ϫ Ϫ */ - 0x03ed, 499, /* ϭ Ϭ */ - 0x03ef, 499, /* ϯ Ϯ */ - 0x03f0, 414, /* ϰ Κ */ - 0x03f1, 420, /* ϱ Ρ */ - 0x0461, 499, /* ѡ Ѡ */ - 0x0463, 499, /* ѣ Ѣ */ - 0x0465, 499, /* ѥ Ѥ */ - 0x0467, 499, /* ѧ Ѧ */ - 0x0469, 499, /* ѩ Ѩ */ - 0x046b, 499, /* ѫ Ѫ */ - 0x046d, 499, /* ѭ Ѭ */ - 0x046f, 499, /* ѯ Ѯ */ - 0x0471, 499, /* ѱ Ѱ */ - 0x0473, 499, /* ѳ Ѳ */ - 0x0475, 499, /* ѵ Ѵ */ - 0x0477, 499, /* ѷ Ѷ */ - 0x0479, 499, /* ѹ Ѹ */ - 0x047b, 499, /* ѻ Ѻ */ - 0x047d, 499, /* ѽ Ѽ */ - 0x047f, 499, /* ѿ Ѿ */ - 0x0481, 499, /* ҁ Ҁ */ - 0x0491, 499, /* ґ Ґ */ - 0x0493, 499, /* ғ Ғ */ - 0x0495, 499, /* ҕ Ҕ */ - 0x0497, 499, /* җ Җ */ - 0x0499, 499, /* ҙ Ҙ */ - 0x049b, 499, /* қ Қ */ - 0x049d, 499, /* ҝ Ҝ */ - 0x049f, 499, /* ҟ Ҟ */ - 0x04a1, 499, /* ҡ Ҡ */ - 0x04a3, 499, /* ң Ң */ - 0x04a5, 499, /* ҥ Ҥ */ - 0x04a7, 499, /* ҧ Ҧ */ - 0x04a9, 499, /* ҩ Ҩ */ - 0x04ab, 499, /* ҫ Ҫ */ - 0x04ad, 499, /* ҭ Ҭ */ - 0x04af, 499, /* ү Ү */ - 0x04b1, 499, /* ұ Ұ */ - 0x04b3, 499, /* ҳ Ҳ */ - 0x04b5, 499, /* ҵ Ҵ */ - 0x04b7, 499, /* ҷ Ҷ */ - 0x04b9, 499, /* ҹ Ҹ */ - 0x04bb, 499, /* һ Һ */ - 0x04bd, 499, /* ҽ Ҽ */ - 0x04bf, 499, /* ҿ Ҿ */ - 0x04c2, 499, /* ӂ Ӂ */ - 0x04c4, 499, /* ӄ Ӄ */ - 0x04c8, 499, /* ӈ Ӈ */ - 0x04cc, 499, /* ӌ Ӌ */ - 0x04d1, 499, /* ӑ Ӑ */ - 0x04d3, 499, /* ӓ Ӓ */ - 0x04d5, 499, /* ӕ Ӕ */ - 0x04d7, 499, /* ӗ Ӗ */ - 0x04d9, 499, /* ә Ә */ - 0x04db, 499, /* ӛ Ӛ */ - 0x04dd, 499, /* ӝ Ӝ */ - 0x04df, 499, /* ӟ Ӟ */ - 0x04e1, 499, /* ӡ Ӡ */ - 0x04e3, 499, /* ӣ Ӣ */ - 0x04e5, 499, /* ӥ Ӥ */ - 0x04e7, 499, /* ӧ Ӧ */ - 0x04e9, 499, /* ө Ө */ - 0x04eb, 499, /* ӫ Ӫ */ - 0x04ef, 499, /* ӯ Ӯ */ - 0x04f1, 499, /* ӱ Ӱ */ - 0x04f3, 499, /* ӳ Ӳ */ - 0x04f5, 499, /* ӵ Ӵ */ - 0x04f9, 499, /* ӹ Ӹ */ - 0x1e01, 499, /* ḁ Ḁ */ - 0x1e03, 499, /* ḃ Ḃ */ - 0x1e05, 499, /* ḅ Ḅ */ - 0x1e07, 499, /* ḇ Ḇ */ - 0x1e09, 499, /* ḉ Ḉ */ - 0x1e0b, 499, /* ḋ Ḋ */ - 0x1e0d, 499, /* ḍ Ḍ */ - 0x1e0f, 499, /* ḏ Ḏ */ - 0x1e11, 499, /* ḑ Ḑ */ - 0x1e13, 499, /* ḓ Ḓ */ - 0x1e15, 499, /* ḕ Ḕ */ - 0x1e17, 499, /* ḗ Ḗ */ - 0x1e19, 499, /* ḙ Ḙ */ - 0x1e1b, 499, /* ḛ Ḛ */ - 0x1e1d, 499, /* ḝ Ḝ */ - 0x1e1f, 499, /* ḟ Ḟ */ - 0x1e21, 499, /* ḡ Ḡ */ - 0x1e23, 499, /* ḣ Ḣ */ - 0x1e25, 499, /* ḥ Ḥ */ - 0x1e27, 499, /* ḧ Ḧ */ - 0x1e29, 499, /* ḩ Ḩ */ - 0x1e2b, 499, /* ḫ Ḫ */ - 0x1e2d, 499, /* ḭ Ḭ */ - 0x1e2f, 499, /* ḯ Ḯ */ - 0x1e31, 499, /* ḱ Ḱ */ - 0x1e33, 499, /* ḳ Ḳ */ - 0x1e35, 499, /* ḵ Ḵ */ - 0x1e37, 499, /* ḷ Ḷ */ - 0x1e39, 499, /* ḹ Ḹ */ - 0x1e3b, 499, /* ḻ Ḻ */ - 0x1e3d, 499, /* ḽ Ḽ */ - 0x1e3f, 499, /* ḿ Ḿ */ - 0x1e41, 499, /* ṁ Ṁ */ - 0x1e43, 499, /* ṃ Ṃ */ - 0x1e45, 499, /* ṅ Ṅ */ - 0x1e47, 499, /* ṇ Ṇ */ - 0x1e49, 499, /* ṉ Ṉ */ - 0x1e4b, 499, /* ṋ Ṋ */ - 0x1e4d, 499, /* ṍ Ṍ */ - 0x1e4f, 499, /* ṏ Ṏ */ - 0x1e51, 499, /* ṑ Ṑ */ - 0x1e53, 499, /* ṓ Ṓ */ - 0x1e55, 499, /* ṕ Ṕ */ - 0x1e57, 499, /* ṗ Ṗ */ - 0x1e59, 499, /* ṙ Ṙ */ - 0x1e5b, 499, /* ṛ Ṛ */ - 0x1e5d, 499, /* ṝ Ṝ */ - 0x1e5f, 499, /* ṟ Ṟ */ - 0x1e61, 499, /* ṡ Ṡ */ - 0x1e63, 499, /* ṣ Ṣ */ - 0x1e65, 499, /* ṥ Ṥ */ - 0x1e67, 499, /* ṧ Ṧ */ - 0x1e69, 499, /* ṩ Ṩ */ - 0x1e6b, 499, /* ṫ Ṫ */ - 0x1e6d, 499, /* ṭ Ṭ */ - 0x1e6f, 499, /* ṯ Ṯ */ - 0x1e71, 499, /* ṱ Ṱ */ - 0x1e73, 499, /* ṳ Ṳ */ - 0x1e75, 499, /* ṵ Ṵ */ - 0x1e77, 499, /* ṷ Ṷ */ - 0x1e79, 499, /* ṹ Ṹ */ - 0x1e7b, 499, /* ṻ Ṻ */ - 0x1e7d, 499, /* ṽ Ṽ */ - 0x1e7f, 499, /* ṿ Ṿ */ - 0x1e81, 499, /* ẁ Ẁ */ - 0x1e83, 499, /* ẃ Ẃ */ - 0x1e85, 499, /* ẅ Ẅ */ - 0x1e87, 499, /* ẇ Ẇ */ - 0x1e89, 499, /* ẉ Ẉ */ - 0x1e8b, 499, /* ẋ Ẋ */ - 0x1e8d, 499, /* ẍ Ẍ */ - 0x1e8f, 499, /* ẏ Ẏ */ - 0x1e91, 499, /* ẑ Ẑ */ - 0x1e93, 499, /* ẓ Ẓ */ - 0x1e95, 499, /* ẕ Ẕ */ - 0x1ea1, 499, /* ạ Ạ */ - 0x1ea3, 499, /* ả Ả */ - 0x1ea5, 499, /* ấ Ấ */ - 0x1ea7, 499, /* ầ Ầ */ - 0x1ea9, 499, /* ẩ Ẩ */ - 0x1eab, 499, /* ẫ Ẫ */ - 0x1ead, 499, /* ậ Ậ */ - 0x1eaf, 499, /* ắ Ắ */ - 0x1eb1, 499, /* ằ Ằ */ - 0x1eb3, 499, /* ẳ Ẳ */ - 0x1eb5, 499, /* ẵ Ẵ */ - 0x1eb7, 499, /* ặ Ặ */ - 0x1eb9, 499, /* ẹ Ẹ */ - 0x1ebb, 499, /* ẻ Ẻ */ - 0x1ebd, 499, /* ẽ Ẽ */ - 0x1ebf, 499, /* ế Ế */ - 0x1ec1, 499, /* ề Ề */ - 0x1ec3, 499, /* ể Ể */ - 0x1ec5, 499, /* ễ Ễ */ - 0x1ec7, 499, /* ệ Ệ */ - 0x1ec9, 499, /* ỉ Ỉ */ - 0x1ecb, 499, /* ị Ị */ - 0x1ecd, 499, /* ọ Ọ */ - 0x1ecf, 499, /* ỏ Ỏ */ - 0x1ed1, 499, /* ố Ố */ - 0x1ed3, 499, /* ồ Ồ */ - 0x1ed5, 499, /* ổ Ổ */ - 0x1ed7, 499, /* ỗ Ỗ */ - 0x1ed9, 499, /* ộ Ộ */ - 0x1edb, 499, /* ớ Ớ */ - 0x1edd, 499, /* ờ Ờ */ - 0x1edf, 499, /* ở Ở */ - 0x1ee1, 499, /* ỡ Ỡ */ - 0x1ee3, 499, /* ợ Ợ */ - 0x1ee5, 499, /* ụ Ụ */ - 0x1ee7, 499, /* ủ Ủ */ - 0x1ee9, 499, /* ứ Ứ */ - 0x1eeb, 499, /* ừ Ừ */ - 0x1eed, 499, /* ử Ử */ - 0x1eef, 499, /* ữ Ữ */ - 0x1ef1, 499, /* ự Ự */ - 0x1ef3, 499, /* ỳ Ỳ */ - 0x1ef5, 499, /* ỵ Ỵ */ - 0x1ef7, 499, /* ỷ Ỷ */ - 0x1ef9, 499, /* ỹ Ỹ */ - 0x1f51, 508, /* ὑ Ὑ */ - 0x1f53, 508, /* ὓ Ὓ */ - 0x1f55, 508, /* ὕ Ὕ */ - 0x1f57, 508, /* ὗ Ὗ */ - 0x1fb3, 509, /* ᾳ ᾼ */ - 0x1fc3, 509, /* ῃ ῌ */ - 0x1fe5, 507, /* ῥ Ῥ */ - 0x1ff3, 509, /* ῳ ῼ */ -}; - -/* - * upper case ranges - * 3rd col is conversion excess 500 - */ -static -Rune __tolower2[] = -{ - 0x0041, 0x005a, 532, /* A-Z a-z */ - 0x00c0, 0x00d6, 532, /* À-Ö à-ö */ - 0x00d8, 0x00de, 532, /* Ø-Þ ø-þ */ - 0x0189, 0x018a, 705, /* Ɖ-Ɗ ɖ-ɗ */ - 0x018e, 0x018f, 702, /* Ǝ-Ə ɘ-ə */ - 0x01b1, 0x01b2, 717, /* Ʊ-Ʋ ʊ-ʋ */ - 0x0388, 0x038a, 537, /* Έ-Ί έ-ί */ - 0x038e, 0x038f, 563, /* Ύ-Ώ ύ-ώ */ - 0x0391, 0x03a1, 532, /* Α-Ρ α-ρ */ - 0x03a3, 0x03ab, 532, /* Σ-Ϋ σ-ϋ */ - 0x0401, 0x040c, 580, /* Ё-Ќ ё-ќ */ - 0x040e, 0x040f, 580, /* Ў-Џ ў-џ */ - 0x0410, 0x042f, 532, /* А-Я а-я */ - 0x0531, 0x0556, 548, /* Ա-Ֆ ա-ֆ */ - 0x10a0, 0x10c5, 548, /* Ⴀ-Ⴥ ა-ჵ */ - 0x1f08, 0x1f0f, 492, /* Ἀ-Ἇ ἀ-ἇ */ - 0x1f18, 0x1f1d, 492, /* Ἐ-Ἕ ἐ-ἕ */ - 0x1f28, 0x1f2f, 492, /* Ἠ-Ἧ ἠ-ἧ */ - 0x1f38, 0x1f3f, 492, /* Ἰ-Ἷ ἰ-ἷ */ - 0x1f48, 0x1f4d, 492, /* Ὀ-Ὅ ὀ-ὅ */ - 0x1f68, 0x1f6f, 492, /* Ὠ-Ὧ ὠ-ὧ */ - 0x1f88, 0x1f8f, 492, /* ᾈ-ᾏ ᾀ-ᾇ */ - 0x1f98, 0x1f9f, 492, /* ᾘ-ᾟ ᾐ-ᾗ */ - 0x1fa8, 0x1faf, 492, /* ᾨ-ᾯ ᾠ-ᾧ */ - 0x1fb8, 0x1fb9, 492, /* Ᾰ-Ᾱ ᾰ-ᾱ */ - 0x1fba, 0x1fbb, 426, /* Ὰ-Ά ὰ-ά */ - 0x1fc8, 0x1fcb, 414, /* Ὲ-Ή ὲ-ή */ - 0x1fd8, 0x1fd9, 492, /* Ῐ-Ῑ ῐ-ῑ */ - 0x1fda, 0x1fdb, 400, /* Ὶ-Ί ὶ-ί */ - 0x1fe8, 0x1fe9, 492, /* Ῠ-Ῡ ῠ-ῡ */ - 0x1fea, 0x1feb, 388, /* Ὺ-Ύ ὺ-ύ */ - 0x1ff8, 0x1ff9, 372, /* Ὸ-Ό ὸ-ό */ - 0x1ffa, 0x1ffb, 374, /* Ὼ-Ώ ὼ-ώ */ - 0x2160, 0x216f, 516, /* Ⅰ-Ⅿ ⅰ-ⅿ */ - 0x24b6, 0x24cf, 526, /* Ⓐ-Ⓩ ⓐ-ⓩ */ - 0xff21, 0xff3a, 532, /* A-Z a-z */ -}; - -/* - * upper case singlets - * 2nd col is conversion excess 500 - */ -static -Rune __tolower1[] = -{ - 0x0100, 501, /* Ā ā */ - 0x0102, 501, /* Ă ă */ - 0x0104, 501, /* Ą ą */ - 0x0106, 501, /* Ć ć */ - 0x0108, 501, /* Ĉ ĉ */ - 0x010a, 501, /* Ċ ċ */ - 0x010c, 501, /* Č č */ - 0x010e, 501, /* Ď ď */ - 0x0110, 501, /* Đ đ */ - 0x0112, 501, /* Ē ē */ - 0x0114, 501, /* Ĕ ĕ */ - 0x0116, 501, /* Ė ė */ - 0x0118, 501, /* Ę ę */ - 0x011a, 501, /* Ě ě */ - 0x011c, 501, /* Ĝ ĝ */ - 0x011e, 501, /* Ğ ğ */ - 0x0120, 501, /* Ġ ġ */ - 0x0122, 501, /* Ģ ģ */ - 0x0124, 501, /* Ĥ ĥ */ - 0x0126, 501, /* Ħ ħ */ - 0x0128, 501, /* Ĩ ĩ */ - 0x012a, 501, /* Ī ī */ - 0x012c, 501, /* Ĭ ĭ */ - 0x012e, 501, /* Į į */ - 0x0130, 301, /* İ i */ - 0x0132, 501, /* IJ ij */ - 0x0134, 501, /* Ĵ ĵ */ - 0x0136, 501, /* Ķ ķ */ - 0x0139, 501, /* Ĺ ĺ */ - 0x013b, 501, /* Ļ ļ */ - 0x013d, 501, /* Ľ ľ */ - 0x013f, 501, /* Ŀ ŀ */ - 0x0141, 501, /* Ł ł */ - 0x0143, 501, /* Ń ń */ - 0x0145, 501, /* Ņ ņ */ - 0x0147, 501, /* Ň ň */ - 0x014a, 501, /* Ŋ ŋ */ - 0x014c, 501, /* Ō ō */ - 0x014e, 501, /* Ŏ ŏ */ - 0x0150, 501, /* Ő ő */ - 0x0152, 501, /* Œ œ */ - 0x0154, 501, /* Ŕ ŕ */ - 0x0156, 501, /* Ŗ ŗ */ - 0x0158, 501, /* Ř ř */ - 0x015a, 501, /* Ś ś */ - 0x015c, 501, /* Ŝ ŝ */ - 0x015e, 501, /* Ş ş */ - 0x0160, 501, /* Š š */ - 0x0162, 501, /* Ţ ţ */ - 0x0164, 501, /* Ť ť */ - 0x0166, 501, /* Ŧ ŧ */ - 0x0168, 501, /* Ũ ũ */ - 0x016a, 501, /* Ū ū */ - 0x016c, 501, /* Ŭ ŭ */ - 0x016e, 501, /* Ů ů */ - 0x0170, 501, /* Ű ű */ - 0x0172, 501, /* Ų ų */ - 0x0174, 501, /* Ŵ ŵ */ - 0x0176, 501, /* Ŷ ŷ */ - 0x0178, 379, /* Ÿ ÿ */ - 0x0179, 501, /* Ź ź */ - 0x017b, 501, /* Ż ż */ - 0x017d, 501, /* Ž ž */ - 0x0181, 710, /* Ɓ ɓ */ - 0x0182, 501, /* Ƃ ƃ */ - 0x0184, 501, /* Ƅ ƅ */ - 0x0186, 706, /* Ɔ ɔ */ - 0x0187, 501, /* Ƈ ƈ */ - 0x018b, 501, /* Ƌ ƌ */ - 0x0190, 703, /* Ɛ ɛ */ - 0x0191, 501, /* Ƒ ƒ */ - 0x0193, 705, /* Ɠ ɠ */ - 0x0194, 707, /* Ɣ ɣ */ - 0x0196, 711, /* Ɩ ɩ */ - 0x0197, 709, /* Ɨ ɨ */ - 0x0198, 501, /* Ƙ ƙ */ - 0x019c, 711, /* Ɯ ɯ */ - 0x019d, 713, /* Ɲ ɲ */ - 0x01a0, 501, /* Ơ ơ */ - 0x01a2, 501, /* Ƣ ƣ */ - 0x01a4, 501, /* Ƥ ƥ */ - 0x01a7, 501, /* Ƨ ƨ */ - 0x01a9, 718, /* Ʃ ʃ */ - 0x01ac, 501, /* Ƭ ƭ */ - 0x01ae, 718, /* Ʈ ʈ */ - 0x01af, 501, /* Ư ư */ - 0x01b3, 501, /* Ƴ ƴ */ - 0x01b5, 501, /* Ƶ ƶ */ - 0x01b7, 719, /* Ʒ ʒ */ - 0x01b8, 501, /* Ƹ ƹ */ - 0x01bc, 501, /* Ƽ ƽ */ - 0x01c4, 502, /* DŽ dž */ - 0x01c5, 501, /* Dž dž */ - 0x01c7, 502, /* LJ lj */ - 0x01c8, 501, /* Lj lj */ - 0x01ca, 502, /* NJ nj */ - 0x01cb, 501, /* Nj nj */ - 0x01cd, 501, /* Ǎ ǎ */ - 0x01cf, 501, /* Ǐ ǐ */ - 0x01d1, 501, /* Ǒ ǒ */ - 0x01d3, 501, /* Ǔ ǔ */ - 0x01d5, 501, /* Ǖ ǖ */ - 0x01d7, 501, /* Ǘ ǘ */ - 0x01d9, 501, /* Ǚ ǚ */ - 0x01db, 501, /* Ǜ ǜ */ - 0x01de, 501, /* Ǟ ǟ */ - 0x01e0, 501, /* Ǡ ǡ */ - 0x01e2, 501, /* Ǣ ǣ */ - 0x01e4, 501, /* Ǥ ǥ */ - 0x01e6, 501, /* Ǧ ǧ */ - 0x01e8, 501, /* Ǩ ǩ */ - 0x01ea, 501, /* Ǫ ǫ */ - 0x01ec, 501, /* Ǭ ǭ */ - 0x01ee, 501, /* Ǯ ǯ */ - 0x01f1, 502, /* DZ dz */ - 0x01f2, 501, /* Dz dz */ - 0x01f4, 501, /* Ǵ ǵ */ - 0x01fa, 501, /* Ǻ ǻ */ - 0x01fc, 501, /* Ǽ ǽ */ - 0x01fe, 501, /* Ǿ ǿ */ - 0x0200, 501, /* Ȁ ȁ */ - 0x0202, 501, /* Ȃ ȃ */ - 0x0204, 501, /* Ȅ ȅ */ - 0x0206, 501, /* Ȇ ȇ */ - 0x0208, 501, /* Ȉ ȉ */ - 0x020a, 501, /* Ȋ ȋ */ - 0x020c, 501, /* Ȍ ȍ */ - 0x020e, 501, /* Ȏ ȏ */ - 0x0210, 501, /* Ȑ ȑ */ - 0x0212, 501, /* Ȓ ȓ */ - 0x0214, 501, /* Ȕ ȕ */ - 0x0216, 501, /* Ȗ ȗ */ - 0x0386, 538, /* Ά ά */ - 0x038c, 564, /* Ό ό */ - 0x03e2, 501, /* Ϣ ϣ */ - 0x03e4, 501, /* Ϥ ϥ */ - 0x03e6, 501, /* Ϧ ϧ */ - 0x03e8, 501, /* Ϩ ϩ */ - 0x03ea, 501, /* Ϫ ϫ */ - 0x03ec, 501, /* Ϭ ϭ */ - 0x03ee, 501, /* Ϯ ϯ */ - 0x0460, 501, /* Ѡ ѡ */ - 0x0462, 501, /* Ѣ ѣ */ - 0x0464, 501, /* Ѥ ѥ */ - 0x0466, 501, /* Ѧ ѧ */ - 0x0468, 501, /* Ѩ ѩ */ - 0x046a, 501, /* Ѫ ѫ */ - 0x046c, 501, /* Ѭ ѭ */ - 0x046e, 501, /* Ѯ ѯ */ - 0x0470, 501, /* Ѱ ѱ */ - 0x0472, 501, /* Ѳ ѳ */ - 0x0474, 501, /* Ѵ ѵ */ - 0x0476, 501, /* Ѷ ѷ */ - 0x0478, 501, /* Ѹ ѹ */ - 0x047a, 501, /* Ѻ ѻ */ - 0x047c, 501, /* Ѽ ѽ */ - 0x047e, 501, /* Ѿ ѿ */ - 0x0480, 501, /* Ҁ ҁ */ - 0x0490, 501, /* Ґ ґ */ - 0x0492, 501, /* Ғ ғ */ - 0x0494, 501, /* Ҕ ҕ */ - 0x0496, 501, /* Җ җ */ - 0x0498, 501, /* Ҙ ҙ */ - 0x049a, 501, /* Қ қ */ - 0x049c, 501, /* Ҝ ҝ */ - 0x049e, 501, /* Ҟ ҟ */ - 0x04a0, 501, /* Ҡ ҡ */ - 0x04a2, 501, /* Ң ң */ - 0x04a4, 501, /* Ҥ ҥ */ - 0x04a6, 501, /* Ҧ ҧ */ - 0x04a8, 501, /* Ҩ ҩ */ - 0x04aa, 501, /* Ҫ ҫ */ - 0x04ac, 501, /* Ҭ ҭ */ - 0x04ae, 501, /* Ү ү */ - 0x04b0, 501, /* Ұ ұ */ - 0x04b2, 501, /* Ҳ ҳ */ - 0x04b4, 501, /* Ҵ ҵ */ - 0x04b6, 501, /* Ҷ ҷ */ - 0x04b8, 501, /* Ҹ ҹ */ - 0x04ba, 501, /* Һ һ */ - 0x04bc, 501, /* Ҽ ҽ */ - 0x04be, 501, /* Ҿ ҿ */ - 0x04c1, 501, /* Ӂ ӂ */ - 0x04c3, 501, /* Ӄ ӄ */ - 0x04c7, 501, /* Ӈ ӈ */ - 0x04cb, 501, /* Ӌ ӌ */ - 0x04d0, 501, /* Ӑ ӑ */ - 0x04d2, 501, /* Ӓ ӓ */ - 0x04d4, 501, /* Ӕ ӕ */ - 0x04d6, 501, /* Ӗ ӗ */ - 0x04d8, 501, /* Ә ә */ - 0x04da, 501, /* Ӛ ӛ */ - 0x04dc, 501, /* Ӝ ӝ */ - 0x04de, 501, /* Ӟ ӟ */ - 0x04e0, 501, /* Ӡ ӡ */ - 0x04e2, 501, /* Ӣ ӣ */ - 0x04e4, 501, /* Ӥ ӥ */ - 0x04e6, 501, /* Ӧ ӧ */ - 0x04e8, 501, /* Ө ө */ - 0x04ea, 501, /* Ӫ ӫ */ - 0x04ee, 501, /* Ӯ ӯ */ - 0x04f0, 501, /* Ӱ ӱ */ - 0x04f2, 501, /* Ӳ ӳ */ - 0x04f4, 501, /* Ӵ ӵ */ - 0x04f8, 501, /* Ӹ ӹ */ - 0x1e00, 501, /* Ḁ ḁ */ - 0x1e02, 501, /* Ḃ ḃ */ - 0x1e04, 501, /* Ḅ ḅ */ - 0x1e06, 501, /* Ḇ ḇ */ - 0x1e08, 501, /* Ḉ ḉ */ - 0x1e0a, 501, /* Ḋ ḋ */ - 0x1e0c, 501, /* Ḍ ḍ */ - 0x1e0e, 501, /* Ḏ ḏ */ - 0x1e10, 501, /* Ḑ ḑ */ - 0x1e12, 501, /* Ḓ ḓ */ - 0x1e14, 501, /* Ḕ ḕ */ - 0x1e16, 501, /* Ḗ ḗ */ - 0x1e18, 501, /* Ḙ ḙ */ - 0x1e1a, 501, /* Ḛ ḛ */ - 0x1e1c, 501, /* Ḝ ḝ */ - 0x1e1e, 501, /* Ḟ ḟ */ - 0x1e20, 501, /* Ḡ ḡ */ - 0x1e22, 501, /* Ḣ ḣ */ - 0x1e24, 501, /* Ḥ ḥ */ - 0x1e26, 501, /* Ḧ ḧ */ - 0x1e28, 501, /* Ḩ ḩ */ - 0x1e2a, 501, /* Ḫ ḫ */ - 0x1e2c, 501, /* Ḭ ḭ */ - 0x1e2e, 501, /* Ḯ ḯ */ - 0x1e30, 501, /* Ḱ ḱ */ - 0x1e32, 501, /* Ḳ ḳ */ - 0x1e34, 501, /* Ḵ ḵ */ - 0x1e36, 501, /* Ḷ ḷ */ - 0x1e38, 501, /* Ḹ ḹ */ - 0x1e3a, 501, /* Ḻ ḻ */ - 0x1e3c, 501, /* Ḽ ḽ */ - 0x1e3e, 501, /* Ḿ ḿ */ - 0x1e40, 501, /* Ṁ ṁ */ - 0x1e42, 501, /* Ṃ ṃ */ - 0x1e44, 501, /* Ṅ ṅ */ - 0x1e46, 501, /* Ṇ ṇ */ - 0x1e48, 501, /* Ṉ ṉ */ - 0x1e4a, 501, /* Ṋ ṋ */ - 0x1e4c, 501, /* Ṍ ṍ */ - 0x1e4e, 501, /* Ṏ ṏ */ - 0x1e50, 501, /* Ṑ ṑ */ - 0x1e52, 501, /* Ṓ ṓ */ - 0x1e54, 501, /* Ṕ ṕ */ - 0x1e56, 501, /* Ṗ ṗ */ - 0x1e58, 501, /* Ṙ ṙ */ - 0x1e5a, 501, /* Ṛ ṛ */ - 0x1e5c, 501, /* Ṝ ṝ */ - 0x1e5e, 501, /* Ṟ ṟ */ - 0x1e60, 501, /* Ṡ ṡ */ - 0x1e62, 501, /* Ṣ ṣ */ - 0x1e64, 501, /* Ṥ ṥ */ - 0x1e66, 501, /* Ṧ ṧ */ - 0x1e68, 501, /* Ṩ ṩ */ - 0x1e6a, 501, /* Ṫ ṫ */ - 0x1e6c, 501, /* Ṭ ṭ */ - 0x1e6e, 501, /* Ṯ ṯ */ - 0x1e70, 501, /* Ṱ ṱ */ - 0x1e72, 501, /* Ṳ ṳ */ - 0x1e74, 501, /* Ṵ ṵ */ - 0x1e76, 501, /* Ṷ ṷ */ - 0x1e78, 501, /* Ṹ ṹ */ - 0x1e7a, 501, /* Ṻ ṻ */ - 0x1e7c, 501, /* Ṽ ṽ */ - 0x1e7e, 501, /* Ṿ ṿ */ - 0x1e80, 501, /* Ẁ ẁ */ - 0x1e82, 501, /* Ẃ ẃ */ - 0x1e84, 501, /* Ẅ ẅ */ - 0x1e86, 501, /* Ẇ ẇ */ - 0x1e88, 501, /* Ẉ ẉ */ - 0x1e8a, 501, /* Ẋ ẋ */ - 0x1e8c, 501, /* Ẍ ẍ */ - 0x1e8e, 501, /* Ẏ ẏ */ - 0x1e90, 501, /* Ẑ ẑ */ - 0x1e92, 501, /* Ẓ ẓ */ - 0x1e94, 501, /* Ẕ ẕ */ - 0x1ea0, 501, /* Ạ ạ */ - 0x1ea2, 501, /* Ả ả */ - 0x1ea4, 501, /* Ấ ấ */ - 0x1ea6, 501, /* Ầ ầ */ - 0x1ea8, 501, /* Ẩ ẩ */ - 0x1eaa, 501, /* Ẫ ẫ */ - 0x1eac, 501, /* Ậ ậ */ - 0x1eae, 501, /* Ắ ắ */ - 0x1eb0, 501, /* Ằ ằ */ - 0x1eb2, 501, /* Ẳ ẳ */ - 0x1eb4, 501, /* Ẵ ẵ */ - 0x1eb6, 501, /* Ặ ặ */ - 0x1eb8, 501, /* Ẹ ẹ */ - 0x1eba, 501, /* Ẻ ẻ */ - 0x1ebc, 501, /* Ẽ ẽ */ - 0x1ebe, 501, /* Ế ế */ - 0x1ec0, 501, /* Ề ề */ - 0x1ec2, 501, /* Ể ể */ - 0x1ec4, 501, /* Ễ ễ */ - 0x1ec6, 501, /* Ệ ệ */ - 0x1ec8, 501, /* Ỉ ỉ */ - 0x1eca, 501, /* Ị ị */ - 0x1ecc, 501, /* Ọ ọ */ - 0x1ece, 501, /* Ỏ ỏ */ - 0x1ed0, 501, /* Ố ố */ - 0x1ed2, 501, /* Ồ ồ */ - 0x1ed4, 501, /* Ổ ổ */ - 0x1ed6, 501, /* Ỗ ỗ */ - 0x1ed8, 501, /* Ộ ộ */ - 0x1eda, 501, /* Ớ ớ */ - 0x1edc, 501, /* Ờ ờ */ - 0x1ede, 501, /* Ở ở */ - 0x1ee0, 501, /* Ỡ ỡ */ - 0x1ee2, 501, /* Ợ ợ */ - 0x1ee4, 501, /* Ụ ụ */ - 0x1ee6, 501, /* Ủ ủ */ - 0x1ee8, 501, /* Ứ ứ */ - 0x1eea, 501, /* Ừ ừ */ - 0x1eec, 501, /* Ử ử */ - 0x1eee, 501, /* Ữ ữ */ - 0x1ef0, 501, /* Ự ự */ - 0x1ef2, 501, /* Ỳ ỳ */ - 0x1ef4, 501, /* Ỵ ỵ */ - 0x1ef6, 501, /* Ỷ ỷ */ - 0x1ef8, 501, /* Ỹ ỹ */ - 0x1f59, 492, /* Ὑ ὑ */ - 0x1f5b, 492, /* Ὓ ὓ */ - 0x1f5d, 492, /* Ὕ ὕ */ - 0x1f5f, 492, /* Ὗ ὗ */ - 0x1fbc, 491, /* ᾼ ᾳ */ - 0x1fcc, 491, /* ῌ ῃ */ - 0x1fec, 493, /* Ῥ ῥ */ - 0x1ffc, 491, /* ῼ ῳ */ -}; - -/* - * title characters are those between - * upper and lower case. ie DZ Dz dz - */ -static -Rune __totitle1[] = -{ - 0x01c4, 501, /* DŽ Dž */ - 0x01c6, 499, /* dž Dž */ - 0x01c7, 501, /* LJ Lj */ - 0x01c9, 499, /* lj Lj */ - 0x01ca, 501, /* NJ Nj */ - 0x01cc, 499, /* nj Nj */ - 0x01f1, 501, /* DZ Dz */ - 0x01f3, 499, /* dz Dz */ -}; - -static Rune* -bsearch(Rune c, Rune *t, int n, int ne) -{ - Rune *p; - int m; - - while(n > 1) { - m = n/2; - p = t + m*ne; - if(c >= p[0]) { - t = p; - n = n-m; - } else - n = m; - } - if(n && c >= t[0]) - return t; - return 0; -} - -Rune -tolowerrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return c + p[2] - 500; - p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -Rune -toupperrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return c + p[2] - 500; - p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -Rune -totitlerune(Rune c) -{ - Rune *p; - - p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2); - if(p && c == p[0]) - return c + p[1] - 500; - return c; -} - -int -islowerrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return 1; - p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2); - if(p && c == p[0]) - return 1; - return 0; -} - -int -isupperrune(Rune c) -{ - Rune *p; - - p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3); - if(p && c >= p[0] && c <= p[1]) - return 1; - p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2); - if(p && c == p[0]) - return 1; - return 0; -} - -int -isalpharune(Rune c) -{ - Rune *p; - - if(isupperrune(c) || islowerrune(c)) - return 1; - p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2); - if(p && c >= p[0] && c <= p[1]) - return 1; - p = bsearch(c, __alpha1, nelem(__alpha1), 1); - if(p && c == p[0]) - return 1; - return 0; -} - -int -istitlerune(Rune c) -{ - return isupperrune(c) && islowerrune(c); -} - -int -isspacerune(Rune c) -{ - Rune *p; - - p = bsearch(c, __space2, nelem(__space2)/2, 2); - if(p && c >= p[0] && c <= p[1]) - return 1; - return 0; -} diff --git a/subprojects/libutf/utf.h b/subprojects/libutf/utf.h deleted file mode 100644 index 44052f4..0000000 --- a/subprojects/libutf/utf.h +++ /dev/null @@ -1,54 +0,0 @@ -#ifndef _UTF_H_ -#define _UTF_H_ 1 -#if defined(__cplusplus) -extern "C" { -#endif - -typedef unsigned int Rune; /* 32 bits */ - -enum -{ - UTFmax = 4, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */ - Runeself = 0x80, /* rune and UTF sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in UTF */ - Runemax = 0x10FFFF /* maximum rune value */ -}; - -/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */ -int chartorune(Rune *rune, char *str); -int fullrune(char *str, int n); -int isalpharune(Rune c); -int islowerrune(Rune c); -int isspacerune(Rune c); -int istitlerune(Rune c); -int isupperrune(Rune c); -int runelen(long c); -int runenlen(Rune *r, int nrune); -Rune* runestrcat(Rune *s1, Rune *s2); -Rune* runestrchr(Rune *s, Rune c); -int runestrcmp(Rune *s1, Rune *s2); -Rune* runestrcpy(Rune *s1, Rune *s2); -Rune* runestrdup(Rune *s) ; -Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2); -long runestrlen(Rune *s); -Rune* runestrncat(Rune *s1, Rune *s2, long n); -int runestrncmp(Rune *s1, Rune *s2, long n); -Rune* runestrncpy(Rune *s1, Rune *s2, long n); -Rune* runestrrchr(Rune *s, Rune c); -Rune* runestrstr(Rune *s1, Rune *s2); -int runetochar(char *str, Rune *rune); -Rune tolowerrune(Rune c); -Rune totitlerune(Rune c); -Rune toupperrune(Rune c); -char* utfecpy(char *to, char *e, char *from); -int utflen(char *s); -int utfnlen(char *s, long m); -char* utfrrune(char *s, long c); -char* utfrune(char *s, long c); -char* utfutf(char *s1, char *s2); - -#if defined(__cplusplus) -} -#endif -#endif diff --git a/subprojects/libutf/utfdef.h b/subprojects/libutf/utfdef.h deleted file mode 100644 index 1db7076..0000000 --- a/subprojects/libutf/utfdef.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * compiler directive on Plan 9 - */ -#ifndef USED -#define USED(x) if(x);else -#endif - -/* - * easiest way to make sure these are defined - */ -#define uchar _fmtuchar -#define ushort _fmtushort -#define uint _fmtuint -#define ulong _fmtulong -#define vlong _fmtvlong -#define uvlong _fmtuvlong -typedef unsigned char uchar; -typedef unsigned short ushort; -typedef unsigned int uint; -typedef unsigned long ulong; -typedef unsigned long long uvlong; -typedef long long vlong; - -/* - * nil cannot be ((void*)0) on ANSI C, - * because it is used for function pointers - */ -#undef nil -#define nil 0 - -#undef nelem -#define nelem ((void*)0) diff --git a/subprojects/libutf/utfecpy.c b/subprojects/libutf/utfecpy.c deleted file mode 100644 index bab8136..0000000 --- a/subprojects/libutf/utfecpy.c +++ /dev/null @@ -1,38 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#define _BSD_SOURCE 1 /* memccpy */ -#define _DEFAULT_SOURCE 1 -#include -#include -#include "plan9.h" -#include "utf.h" - -char* -utfecpy(char *to, char *e, char *from) -{ - char *end; - - if(to >= e) - return to; - end = memccpy(to, from, '\0', e - to); - if(end == nil){ - end = e-1; - while(end>to && (*--end&0xC0)==0x80) - ; - *end = '\0'; - }else{ - end--; - } - return end; -} diff --git a/subprojects/libutf/utflen.c b/subprojects/libutf/utflen.c deleted file mode 100644 index 769805a..0000000 --- a/subprojects/libutf/utflen.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -int -utflen(char *s) -{ - int c; - long n; - Rune rune; - - n = 0; - for(;;) { - c = *(uchar*)s; - if(c < Runeself) { - if(c == 0) - return n; - s++; - } else - s += chartorune(&rune, s); - n++; - } -} diff --git a/subprojects/libutf/utfnlen.c b/subprojects/libutf/utfnlen.c deleted file mode 100644 index 6680329..0000000 --- a/subprojects/libutf/utfnlen.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -int -utfnlen(char *s, long m) -{ - int c; - long n; - Rune rune; - char *es; - - es = s + m; - for(n = 0; s < es; n++) { - c = *(uchar*)s; - if(c < Runeself){ - if(c == '\0') - break; - s++; - continue; - } - if(!fullrune(s, es-s)) - break; - s += chartorune(&rune, s); - } - return n; -} diff --git a/subprojects/libutf/utfrrune.c b/subprojects/libutf/utfrrune.c deleted file mode 100644 index cff12b5..0000000 --- a/subprojects/libutf/utfrrune.c +++ /dev/null @@ -1,45 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -char* -utfrrune(char *s, long c) -{ - long c1; - Rune r; - char *s1; - - if(c < Runesync) /* not part of utf sequence */ - return strrchr(s, c); - - s1 = 0; - for(;;) { - c1 = *(uchar*)s; - if(c1 < Runeself) { /* one byte rune */ - if(c1 == 0) - return s1; - if(c1 == c) - s1 = s; - s++; - continue; - } - c1 = chartorune(&r, s); - if(r == c) - s1 = s; - s += c1; - } -} diff --git a/subprojects/libutf/utfrune.c b/subprojects/libutf/utfrune.c deleted file mode 100644 index 52b8359..0000000 --- a/subprojects/libutf/utfrune.c +++ /dev/null @@ -1,44 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - -char* -utfrune(char *s, long c) -{ - long c1; - Rune r; - int n; - - if(c < Runesync) /* not part of utf sequence */ - return strchr(s, c); - - for(;;) { - c1 = *(uchar*)s; - if(c1 < Runeself) { /* one byte rune */ - if(c1 == 0) - return 0; - if(c1 == c) - return s; - s++; - continue; - } - n = chartorune(&r, s); - if(r == c) - return s; - s += n; - } -} diff --git a/subprojects/libutf/utfutf.c b/subprojects/libutf/utfutf.c deleted file mode 100644 index 13c8502..0000000 --- a/subprojects/libutf/utfutf.c +++ /dev/null @@ -1,41 +0,0 @@ -/* - * The authors of this software are Rob Pike and Ken Thompson. - * Copyright (c) 2002 by Lucent Technologies. - * Permission to use, copy, modify, and distribute this software for any - * purpose without fee is hereby granted, provided that this entire notice - * is included in all copies of any software which is or includes a copy - * or modification of this software and in all copies of the supporting - * documentation for such software. - * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED - * WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE - * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY - * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. - */ -#include -#include -#include "plan9.h" -#include "utf.h" - - -/* - * Return pointer to first occurrence of s2 in s1, - * 0 if none - */ -char* -utfutf(char *s1, char *s2) -{ - char *p; - long f, n1, n2; - Rune r; - - n1 = chartorune(&r, s2); - f = r; - if(f <= Runesync) /* represents self */ - return strstr(s1, s2); - - n2 = strlen(s2); - for(p=s1; p=utfrune(p, f); p+=n1) - if(strncmp(p, s2, n2) == 0) - return p; - return 0; -}