From fdd1ee61b5b2be15411de323b593ad440301a842 Mon Sep 17 00:00:00 2001 From: "Javier B. Torres" Date: Mon, 19 Jan 2026 09:08:23 -0300 Subject: [PATCH] initial commit --- .editorconfig | 7 + .gitignore | 5 + README | 11 + chunk.c | 29 + chunk.h | 22 + common.h | 15 + gc.c | 126 ++ gc.h | 31 + main.c | 89 ++ makefile | 11 + object.c | 0 object.h | 24 + parser.c | 51 + parser.h | 13 + print.c | 0 print.h | 0 shell.nix | 7 + test.grr | 3 + vendor/mpc.c | 4128 +++++++++++++++++++++++++++++++++++++++++++++++++ vendor/mpc.h | 391 +++++ vendor/yar.c | 2 + vendor/yar.h | 229 +++ vm.c | 60 + vm.h | 30 + 24 files changed, 5284 insertions(+) create mode 100644 .editorconfig create mode 100644 .gitignore create mode 100644 README create mode 100644 chunk.c create mode 100644 chunk.h create mode 100644 common.h create mode 100644 gc.c create mode 100644 gc.h create mode 100644 main.c create mode 100644 makefile create mode 100644 object.c create mode 100644 object.h create mode 100644 parser.c create mode 100644 parser.h create mode 100644 print.c create mode 100644 print.h create mode 100644 shell.nix create mode 100644 test.grr create mode 100644 vendor/mpc.c create mode 100644 vendor/mpc.h create mode 100644 vendor/yar.c create mode 100644 vendor/yar.h create mode 100644 vm.c create mode 100644 vm.h diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..f1094a0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,7 @@ +root = true +[*] +end_of_line = lf +insert_final_newline = true +[*.{c,h,grr}] +indent_style = space +indent_size = 2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..98db229 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +*.o +.cache +/growl +/.envrc +/compile_commands.json diff --git a/README b/README new file mode 100644 index 0000000..8bcbd5e --- /dev/null +++ b/README @@ -0,0 +1,11 @@ + . + / V\ + / ` / + << | + / | Growl + / | A concatenative programming language. + / | + / \ \ / + ( ) | | + ________| _/_ | | +<__________\______)\__) diff --git a/chunk.c b/chunk.c new file mode 100644 index 0000000..d16c2db --- /dev/null +++ b/chunk.c @@ -0,0 +1,29 @@ +#include "chunk.h" +#include "vendor/yar.h" + +V chunk_emit_byte(Bc *chunk, U8 byte) { *yar_append(chunk) = byte; } + +V chunk_emit_sleb128(Bc *chunk, I num) { + I more = 1; + while (more) { + U8 byte = num & 0x7f; + num >>= 7; + if ((num == 0 && !(byte & 0x40)) || (num == -1 && (byte & 0x40))) { + more = 0; + } else { + byte |= 0x80; + } + chunk_emit_byte(chunk, byte); + } +} + +I chunk_add_constant(Bc *chunk, O value) { + I mark = chunk->constants.count; + *yar_append(&chunk->constants) = value; + return mark; +} + +V chunk_free(Bc *chunk) { + yar_free(&chunk->constants); + yar_free(chunk); +} diff --git a/chunk.h b/chunk.h new file mode 100644 index 0000000..e30bce1 --- /dev/null +++ b/chunk.h @@ -0,0 +1,22 @@ +#ifndef CHUNK_H +#define CHUNK_H + +#include "common.h" +#include "object.h" + +/** Bytecode chunk */ +typedef struct Bc { + U8 *items; + Z count, capacity; + struct { + O *items; + Z count, capacity; + } constants; +} Bc; + +V chunk_emit_byte(Bc *, U8); +V chunk_emit_sleb128(Bc *, I); +I chunk_add_constant(Bc *, O); +V chunk_free(Bc *); + +#endif diff --git a/common.h b/common.h new file mode 100644 index 0000000..b71a897 --- /dev/null +++ b/common.h @@ -0,0 +1,15 @@ +#ifndef COMMON_H +#define COMMON_H + +#include +#include + +typedef void V; +typedef intptr_t I; +typedef uintptr_t U; +typedef double F; +typedef size_t Z; +typedef uint8_t U8; +typedef uint32_t U32; + +#endif diff --git a/gc.c b/gc.c new file mode 100644 index 0000000..9ddf211 --- /dev/null +++ b/gc.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include + +#include "gc.h" +#include "vendor/yar.h" + +#define ALIGN(n) (((n) + 7) & ~7) +static inline int infrom(Gc *gc, V *ptr) { + const U8 *x = (const U8 *)ptr; + return (x >= gc->from.start && x < gc->from.end); +} + +V gc_addroot(Gc *gc, O *ptr) { *yar_append(&gc->roots) = ptr; } +I gc_mark(Gc *gc) { return gc->roots.count; } +V gc_reset(Gc *gc, I mark) { gc->roots.count = mark; } + +static O copy(Gc *gc, Hd *hdr) { + assert(infrom(gc, hdr)); + assert(hdr->type != TYPE_FWD); + + Z sz = ALIGN(hdr->size); + Hd *new = (Hd *)gc->to.free; + gc->to.free += sz; + memcpy(new, hdr, sz); + + hdr->type = TYPE_FWD; + O *obj = (O *)(hdr + 1); + *obj = BOX(new); + return *obj; +} + +static O forward(Gc *gc, O obj) { + if (obj == 0) + return 0; + if (IMM(obj)) + return obj; + if (!infrom(gc, (V *)obj)) + return obj; + + Hd *hdr = UNBOX(obj); + if (hdr->type == TYPE_FWD) { + O *o = (O *)(hdr + 1); + return *o; + } else { + return copy(gc, hdr); + } +} + +#if GC_DEBUG +static V printstats(Gc *gc, const char *label) { + Z used = (Z)(gc->from.free - gc->from.start); + fprintf(stderr, "[%s] used=%zu/%zu bytes (%.1f%%)\n", label, used, + (Z)HEAP_BYTES, (F)used / (F)HEAP_BYTES * 100.0); +} +#endif + +V gc_collect(Gc *gc) { + uint8_t *scan = gc->to.free; + +#if GC_DEBUG + printstats(gc, "before GC"); +#endif + + for (Z i = 0; i < gc->roots.count; i++) { + O *o = gc->roots.items[i]; + *o = forward(gc, *o); + } + + while (scan < gc->to.free) { + if (scan >= gc->to.end) { + fprintf(stderr, "fatal GC error: out of memory\n"); + abort(); + } + Hd *hdr = (Hd *)scan; + switch (hdr->type) { + // TODO: the rest of the owl + case TYPE_FWD: + fprintf(stderr, "fatal GC error: forwarding pointer in to-space\n"); + abort(); + default: + fprintf(stderr, "GC warning: junk object type %" PRId32 "\n", hdr->type); + } + scan += ALIGN(hdr->size); + } + + Gs tmp = gc->from; + gc->from = gc->to; + gc->to = tmp; + gc->to.free = gc->to.start; + +#if GC_DEBUG + printstats(gc, "after GC"); +#endif +} + +void gc_init(Gc *gc) { + gc->from.start = malloc(HEAP_BYTES); + if (!gc->from.start) + goto fatal; + gc->from.end = gc->from.start + HEAP_BYTES; + gc->from.free = gc->from.start; + + gc->to.start = malloc(HEAP_BYTES); + if (!gc->to.start) + goto fatal; + gc->to.end = gc->to.start + HEAP_BYTES; + gc->to.free = gc->to.start; + + gc->roots.capacity = 0; + gc->roots.count = 0; + gc->roots.items = NULL; + return; + +fatal: + fprintf(stderr, "failed to allocate heap space\n"); + abort(); +} + +void gc_deinit(Gc *gc) { + gc_collect(gc); + free(gc->from.start); + free(gc->to.start); + yar_free(&gc->roots); +} diff --git a/gc.h b/gc.h new file mode 100644 index 0000000..386f739 --- /dev/null +++ b/gc.h @@ -0,0 +1,31 @@ +#ifndef GC_H +#define GC_H + +#include "common.h" +#include "object.h" + +#define GC_DEBUG 1 +#define HEAP_BYTES (4 * 1024 * 1024) + +typedef struct Gs { + U8 *start, *end; + U8 *free; +} Gs; + +typedef struct Gc { + Gs from, to; + struct { + O **items; + Z count, capacity; + } roots; +} Gc; + +V gc_addroot(Gc *, O *); +I gc_mark(Gc *); +V gc_reset(Gc *, I); +V gc_collect(Gc *); +Hd *gc_alloc(Gc *, Z); +V gc_init(Gc *); +V gc_deinit(Gc *); + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..dce95f9 --- /dev/null +++ b/main.c @@ -0,0 +1,89 @@ +#include +#include + +#include "common.h" + +#include "chunk.h" +#include "gc.h" +#include "parser.h" +#include "vendor/mpc.h" +#include "vm.h" + +void dump(const V *data, Z size) { + char ascii[17]; + Z i, j; + ascii[16] = '\0'; + for (i = 0; i < size; ++i) { + printf("%02X ", ((unsigned char *)data)[i]); + if (((unsigned char *)data)[i] >= ' ' && + ((unsigned char *)data)[i] <= '~') { + ascii[i % 16] = ((unsigned char *)data)[i]; + } else { + ascii[i % 16] = '.'; + } + if ((i + 1) % 8 == 0 || i + 1 == size) { + printf(" "); + if ((i + 1) % 16 == 0) { + printf("| %s \n", ascii); + } else if (i + 1 == size) { + ascii[(i + 1) % 16] = '\0'; + if ((i + 1) % 16 <= 8) { + printf(" "); + } + for (j = (i + 1) % 16; j < 16; ++j) { + printf(" "); + } + printf("| %s \n", ascii); + } + } + } +} + +I repl(void) { + Bc chunk = {0}; + Vm vm = {0}; + + vm_init(&vm); + + I idx = chunk_add_constant(&chunk, NUM(10)); + chunk_emit_byte(&chunk, OP_CONST); + chunk_emit_sleb128(&chunk, idx); + chunk_emit_byte(&chunk, OP_RETURN); + + vm_run(&vm, &chunk, 0); + + return 0; +} + +I loadfile(const char *fname) { + Gc gc = {0}; + gc_init(&gc); + + mpc_result_t res; + if (!mpc_parse_contents(fname, Program, &res)) { + mpc_err_print_to(res.error, stderr); + mpc_err_delete(res.error); + gc_deinit(&gc); + return 1; + } + + mpc_ast_print(res.output); + mpc_ast_delete(res.output); + gc_deinit(&gc); + return 0; +} + +int main(int argc, const char *argv[]) { + parser_init(); + atexit(parser_deinit); + + switch (argc) { + case 1: + return repl(); + case 2: + return loadfile(argv[1]); + default: + fprintf(stderr, "usage: growl [file]\n"); + return 64; + } +} diff --git a/makefile b/makefile new file mode 100644 index 0000000..7e917b3 --- /dev/null +++ b/makefile @@ -0,0 +1,11 @@ +CC := cc +CFLAGS := -Og -g -std=c99 -Wpedantic -Wall +OBJS = chunk.o gc.o main.o object.o parser.o print.o vm.o vendor/mpc.o \ + vendor/yar.o + +growl: $(OBJS) + $(CC) -o growl $(OBJS) + +.PHONY: clean +clean: + rm -f growl $(OBJS) diff --git a/object.c b/object.c new file mode 100644 index 0000000..e69de29 diff --git a/object.h b/object.h new file mode 100644 index 0000000..fe16d90 --- /dev/null +++ b/object.h @@ -0,0 +1,24 @@ +#ifndef OBJECT_H +#define OBJECT_H + +#include "common.h" + +#define NIL ((O)0) +#define BOX(x) ((O)(x)) +#define UNBOX(x) ((Hd *)(x)) +#define IMM(x) ((O)(x) & (O)1) +#define NUM(x) (((O)((intptr_t)(x) << 1)) | (O)1) +#define ORD(x) ((O)(x) >> 1) + +enum { + TYPE_FWD, +}; + +typedef uintptr_t O; + +/** Object header */ +typedef struct Hd { + U32 size, type; +} Hd; + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..f53b8ba --- /dev/null +++ b/parser.c @@ -0,0 +1,51 @@ +#include "parser.h" +#include "vendor/mpc.h" + +mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition, + *Command, *List, *Table, *Quotation, *Program; + +V parser_init(V) { + Pragma = mpc_new("pragma"); + Comment = mpc_new("comment"); + Expr = mpc_new("expr"); + Number = mpc_new("number"); + String = mpc_new("string"); + Word = mpc_new("word"); + Definition = mpc_new("def"); + Command = mpc_new("command"); + List = mpc_new("list"); + Table = mpc_new("table"); + Quotation = mpc_new("quotation"); + Program = mpc_new("program"); + + mpc_err_t *err = mpca_lang( + MPCA_LANG_DEFAULT, + " pragma : '#' ('(' * ')')? ; " + " comment : /\\\\[^\\n]*/ ; " + " expr : ( | | | " + " | | | | " + " | | ) ; " + " number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; " + " string : /\"(\\\\.|[^\"])*\"/ ; " + " word : /[a-zA-Z0-9_!.,@#$%^&*_+\\-=><|\\/]+/ ; " + " def : ':' * ';' ; " + " command : ':' + ';' ; " + " list : '(' * ')' ; " + " table : '{' * '}' ; " + " quotation : '[' * ']' ; " + " program : /^/ * /$/ ; ", + Pragma, Comment, Expr, Number, String, Word, Definition, Command, List, + Table, Quotation, Program, NULL); + + // crash if i do a woopsie + if (err != NULL) { + mpc_err_print(err); + mpc_err_delete(err); + abort(); + } +} + +V parser_deinit(V) { + mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition, + Command, List, Table, Quotation, Program); +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..c0b3f2a --- /dev/null +++ b/parser.h @@ -0,0 +1,13 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "common.h" +#include "vendor/mpc.h" + + +V parser_init(V); +V parser_deinit(V); + +extern mpc_parser_t *Program; + +#endif diff --git a/print.c b/print.c new file mode 100644 index 0000000..e69de29 diff --git a/print.h b/print.h new file mode 100644 index 0000000..e69de29 diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..7ecb062 --- /dev/null +++ b/shell.nix @@ -0,0 +1,7 @@ +{ pkgs ? import {} }: + +pkgs.mkShell { + buildInputs = with pkgs; [ + clang-tools bear gdb tinycc + ]; +} diff --git a/test.grr b/test.grr new file mode 100644 index 0000000..ff43c22 --- /dev/null +++ b/test.grr @@ -0,0 +1,3 @@ +\ This is a comment. + +: when [] if ; diff --git a/vendor/mpc.c b/vendor/mpc.c new file mode 100644 index 0000000..73a658b --- /dev/null +++ b/vendor/mpc.c @@ -0,0 +1,4128 @@ +#include "mpc.h" + +/* +** State Type +*/ + +static mpc_state_t mpc_state_invalid(void) { + mpc_state_t s; + s.pos = -1; + s.row = -1; + s.col = -1; + s.term = 0; + return s; +} + +static mpc_state_t mpc_state_new(void) { + mpc_state_t s; + s.pos = 0; + s.row = 0; + s.col = 0; + s.term = 0; + return s; +} + +/* +** Input Type +*/ + +/* +** In mpc the input type has three modes of +** operation: String, File and Pipe. +** +** String is easy. The whole contents are +** loaded into a buffer and scanned through. +** The cursor can jump around at will making +** backtracking easy. +** +** The second is a File which is also somewhat +** easy. The contents are never loaded into +** memory but backtracking can still be achieved +** by seeking in the file at different positions. +** +** The final mode is Pipe. This is the difficult +** one. As we assume pipes cannot be seeked - and +** only support a single character lookahead at +** any point, when the input is marked for a +** potential backtracking we start buffering any +** input. +** +** This means that if we are requested to seek +** back we can simply start reading from the +** buffer instead of the input. +** +** Of course using `mpc_predictive` will disable +** backtracking and make LL(1) grammars easy +** to parse for all input methods. +** +*/ + +enum { + MPC_INPUT_STRING = 0, + MPC_INPUT_FILE = 1, + MPC_INPUT_PIPE = 2 +}; + +enum { + MPC_INPUT_MARKS_MIN = 32 +}; + +enum { + MPC_INPUT_MEM_NUM = 512 +}; + +typedef struct { + char mem[64]; +} mpc_mem_t; + +typedef struct { + + int type; + char *filename; + mpc_state_t state; + + char *string; + char *buffer; + FILE *file; + + int suppress; + int backtrack; + int marks_slots; + int marks_num; + mpc_state_t *marks; + + char *lasts; + char last; + + size_t mem_index; + char mem_full[MPC_INPUT_MEM_NUM]; + mpc_mem_t mem[MPC_INPUT_MEM_NUM]; + +} mpc_input_t; + +static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(strlen(string) + 1); + strcpy(i->string, string); + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(length + 1); + strncpy(i->string, string, length); + i->string[length] = '\0'; + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + + i->type = MPC_INPUT_PIPE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = pipe; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_FILE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = file; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static void mpc_input_delete(mpc_input_t *i) { + + free(i->filename); + + if (i->type == MPC_INPUT_STRING) { free(i->string); } + if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } + + free(i->marks); + free(i->lasts); + free(i); +} + +static int mpc_mem_ptr(mpc_input_t *i, void *p) { + return + (char*)p >= (char*)(i->mem) && + (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); +} + +static void *mpc_malloc(mpc_input_t *i, size_t n) { + size_t j; + char *p; + + if (n > sizeof(mpc_mem_t)) { return malloc(n); } + + j = i->mem_index; + do { + if (!i->mem_full[i->mem_index]) { + p = (void*)(i->mem + i->mem_index); + i->mem_full[i->mem_index] = 1; + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + return p; + } + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + } while (j != i->mem_index); + + return malloc(n); +} + +static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { + char *x = mpc_malloc(i, n * m); + memset(x, 0, n * m); + return x; +} + +static void mpc_free(mpc_input_t *i, void *p) { + size_t j; + if (!mpc_mem_ptr(i, p)) { free(p); return; } + j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); + i->mem_full[j] = 0; +} + +static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { + + char *q = NULL; + + if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } + + if (n > sizeof(mpc_mem_t)) { + q = malloc(n); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; + } + + return p; +} + +static void *mpc_export(mpc_input_t *i, void *p) { + char *q = NULL; + if (!mpc_mem_ptr(i, p)) { return p; } + q = malloc(sizeof(mpc_mem_t)); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; +} + +static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } +static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } + +static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } +static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } + +static void mpc_input_mark(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->marks_num++; + + if (i->marks_num > i->marks_slots) { + i->marks_slots = i->marks_num + i->marks_num / 2; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + i->marks[i->marks_num-1] = i->state; + i->lasts[i->marks_num-1] = i->last; + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { + i->buffer = calloc(1, 1); + } + +} + +static void mpc_input_unmark(mpc_input_t *i) { + int j; + + if (i->backtrack < 1) { return; } + + i->marks_num--; + + if (i->marks_slots > i->marks_num + i->marks_num / 2 + && i->marks_slots > MPC_INPUT_MARKS_MIN) { + i->marks_slots = + i->marks_num > MPC_INPUT_MARKS_MIN ? + i->marks_num : MPC_INPUT_MARKS_MIN; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { + for (j = strlen(i->buffer) - 1; j >= 0; j--) + ungetc(i->buffer[j], i->file); + + free(i->buffer); + i->buffer = NULL; + } + +} + +static void mpc_input_rewind(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->state = i->marks[i->marks_num-1]; + i->last = i->lasts[i->marks_num-1]; + + if (i->type == MPC_INPUT_FILE) { + fseek(i->file, i->state.pos, SEEK_SET); + } + + mpc_input_unmark(i); +} + +static int mpc_input_buffer_in_range(mpc_input_t *i) { + return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); +} + +static char mpc_input_buffer_get(mpc_input_t *i) { + return i->buffer[i->state.pos - i->marks[0].pos]; +} + +static char mpc_input_getc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: c = fgetc(i->file); return c; + case MPC_INPUT_PIPE: + + if (!i->buffer) { c = getc(i->file); return c; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + c = mpc_input_buffer_get(i); + return c; + } else { + c = getc(i->file); + return c; + } + + default: return c; + } +} + +static char mpc_input_peekc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: + + c = fgetc(i->file); + if (feof(i->file)) { return '\0'; } + + fseek(i->file, -1, SEEK_CUR); + return c; + + case MPC_INPUT_PIPE: + + if (!i->buffer) { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + return mpc_input_buffer_get(i); + } else { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + default: return c; + } + +} + +static int mpc_input_terminated(mpc_input_t *i) { + return mpc_input_peekc(i) == '\0'; +} + +static int mpc_input_failure(mpc_input_t *i, char c) { + + switch (i->type) { + case MPC_INPUT_STRING: { break; } + case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } + case MPC_INPUT_PIPE: { + + if (!i->buffer) { ungetc(c, i->file); break; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + break; + } else { + ungetc(c, i->file); + } + } + default: { break; } + } + return 0; +} + +static int mpc_input_success(mpc_input_t *i, char c, char **o) { + + if (i->type == MPC_INPUT_PIPE + && i->buffer && !mpc_input_buffer_in_range(i)) { + i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); + i->buffer[strlen(i->buffer) + 1] = '\0'; + i->buffer[strlen(i->buffer) + 0] = c; + } + + i->last = c; + i->state.pos++; + i->state.col++; + + if (c == '\n') { + i->state.col = 0; + i->state.row++; + } + + if (o) { + (*o) = mpc_malloc(i, 2); + (*o)[0] = c; + (*o)[1] = '\0'; + } + + return 1; +} + +static int mpc_input_any(mpc_input_t *i, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return mpc_input_success(i, x, o); +} + +static int mpc_input_char(mpc_input_t *i, char c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { + + const char *x = c; + + mpc_input_mark(i); + while (*x) { + if (!mpc_input_char(i, *x, NULL)) { + mpc_input_rewind(i); + return 0; + } + x++; + } + mpc_input_unmark(i); + + *o = mpc_malloc(i, strlen(c) + 1); + strcpy(*o, c); + return 1; +} + +static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { + *o = NULL; + return f(i->last, mpc_input_peekc(i)); +} + +static int mpc_input_soi(mpc_input_t* i, char **o) { + *o = NULL; + return i->last == '\0'; +} + +static int mpc_input_eoi(mpc_input_t* i, char **o) { + *o = NULL; + if (i->state.term) { + return 0; + } else if (mpc_input_terminated(i)) { + i->state.term = 1; + return 1; + } else { + return 0; + } +} + +static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { + mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); + memcpy(r, &i->state, sizeof(mpc_state_t)); + return r; +} + +/* +** Error Type +*/ + +void mpc_err_delete(mpc_err_t *x) { + int i; + for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } + free(x->expected); + free(x->filename); + free(x->failure); + free(x); +} + +void mpc_err_print(mpc_err_t *x) { + mpc_err_print_to(x, stdout); +} + +void mpc_err_print_to(mpc_err_t *x, FILE *f) { + char *str = mpc_err_string(x); + fprintf(f, "%s", str); + free(str); +} + +static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { + /* TODO: Error Checking on Length */ + int left = ((*max) - (*pos)); + va_list va; + va_start(va, fmt); + if (left < 0) { left = 0;} + (*pos) += vsprintf(buffer + (*pos), fmt, va); + va_end(va); +} + +static const char *mpc_err_char_unescape(char c, char char_unescape_buffer[4]) { + + char_unescape_buffer[0] = '\''; + char_unescape_buffer[1] = ' '; + char_unescape_buffer[2] = '\''; + char_unescape_buffer[3] = '\0'; + + switch (c) { + case '\a': return "bell"; + case '\b': return "backspace"; + case '\f': return "formfeed"; + case '\r': return "carriage return"; + case '\v': return "vertical tab"; + case '\0': return "end of input"; + case '\n': return "newline"; + case '\t': return "tab"; + case ' ' : return "space"; + default: + char_unescape_buffer[1] = c; + return char_unescape_buffer; + } + +} + +char *mpc_err_string(mpc_err_t *x) { + + int i; + int pos = 0; + int max = 1023; + char *buffer = calloc(1, 1024); + char char_unescape_buffer[4]; + + if (x->failure) { + mpc_err_string_cat(buffer, &pos, &max, + "%s: error: %s\n", x->filename, x->failure); + return buffer; + } + + mpc_err_string_cat(buffer, &pos, &max, + "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); + + if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } + if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } + if (x->expected_num >= 2) { + + for (i = 0; i < x->expected_num-2; i++) { + mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); + } + + mpc_err_string_cat(buffer, &pos, &max, "%s or %s", + x->expected[x->expected_num-2], + x->expected[x->expected_num-1]); + } + + mpc_err_string_cat(buffer, &pos, &max, " at "); + mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received, char_unescape_buffer)); + mpc_err_string_cat(buffer, &pos, &max, "\n"); + + return realloc(buffer, strlen(buffer) + 1); +} + +static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 1; + x->expected = mpc_malloc(i, sizeof(char*)); + x->expected[0] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[0], expected); + x->failure = NULL; + x->received = mpc_input_peekc(i); + return x; +} + +static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 0; + x->expected = NULL; + x->failure = mpc_malloc(i, strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { + mpc_err_t *x; + x = malloc(sizeof(mpc_err_t)); + x->filename = malloc(strlen(filename) + 1); + strcpy(x->filename, filename); + x->state = mpc_state_new(); + x->expected_num = 0; + x->expected = NULL; + x->failure = malloc(strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { + int j; + if (x == NULL) { return; } + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + mpc_free(i, x->expected); + mpc_free(i, x->filename); + mpc_free(i, x->failure); + mpc_free(i, x); +} + +static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { + int j; + for (j = 0; j < x->expected_num; j++) { + x->expected[j] = mpc_export(i, x->expected[j]); + } + x->expected = mpc_export(i, x->expected); + x->filename = mpc_export(i, x->filename); + x->failure = mpc_export(i, x->failure); + return mpc_export(i, x); +} + +static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + int j; + (void)i; + for (j = 0; j < x->expected_num; j++) { + if (strcmp(x->expected[j], expected) == 0) { return 1; } + } + return 0; +} + +static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + (void)i; + x->expected_num++; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[x->expected_num-1], expected); +} + +static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { + + int j, k, fst; + mpc_err_t *e; + + fst = -1; + for (j = 0; j < n; j++) { + if (x[j] != NULL) { fst = j; } + } + + if (fst == -1) { return NULL; } + + e = mpc_malloc(i, sizeof(mpc_err_t)); + e->state = mpc_state_invalid(); + e->expected_num = 0; + e->expected = NULL; + e->failure = NULL; + e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); + strcpy(e->filename, x[fst]->filename); + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos < e->state.pos) { continue; } + + if (x[j]->failure) { + e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); + strcpy(e->failure, x[j]->failure); + break; + } + + e->received = x[j]->received; + + for (k = 0; k < x[j]->expected_num; k++) { + if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { + mpc_err_add_expected(i, e, x[j]->expected[k]); + } + } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + mpc_err_delete_internal(i, x[j]); + } + + return e; +} + +static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { + + int j = 0; + size_t l = 0; + char *expect = NULL; + + if (x == NULL) { return NULL; } + + if (x->expected_num == 0) { + expect = mpc_calloc(i, 1, 1); + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num == 1) { + expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); + strcpy(expect, prefix); + strcat(expect, x->expected[0]); + mpc_free(i, x->expected[0]); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num > 1) { + + l += strlen(prefix); + for (j = 0; j < x->expected_num-2; j++) { + l += strlen(x->expected[j]) + strlen(", "); + } + l += strlen(x->expected[x->expected_num-2]); + l += strlen(" or "); + l += strlen(x->expected[x->expected_num-1]); + + expect = mpc_malloc(i, l + 1); + + strcpy(expect, prefix); + for (j = 0; j < x->expected_num-2; j++) { + strcat(expect, x->expected[j]); strcat(expect, ", "); + } + strcat(expect, x->expected[x->expected_num-2]); + strcat(expect, " or "); + strcat(expect, x->expected[x->expected_num-1]); + + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + return NULL; +} + +static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { + return mpc_err_repeat(i, x, "one or more of "); +} + +static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { + mpc_err_t *y; + int digits = n/10 + 1; + char *prefix; + prefix = mpc_malloc(i, digits + strlen(" of ") + 1); + if (!prefix) { + return NULL; + } + sprintf(prefix, "%i of ", n); + y = mpc_err_repeat(i, x, prefix); + mpc_free(i, prefix); + return y; +} + +static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { + mpc_err_t *errs[2]; + errs[0] = x; + errs[1] = y; + return mpc_err_or(i, errs, 2); +} + +/* +** Parser Type +*/ + +enum { + MPC_TYPE_UNDEFINED = 0, + MPC_TYPE_PASS = 1, + MPC_TYPE_FAIL = 2, + MPC_TYPE_LIFT = 3, + MPC_TYPE_LIFT_VAL = 4, + MPC_TYPE_EXPECT = 5, + MPC_TYPE_ANCHOR = 6, + MPC_TYPE_STATE = 7, + + MPC_TYPE_ANY = 8, + MPC_TYPE_SINGLE = 9, + MPC_TYPE_ONEOF = 10, + MPC_TYPE_NONEOF = 11, + MPC_TYPE_RANGE = 12, + MPC_TYPE_SATISFY = 13, + MPC_TYPE_STRING = 14, + + MPC_TYPE_APPLY = 15, + MPC_TYPE_APPLY_TO = 16, + MPC_TYPE_PREDICT = 17, + MPC_TYPE_NOT = 18, + MPC_TYPE_MAYBE = 19, + MPC_TYPE_MANY = 20, + MPC_TYPE_MANY1 = 21, + MPC_TYPE_COUNT = 22, + + MPC_TYPE_OR = 23, + MPC_TYPE_AND = 24, + + MPC_TYPE_CHECK = 25, + MPC_TYPE_CHECK_WITH = 26, + + MPC_TYPE_SOI = 27, + MPC_TYPE_EOI = 28, + + MPC_TYPE_SEPBY1 = 29 +}; + +typedef struct { char *m; } mpc_pdata_fail_t; +typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; +typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; +typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; +typedef struct { char x; } mpc_pdata_single_t; +typedef struct { char x; char y; } mpc_pdata_range_t; +typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; +typedef struct { char *x; } mpc_pdata_string_t; +typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; +typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; +typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; +typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_parser_t *sep; } mpc_pdata_sepby1; + +typedef union { + mpc_pdata_fail_t fail; + mpc_pdata_lift_t lift; + mpc_pdata_expect_t expect; + mpc_pdata_anchor_t anchor; + mpc_pdata_single_t single; + mpc_pdata_range_t range; + mpc_pdata_satisfy_t satisfy; + mpc_pdata_string_t string; + mpc_pdata_apply_t apply; + mpc_pdata_apply_to_t apply_to; + mpc_pdata_check_t check; + mpc_pdata_check_with_t check_with; + mpc_pdata_predict_t predict; + mpc_pdata_not_t not; + mpc_pdata_repeat_t repeat; + mpc_pdata_and_t and; + mpc_pdata_or_t or; + mpc_pdata_sepby1 sepby1; +} mpc_pdata_t; + +struct mpc_parser_t { + char *name; + mpc_pdata_t data; + char type; + char retained; +}; + +static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { + int j; + for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } + return xs[x]; +} + +static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } +static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } +static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } + +static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { + int j; + size_t l = 0; + if (n == 0) { return mpc_calloc(i, 1, 1); } + for (j = 0; j < n; j++) { l += strlen(xs[j]); } + xs[0] = mpc_realloc(i, xs[0], l + 1); + for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } + return xs[0]; +} + +static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + a = mpc_ast_state(a, *s); + mpc_free(i, s); + (void) n; + return a; +} + +static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { + int j; + if (f == mpcf_null) { return mpcf_null(n, xs); } + if (f == mpcf_fst) { return mpcf_fst(n, xs); } + if (f == mpcf_snd) { return mpcf_snd(n, xs); } + if (f == mpcf_trd) { return mpcf_trd(n, xs); } + if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } + if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } + if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } + if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } + if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } + for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } + return f(j, xs); +} + +static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { + mpc_free(i, x); + return NULL; +} + +static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + mpc_free(i, c); + return a; +} + +static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { + if (f == mpcf_free) { return mpcf_input_free(i, x); } + if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } + return f(mpc_export(i, x)); +} + +static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { + return f(mpc_export(i, x), d); +} + +static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { + if (d == free) { mpc_free(i, x); return; } + d(mpc_export(i, x)); +} + +enum { + MPC_PARSE_STACK_MIN = 4 +}; + +#define MPC_SUCCESS(x) r->output = x; return 1 +#define MPC_FAILURE(x) r->error = x; return 0 +#define MPC_PRIMITIVE(x) \ + if (x) { MPC_SUCCESS(r->output); } \ + else { MPC_FAILURE(NULL); } + +#define MPC_MAX_RECURSION_DEPTH 1000 + +static mpc_result_t *mpc_grow_results(mpc_input_t *i, int j, mpc_result_t *results_stk, mpc_result_t *results){ + mpc_result_t *tmp_results = results; + + if (j == MPC_PARSE_STACK_MIN) { + int results_slots = j + j / 2; + tmp_results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); + memcpy(tmp_results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); + } else if (j >= MPC_PARSE_STACK_MIN) { + int results_slots = j + j / 2; + tmp_results = mpc_realloc(i, tmp_results, sizeof(mpc_result_t) * results_slots); + } + + return tmp_results; +} + +static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { + + int j = 0, k = 0; + mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; + mpc_result_t *results; + + if (depth == MPC_MAX_RECURSION_DEPTH) + { + MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); + } + + switch (p->type) { + + /* Basic Parsers */ + + case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); + case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); + case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); + case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); + case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); + case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); + case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); + + /* Other parsers */ + + case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); + case MPC_TYPE_PASS: MPC_SUCCESS(NULL); + case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); + case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); + case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); + case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); + + /* Application Parsers */ + + case MPC_TYPE_APPLY: + if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); + } else { + MPC_FAILURE(r->output); + } + + case MPC_TYPE_APPLY_TO: + if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK: + if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { + if (p->data.check.f(&r->output)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK_WITH: + if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { + if (p->data.check_with.f(&r->output, p->data.check_with.d)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_EXPECT: + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { + mpc_input_suppress_disable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_suppress_disable(i); + MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); + } + + case MPC_TYPE_PREDICT: + mpc_input_backtrack_disable(i); + if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { + mpc_input_backtrack_enable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_backtrack_enable(i); + MPC_FAILURE(r->error); + } + + /* Optional Parsers */ + + /* TODO: Update Not Error Message */ + + case MPC_TYPE_NOT: + mpc_input_mark(i); + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + mpc_input_rewind(i); + mpc_input_suppress_disable(i); + mpc_parse_dtor(i, p->data.not.dx, r->output); + MPC_FAILURE(mpc_err_new(i, "opposite")); + } else { + mpc_input_unmark(i); + mpc_input_suppress_disable(i); + MPC_SUCCESS(p->data.not.lf()); + } + + case MPC_TYPE_MAYBE: + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + MPC_SUCCESS(r->output); + } else { + *e = mpc_err_merge(i, *e, r->error); + MPC_SUCCESS(p->data.not.lf()); + } + + /* Repeat Parsers */ + + case MPC_TYPE_MANY: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_MANY1: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + + if (j == 0) { + MPC_FAILURE( + mpc_err_many1(i, results[j].error); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + case MPC_TYPE_SEPBY1: + + results = results_stk; + + if(mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1)){ + j++; + results = mpc_grow_results(i, j, results_stk, results); + + while ( + mpc_parse_run(i, p->data.sepby1.sep, &results[j], e, depth+1) && + mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1) + ) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + } + + if (j == 0) { + MPC_FAILURE( + mpc_err_many1(i, results[j].error); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + case MPC_TYPE_COUNT: + + results = p->data.repeat.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) + : results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + if (j == p->data.repeat.n) { break; } + } + + if (j == p->data.repeat.n) { + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); + } + MPC_FAILURE( + mpc_err_count(i, results[j].error, p->data.repeat.n); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + /* Combinatory Parsers */ + + case MPC_TYPE_OR: + + if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + for (j = 0; j < p->data.or.n; j++) { + if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { + MPC_SUCCESS(results[j].output; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + *e = mpc_err_merge(i, *e, results[j].error); + } + } + + MPC_FAILURE(NULL; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_AND: + if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + mpc_input_mark(i); + for (j = 0; j < p->data.and.n; j++) { + if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { + mpc_input_rewind(i); + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); + } + MPC_FAILURE(results[j].error; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + } + mpc_input_unmark(i); + MPC_SUCCESS( + mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + /* End */ + + default: + + MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); + } + + return 0; + +} + +#undef MPC_SUCCESS +#undef MPC_FAILURE +#undef MPC_PRIMITIVE + +int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); + e->state = mpc_state_invalid(); + x = mpc_parse_run(i, p, r, &e, 0); + if (x) { + mpc_err_delete_internal(i, e); + r->output = mpc_export(i, r->output); + } else { + r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); + } + return x; +} + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_string(filename, string); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_nstring(filename, string, length); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_file(filename, file); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_pipe(filename, pipe); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { + + FILE *f = fopen(filename, "rb"); + int res; + + if (f == NULL) { + r->output = NULL; + r->error = mpc_err_file(filename, "Unable to open file!"); + return 0; + } + + res = mpc_parse_file(filename, f, p, r); + fclose(f); + return res; +} + +/* +** Building a Parser +*/ + +static void mpc_undefine_unretained(mpc_parser_t *p, int force); + +static void mpc_undefine_or(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.or.n; i++) { + mpc_undefine_unretained(p->data.or.xs[i], 0); + } + free(p->data.or.xs); + +} + +static void mpc_undefine_and(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.and.n; i++) { + mpc_undefine_unretained(p->data.and.xs[i], 0); + } + free(p->data.and.xs); + free(p->data.and.dxs); + +} + +static void mpc_undefine_unretained(mpc_parser_t *p, int force) { + + if (p->retained && !force) { return; } + + switch (p->type) { + + case MPC_TYPE_FAIL: free(p->data.fail.m); break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + free(p->data.string.x); + break; + + case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; + case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; + case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + mpc_undefine_unretained(p->data.not.x, 0); + break; + + case MPC_TYPE_EXPECT: + mpc_undefine_unretained(p->data.expect.x, 0); + free(p->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + mpc_undefine_unretained(p->data.repeat.x, 0); + break; + + case MPC_TYPE_SEPBY1: + mpc_undefine_unretained(p->data.sepby1.x, 0); + mpc_undefine_unretained(p->data.sepby1.sep, 0); + break; + + case MPC_TYPE_OR: mpc_undefine_or(p); break; + case MPC_TYPE_AND: mpc_undefine_and(p); break; + + case MPC_TYPE_CHECK: + mpc_undefine_unretained(p->data.check.x, 0); + free(p->data.check.e); + break; + + case MPC_TYPE_CHECK_WITH: + mpc_undefine_unretained(p->data.check_with.x, 0); + free(p->data.check_with.e); + break; + + default: break; + } + + if (!force) { + free(p->name); + free(p); + } + +} + +void mpc_delete(mpc_parser_t *p) { + if (p->retained) { + + if (p->type != MPC_TYPE_UNDEFINED) { + mpc_undefine_unretained(p, 0); + } + + free(p->name); + free(p); + + } else { + mpc_undefine_unretained(p, 0); + } +} + +static void mpc_soft_delete(mpc_val_t *x) { + mpc_undefine_unretained(x, 0); +} + +static mpc_parser_t *mpc_undefined(void) { + mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); + p->retained = 0; + p->type = MPC_TYPE_UNDEFINED; + p->name = NULL; + return p; +} + +mpc_parser_t *mpc_new(const char *name) { + mpc_parser_t *p = mpc_undefined(); + p->retained = 1; + p->name = realloc(p->name, strlen(name) + 1); + strcpy(p->name, name); + return p; +} + +mpc_parser_t *mpc_copy(mpc_parser_t *a) { + int i = 0; + mpc_parser_t *p; + + if (a->retained) { return a; } + + p = mpc_undefined(); + p->retained = a->retained; + p->type = a->type; + p->data = a->data; + + if (a->name) { + p->name = malloc(strlen(a->name)+1); + strcpy(p->name, a->name); + } + + switch (a->type) { + + case MPC_TYPE_FAIL: + p->data.fail.m = malloc(strlen(a->data.fail.m)+1); + strcpy(p->data.fail.m, a->data.fail.m); + break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + p->data.string.x = malloc(strlen(a->data.string.x)+1); + strcpy(p->data.string.x, a->data.string.x); + break; + + case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; + case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; + case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + p->data.not.x = mpc_copy(a->data.not.x); + break; + + case MPC_TYPE_EXPECT: + p->data.expect.x = mpc_copy(a->data.expect.x); + p->data.expect.m = malloc(strlen(a->data.expect.m)+1); + strcpy(p->data.expect.m, a->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + p->data.repeat.x = mpc_copy(a->data.repeat.x); + break; + + case MPC_TYPE_SEPBY1: + p->data.sepby1.x = mpc_copy(a->data.sepby1.x); + p->data.sepby1.sep = mpc_copy(a->data.sepby1.sep); + break; + + case MPC_TYPE_OR: + p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.or.n; i++) { + p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); + } + break; + case MPC_TYPE_AND: + p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.and.n; i++) { + p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); + } + if (a->data.and.n > 0) { + p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); + for (i = 0; i < a->data.and.n-1; i++) { + p->data.and.dxs[i] = a->data.and.dxs[i]; + } + } + break; + + case MPC_TYPE_CHECK: + p->data.check.x = mpc_copy(a->data.check.x); + p->data.check.e = malloc(strlen(a->data.check.e)+1); + strcpy(p->data.check.e, a->data.check.e); + break; + case MPC_TYPE_CHECK_WITH: + p->data.check_with.x = mpc_copy(a->data.check_with.x); + p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); + strcpy(p->data.check_with.e, a->data.check_with.e); + break; + + default: break; + } + + + return p; +} + +mpc_parser_t *mpc_undefine(mpc_parser_t *p) { + mpc_undefine_unretained(p, 1); + p->type = MPC_TYPE_UNDEFINED; + return p; +} + +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { + + if (p->retained) { + p->type = a->type; + p->data = a->data; + } else { + mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); + p->type = a2->type; + p->data = a2->data; + free(a2); + } + + free(a); + return p; +} + +void mpc_cleanup(int n, ...) { + int i; + mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); + + va_list va; + va_start(va, n); + for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } + for (i = 0; i < n; i++) { mpc_undefine(list[i]); } + for (i = 0; i < n; i++) { mpc_delete(list[i]); } + va_end(va); + + free(list); +} + +mpc_parser_t *mpc_pass(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PASS; + return p; +} + +mpc_parser_t *mpc_fail(const char *m) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + p->data.fail.m = malloc(strlen(m) + 1); + strcpy(p->data.fail.m, m); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_failf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_failf(const char *fmt, ...) { + + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + + va_start(va, fmt); + buffer = malloc(2048); + if (!buffer) { + return NULL; + } + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.fail.m = buffer; + return p; + +} + +mpc_parser_t *mpc_lift_val(mpc_val_t *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT_VAL; + p->data.lift.x = x; + return p; +} + +mpc_parser_t *mpc_lift(mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT; + p->data.lift.lf = lf; + return p; +} + +mpc_parser_t *mpc_anchor(int(*f)(char,char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANCHOR; + p->data.anchor.f = f; + return mpc_expect(p, "anchor"); +} + +mpc_parser_t *mpc_state(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STATE; + return p; +} + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + p->data.expect.x = a; + p->data.expect.m = malloc(strlen(expected) + 1); + strcpy(p->data.expect.m, expected); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_expectf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + + va_start(va, fmt); + buffer = malloc(2048); + if (!buffer) { + return NULL; + } + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.expect.x = a; + p->data.expect.m = buffer; + return p; +} + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANY; + return mpc_expect(p, "any character"); +} + +mpc_parser_t *mpc_char(char c) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SINGLE; + p->data.single.x = c; + return mpc_expectf(p, "'%c'", c); +} + +mpc_parser_t *mpc_range(char s, char e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_RANGE; + p->data.range.x = s; + p->data.range.y = e; + return mpc_expectf(p, "character between '%c' and '%c'", s, e); +} + +mpc_parser_t *mpc_oneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "one of '%s'", s); +} + +mpc_parser_t *mpc_noneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "none of '%s'", s); + +} + +mpc_parser_t *mpc_satisfy(int(*f)(char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SATISFY; + p->data.satisfy.f = f; + return mpc_expectf(p, "character satisfying function %p", f); +} + +mpc_parser_t *mpc_string(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STRING; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "\"%s\"", s); +} + +/* +** Core Parsers +*/ + +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY; + p->data.apply.x = a; + p->data.apply.f = f; + return p; +} + +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY_TO; + p->data.apply_to.x = a; + p->data.apply_to.f = f; + p->data.apply_to.d = x; + return p; +} + +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK; + p->data.check.x = a; + p->data.check.dx = da; + p->data.check.f = f; + p->data.check.e = malloc(strlen(e) + 1); + strcpy(p->data.check.e, e); + return p; +} + +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK_WITH; + p->data.check_with.x = a; + p->data.check_with.dx = da; + p->data.check_with.f = f; + p->data.check_with.d = x; + p->data.check_with.e = malloc(strlen(e) + 1); + strcpy(p->data.check_with.e, e); + return p; +} + +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check(a, da, f, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check_with(a, da, f, x, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_predictive(mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PREDICT; + p->data.predict.x = a; + return p; +} + +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NOT; + p->data.not.x = a; + p->data.not.dx = da; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { + return mpc_not_lift(a, da, mpcf_ctor_null); +} + +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MAYBE; + p->data.not.x = a; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_maybe(mpc_parser_t *a) { + return mpc_maybe_lift(a, mpcf_ctor_null); +} + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY1; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_COUNT; + p->data.repeat.n = n; + p->data.repeat.f = f; + p->data.repeat.x = a; + p->data.repeat.dx = da; + return p; +} + +mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SEPBY1; + p->data.sepby1.x = a; + p->data.sepby1.f = f; + p->data.sepby1.sep = sep; + return p; +} + +mpc_parser_t *mpc_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; +} + +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = f; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, f); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); + } + va_end(va); + + return p; +} + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_soi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SOI; + return mpc_expect(p, "start of input"); +} + +mpc_parser_t *mpc_eoi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EOI; + return mpc_expect(p, "end of input"); +} + +static int mpc_boundary_anchor(char prev, char next) { + const char* word = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789_"; + if ( strchr(word, next) && prev == '\0') { return 1; } + if ( strchr(word, prev) && next == '\0') { return 1; } + if ( strchr(word, next) && !strchr(word, prev)) { return 1; } + if (!strchr(word, next) && strchr(word, prev)) { return 1; } + return 0; +} + +static int mpc_boundary_newline_anchor(char prev, char next) { + (void)next; + return prev == '\n'; +} + +mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } +mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } + +mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } +mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } +mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } + +mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } +mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } +mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } + +mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } +mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } +mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } +mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } +mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } +mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } + +mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } +mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } +mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } +mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } +mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } + +mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } +mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } +mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } +mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } + +mpc_parser_t *mpc_real(void) { + + /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ + + mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; + + p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p1 = mpc_digits(); + p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); + p30 = mpc_oneof("eE"); + p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p32 = mpc_digits(); + p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); + + return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); + +} + +mpc_parser_t *mpc_float(void) { + return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); +} + +mpc_parser_t *mpc_char_lit(void) { + return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); +} + +mpc_parser_t *mpc_string_lit(void) { + mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); +} + +mpc_parser_t *mpc_regex_lit(void) { + mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); +} + +mpc_parser_t *mpc_ident(void) { + mpc_parser_t *p0, *p1; + p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); + p1 = mpc_many(mpcf_strfold, mpc_alphanum()); + return mpc_and(2, mpcf_strfold, p0, p1, free); +} + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } + +mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } +mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } + +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_string(o), a, mpc_string(c), + free, ad); +} + +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_sym(o), mpc_tok(a), mpc_sym(c), + free, ad); +} + +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } + +/* +** Regular Expression Parsers +*/ + +/* +** So here is a cute bootstrapping. +** +** I'm using the previously defined +** mpc constructs and functions to +** parse the user regex string and +** construct a parser from it. +** +** As it turns out lots of the standard +** mpc functions look a lot like `fold` +** functions and so can be used indirectly +** by many of the parsing functions to build +** a parser directly - as we are parsing. +** +** This is certainly something that +** would be less elegant/interesting +** in a two-phase parser which first +** builds an AST and then traverses it +** to generate the object. +** +** This whole thing acts as a great +** case study for how trivial it can be +** to write a great parser in a few +** lines of code using mpc. +*/ + +/* +** +** ### Regular Expression Grammar +** +** : | ( "|" ) +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : +** | "\" +** | "(" ")" +** | "[" "]" +*/ + +static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpc_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_lift(mpcf_ctor_str); + for (i = 0; i < n; i++) { + p = mpc_and(2, mpcf_strfold, p, xs[i], free); + } + return p; +} + +static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; + case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; + case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; + default: + num = *(int*)xs[1]; + free(xs[1]); + } + + return mpc_count(num, mpcf_strfold, xs[0], free); +} + +static mpc_parser_t *mpc_re_escape_char(char c) { + switch (c) { + case 'a': return mpc_char('\a'); + case 'f': return mpc_char('\f'); + case 'n': return mpc_char('\n'); + case 'r': return mpc_char('\r'); + case 't': return mpc_char('\t'); + case 'v': return mpc_char('\v'); + case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); + case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); + case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); + case 'd': return mpc_digit(); + case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); + case 's': return mpc_whitespace(); + case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); + case 'w': return mpc_alphanum(); + case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { + + int mode = *((int*)data); + char *s = x; + mpc_parser_t *p; + + /* Any Character */ + if (s[0] == '.') { + free(s); + if (mode & MPC_RE_DOTALL) { + return mpc_any(); + } else { + return mpc_expect(mpc_noneof("\n"), "any character except a newline"); + } + } + + /* Start of Input */ + if (s[0] == '^') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); + } else { + return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + } + } + + /* End of Input */ + if (s[0] == '$') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_or(2, + mpc_newline(), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } else { + return mpc_or(2, + mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } + } + + /* Regex Escape */ + if (s[0] == '\\') { + p = mpc_re_escape_char(s[1]); + p = (p == NULL) ? mpc_char(s[1]) : p; + free(s); + return p; + } + + /* Regex Standard */ + p = mpc_char(s[0]); + free(s); + return p; +} + +static const char *mpc_re_range_escape_char(char c) { + switch (c) { + case '-': return "-"; + case 'a': return "\a"; + case 'f': return "\f"; + case 'n': return "\n"; + case 'r': return "\r"; + case 't': return "\t"; + case 'v': return "\v"; + case 'b': return "\b"; + case 'd': return "0123456789"; + case 's': return " \f\n\r\t\v"; + case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_range(mpc_val_t *x) { + + mpc_parser_t *out; + size_t i, j; + size_t start, end; + const char *tmp = NULL; + const char *s = x; + int comp = s[0] == '^' ? 1 : 0; + char *range = calloc(1,1); + + if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + if (s[0] == '^' && + s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + + for (i = comp; i < strlen(s); i++){ + + /* Regex Range Escape */ + if (s[i] == '\\') { + tmp = mpc_re_range_escape_char(s[i+1]); + if (tmp != NULL) { + range = realloc(range, strlen(range) + strlen(tmp) + 1); + strcat(range, tmp); + } else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i+1]; + } + i++; + } + + /* Regex Range...Range */ + else if (s[i] == '-') { + if (s[i+1] == '\0' || i == 0) { + range = realloc(range, strlen(range) + strlen("-") + 1); + strcat(range, "-"); + } else { + start = s[i-1]+1; + end = s[i+1]-1; + for (j = start; j <= end; j++) { + range = realloc(range, strlen(range) + 1 + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = (char)j; + } + } + } + + /* Regex Range Normal */ + else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i]; + } + + } + + out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); + + free(x); + free(range); + + return out; +} + +mpc_parser_t *mpc_re(const char *re) { + return mpc_re_mode(re, MPC_RE_DEFAULT); +} + +mpc_parser_t *mpc_re_mode(const char *re, int mode) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; + + Regex = mpc_new("regex"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + Range = mpc_new("range"); + + mpc_define(Regex, mpc_and(2, mpcf_re_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Term, mpc_many(mpcf_re_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcf_re_repeat, + Base, + mpc_or(5, + mpc_char('*'), mpc_char('+'), mpc_char('?'), + mpc_brackets(mpc_int(), free), + mpc_pass()), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Base, mpc_or(4, + mpc_parens(Regex, (mpc_dtor_t)mpc_delete), + mpc_squares(Range, (mpc_dtor_t)mpc_delete), + mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), + mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) + )); + + mpc_define(Range, mpc_apply( + mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), + mpcf_re_range + )); + + RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); + + mpc_optimise(RegexEnclose); + mpc_optimise(Regex); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + mpc_optimise(Range); + + if(!mpc_parse("", re, RegexEnclose, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Regex: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); + + mpc_optimise(r.output); + + return r.output; + +} + +/* +** Common Fold Functions +*/ + +void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } + +mpc_val_t *mpcf_ctor_null(void) { return NULL; } +mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } +mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } + +mpc_val_t *mpcf_int(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 10); + free(x); + return y; +} + +mpc_val_t *mpcf_hex(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 16); + free(x); + return y; +} + +mpc_val_t *mpcf_oct(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 8); + free(x); + return y; +} + +mpc_val_t *mpcf_float(mpc_val_t *x) { + float *y = malloc(sizeof(float)); + *y = strtod(x, NULL); + free(x); + return y; +} + +mpc_val_t *mpcf_strtriml(mpc_val_t *x) { + char *s = x; + while (isspace((unsigned char)*s)) { + memmove(s, s+1, strlen(s)); + } + return s; +} + +mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { + char *s = x; + size_t l = strlen(s); + while (l > 0 && isspace((unsigned char)s[l-1])) { + s[l-1] = '\0'; l--; + } + return s; +} + +mpc_val_t *mpcf_strtrim(mpc_val_t *x) { + return mpcf_strtriml(mpcf_strtrimr(x)); +} + +static const char mpc_escape_input_c[] = { + '\a', '\b', '\f', '\n', '\r', + '\t', '\v', '\\', '\'', '\"', '\0'}; + +static const char *mpc_escape_output_c[] = { + "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", + "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; + +static const char mpc_escape_input_raw_re[] = { '/' }; +static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; + +static const char mpc_escape_input_raw_cstr[] = { '"' }; +static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; + +static const char mpc_escape_input_raw_cchar[] = { '\'' }; +static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; + +static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if (*s == input[i]) { + y = realloc(y, strlen(y) + strlen(output[i]) + 1); + strcat(y, output[i]); + found = 1; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 2); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + s++; + } + + + return y; +} + +static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found = 0; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if ((*(s+0)) == output[i][0] && + (*(s+1)) == output[i][1]) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = input[i]; buff[1] = '\0'; + strcat(y, buff); + found = 1; + s++; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + if (*s == '\0') { break; } + else { s++; } + } + + return y; + +} + +mpc_val_t *mpcf_escape(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } +mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } +mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } +mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } + +static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { + int i; + for (i = 0; i < n; i++) { + if (i != x) { free(xs[i]); } + } + return xs[x]; +} + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } +mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } +mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } +mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs) { + int i; + for (i = 0; i < n; i++) { + free(xs[i]); + } + return NULL; +} + +mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { + int i; + size_t l = 0; + + if (n == 0) { return calloc(1, 1); } + + for (i = 0; i < n; i++) { l += strlen(xs[i]); } + + xs[0] = realloc(xs[0], l + 1); + + for (i = 1; i < n; i++) { + strcat(xs[0], xs[i]); free(xs[i]); + } + + return xs[0]; +} + +/* +** Printing +*/ + +static void mpc_print_unretained(mpc_parser_t *p, int force) { + + /* TODO: Print Everything Escaped */ + + int i; + char *s, *e; + char buff[2]; + + if (p->retained && !force) {; + if (p->name) { printf("<%s>", p->name); } + else { printf(""); } + return; + } + + if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } + if (p->type == MPC_TYPE_PASS) { printf("<:>"); } + if (p->type == MPC_TYPE_FAIL) { printf(""); } + if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } + if (p->type == MPC_TYPE_STATE) { printf(""); } + if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } + if (p->type == MPC_TYPE_EXPECT) { + printf("%s", p->data.expect.m); + /*mpc_print_unretained(p->data.expect.x, 0);*/ + } + + if (p->type == MPC_TYPE_ANY) { printf("<.>"); } + if (p->type == MPC_TYPE_SATISFY) { printf(""); } + + if (p->type == MPC_TYPE_SINGLE) { + buff[0] = p->data.single.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("'%s'", s); + free(s); + } + + if (p->type == MPC_TYPE_RANGE) { + buff[0] = p->data.range.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + buff[0] = p->data.range.y; buff[1] = '\0'; + e = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s-%s]", s, e); + free(s); + free(e); + } + + if (p->type == MPC_TYPE_ONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_NONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[^%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_STRING) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("\"%s\"", s); + free(s); + } + + if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } + if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } + + if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } + if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } + if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } + if (p->type == MPC_TYPE_SEPBY1) { + mpc_print_unretained(p->data.sepby1.x, 0); + printf(" ("); + mpc_print_unretained(p->data.sepby1.sep, 0); + printf(" "); + mpc_print_unretained(p->data.sepby1.x, 0); + printf(")"); + printf("*"); + } + + if (p->type == MPC_TYPE_OR) { + printf("("); + for(i = 0; i < p->data.or.n-1; i++) { + mpc_print_unretained(p->data.or.xs[i], 0); + printf(" | "); + } + mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_AND) { + printf("("); + for(i = 0; i < p->data.and.n-1; i++) { + mpc_print_unretained(p->data.and.xs[i], 0); + printf(" "); + } + mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_CHECK) { + mpc_print_unretained(p->data.check.x, 0); + printf("->?"); + } + if (p->type == MPC_TYPE_CHECK_WITH) { + mpc_print_unretained(p->data.check_with.x, 0); + printf("->?"); + } + +} + +void mpc_print(mpc_parser_t *p) { + mpc_print_unretained(p, 1); + printf("\n"); +} + +/* +** Testing +*/ + +/* +** These functions are slightly unwieldy and +** also the whole of the testing suite for mpc +** mpc is pretty shaky. +** +** It could do with a lot more tests and more +** precision. Currently I am only really testing +** changes off of the examples. +** +*/ + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + mpc_result_t r; + (void) printer; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 0; + } else { + destructor(r.output); + return 1; + } + + } else { + mpc_err_delete(r.error); + return 1; + } + +} + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + + mpc_result_t r; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 1; + } else { + printf("Got "); printer(r.output); printf("\n"); + printf("Expected "); printer(d); printf("\n"); + destructor(r.output); + return 0; + } + + } else { + mpc_err_print(r.error); + mpc_err_delete(r.error); + return 0; + + } + +} + + +/* +** AST +*/ + +void mpc_ast_delete(mpc_ast_t *a) { + + int i; + + if (a == NULL) { return; } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_delete(a->children[i]); + } + + free(a->children); + free(a->tag); + free(a->contents); + free(a); + +} + +static void mpc_ast_delete_no_children(mpc_ast_t *a) { + free(a->children); + free(a->tag); + free(a->contents); + free(a); +} + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { + + mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); + + a->tag = malloc(strlen(tag) + 1); + strcpy(a->tag, tag); + + a->contents = malloc(strlen(contents) + 1); + strcpy(a->contents, contents); + + a->state = mpc_state_new(); + + a->children_num = 0; + a->children = NULL; + return a; + +} + +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { + + mpc_ast_t *a = mpc_ast_new(tag, ""); + + int i; + va_list va; + va_start(va, tag); + + for (i = 0; i < n; i++) { + mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); + } + + va_end(va); + + return a; + +} + +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { + + mpc_ast_t *r; + + if (a == NULL) { return a; } + if (a->children_num == 0) { return a; } + if (a->children_num == 1) { return a; } + + r = mpc_ast_new(">", ""); + mpc_ast_add_child(r, a); + return r; +} + +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { + + int i; + + if (strcmp(a->tag, b->tag) != 0) { return 0; } + if (strcmp(a->contents, b->contents) != 0) { return 0; } + if (a->children_num != b->children_num) { return 0; } + + for (i = 0; i < a->children_num; i++) { + if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } + } + + return 1; +} + +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { + r->children_num++; + r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); + r->children[r->children_num-1] = a; + return r; +} + +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); + memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); + memmove(a->tag, t, strlen(t)); + memmove(a->tag + strlen(t), "|", 1); + return a; +} + +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); + memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); + memmove(a->tag, t, (strlen(t)-1)); + return a; +} + +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { + a->tag = realloc(a->tag, strlen(t) + 1); + strcpy(a->tag, t); + return a; +} + +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { + if (a == NULL) { return a; } + a->state = s; + return a; +} + +static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { + + int i; + + if (a == NULL) { + fprintf(fp, "NULL\n"); + return; + } + + for (i = 0; i < d; i++) { fprintf(fp, " "); } + + if (strlen(a->contents)) { + fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, + (long unsigned int)(a->state.row+1), + (long unsigned int)(a->state.col+1), + a->contents); + } else { + fprintf(fp, "%s \n", a->tag); + } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_print_depth(a->children[i], d+1, fp); + } + +} + +void mpc_ast_print(mpc_ast_t *a) { + mpc_ast_print_depth(a, 0, stdout); +} + +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { + mpc_ast_print_depth(a, 0, fp); +} + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_index_lb(ast, tag, 0); +} + +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return i; + } + } + + return -1; +} + +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_child_lb(ast, tag, 0); +} + +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return ast->children[i]; + } + } + + return NULL; +} + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order) +{ + mpc_ast_trav_t *trav, *n_trav; + mpc_ast_t *cnode = ast; + + /* Create the traversal structure */ + trav = malloc(sizeof(mpc_ast_trav_t)); + trav->curr_node = cnode; + trav->parent = NULL; + trav->curr_child = 0; + trav->order = order; + + /* Get start node */ + switch(order) { + case mpc_ast_trav_order_pre: + /* Nothing else is needed for pre order start */ + break; + + case mpc_ast_trav_order_post: + while(cnode->children_num > 0) { + cnode = cnode->children[0]; + + n_trav = malloc(sizeof(mpc_ast_trav_t)); + n_trav->curr_node = cnode; + n_trav->parent = trav; + n_trav->curr_child = 0; + n_trav->order = order; + + trav = n_trav; + } + + break; + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return trav; +} + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav, *to_free; + mpc_ast_t *ret = NULL; + int cchild; + + /* The end of traversal was reached */ + if(*trav == NULL) return NULL; + + switch((*trav)->order) { + case mpc_ast_trav_order_pre: + ret = (*trav)->curr_node; + + /* If there aren't any more children, go up */ + while(*trav != NULL && + (*trav)->curr_child >= (*trav)->curr_node->children_num) + { + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + } + + /* If trav is NULL, the end was reached */ + if(*trav == NULL) { + break; + } + + /* Go to next child */ + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + (*trav)->curr_child++; + *trav = n_trav; + + break; + + case mpc_ast_trav_order_post: + ret = (*trav)->curr_node; + + /* Move up tree to the parent If the parent doesn't have any more nodes, + * then this is the current node. If it does, move down to its left most + * child. Also, free the previous traversal node */ + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + + if(*trav == NULL) + break; + + /* Next child */ + (*trav)->curr_child++; + + /* If there aren't any more children, this is the next node */ + if((*trav)->curr_child >= (*trav)->curr_node->children_num) { + break; + } + + /* If there are still more children, find the leftmost child from this + * node */ + while((*trav)->curr_node->children_num > 0) { + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + *trav = n_trav; + } + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return ret; +} + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav; + + /* Go through parents until all are free */ + while(*trav != NULL) { + n_trav = (*trav)->parent; + free(*trav); + *trav = n_trav; + } +} + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { + + int i, j; + mpc_ast_t** as = (mpc_ast_t**)xs; + mpc_ast_t *r; + + if (n == 0) { return NULL; } + if (n == 1) { return xs[0]; } + if (n == 2 && xs[1] == NULL) { return xs[0]; } + if (n == 2 && xs[0] == NULL) { return xs[1]; } + + r = mpc_ast_new(">", ""); + + for (i = 0; i < n; i++) { + + if (as[i] == NULL) { continue; } + + if (as[i] && as[i]->children_num == 0) { + mpc_ast_add_child(r, as[i]); + } else if (as[i] && as[i]->children_num == 1) { + mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); + mpc_ast_delete_no_children(as[i]); + } else if (as[i] && as[i]->children_num >= 2) { + for (j = 0; j < as[i]->children_num; j++) { + mpc_ast_add_child(r, as[i]->children[j]); + } + mpc_ast_delete_no_children(as[i]); + } + + } + + if (r->children_num) { + r->state = r->children[0]->state; + } + + return r; +} + +mpc_val_t *mpcf_str_ast(mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + free(c); + return a; +} + +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + (void)n; + a = mpc_ast_state(a, *s); + free(s); + return a; +} + +mpc_parser_t *mpca_state(mpc_parser_t *a) { + return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); +} + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); +} + +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); +} + +mpc_parser_t *mpca_root(mpc_parser_t *a) { + return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); +} + +mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } +mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } +mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } +mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } +mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } + +mpc_parser_t *mpca_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; + +} + +mpc_parser_t *mpca_and(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = mpcf_fold_ast; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; + } + va_end(va); + + return p; +} + +mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } + +/* +** Grammar Parser +*/ + +/* +** This is another interesting bootstrapping. +** +** Having a general purpose AST type allows +** users to specify the grammar alone and +** let all fold rules be automatically taken +** care of by existing functions. +** +** You don't get to control the type spat +** out but this means you can make a nice +** parser to take in some grammar in nice +** syntax and spit out a parser that works. +** +** The grammar for this looks surprisingly +** like regex but the main difference is that +** it is now whitespace insensitive and the +** base type takes literals of some form. +*/ + +/* +** +** ### Grammar Grammar +** +** : ( "|" ) | +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : "<" ( | ) ">" +** | +** | +** | +** | "(" ")" +*/ + +typedef struct { + va_list *va; + int parsers_num; + mpc_parser_t **parsers; + int flags; +} mpca_grammar_st_t; + +static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpca_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_pass(); + for (i = 0; i < n; i++) { + if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } + } + return p; +} + +static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; + case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; + case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; + case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; + default: + num = *((int*)xs[1]); + free(xs[1]); + } + return mpca_count(num, xs[0]); +} + +static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); +} + +static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); +} + +static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { + char *y = xs[0]; + char *m = xs[1]; + mpca_grammar_st_t *st = xs[2]; + mpc_parser_t *p; + int mode = MPC_RE_DEFAULT; + + (void)n; + if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } + if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } + y = mpcf_unescape_regex(y); + p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); + free(y); + free(m); + + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); +} + +/* Should this just use `isdigit` instead? */ +static int is_number(const char* s) { + size_t i; + for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } + return 1; +} + +static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { + + int i; + mpc_parser_t *p; + + /* Case of Number */ + if (is_number(x)) { + + i = strtol(x, NULL, 10); + + while (st->parsers_num <= i) { + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); + if (st->parsers[st->parsers_num-1] == NULL) { + return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); + } + } + + return st->parsers[st->parsers_num-1]; + + /* Case of Identifier */ + } else { + + /* Search Existing Parsers */ + for (i = 0; i < st->parsers_num; i++) { + mpc_parser_t *q = st->parsers[i]; + if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (q->name && strcmp(q->name, x) == 0) { return q; } + } + + /* Search New Parsers */ + while (1) { + + p = va_arg(*st->va, mpc_parser_t*); + + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = p; + + if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (p->name && strcmp(p->name, x) == 0) { return p; } + + } + + } + +} + +static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpc_parser_t *p = mpca_grammar_find_parser(x, st); + free(x); + + if (p->name) { + return mpca_state(mpca_root(mpca_add_tag(p, p->name))); + } else { + return mpca_state(mpca_root(p)); + } +} + +mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; + + GrammarTotal = mpc_new("grammar_total"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(GrammarTotal, + mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) + ); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(GrammarTotal); + mpc_optimise(Grammar); + mpc_optimise(Factor); + mpc_optimise(Term); + mpc_optimise(Base); + + if(!mpc_parse("", grammar, GrammarTotal, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Grammar: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); + + mpc_optimise(r.output); + + return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; + +} + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { + mpca_grammar_st_t st; + mpc_parser_t *res; + va_list va; + va_start(va, grammar); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + res = mpca_grammar_st(grammar, &st); + free(st.parsers); + va_end(va); + return res; +} + +typedef struct { + char *ident; + char *name; + mpc_parser_t *grammar; +} mpca_stmt_t; + +static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { + mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); + stmt->ident = ((char**)xs)[0]; + stmt->name = ((char**)xs)[1]; + stmt->grammar = ((mpc_parser_t**)xs)[3]; + (void) n; + free(((char**)xs)[2]); + free(((char**)xs)[4]); + + return stmt; +} + +static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { + + int i; + mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); + + for (i = 0; i < n; i++) { + stmts[i] = xs[i]; + } + stmts[n] = NULL; + + return stmts; +} + +static void mpca_stmt_list_delete(mpc_val_t *x) { + + mpca_stmt_t **stmts = x; + + while(*stmts) { + mpca_stmt_t *stmt = *stmts; + free(stmt->ident); + free(stmt->name); + mpc_soft_delete(stmt->grammar); + free(stmt); + stmts++; + } + free(x); + +} + +static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpca_stmt_t *stmt; + mpca_stmt_t **stmts = x; + mpc_parser_t *left; + + while(*stmts) { + stmt = *stmts; + left = mpca_grammar_find_parser(stmt->ident, st); + if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } + if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } + mpc_optimise(stmt->grammar); + mpc_define(left, stmt->grammar); + free(stmt->ident); + free(stmt->name); + free(stmt); + stmts++; + } + + free(x); + + return NULL; +} + +static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { + + mpc_result_t r; + mpc_err_t *e; + mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; + + Lang = mpc_new("lang"); + Stmt = mpc_new("stmt"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(Lang, mpc_apply_to( + mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), + mpca_stmt_list_apply_to, st + )); + + mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, + mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), + free, free, free, mpc_soft_delete + )); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(Lang); + mpc_optimise(Stmt); + mpc_optimise(Grammar); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + + if (!mpc_parse_input(i, Lang, &r)) { + e = r.error; + } else { + e = NULL; + } + + mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); + + return e; +} + +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, f); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file("", f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, p); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_pipe("", p); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang(int flags, const char *language, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, language); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_string("", language); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + + FILE *f = fopen(filename, "rb"); + + if (f == NULL) { + err = mpc_err_file(filename, "Unable to open file!"); + return err; + } + + va_start(va, filename); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file(filename, f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + + fclose(f); + + return err; +} + +static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { + + int i, total; + + if (p->retained && !force) { return 0; } + + if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } + + if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } + + if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + + if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_SEPBY1) { + total = 1; + total += mpc_nodecount_unretained(p->data.sepby1.x, 0); + total += mpc_nodecount_unretained(p->data.sepby1.sep, 0); + total += mpc_nodecount_unretained(p->data.sepby1.x, 0); + return total; + } + + if (p->type == MPC_TYPE_OR) { + total = 1; + for(i = 0; i < p->data.or.n; i++) { + total += mpc_nodecount_unretained(p->data.or.xs[i], 0); + } + return total; + } + + if (p->type == MPC_TYPE_AND) { + total = 1; + for(i = 0; i < p->data.and.n; i++) { + total += mpc_nodecount_unretained(p->data.and.xs[i], 0); + } + return total; + } + + return 1; + +} + +void mpc_stats(mpc_parser_t* p) { + printf("Stats\n"); + printf("=====\n"); + printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); +} + +static void mpc_optimise_unretained(mpc_parser_t *p, int force) { + + int i, n, m; + mpc_parser_t *t; + + if (p->retained && !force) { return; } + + /* Optimise Subexpressions */ + + if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } + if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } + if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_SEPBY1) { + mpc_optimise_unretained(p->data.sepby1.x, 0); + mpc_optimise_unretained(p->data.sepby1.sep, 0); + } + + if (p->type == MPC_TYPE_OR) { + for(i = 0; i < p->data.or.n; i++) { + mpc_optimise_unretained(p->data.or.xs[i], 0); + } + } + + if (p->type == MPC_TYPE_AND) { + for(i = 0; i < p->data.and.n; i++) { + mpc_optimise_unretained(p->data.and.xs[i], 0); + } + } + + /* Perform optimisations */ + + while (1) { + + /* Merge rhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR + && !p->data.or.xs[p->data.or.n-1]->retained) { + t = p->data.or.xs[p->data.or.n-1]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Merge lhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[0]->type == MPC_TYPE_OR + && !p->data.or.xs[0]->retained) { + t = p->data.or.xs[0]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Remove ast `pass` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_PASS + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge ast lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge ast rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Remove re `lift` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_LIFT + && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_strfold) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge re lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge re rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + return; + + } + +} + +void mpc_optimise(mpc_parser_t *p) { + mpc_optimise_unretained(p, 1); +} diff --git a/vendor/mpc.h b/vendor/mpc.h new file mode 100644 index 0000000..49a08ee --- /dev/null +++ b/vendor/mpc.h @@ -0,0 +1,391 @@ +/* +** mpc - Micro Parser Combinator library for C +** +** https://github.com/orangeduck/mpc +** +** Daniel Holden - contact@daniel-holden.com +** Licensed under BSD3 +*/ + +#ifndef mpc_h +#define mpc_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +/* +** State Type +*/ + +typedef struct { + long pos; + long row; + long col; + int term; +} mpc_state_t; + +/* +** Error Type +*/ + +typedef struct { + mpc_state_t state; + int expected_num; + char *filename; + char *failure; + char **expected; + char received; +} mpc_err_t; + +void mpc_err_delete(mpc_err_t *e); +char *mpc_err_string(mpc_err_t *e); +void mpc_err_print(mpc_err_t *e); +void mpc_err_print_to(mpc_err_t *e, FILE *f); + +/* +** Parsing +*/ + +typedef void mpc_val_t; + +typedef union { + mpc_err_t *error; + mpc_val_t *output; +} mpc_result_t; + +struct mpc_parser_t; +typedef struct mpc_parser_t mpc_parser_t; + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); + +/* +** Function Types +*/ + +typedef void(*mpc_dtor_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_ctor_t)(void); + +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); + +typedef int(*mpc_check_t)(mpc_val_t**); +typedef int(*mpc_check_with_t)(mpc_val_t**,void*); + +/* +** Building a Parser +*/ + +mpc_parser_t *mpc_new(const char *name); +mpc_parser_t *mpc_copy(mpc_parser_t *a); +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); +mpc_parser_t *mpc_undefine(mpc_parser_t *p); + +void mpc_delete(mpc_parser_t *p); +void mpc_cleanup(int n, ...); + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void); +mpc_parser_t *mpc_char(char c); +mpc_parser_t *mpc_range(char s, char e); +mpc_parser_t *mpc_oneof(const char *s); +mpc_parser_t *mpc_noneof(const char *s); +mpc_parser_t *mpc_satisfy(int(*f)(char)); +mpc_parser_t *mpc_string(const char *s); + +/* +** Other Parsers +*/ + +mpc_parser_t *mpc_pass(void); +mpc_parser_t *mpc_fail(const char *m); +mpc_parser_t *mpc_failf(const char *fmt, ...); +mpc_parser_t *mpc_lift(mpc_ctor_t f); +mpc_parser_t *mpc_lift_val(mpc_val_t *x); +mpc_parser_t *mpc_anchor(int(*f)(char,char)); +mpc_parser_t *mpc_state(void); + +/* +** Combinator Parsers +*/ + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); +mpc_parser_t *mpc_maybe(mpc_parser_t *a); +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_or(int n, ...); +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); + +mpc_parser_t *mpc_predictive(mpc_parser_t *a); + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_eoi(void); +mpc_parser_t *mpc_soi(void); + +mpc_parser_t *mpc_boundary(void); +mpc_parser_t *mpc_boundary_newline(void); + +mpc_parser_t *mpc_whitespace(void); +mpc_parser_t *mpc_whitespaces(void); +mpc_parser_t *mpc_blank(void); + +mpc_parser_t *mpc_newline(void); +mpc_parser_t *mpc_tab(void); +mpc_parser_t *mpc_escape(void); + +mpc_parser_t *mpc_digit(void); +mpc_parser_t *mpc_hexdigit(void); +mpc_parser_t *mpc_octdigit(void); +mpc_parser_t *mpc_digits(void); +mpc_parser_t *mpc_hexdigits(void); +mpc_parser_t *mpc_octdigits(void); + +mpc_parser_t *mpc_lower(void); +mpc_parser_t *mpc_upper(void); +mpc_parser_t *mpc_alpha(void); +mpc_parser_t *mpc_underscore(void); +mpc_parser_t *mpc_alphanum(void); + +mpc_parser_t *mpc_int(void); +mpc_parser_t *mpc_hex(void); +mpc_parser_t *mpc_oct(void); +mpc_parser_t *mpc_number(void); + +mpc_parser_t *mpc_real(void); +mpc_parser_t *mpc_float(void); + +mpc_parser_t *mpc_char_lit(void); +mpc_parser_t *mpc_string_lit(void); +mpc_parser_t *mpc_regex_lit(void); + +mpc_parser_t *mpc_ident(void); + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a); +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_stripl(mpc_parser_t *a); +mpc_parser_t *mpc_stripr(mpc_parser_t *a); +mpc_parser_t *mpc_strip(mpc_parser_t *a); +mpc_parser_t *mpc_tok(mpc_parser_t *a); +mpc_parser_t *mpc_sym(const char *s); +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); + +mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a); + +/* +** Common Function Parameters +*/ + +void mpcf_dtor_null(mpc_val_t *x); + +mpc_val_t *mpcf_ctor_null(void); +mpc_val_t *mpcf_ctor_str(void); + +mpc_val_t *mpcf_free(mpc_val_t *x); +mpc_val_t *mpcf_int(mpc_val_t *x); +mpc_val_t *mpcf_hex(mpc_val_t *x); +mpc_val_t *mpcf_oct(mpc_val_t *x); +mpc_val_t *mpcf_float(mpc_val_t *x); +mpc_val_t *mpcf_strtriml(mpc_val_t *x); +mpc_val_t *mpcf_strtrimr(mpc_val_t *x); +mpc_val_t *mpcf_strtrim(mpc_val_t *x); + +mpc_val_t *mpcf_escape(mpc_val_t *x); +mpc_val_t *mpcf_escape_regex(mpc_val_t *x); +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_unescape(mpc_val_t *x); +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs); +mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs); +mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); + +/* +** Regular Expression Parsers +*/ + +enum { + MPC_RE_DEFAULT = 0, + MPC_RE_M = 1, + MPC_RE_S = 2, + MPC_RE_MULTILINE = 1, + MPC_RE_DOTALL = 2 +}; + +mpc_parser_t *mpc_re(const char *re); +mpc_parser_t *mpc_re_mode(const char *re, int mode); + +/* +** AST +*/ + +typedef struct mpc_ast_t { + char *tag; + char *contents; + mpc_state_t state; + int children_num; + struct mpc_ast_t** children; +} mpc_ast_t; + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); + +void mpc_ast_delete(mpc_ast_t *a); +void mpc_ast_print(mpc_ast_t *a); +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); + +typedef enum { + mpc_ast_trav_order_pre, + mpc_ast_trav_order_post +} mpc_ast_trav_order_t; + +typedef struct mpc_ast_trav_t { + mpc_ast_t *curr_node; + struct mpc_ast_trav_t *parent; + int curr_child; + mpc_ast_trav_order_t order; +} mpc_ast_trav_t; + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order); + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav); + +/* +** Warning: This function currently doesn't test for equality of the `state` member! +*/ +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); +mpc_val_t *mpcf_str_ast(mpc_val_t *c); +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_root(mpc_parser_t *a); +mpc_parser_t *mpca_state(mpc_parser_t *a); +mpc_parser_t *mpca_total(mpc_parser_t *a); + +mpc_parser_t *mpca_not(mpc_parser_t *a); +mpc_parser_t *mpca_maybe(mpc_parser_t *a); + +mpc_parser_t *mpca_many(mpc_parser_t *a); +mpc_parser_t *mpca_many1(mpc_parser_t *a); +mpc_parser_t *mpca_count(int n, mpc_parser_t *a); + +mpc_parser_t *mpca_or(int n, ...); +mpc_parser_t *mpca_and(int n, ...); + +enum { + MPCA_LANG_DEFAULT = 0, + MPCA_LANG_PREDICTIVE = 1, + MPCA_LANG_WHITESPACE_SENSITIVE = 2 +}; + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); + +mpc_err_t *mpca_lang(int flags, const char *language, ...); +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); + +/* +** Misc +*/ + + +void mpc_print(mpc_parser_t *p); +void mpc_optimise(mpc_parser_t *p); +void mpc_stats(mpc_parser_t *p); + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/vendor/yar.c b/vendor/yar.c new file mode 100644 index 0000000..54b4179 --- /dev/null +++ b/vendor/yar.c @@ -0,0 +1,2 @@ +#define YAR_IMPLEMENTATION +#include "yar.h" diff --git a/vendor/yar.h b/vendor/yar.h new file mode 100644 index 0000000..5f4310f --- /dev/null +++ b/vendor/yar.h @@ -0,0 +1,229 @@ +/* yar - dynamic arrays in C - public domain Nicholas Rixson 2025 + * + * https://github.com/segcore/yar + * + * Licence: see end of file + + Sample usage: + #define YAR_IMPLEMENTATION + #include "yar.h" + + int main() { + // struct { double *items; size_t count; size_t capacity; } numbers = {0}; + yar(double) numbers = {0}; + *yar_append(&numbers) = 3.14159; + *yar_append(&numbers) = 2.71828; + *yar_append(&numbers) = 1.61803; + + for(size_t i = 0; i < numbers.count; i++) { + printf("%f\n", numbers.items[i]); + } + + yar_free(&numbers); + } + */ +#ifndef YAR_H +#define YAR_H + +#include // size_t +#include // strlen + +/* + * yar(type) - Declare a new basic dynamic array + * + * yar_append(array) - Add a new item at the end of the array, and return a pointer to it + * + * yar_reserve(array, extra) - Reserve space for `extra` count of items + * + * yar_append_many(array, data, num) - Append a copy of existing data + * + * yar_append_cstr(array, data) - Append a C string (nul-terminated char array) + * + * yar_insert(array, index, num) - Insert items somewhere within the array. Moves items to higher indexes as required. Returns &array[index] + * + * yar_remove(array, index, num) - Remove items from somewhere within the array. Moves items to lower indexes as required. + * + * yar_reset(array) - Reset the count of elements to 0, to re-use the memory. Does not free the memory. + * + * yar_init(array) - Set items, count, and capacity to 0. Can usually be avoided with = {0}; + * + * yar_free(array) - Free items memory, and set the items, count, and capacity to 0. + */ + +#define yar(type) struct { type *items; size_t count; size_t capacity; } +#define yar_append(array) ((_yar_append((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0])) ? \ + &(array)->items[(array)->count - 1] : NULL)) +#define yar_reserve(array, extra) ((_yar_reserve((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), (extra)) ? \ + &(array)->items[(array)->count] : NULL)) +#define yar_append_many(array, data, num) ((_yar_append_many((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), 1 ? (data) : ((array)->items), (num)) )) +#define yar_append_cstr(array, data) yar_append_many(array, data, strlen(data)) +#define yar_insert(array, index, num) ((_yar_insert((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), index, num) )) +#define yar_remove(array, index, num) ((_yar_remove((void**)&(array)->items, &(array)->count, sizeof((array)->items[0]), index, num) )) +#define yar_reset(array) (((array)->count = 0)) +#define yar_init(array) ((array)->items = NULL, (array)->count = 0, (array)->capacity = 0) +#define yar_free(array) ((_yar_free((array)->items)), (array)->items = NULL, (array)->count = 0, (array)->capacity = 0) + +#ifndef YARAPI + #define YARAPI // nothing; overridable if needed. +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +// Implementation functions +YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size); +YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra); +YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra); +YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra); +YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove); +YARAPI void* _yar_realloc(void* p, size_t new_size); +YARAPI void _yar_free(void* p); + +#ifdef __cplusplus + } +#endif + +#endif // YAR_H + +#if defined(YAR_IMPLEMENTATION) + +#ifndef YAR_MIN_CAP + #define YAR_MIN_CAP 16 +#endif + +#ifndef YAR_REALLOC + #define YAR_REALLOC realloc +#endif + +#ifndef YAR_FREE + #define YAR_FREE free +#endif + +#include // mem* functions +YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size) +{ + void* result = _yar_reserve(items_pointer, count, capacity, item_size, 1); + if (result != NULL) *count += 1; + return result; +} + +YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra) +{ + void* result = _yar_reserve(items_pointer, count, capacity, item_size, extra); + if (result != NULL) { + memcpy(result, data, item_size * extra); + *count += extra; + } + return result; +} + +YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra) +{ + char* items = *items_pointer; + size_t newcount = *count + extra; + if (newcount > *capacity) { + size_t newcap = (*capacity < YAR_MIN_CAP) ? YAR_MIN_CAP : *capacity * 8 / 5; + if (newcap < newcount) newcap = newcount; + void* next = _yar_realloc(items, newcap * item_size); + if (next == NULL) return NULL; + items = next; + *items_pointer = next; + *capacity = newcap; + } + void* result = items + (*count * item_size); + if (extra && result) memset(result, 0, item_size * extra); + return result; +} + +YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra) +{ + void* next = _yar_reserve(items_pointer, count, capacity, item_size, extra); + if(next == NULL) return NULL; + + char* items = *items_pointer; + if (index < *count) + { + memmove(&items[item_size * (index + extra)], &items[item_size * index], (*count - index) * item_size); + memset(&items[item_size * index], 0, extra * item_size); + } + *count += extra; + return items + index * item_size; +} + +YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove) +{ + if(remove >= *count) { + *count = 0; + return *items_pointer; + } + if (index >= *count) { + return *items_pointer; + } + char* items = *items_pointer; + memmove(&items[item_size * index], &items[item_size * (index + remove)], item_size * (*count - (index + remove))); + *count -= remove; + return items + item_size * index; +} + +YARAPI void* _yar_realloc(void* p, size_t new_size) +{ + // Declaration, so we can call it if the definition is overridden + extern void* YAR_REALLOC(void *ptr, size_t size); + return YAR_REALLOC(p, new_size); +} + +YARAPI void _yar_free(void* p) +{ + extern void YAR_FREE(void *ptr); + YAR_FREE(p); +} + +#endif // YAR_IMPLEMENTATION +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License + +Copyright (c) 2025 Nicholas Rixson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..20df937 --- /dev/null +++ b/vm.c @@ -0,0 +1,60 @@ +#include "vm.h" +#include "gc.h" + +static I decode_sleb128(U8 **ptr) { + I result = 0; + I shift = 0; + U8 byte; + + do { + byte = **ptr; + (*ptr)++; + result |= (I)(byte & 0x7F) << shift; + shift += 7; + } while (byte & 0x80); + + if ((shift < 64) && (byte & 0x40)) { + result |= -(1LL << shift); + } + + return result; +} + +V vm_init(Vm *vm) { + vm->sp = vm->stack; + vm->rsp = vm->rstack; + gc_init(&vm->gc); + + for (Z i = 0; i < STACK_SIZE; i++) { + vm->stack[i] = NIL; + gc_addroot(&vm->gc, &vm->stack[i]); + } +} + +V vm_push(Vm *vm, O o) { *vm->sp++ = o; } +O vm_pop(Vm *vm) { return *--vm->sp; } +O vm_peek(Vm *vm) { return *(vm->sp - 1); } + +V vm_run(Vm *vm, Bc *chunk, I offset) { + I mark = gc_mark(&vm->gc); + for (Z i = 0; i < chunk->constants.count; i++) + gc_addroot(&vm->gc, &chunk->constants.items[i]); + + vm->ip = chunk->items + offset; + for (;;) { + U8 opcode; + switch (opcode = *vm->ip++) { + case OP_NOP: + break; + case OP_RETURN: + return; + case OP_CONST: { + I idx = decode_sleb128(&vm->ip); + vm_push(vm, chunk->constants.items[idx]); + break; + } + } + } + + gc_reset(&vm->gc, mark); +} diff --git a/vm.h b/vm.h new file mode 100644 index 0000000..7da042d --- /dev/null +++ b/vm.h @@ -0,0 +1,30 @@ +#ifndef VM_H +#define VM_H + +#include "common.h" + +#include "chunk.h" +#include "gc.h" +#include "object.h" + +enum { + OP_NOP = 0, + OP_RETURN, + OP_CONST, +}; + +#define STACK_SIZE 256 + +typedef struct Vm { + Gc gc; + O stack[256], *sp; + U rstack[256], *rsp; + U8 *ip; +} Vm; + +V vm_init(Vm *); +V vm_push(Vm *, O); +O vm_pop(Vm *); +O vm_peek(Vm *); +V vm_run(Vm *, Bc *, I); +#endif