commit 146e7a85d6cc836340fa4a62469d9ff4074cadbc Author: Javier B. Torres Date: Mon Jan 19 09:08:23 2026 -0300 initial commit diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 0000000..f1094a0 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,7 @@ +root = true +[*] +end_of_line = lf +insert_final_newline = true +[*.{c,h,grr}] +indent_style = space +indent_size = 2 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..420e2a0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +*.o +.cache +/.envrc +/compile_commands.json diff --git a/README b/README new file mode 100644 index 0000000..8bcbd5e --- /dev/null +++ b/README @@ -0,0 +1,11 @@ + . + / V\ + / ` / + << | + / | Growl + / | A concatenative programming language. + / | + / \ \ / + ( ) | | + ________| _/_ | | +<__________\______)\__) diff --git a/chunk.c b/chunk.c new file mode 100644 index 0000000..d16c2db --- /dev/null +++ b/chunk.c @@ -0,0 +1,29 @@ +#include "chunk.h" +#include "vendor/yar.h" + +V chunk_emit_byte(Bc *chunk, U8 byte) { *yar_append(chunk) = byte; } + +V chunk_emit_sleb128(Bc *chunk, I num) { + I more = 1; + while (more) { + U8 byte = num & 0x7f; + num >>= 7; + if ((num == 0 && !(byte & 0x40)) || (num == -1 && (byte & 0x40))) { + more = 0; + } else { + byte |= 0x80; + } + chunk_emit_byte(chunk, byte); + } +} + +I chunk_add_constant(Bc *chunk, O value) { + I mark = chunk->constants.count; + *yar_append(&chunk->constants) = value; + return mark; +} + +V chunk_free(Bc *chunk) { + yar_free(&chunk->constants); + yar_free(chunk); +} diff --git a/chunk.h b/chunk.h new file mode 100644 index 0000000..e30bce1 --- /dev/null +++ b/chunk.h @@ -0,0 +1,22 @@ +#ifndef CHUNK_H +#define CHUNK_H + +#include "common.h" +#include "object.h" + +/** Bytecode chunk */ +typedef struct Bc { + U8 *items; + Z count, capacity; + struct { + O *items; + Z count, capacity; + } constants; +} Bc; + +V chunk_emit_byte(Bc *, U8); +V chunk_emit_sleb128(Bc *, I); +I chunk_add_constant(Bc *, O); +V chunk_free(Bc *); + +#endif diff --git a/common.h b/common.h new file mode 100644 index 0000000..b71a897 --- /dev/null +++ b/common.h @@ -0,0 +1,15 @@ +#ifndef COMMON_H +#define COMMON_H + +#include +#include + +typedef void V; +typedef intptr_t I; +typedef uintptr_t U; +typedef double F; +typedef size_t Z; +typedef uint8_t U8; +typedef uint32_t U32; + +#endif diff --git a/gc.c b/gc.c new file mode 100644 index 0000000..9ddf211 --- /dev/null +++ b/gc.c @@ -0,0 +1,126 @@ +#include +#include +#include +#include + +#include "gc.h" +#include "vendor/yar.h" + +#define ALIGN(n) (((n) + 7) & ~7) +static inline int infrom(Gc *gc, V *ptr) { + const U8 *x = (const U8 *)ptr; + return (x >= gc->from.start && x < gc->from.end); +} + +V gc_addroot(Gc *gc, O *ptr) { *yar_append(&gc->roots) = ptr; } +I gc_mark(Gc *gc) { return gc->roots.count; } +V gc_reset(Gc *gc, I mark) { gc->roots.count = mark; } + +static O copy(Gc *gc, Hd *hdr) { + assert(infrom(gc, hdr)); + assert(hdr->type != TYPE_FWD); + + Z sz = ALIGN(hdr->size); + Hd *new = (Hd *)gc->to.free; + gc->to.free += sz; + memcpy(new, hdr, sz); + + hdr->type = TYPE_FWD; + O *obj = (O *)(hdr + 1); + *obj = BOX(new); + return *obj; +} + +static O forward(Gc *gc, O obj) { + if (obj == 0) + return 0; + if (IMM(obj)) + return obj; + if (!infrom(gc, (V *)obj)) + return obj; + + Hd *hdr = UNBOX(obj); + if (hdr->type == TYPE_FWD) { + O *o = (O *)(hdr + 1); + return *o; + } else { + return copy(gc, hdr); + } +} + +#if GC_DEBUG +static V printstats(Gc *gc, const char *label) { + Z used = (Z)(gc->from.free - gc->from.start); + fprintf(stderr, "[%s] used=%zu/%zu bytes (%.1f%%)\n", label, used, + (Z)HEAP_BYTES, (F)used / (F)HEAP_BYTES * 100.0); +} +#endif + +V gc_collect(Gc *gc) { + uint8_t *scan = gc->to.free; + +#if GC_DEBUG + printstats(gc, "before GC"); +#endif + + for (Z i = 0; i < gc->roots.count; i++) { + O *o = gc->roots.items[i]; + *o = forward(gc, *o); + } + + while (scan < gc->to.free) { + if (scan >= gc->to.end) { + fprintf(stderr, "fatal GC error: out of memory\n"); + abort(); + } + Hd *hdr = (Hd *)scan; + switch (hdr->type) { + // TODO: the rest of the owl + case TYPE_FWD: + fprintf(stderr, "fatal GC error: forwarding pointer in to-space\n"); + abort(); + default: + fprintf(stderr, "GC warning: junk object type %" PRId32 "\n", hdr->type); + } + scan += ALIGN(hdr->size); + } + + Gs tmp = gc->from; + gc->from = gc->to; + gc->to = tmp; + gc->to.free = gc->to.start; + +#if GC_DEBUG + printstats(gc, "after GC"); +#endif +} + +void gc_init(Gc *gc) { + gc->from.start = malloc(HEAP_BYTES); + if (!gc->from.start) + goto fatal; + gc->from.end = gc->from.start + HEAP_BYTES; + gc->from.free = gc->from.start; + + gc->to.start = malloc(HEAP_BYTES); + if (!gc->to.start) + goto fatal; + gc->to.end = gc->to.start + HEAP_BYTES; + gc->to.free = gc->to.start; + + gc->roots.capacity = 0; + gc->roots.count = 0; + gc->roots.items = NULL; + return; + +fatal: + fprintf(stderr, "failed to allocate heap space\n"); + abort(); +} + +void gc_deinit(Gc *gc) { + gc_collect(gc); + free(gc->from.start); + free(gc->to.start); + yar_free(&gc->roots); +} diff --git a/gc.h b/gc.h new file mode 100644 index 0000000..386f739 --- /dev/null +++ b/gc.h @@ -0,0 +1,31 @@ +#ifndef GC_H +#define GC_H + +#include "common.h" +#include "object.h" + +#define GC_DEBUG 1 +#define HEAP_BYTES (4 * 1024 * 1024) + +typedef struct Gs { + U8 *start, *end; + U8 *free; +} Gs; + +typedef struct Gc { + Gs from, to; + struct { + O **items; + Z count, capacity; + } roots; +} Gc; + +V gc_addroot(Gc *, O *); +I gc_mark(Gc *); +V gc_reset(Gc *, I); +V gc_collect(Gc *); +Hd *gc_alloc(Gc *, Z); +V gc_init(Gc *); +V gc_deinit(Gc *); + +#endif diff --git a/growl b/growl new file mode 100755 index 0000000..7eac05e Binary files /dev/null and b/growl differ diff --git a/main.c b/main.c new file mode 100644 index 0000000..dce95f9 --- /dev/null +++ b/main.c @@ -0,0 +1,89 @@ +#include +#include + +#include "common.h" + +#include "chunk.h" +#include "gc.h" +#include "parser.h" +#include "vendor/mpc.h" +#include "vm.h" + +void dump(const V *data, Z size) { + char ascii[17]; + Z i, j; + ascii[16] = '\0'; + for (i = 0; i < size; ++i) { + printf("%02X ", ((unsigned char *)data)[i]); + if (((unsigned char *)data)[i] >= ' ' && + ((unsigned char *)data)[i] <= '~') { + ascii[i % 16] = ((unsigned char *)data)[i]; + } else { + ascii[i % 16] = '.'; + } + if ((i + 1) % 8 == 0 || i + 1 == size) { + printf(" "); + if ((i + 1) % 16 == 0) { + printf("| %s \n", ascii); + } else if (i + 1 == size) { + ascii[(i + 1) % 16] = '\0'; + if ((i + 1) % 16 <= 8) { + printf(" "); + } + for (j = (i + 1) % 16; j < 16; ++j) { + printf(" "); + } + printf("| %s \n", ascii); + } + } + } +} + +I repl(void) { + Bc chunk = {0}; + Vm vm = {0}; + + vm_init(&vm); + + I idx = chunk_add_constant(&chunk, NUM(10)); + chunk_emit_byte(&chunk, OP_CONST); + chunk_emit_sleb128(&chunk, idx); + chunk_emit_byte(&chunk, OP_RETURN); + + vm_run(&vm, &chunk, 0); + + return 0; +} + +I loadfile(const char *fname) { + Gc gc = {0}; + gc_init(&gc); + + mpc_result_t res; + if (!mpc_parse_contents(fname, Program, &res)) { + mpc_err_print_to(res.error, stderr); + mpc_err_delete(res.error); + gc_deinit(&gc); + return 1; + } + + mpc_ast_print(res.output); + mpc_ast_delete(res.output); + gc_deinit(&gc); + return 0; +} + +int main(int argc, const char *argv[]) { + parser_init(); + atexit(parser_deinit); + + switch (argc) { + case 1: + return repl(); + case 2: + return loadfile(argv[1]); + default: + fprintf(stderr, "usage: growl [file]\n"); + return 64; + } +} diff --git a/makefile b/makefile new file mode 100644 index 0000000..7e917b3 --- /dev/null +++ b/makefile @@ -0,0 +1,11 @@ +CC := cc +CFLAGS := -Og -g -std=c99 -Wpedantic -Wall +OBJS = chunk.o gc.o main.o object.o parser.o print.o vm.o vendor/mpc.o \ + vendor/yar.o + +growl: $(OBJS) + $(CC) -o growl $(OBJS) + +.PHONY: clean +clean: + rm -f growl $(OBJS) diff --git a/object.c b/object.c new file mode 100644 index 0000000..e69de29 diff --git a/object.h b/object.h new file mode 100644 index 0000000..fe16d90 --- /dev/null +++ b/object.h @@ -0,0 +1,24 @@ +#ifndef OBJECT_H +#define OBJECT_H + +#include "common.h" + +#define NIL ((O)0) +#define BOX(x) ((O)(x)) +#define UNBOX(x) ((Hd *)(x)) +#define IMM(x) ((O)(x) & (O)1) +#define NUM(x) (((O)((intptr_t)(x) << 1)) | (O)1) +#define ORD(x) ((O)(x) >> 1) + +enum { + TYPE_FWD, +}; + +typedef uintptr_t O; + +/** Object header */ +typedef struct Hd { + U32 size, type; +} Hd; + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..f53b8ba --- /dev/null +++ b/parser.c @@ -0,0 +1,51 @@ +#include "parser.h" +#include "vendor/mpc.h" + +mpc_parser_t *Pragma, *Comment, *Expr, *Number, *String, *Word, *Definition, + *Command, *List, *Table, *Quotation, *Program; + +V parser_init(V) { + Pragma = mpc_new("pragma"); + Comment = mpc_new("comment"); + Expr = mpc_new("expr"); + Number = mpc_new("number"); + String = mpc_new("string"); + Word = mpc_new("word"); + Definition = mpc_new("def"); + Command = mpc_new("command"); + List = mpc_new("list"); + Table = mpc_new("table"); + Quotation = mpc_new("quotation"); + Program = mpc_new("program"); + + mpc_err_t *err = mpca_lang( + MPCA_LANG_DEFAULT, + " pragma : '#' ('(' * ')')? ; " + " comment : /\\\\[^\\n]*/ ; " + " expr : ( | | | " + " | | | | " + " | | ) ; " + " number : ( /0x[0-9A-Fa-f]+/ | /-?[0-9]+/ ) ; " + " string : /\"(\\\\.|[^\"])*\"/ ; " + " word : /[a-zA-Z0-9_!.,@#$%^&*_+\\-=><|\\/]+/ ; " + " def : ':' * ';' ; " + " command : ':' + ';' ; " + " list : '(' * ')' ; " + " table : '{' * '}' ; " + " quotation : '[' * ']' ; " + " program : /^/ * /$/ ; ", + Pragma, Comment, Expr, Number, String, Word, Definition, Command, List, + Table, Quotation, Program, NULL); + + // crash if i do a woopsie + if (err != NULL) { + mpc_err_print(err); + mpc_err_delete(err); + abort(); + } +} + +V parser_deinit(V) { + mpc_cleanup(12, Pragma, Comment, Expr, Number, String, Word, Definition, + Command, List, Table, Quotation, Program); +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..c0b3f2a --- /dev/null +++ b/parser.h @@ -0,0 +1,13 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "common.h" +#include "vendor/mpc.h" + + +V parser_init(V); +V parser_deinit(V); + +extern mpc_parser_t *Program; + +#endif diff --git a/print.c b/print.c new file mode 100644 index 0000000..e69de29 diff --git a/print.h b/print.h new file mode 100644 index 0000000..e69de29 diff --git a/shell.nix b/shell.nix new file mode 100644 index 0000000..7ecb062 --- /dev/null +++ b/shell.nix @@ -0,0 +1,7 @@ +{ pkgs ? import {} }: + +pkgs.mkShell { + buildInputs = with pkgs; [ + clang-tools bear gdb tinycc + ]; +} diff --git a/test.grr b/test.grr new file mode 100644 index 0000000..ff43c22 --- /dev/null +++ b/test.grr @@ -0,0 +1,3 @@ +\ This is a comment. + +: when [] if ; diff --git a/vendor/mpc.c b/vendor/mpc.c new file mode 100644 index 0000000..73a658b --- /dev/null +++ b/vendor/mpc.c @@ -0,0 +1,4128 @@ +#include "mpc.h" + +/* +** State Type +*/ + +static mpc_state_t mpc_state_invalid(void) { + mpc_state_t s; + s.pos = -1; + s.row = -1; + s.col = -1; + s.term = 0; + return s; +} + +static mpc_state_t mpc_state_new(void) { + mpc_state_t s; + s.pos = 0; + s.row = 0; + s.col = 0; + s.term = 0; + return s; +} + +/* +** Input Type +*/ + +/* +** In mpc the input type has three modes of +** operation: String, File and Pipe. +** +** String is easy. The whole contents are +** loaded into a buffer and scanned through. +** The cursor can jump around at will making +** backtracking easy. +** +** The second is a File which is also somewhat +** easy. The contents are never loaded into +** memory but backtracking can still be achieved +** by seeking in the file at different positions. +** +** The final mode is Pipe. This is the difficult +** one. As we assume pipes cannot be seeked - and +** only support a single character lookahead at +** any point, when the input is marked for a +** potential backtracking we start buffering any +** input. +** +** This means that if we are requested to seek +** back we can simply start reading from the +** buffer instead of the input. +** +** Of course using `mpc_predictive` will disable +** backtracking and make LL(1) grammars easy +** to parse for all input methods. +** +*/ + +enum { + MPC_INPUT_STRING = 0, + MPC_INPUT_FILE = 1, + MPC_INPUT_PIPE = 2 +}; + +enum { + MPC_INPUT_MARKS_MIN = 32 +}; + +enum { + MPC_INPUT_MEM_NUM = 512 +}; + +typedef struct { + char mem[64]; +} mpc_mem_t; + +typedef struct { + + int type; + char *filename; + mpc_state_t state; + + char *string; + char *buffer; + FILE *file; + + int suppress; + int backtrack; + int marks_slots; + int marks_num; + mpc_state_t *marks; + + char *lasts; + char last; + + size_t mem_index; + char mem_full[MPC_INPUT_MEM_NUM]; + mpc_mem_t mem[MPC_INPUT_MEM_NUM]; + +} mpc_input_t; + +static mpc_input_t *mpc_input_new_string(const char *filename, const char *string) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(strlen(string) + 1); + strcpy(i->string, string); + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static mpc_input_t *mpc_input_new_nstring(const char *filename, const char *string, size_t length) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_STRING; + + i->state = mpc_state_new(); + + i->string = malloc(length + 1); + strncpy(i->string, string, length); + i->string[length] = '\0'; + i->buffer = NULL; + i->file = NULL; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_pipe(const char *filename, FILE *pipe) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + + i->type = MPC_INPUT_PIPE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = pipe; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; + +} + +static mpc_input_t *mpc_input_new_file(const char *filename, FILE *file) { + + mpc_input_t *i = malloc(sizeof(mpc_input_t)); + + i->filename = malloc(strlen(filename) + 1); + strcpy(i->filename, filename); + i->type = MPC_INPUT_FILE; + i->state = mpc_state_new(); + + i->string = NULL; + i->buffer = NULL; + i->file = file; + + i->suppress = 0; + i->backtrack = 1; + i->marks_num = 0; + i->marks_slots = MPC_INPUT_MARKS_MIN; + i->marks = malloc(sizeof(mpc_state_t) * i->marks_slots); + i->lasts = malloc(sizeof(char) * i->marks_slots); + i->last = '\0'; + + i->mem_index = 0; + memset(i->mem_full, 0, sizeof(char) * MPC_INPUT_MEM_NUM); + + return i; +} + +static void mpc_input_delete(mpc_input_t *i) { + + free(i->filename); + + if (i->type == MPC_INPUT_STRING) { free(i->string); } + if (i->type == MPC_INPUT_PIPE) { free(i->buffer); } + + free(i->marks); + free(i->lasts); + free(i); +} + +static int mpc_mem_ptr(mpc_input_t *i, void *p) { + return + (char*)p >= (char*)(i->mem) && + (char*)p < (char*)(i->mem) + (MPC_INPUT_MEM_NUM * sizeof(mpc_mem_t)); +} + +static void *mpc_malloc(mpc_input_t *i, size_t n) { + size_t j; + char *p; + + if (n > sizeof(mpc_mem_t)) { return malloc(n); } + + j = i->mem_index; + do { + if (!i->mem_full[i->mem_index]) { + p = (void*)(i->mem + i->mem_index); + i->mem_full[i->mem_index] = 1; + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + return p; + } + i->mem_index = (i->mem_index+1) % MPC_INPUT_MEM_NUM; + } while (j != i->mem_index); + + return malloc(n); +} + +static void *mpc_calloc(mpc_input_t *i, size_t n, size_t m) { + char *x = mpc_malloc(i, n * m); + memset(x, 0, n * m); + return x; +} + +static void mpc_free(mpc_input_t *i, void *p) { + size_t j; + if (!mpc_mem_ptr(i, p)) { free(p); return; } + j = ((size_t)(((char*)p) - ((char*)i->mem))) / sizeof(mpc_mem_t); + i->mem_full[j] = 0; +} + +static void *mpc_realloc(mpc_input_t *i, void *p, size_t n) { + + char *q = NULL; + + if (!mpc_mem_ptr(i, p)) { return realloc(p, n); } + + if (n > sizeof(mpc_mem_t)) { + q = malloc(n); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; + } + + return p; +} + +static void *mpc_export(mpc_input_t *i, void *p) { + char *q = NULL; + if (!mpc_mem_ptr(i, p)) { return p; } + q = malloc(sizeof(mpc_mem_t)); + memcpy(q, p, sizeof(mpc_mem_t)); + mpc_free(i, p); + return q; +} + +static void mpc_input_backtrack_disable(mpc_input_t *i) { i->backtrack--; } +static void mpc_input_backtrack_enable(mpc_input_t *i) { i->backtrack++; } + +static void mpc_input_suppress_disable(mpc_input_t *i) { i->suppress--; } +static void mpc_input_suppress_enable(mpc_input_t *i) { i->suppress++; } + +static void mpc_input_mark(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->marks_num++; + + if (i->marks_num > i->marks_slots) { + i->marks_slots = i->marks_num + i->marks_num / 2; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + i->marks[i->marks_num-1] = i->state; + i->lasts[i->marks_num-1] = i->last; + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 1) { + i->buffer = calloc(1, 1); + } + +} + +static void mpc_input_unmark(mpc_input_t *i) { + int j; + + if (i->backtrack < 1) { return; } + + i->marks_num--; + + if (i->marks_slots > i->marks_num + i->marks_num / 2 + && i->marks_slots > MPC_INPUT_MARKS_MIN) { + i->marks_slots = + i->marks_num > MPC_INPUT_MARKS_MIN ? + i->marks_num : MPC_INPUT_MARKS_MIN; + i->marks = realloc(i->marks, sizeof(mpc_state_t) * i->marks_slots); + i->lasts = realloc(i->lasts, sizeof(char) * i->marks_slots); + } + + if (i->type == MPC_INPUT_PIPE && i->marks_num == 0) { + for (j = strlen(i->buffer) - 1; j >= 0; j--) + ungetc(i->buffer[j], i->file); + + free(i->buffer); + i->buffer = NULL; + } + +} + +static void mpc_input_rewind(mpc_input_t *i) { + + if (i->backtrack < 1) { return; } + + i->state = i->marks[i->marks_num-1]; + i->last = i->lasts[i->marks_num-1]; + + if (i->type == MPC_INPUT_FILE) { + fseek(i->file, i->state.pos, SEEK_SET); + } + + mpc_input_unmark(i); +} + +static int mpc_input_buffer_in_range(mpc_input_t *i) { + return i->state.pos < (long)(strlen(i->buffer) + i->marks[0].pos); +} + +static char mpc_input_buffer_get(mpc_input_t *i) { + return i->buffer[i->state.pos - i->marks[0].pos]; +} + +static char mpc_input_getc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: c = fgetc(i->file); return c; + case MPC_INPUT_PIPE: + + if (!i->buffer) { c = getc(i->file); return c; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + c = mpc_input_buffer_get(i); + return c; + } else { + c = getc(i->file); + return c; + } + + default: return c; + } +} + +static char mpc_input_peekc(mpc_input_t *i) { + + char c = '\0'; + + switch (i->type) { + case MPC_INPUT_STRING: return i->string[i->state.pos]; + case MPC_INPUT_FILE: + + c = fgetc(i->file); + if (feof(i->file)) { return '\0'; } + + fseek(i->file, -1, SEEK_CUR); + return c; + + case MPC_INPUT_PIPE: + + if (!i->buffer) { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + return mpc_input_buffer_get(i); + } else { + c = getc(i->file); + if (feof(i->file)) { return '\0'; } + ungetc(c, i->file); + return c; + } + + default: return c; + } + +} + +static int mpc_input_terminated(mpc_input_t *i) { + return mpc_input_peekc(i) == '\0'; +} + +static int mpc_input_failure(mpc_input_t *i, char c) { + + switch (i->type) { + case MPC_INPUT_STRING: { break; } + case MPC_INPUT_FILE: fseek(i->file, -1, SEEK_CUR); { break; } + case MPC_INPUT_PIPE: { + + if (!i->buffer) { ungetc(c, i->file); break; } + + if (i->buffer && mpc_input_buffer_in_range(i)) { + break; + } else { + ungetc(c, i->file); + } + } + default: { break; } + } + return 0; +} + +static int mpc_input_success(mpc_input_t *i, char c, char **o) { + + if (i->type == MPC_INPUT_PIPE + && i->buffer && !mpc_input_buffer_in_range(i)) { + i->buffer = realloc(i->buffer, strlen(i->buffer) + 2); + i->buffer[strlen(i->buffer) + 1] = '\0'; + i->buffer[strlen(i->buffer) + 0] = c; + } + + i->last = c; + i->state.pos++; + i->state.col++; + + if (c == '\n') { + i->state.col = 0; + i->state.row++; + } + + if (o) { + (*o) = mpc_malloc(i, 2); + (*o)[0] = c; + (*o)[1] = '\0'; + } + + return 1; +} + +static int mpc_input_any(mpc_input_t *i, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return mpc_input_success(i, x, o); +} + +static int mpc_input_char(mpc_input_t *i, char c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x == c ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_range(mpc_input_t *i, char c, char d, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return x >= c && x <= d ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_oneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) != 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_noneof(mpc_input_t *i, const char *c, char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return strchr(c, x) == 0 ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_satisfy(mpc_input_t *i, int(*cond)(char), char **o) { + char x; + if (mpc_input_terminated(i)) { return 0; } + x = mpc_input_getc(i); + return cond(x) ? mpc_input_success(i, x, o) : mpc_input_failure(i, x); +} + +static int mpc_input_string(mpc_input_t *i, const char *c, char **o) { + + const char *x = c; + + mpc_input_mark(i); + while (*x) { + if (!mpc_input_char(i, *x, NULL)) { + mpc_input_rewind(i); + return 0; + } + x++; + } + mpc_input_unmark(i); + + *o = mpc_malloc(i, strlen(c) + 1); + strcpy(*o, c); + return 1; +} + +static int mpc_input_anchor(mpc_input_t* i, int(*f)(char,char), char **o) { + *o = NULL; + return f(i->last, mpc_input_peekc(i)); +} + +static int mpc_input_soi(mpc_input_t* i, char **o) { + *o = NULL; + return i->last == '\0'; +} + +static int mpc_input_eoi(mpc_input_t* i, char **o) { + *o = NULL; + if (i->state.term) { + return 0; + } else if (mpc_input_terminated(i)) { + i->state.term = 1; + return 1; + } else { + return 0; + } +} + +static mpc_state_t *mpc_input_state_copy(mpc_input_t *i) { + mpc_state_t *r = mpc_malloc(i, sizeof(mpc_state_t)); + memcpy(r, &i->state, sizeof(mpc_state_t)); + return r; +} + +/* +** Error Type +*/ + +void mpc_err_delete(mpc_err_t *x) { + int i; + for (i = 0; i < x->expected_num; i++) { free(x->expected[i]); } + free(x->expected); + free(x->filename); + free(x->failure); + free(x); +} + +void mpc_err_print(mpc_err_t *x) { + mpc_err_print_to(x, stdout); +} + +void mpc_err_print_to(mpc_err_t *x, FILE *f) { + char *str = mpc_err_string(x); + fprintf(f, "%s", str); + free(str); +} + +static void mpc_err_string_cat(char *buffer, int *pos, int *max, char const *fmt, ...) { + /* TODO: Error Checking on Length */ + int left = ((*max) - (*pos)); + va_list va; + va_start(va, fmt); + if (left < 0) { left = 0;} + (*pos) += vsprintf(buffer + (*pos), fmt, va); + va_end(va); +} + +static const char *mpc_err_char_unescape(char c, char char_unescape_buffer[4]) { + + char_unescape_buffer[0] = '\''; + char_unescape_buffer[1] = ' '; + char_unescape_buffer[2] = '\''; + char_unescape_buffer[3] = '\0'; + + switch (c) { + case '\a': return "bell"; + case '\b': return "backspace"; + case '\f': return "formfeed"; + case '\r': return "carriage return"; + case '\v': return "vertical tab"; + case '\0': return "end of input"; + case '\n': return "newline"; + case '\t': return "tab"; + case ' ' : return "space"; + default: + char_unescape_buffer[1] = c; + return char_unescape_buffer; + } + +} + +char *mpc_err_string(mpc_err_t *x) { + + int i; + int pos = 0; + int max = 1023; + char *buffer = calloc(1, 1024); + char char_unescape_buffer[4]; + + if (x->failure) { + mpc_err_string_cat(buffer, &pos, &max, + "%s: error: %s\n", x->filename, x->failure); + return buffer; + } + + mpc_err_string_cat(buffer, &pos, &max, + "%s:%li:%li: error: expected ", x->filename, x->state.row+1, x->state.col+1); + + if (x->expected_num == 0) { mpc_err_string_cat(buffer, &pos, &max, "ERROR: NOTHING EXPECTED"); } + if (x->expected_num == 1) { mpc_err_string_cat(buffer, &pos, &max, "%s", x->expected[0]); } + if (x->expected_num >= 2) { + + for (i = 0; i < x->expected_num-2; i++) { + mpc_err_string_cat(buffer, &pos, &max, "%s, ", x->expected[i]); + } + + mpc_err_string_cat(buffer, &pos, &max, "%s or %s", + x->expected[x->expected_num-2], + x->expected[x->expected_num-1]); + } + + mpc_err_string_cat(buffer, &pos, &max, " at "); + mpc_err_string_cat(buffer, &pos, &max, mpc_err_char_unescape(x->received, char_unescape_buffer)); + mpc_err_string_cat(buffer, &pos, &max, "\n"); + + return realloc(buffer, strlen(buffer) + 1); +} + +static mpc_err_t *mpc_err_new(mpc_input_t *i, const char *expected) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 1; + x->expected = mpc_malloc(i, sizeof(char*)); + x->expected[0] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[0], expected); + x->failure = NULL; + x->received = mpc_input_peekc(i); + return x; +} + +static mpc_err_t *mpc_err_fail(mpc_input_t *i, const char *failure) { + mpc_err_t *x; + if (i->suppress) { return NULL; } + x = mpc_malloc(i, sizeof(mpc_err_t)); + x->filename = mpc_malloc(i, strlen(i->filename) + 1); + strcpy(x->filename, i->filename); + x->state = i->state; + x->expected_num = 0; + x->expected = NULL; + x->failure = mpc_malloc(i, strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static mpc_err_t *mpc_err_file(const char *filename, const char *failure) { + mpc_err_t *x; + x = malloc(sizeof(mpc_err_t)); + x->filename = malloc(strlen(filename) + 1); + strcpy(x->filename, filename); + x->state = mpc_state_new(); + x->expected_num = 0; + x->expected = NULL; + x->failure = malloc(strlen(failure) + 1); + strcpy(x->failure, failure); + x->received = ' '; + return x; +} + +static void mpc_err_delete_internal(mpc_input_t *i, mpc_err_t *x) { + int j; + if (x == NULL) { return; } + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + mpc_free(i, x->expected); + mpc_free(i, x->filename); + mpc_free(i, x->failure); + mpc_free(i, x); +} + +static mpc_err_t *mpc_err_export(mpc_input_t *i, mpc_err_t *x) { + int j; + for (j = 0; j < x->expected_num; j++) { + x->expected[j] = mpc_export(i, x->expected[j]); + } + x->expected = mpc_export(i, x->expected); + x->filename = mpc_export(i, x->filename); + x->failure = mpc_export(i, x->failure); + return mpc_export(i, x); +} + +static int mpc_err_contains_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + int j; + (void)i; + for (j = 0; j < x->expected_num; j++) { + if (strcmp(x->expected[j], expected) == 0) { return 1; } + } + return 0; +} + +static void mpc_err_add_expected(mpc_input_t *i, mpc_err_t *x, char *expected) { + (void)i; + x->expected_num++; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[x->expected_num-1] = mpc_malloc(i, strlen(expected) + 1); + strcpy(x->expected[x->expected_num-1], expected); +} + +static mpc_err_t *mpc_err_or(mpc_input_t *i, mpc_err_t** x, int n) { + + int j, k, fst; + mpc_err_t *e; + + fst = -1; + for (j = 0; j < n; j++) { + if (x[j] != NULL) { fst = j; } + } + + if (fst == -1) { return NULL; } + + e = mpc_malloc(i, sizeof(mpc_err_t)); + e->state = mpc_state_invalid(); + e->expected_num = 0; + e->expected = NULL; + e->failure = NULL; + e->filename = mpc_malloc(i, strlen(x[fst]->filename)+1); + strcpy(e->filename, x[fst]->filename); + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos > e->state.pos) { e->state = x[j]->state; } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + if (x[j]->state.pos < e->state.pos) { continue; } + + if (x[j]->failure) { + e->failure = mpc_malloc(i, strlen(x[j]->failure)+1); + strcpy(e->failure, x[j]->failure); + break; + } + + e->received = x[j]->received; + + for (k = 0; k < x[j]->expected_num; k++) { + if (!mpc_err_contains_expected(i, e, x[j]->expected[k])) { + mpc_err_add_expected(i, e, x[j]->expected[k]); + } + } + } + + for (j = 0; j < n; j++) { + if (x[j] == NULL) { continue; } + mpc_err_delete_internal(i, x[j]); + } + + return e; +} + +static mpc_err_t *mpc_err_repeat(mpc_input_t *i, mpc_err_t *x, const char *prefix) { + + int j = 0; + size_t l = 0; + char *expect = NULL; + + if (x == NULL) { return NULL; } + + if (x->expected_num == 0) { + expect = mpc_calloc(i, 1, 1); + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num == 1) { + expect = mpc_malloc(i, strlen(prefix) + strlen(x->expected[0]) + 1); + strcpy(expect, prefix); + strcat(expect, x->expected[0]); + mpc_free(i, x->expected[0]); + x->expected[0] = expect; + return x; + } + + else if (x->expected_num > 1) { + + l += strlen(prefix); + for (j = 0; j < x->expected_num-2; j++) { + l += strlen(x->expected[j]) + strlen(", "); + } + l += strlen(x->expected[x->expected_num-2]); + l += strlen(" or "); + l += strlen(x->expected[x->expected_num-1]); + + expect = mpc_malloc(i, l + 1); + + strcpy(expect, prefix); + for (j = 0; j < x->expected_num-2; j++) { + strcat(expect, x->expected[j]); strcat(expect, ", "); + } + strcat(expect, x->expected[x->expected_num-2]); + strcat(expect, " or "); + strcat(expect, x->expected[x->expected_num-1]); + + for (j = 0; j < x->expected_num; j++) { mpc_free(i, x->expected[j]); } + + x->expected_num = 1; + x->expected = mpc_realloc(i, x->expected, sizeof(char*) * x->expected_num); + x->expected[0] = expect; + return x; + } + + return NULL; +} + +static mpc_err_t *mpc_err_many1(mpc_input_t *i, mpc_err_t *x) { + return mpc_err_repeat(i, x, "one or more of "); +} + +static mpc_err_t *mpc_err_count(mpc_input_t *i, mpc_err_t *x, int n) { + mpc_err_t *y; + int digits = n/10 + 1; + char *prefix; + prefix = mpc_malloc(i, digits + strlen(" of ") + 1); + if (!prefix) { + return NULL; + } + sprintf(prefix, "%i of ", n); + y = mpc_err_repeat(i, x, prefix); + mpc_free(i, prefix); + return y; +} + +static mpc_err_t *mpc_err_merge(mpc_input_t *i, mpc_err_t *x, mpc_err_t *y) { + mpc_err_t *errs[2]; + errs[0] = x; + errs[1] = y; + return mpc_err_or(i, errs, 2); +} + +/* +** Parser Type +*/ + +enum { + MPC_TYPE_UNDEFINED = 0, + MPC_TYPE_PASS = 1, + MPC_TYPE_FAIL = 2, + MPC_TYPE_LIFT = 3, + MPC_TYPE_LIFT_VAL = 4, + MPC_TYPE_EXPECT = 5, + MPC_TYPE_ANCHOR = 6, + MPC_TYPE_STATE = 7, + + MPC_TYPE_ANY = 8, + MPC_TYPE_SINGLE = 9, + MPC_TYPE_ONEOF = 10, + MPC_TYPE_NONEOF = 11, + MPC_TYPE_RANGE = 12, + MPC_TYPE_SATISFY = 13, + MPC_TYPE_STRING = 14, + + MPC_TYPE_APPLY = 15, + MPC_TYPE_APPLY_TO = 16, + MPC_TYPE_PREDICT = 17, + MPC_TYPE_NOT = 18, + MPC_TYPE_MAYBE = 19, + MPC_TYPE_MANY = 20, + MPC_TYPE_MANY1 = 21, + MPC_TYPE_COUNT = 22, + + MPC_TYPE_OR = 23, + MPC_TYPE_AND = 24, + + MPC_TYPE_CHECK = 25, + MPC_TYPE_CHECK_WITH = 26, + + MPC_TYPE_SOI = 27, + MPC_TYPE_EOI = 28, + + MPC_TYPE_SEPBY1 = 29 +}; + +typedef struct { char *m; } mpc_pdata_fail_t; +typedef struct { mpc_ctor_t lf; void *x; } mpc_pdata_lift_t; +typedef struct { mpc_parser_t *x; char *m; } mpc_pdata_expect_t; +typedef struct { int(*f)(char,char); } mpc_pdata_anchor_t; +typedef struct { char x; } mpc_pdata_single_t; +typedef struct { char x; char y; } mpc_pdata_range_t; +typedef struct { int(*f)(char); } mpc_pdata_satisfy_t; +typedef struct { char *x; } mpc_pdata_string_t; +typedef struct { mpc_parser_t *x; mpc_apply_t f; } mpc_pdata_apply_t; +typedef struct { mpc_parser_t *x; mpc_apply_to_t f; void *d; } mpc_pdata_apply_to_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_t f; char *e; } mpc_pdata_check_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_check_with_t f; void *d; char *e; } mpc_pdata_check_with_t; +typedef struct { mpc_parser_t *x; } mpc_pdata_predict_t; +typedef struct { mpc_parser_t *x; mpc_dtor_t dx; mpc_ctor_t lf; } mpc_pdata_not_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_dtor_t dx; } mpc_pdata_repeat_t; +typedef struct { int n; mpc_parser_t **xs; } mpc_pdata_or_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t **xs; mpc_dtor_t *dxs; } mpc_pdata_and_t; +typedef struct { int n; mpc_fold_t f; mpc_parser_t *x; mpc_parser_t *sep; } mpc_pdata_sepby1; + +typedef union { + mpc_pdata_fail_t fail; + mpc_pdata_lift_t lift; + mpc_pdata_expect_t expect; + mpc_pdata_anchor_t anchor; + mpc_pdata_single_t single; + mpc_pdata_range_t range; + mpc_pdata_satisfy_t satisfy; + mpc_pdata_string_t string; + mpc_pdata_apply_t apply; + mpc_pdata_apply_to_t apply_to; + mpc_pdata_check_t check; + mpc_pdata_check_with_t check_with; + mpc_pdata_predict_t predict; + mpc_pdata_not_t not; + mpc_pdata_repeat_t repeat; + mpc_pdata_and_t and; + mpc_pdata_or_t or; + mpc_pdata_sepby1 sepby1; +} mpc_pdata_t; + +struct mpc_parser_t { + char *name; + mpc_pdata_t data; + char type; + char retained; +}; + +static mpc_val_t *mpcf_input_nth_free(mpc_input_t *i, int n, mpc_val_t **xs, int x) { + int j; + for (j = 0; j < n; j++) { if (j != x) { mpc_free(i, xs[j]); } } + return xs[x]; +} + +static mpc_val_t *mpcf_input_fst_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 0); } +static mpc_val_t *mpcf_input_snd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 1); } +static mpc_val_t *mpcf_input_trd_free(mpc_input_t *i, int n, mpc_val_t **xs) { return mpcf_input_nth_free(i, n, xs, 2); } + +static mpc_val_t *mpcf_input_strfold(mpc_input_t *i, int n, mpc_val_t **xs) { + int j; + size_t l = 0; + if (n == 0) { return mpc_calloc(i, 1, 1); } + for (j = 0; j < n; j++) { l += strlen(xs[j]); } + xs[0] = mpc_realloc(i, xs[0], l + 1); + for (j = 1; j < n; j++) { strcat(xs[0], xs[j]); mpc_free(i, xs[j]); } + return xs[0]; +} + +static mpc_val_t *mpcf_input_state_ast(mpc_input_t *i, int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + a = mpc_ast_state(a, *s); + mpc_free(i, s); + (void) n; + return a; +} + +static mpc_val_t *mpc_parse_fold(mpc_input_t *i, mpc_fold_t f, int n, mpc_val_t **xs) { + int j; + if (f == mpcf_null) { return mpcf_null(n, xs); } + if (f == mpcf_fst) { return mpcf_fst(n, xs); } + if (f == mpcf_snd) { return mpcf_snd(n, xs); } + if (f == mpcf_trd) { return mpcf_trd(n, xs); } + if (f == mpcf_fst_free) { return mpcf_input_fst_free(i, n, xs); } + if (f == mpcf_snd_free) { return mpcf_input_snd_free(i, n, xs); } + if (f == mpcf_trd_free) { return mpcf_input_trd_free(i, n, xs); } + if (f == mpcf_strfold) { return mpcf_input_strfold(i, n, xs); } + if (f == mpcf_state_ast) { return mpcf_input_state_ast(i, n, xs); } + for (j = 0; j < n; j++) { xs[j] = mpc_export(i, xs[j]); } + return f(j, xs); +} + +static mpc_val_t *mpcf_input_free(mpc_input_t *i, mpc_val_t *x) { + mpc_free(i, x); + return NULL; +} + +static mpc_val_t *mpcf_input_str_ast(mpc_input_t *i, mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + mpc_free(i, c); + return a; +} + +static mpc_val_t *mpc_parse_apply(mpc_input_t *i, mpc_apply_t f, mpc_val_t *x) { + if (f == mpcf_free) { return mpcf_input_free(i, x); } + if (f == mpcf_str_ast) { return mpcf_input_str_ast(i, x); } + return f(mpc_export(i, x)); +} + +static mpc_val_t *mpc_parse_apply_to(mpc_input_t *i, mpc_apply_to_t f, mpc_val_t *x, mpc_val_t *d) { + return f(mpc_export(i, x), d); +} + +static void mpc_parse_dtor(mpc_input_t *i, mpc_dtor_t d, mpc_val_t *x) { + if (d == free) { mpc_free(i, x); return; } + d(mpc_export(i, x)); +} + +enum { + MPC_PARSE_STACK_MIN = 4 +}; + +#define MPC_SUCCESS(x) r->output = x; return 1 +#define MPC_FAILURE(x) r->error = x; return 0 +#define MPC_PRIMITIVE(x) \ + if (x) { MPC_SUCCESS(r->output); } \ + else { MPC_FAILURE(NULL); } + +#define MPC_MAX_RECURSION_DEPTH 1000 + +static mpc_result_t *mpc_grow_results(mpc_input_t *i, int j, mpc_result_t *results_stk, mpc_result_t *results){ + mpc_result_t *tmp_results = results; + + if (j == MPC_PARSE_STACK_MIN) { + int results_slots = j + j / 2; + tmp_results = mpc_malloc(i, sizeof(mpc_result_t) * results_slots); + memcpy(tmp_results, results_stk, sizeof(mpc_result_t) * MPC_PARSE_STACK_MIN); + } else if (j >= MPC_PARSE_STACK_MIN) { + int results_slots = j + j / 2; + tmp_results = mpc_realloc(i, tmp_results, sizeof(mpc_result_t) * results_slots); + } + + return tmp_results; +} + +static int mpc_parse_run(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r, mpc_err_t **e, int depth) { + + int j = 0, k = 0; + mpc_result_t results_stk[MPC_PARSE_STACK_MIN]; + mpc_result_t *results; + + if (depth == MPC_MAX_RECURSION_DEPTH) + { + MPC_FAILURE(mpc_err_fail(i, "Maximum recursion depth exceeded!")); + } + + switch (p->type) { + + /* Basic Parsers */ + + case MPC_TYPE_ANY: MPC_PRIMITIVE(mpc_input_any(i, (char**)&r->output)); + case MPC_TYPE_SINGLE: MPC_PRIMITIVE(mpc_input_char(i, p->data.single.x, (char**)&r->output)); + case MPC_TYPE_RANGE: MPC_PRIMITIVE(mpc_input_range(i, p->data.range.x, p->data.range.y, (char**)&r->output)); + case MPC_TYPE_ONEOF: MPC_PRIMITIVE(mpc_input_oneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_NONEOF: MPC_PRIMITIVE(mpc_input_noneof(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_SATISFY: MPC_PRIMITIVE(mpc_input_satisfy(i, p->data.satisfy.f, (char**)&r->output)); + case MPC_TYPE_STRING: MPC_PRIMITIVE(mpc_input_string(i, p->data.string.x, (char**)&r->output)); + case MPC_TYPE_ANCHOR: MPC_PRIMITIVE(mpc_input_anchor(i, p->data.anchor.f, (char**)&r->output)); + case MPC_TYPE_SOI: MPC_PRIMITIVE(mpc_input_soi(i, (char**)&r->output)); + case MPC_TYPE_EOI: MPC_PRIMITIVE(mpc_input_eoi(i, (char**)&r->output)); + + /* Other parsers */ + + case MPC_TYPE_UNDEFINED: MPC_FAILURE(mpc_err_fail(i, "Parser Undefined!")); + case MPC_TYPE_PASS: MPC_SUCCESS(NULL); + case MPC_TYPE_FAIL: MPC_FAILURE(mpc_err_fail(i, p->data.fail.m)); + case MPC_TYPE_LIFT: MPC_SUCCESS(p->data.lift.lf()); + case MPC_TYPE_LIFT_VAL: MPC_SUCCESS(p->data.lift.x); + case MPC_TYPE_STATE: MPC_SUCCESS(mpc_input_state_copy(i)); + + /* Application Parsers */ + + case MPC_TYPE_APPLY: + if (mpc_parse_run(i, p->data.apply.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply(i, p->data.apply.f, r->output)); + } else { + MPC_FAILURE(r->output); + } + + case MPC_TYPE_APPLY_TO: + if (mpc_parse_run(i, p->data.apply_to.x, r, e, depth+1)) { + MPC_SUCCESS(mpc_parse_apply_to(i, p->data.apply_to.f, r->output, p->data.apply_to.d)); + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK: + if (mpc_parse_run(i, p->data.check.x, r, e, depth+1)) { + if (p->data.check.f(&r->output)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_CHECK_WITH: + if (mpc_parse_run(i, p->data.check_with.x, r, e, depth+1)) { + if (p->data.check_with.f(&r->output, p->data.check_with.d)) { + MPC_SUCCESS(r->output); + } else { + mpc_parse_dtor(i, p->data.check.dx, r->output); + MPC_FAILURE(mpc_err_fail(i, p->data.check_with.e)); + } + } else { + MPC_FAILURE(r->error); + } + + case MPC_TYPE_EXPECT: + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.expect.x, r, e, depth+1)) { + mpc_input_suppress_disable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_suppress_disable(i); + MPC_FAILURE(mpc_err_new(i, p->data.expect.m)); + } + + case MPC_TYPE_PREDICT: + mpc_input_backtrack_disable(i); + if (mpc_parse_run(i, p->data.predict.x, r, e, depth+1)) { + mpc_input_backtrack_enable(i); + MPC_SUCCESS(r->output); + } else { + mpc_input_backtrack_enable(i); + MPC_FAILURE(r->error); + } + + /* Optional Parsers */ + + /* TODO: Update Not Error Message */ + + case MPC_TYPE_NOT: + mpc_input_mark(i); + mpc_input_suppress_enable(i); + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + mpc_input_rewind(i); + mpc_input_suppress_disable(i); + mpc_parse_dtor(i, p->data.not.dx, r->output); + MPC_FAILURE(mpc_err_new(i, "opposite")); + } else { + mpc_input_unmark(i); + mpc_input_suppress_disable(i); + MPC_SUCCESS(p->data.not.lf()); + } + + case MPC_TYPE_MAYBE: + if (mpc_parse_run(i, p->data.not.x, r, e, depth+1)) { + MPC_SUCCESS(r->output); + } else { + *e = mpc_err_merge(i, *e, r->error); + MPC_SUCCESS(p->data.not.lf()); + } + + /* Repeat Parsers */ + + case MPC_TYPE_MANY: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_MANY1: + + results = results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + + if (j == 0) { + MPC_FAILURE( + mpc_err_many1(i, results[j].error); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + case MPC_TYPE_SEPBY1: + + results = results_stk; + + if(mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1)){ + j++; + results = mpc_grow_results(i, j, results_stk, results); + + while ( + mpc_parse_run(i, p->data.sepby1.sep, &results[j], e, depth+1) && + mpc_parse_run(i, p->data.sepby1.x, &results[j], e, depth+1) + ) { + j++; + results = mpc_grow_results(i, j, results_stk, results); + } + } + + if (j == 0) { + MPC_FAILURE( + mpc_err_many1(i, results[j].error); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + *e = mpc_err_merge(i, *e, results[j].error); + + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (j >= MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + case MPC_TYPE_COUNT: + + results = p->data.repeat.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.repeat.n) + : results_stk; + + while (mpc_parse_run(i, p->data.repeat.x, &results[j], e, depth+1)) { + j++; + if (j == p->data.repeat.n) { break; } + } + + if (j == p->data.repeat.n) { + MPC_SUCCESS( + mpc_parse_fold(i, p->data.repeat.f, j, (mpc_val_t**)results); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.repeat.dx, results[k].output); + } + MPC_FAILURE( + mpc_err_count(i, results[j].error, p->data.repeat.n); + if (p->data.repeat.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + + /* Combinatory Parsers */ + + case MPC_TYPE_OR: + + if (p->data.or.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + for (j = 0; j < p->data.or.n; j++) { + if (mpc_parse_run(i, p->data.or.xs[j], &results[j], e, depth+1)) { + MPC_SUCCESS(results[j].output; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } else { + *e = mpc_err_merge(i, *e, results[j].error); + } + } + + MPC_FAILURE(NULL; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + case MPC_TYPE_AND: + if (p->data.and.n == 0) { MPC_SUCCESS(NULL); } + + results = p->data.or.n > MPC_PARSE_STACK_MIN + ? mpc_malloc(i, sizeof(mpc_result_t) * p->data.or.n) + : results_stk; + + mpc_input_mark(i); + for (j = 0; j < p->data.and.n; j++) { + if (!mpc_parse_run(i, p->data.and.xs[j], &results[j], e, depth+1)) { + mpc_input_rewind(i); + for (k = 0; k < j; k++) { + mpc_parse_dtor(i, p->data.and.dxs[k], results[k].output); + } + MPC_FAILURE(results[j].error; + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + } + } + mpc_input_unmark(i); + MPC_SUCCESS( + mpc_parse_fold(i, p->data.and.f, j, (mpc_val_t**)results); + if (p->data.or.n > MPC_PARSE_STACK_MIN) { mpc_free(i, results); }); + + /* End */ + + default: + + MPC_FAILURE(mpc_err_fail(i, "Unknown Parser Type Id!")); + } + + return 0; + +} + +#undef MPC_SUCCESS +#undef MPC_FAILURE +#undef MPC_PRIMITIVE + +int mpc_parse_input(mpc_input_t *i, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_err_t *e = mpc_err_fail(i, "Unknown Error"); + e->state = mpc_state_invalid(); + x = mpc_parse_run(i, p, r, &e, 0); + if (x) { + mpc_err_delete_internal(i, e); + r->output = mpc_export(i, r->output); + } else { + r->error = mpc_err_export(i, mpc_err_merge(i, e, r->error)); + } + return x; +} + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_string(filename, string); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_nstring(filename, string, length); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_file(filename, file); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r) { + int x; + mpc_input_t *i = mpc_input_new_pipe(filename, pipe); + x = mpc_parse_input(i, p, r); + mpc_input_delete(i); + return x; +} + +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r) { + + FILE *f = fopen(filename, "rb"); + int res; + + if (f == NULL) { + r->output = NULL; + r->error = mpc_err_file(filename, "Unable to open file!"); + return 0; + } + + res = mpc_parse_file(filename, f, p, r); + fclose(f); + return res; +} + +/* +** Building a Parser +*/ + +static void mpc_undefine_unretained(mpc_parser_t *p, int force); + +static void mpc_undefine_or(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.or.n; i++) { + mpc_undefine_unretained(p->data.or.xs[i], 0); + } + free(p->data.or.xs); + +} + +static void mpc_undefine_and(mpc_parser_t *p) { + + int i; + for (i = 0; i < p->data.and.n; i++) { + mpc_undefine_unretained(p->data.and.xs[i], 0); + } + free(p->data.and.xs); + free(p->data.and.dxs); + +} + +static void mpc_undefine_unretained(mpc_parser_t *p, int force) { + + if (p->retained && !force) { return; } + + switch (p->type) { + + case MPC_TYPE_FAIL: free(p->data.fail.m); break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + free(p->data.string.x); + break; + + case MPC_TYPE_APPLY: mpc_undefine_unretained(p->data.apply.x, 0); break; + case MPC_TYPE_APPLY_TO: mpc_undefine_unretained(p->data.apply_to.x, 0); break; + case MPC_TYPE_PREDICT: mpc_undefine_unretained(p->data.predict.x, 0); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + mpc_undefine_unretained(p->data.not.x, 0); + break; + + case MPC_TYPE_EXPECT: + mpc_undefine_unretained(p->data.expect.x, 0); + free(p->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + mpc_undefine_unretained(p->data.repeat.x, 0); + break; + + case MPC_TYPE_SEPBY1: + mpc_undefine_unretained(p->data.sepby1.x, 0); + mpc_undefine_unretained(p->data.sepby1.sep, 0); + break; + + case MPC_TYPE_OR: mpc_undefine_or(p); break; + case MPC_TYPE_AND: mpc_undefine_and(p); break; + + case MPC_TYPE_CHECK: + mpc_undefine_unretained(p->data.check.x, 0); + free(p->data.check.e); + break; + + case MPC_TYPE_CHECK_WITH: + mpc_undefine_unretained(p->data.check_with.x, 0); + free(p->data.check_with.e); + break; + + default: break; + } + + if (!force) { + free(p->name); + free(p); + } + +} + +void mpc_delete(mpc_parser_t *p) { + if (p->retained) { + + if (p->type != MPC_TYPE_UNDEFINED) { + mpc_undefine_unretained(p, 0); + } + + free(p->name); + free(p); + + } else { + mpc_undefine_unretained(p, 0); + } +} + +static void mpc_soft_delete(mpc_val_t *x) { + mpc_undefine_unretained(x, 0); +} + +static mpc_parser_t *mpc_undefined(void) { + mpc_parser_t *p = calloc(1, sizeof(mpc_parser_t)); + p->retained = 0; + p->type = MPC_TYPE_UNDEFINED; + p->name = NULL; + return p; +} + +mpc_parser_t *mpc_new(const char *name) { + mpc_parser_t *p = mpc_undefined(); + p->retained = 1; + p->name = realloc(p->name, strlen(name) + 1); + strcpy(p->name, name); + return p; +} + +mpc_parser_t *mpc_copy(mpc_parser_t *a) { + int i = 0; + mpc_parser_t *p; + + if (a->retained) { return a; } + + p = mpc_undefined(); + p->retained = a->retained; + p->type = a->type; + p->data = a->data; + + if (a->name) { + p->name = malloc(strlen(a->name)+1); + strcpy(p->name, a->name); + } + + switch (a->type) { + + case MPC_TYPE_FAIL: + p->data.fail.m = malloc(strlen(a->data.fail.m)+1); + strcpy(p->data.fail.m, a->data.fail.m); + break; + + case MPC_TYPE_ONEOF: + case MPC_TYPE_NONEOF: + case MPC_TYPE_STRING: + p->data.string.x = malloc(strlen(a->data.string.x)+1); + strcpy(p->data.string.x, a->data.string.x); + break; + + case MPC_TYPE_APPLY: p->data.apply.x = mpc_copy(a->data.apply.x); break; + case MPC_TYPE_APPLY_TO: p->data.apply_to.x = mpc_copy(a->data.apply_to.x); break; + case MPC_TYPE_PREDICT: p->data.predict.x = mpc_copy(a->data.predict.x); break; + + case MPC_TYPE_MAYBE: + case MPC_TYPE_NOT: + p->data.not.x = mpc_copy(a->data.not.x); + break; + + case MPC_TYPE_EXPECT: + p->data.expect.x = mpc_copy(a->data.expect.x); + p->data.expect.m = malloc(strlen(a->data.expect.m)+1); + strcpy(p->data.expect.m, a->data.expect.m); + break; + + case MPC_TYPE_MANY: + case MPC_TYPE_MANY1: + case MPC_TYPE_COUNT: + p->data.repeat.x = mpc_copy(a->data.repeat.x); + break; + + case MPC_TYPE_SEPBY1: + p->data.sepby1.x = mpc_copy(a->data.sepby1.x); + p->data.sepby1.sep = mpc_copy(a->data.sepby1.sep); + break; + + case MPC_TYPE_OR: + p->data.or.xs = malloc(a->data.or.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.or.n; i++) { + p->data.or.xs[i] = mpc_copy(a->data.or.xs[i]); + } + break; + case MPC_TYPE_AND: + p->data.and.xs = malloc(a->data.and.n * sizeof(mpc_parser_t*)); + for (i = 0; i < a->data.and.n; i++) { + p->data.and.xs[i] = mpc_copy(a->data.and.xs[i]); + } + if (a->data.and.n > 0) { + p->data.and.dxs = malloc((a->data.and.n-1) * sizeof(mpc_dtor_t)); + for (i = 0; i < a->data.and.n-1; i++) { + p->data.and.dxs[i] = a->data.and.dxs[i]; + } + } + break; + + case MPC_TYPE_CHECK: + p->data.check.x = mpc_copy(a->data.check.x); + p->data.check.e = malloc(strlen(a->data.check.e)+1); + strcpy(p->data.check.e, a->data.check.e); + break; + case MPC_TYPE_CHECK_WITH: + p->data.check_with.x = mpc_copy(a->data.check_with.x); + p->data.check_with.e = malloc(strlen(a->data.check_with.e)+1); + strcpy(p->data.check_with.e, a->data.check_with.e); + break; + + default: break; + } + + + return p; +} + +mpc_parser_t *mpc_undefine(mpc_parser_t *p) { + mpc_undefine_unretained(p, 1); + p->type = MPC_TYPE_UNDEFINED; + return p; +} + +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a) { + + if (p->retained) { + p->type = a->type; + p->data = a->data; + } else { + mpc_parser_t *a2 = mpc_failf("Attempt to assign to Unretained Parser!"); + p->type = a2->type; + p->data = a2->data; + free(a2); + } + + free(a); + return p; +} + +void mpc_cleanup(int n, ...) { + int i; + mpc_parser_t **list = malloc(sizeof(mpc_parser_t*) * n); + + va_list va; + va_start(va, n); + for (i = 0; i < n; i++) { list[i] = va_arg(va, mpc_parser_t*); } + for (i = 0; i < n; i++) { mpc_undefine(list[i]); } + for (i = 0; i < n; i++) { mpc_delete(list[i]); } + va_end(va); + + free(list); +} + +mpc_parser_t *mpc_pass(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PASS; + return p; +} + +mpc_parser_t *mpc_fail(const char *m) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + p->data.fail.m = malloc(strlen(m) + 1); + strcpy(p->data.fail.m, m); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_failf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_failf(const char *fmt, ...) { + + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_FAIL; + + va_start(va, fmt); + buffer = malloc(2048); + if (!buffer) { + return NULL; + } + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.fail.m = buffer; + return p; + +} + +mpc_parser_t *mpc_lift_val(mpc_val_t *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT_VAL; + p->data.lift.x = x; + return p; +} + +mpc_parser_t *mpc_lift(mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_LIFT; + p->data.lift.lf = lf; + return p; +} + +mpc_parser_t *mpc_anchor(int(*f)(char,char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANCHOR; + p->data.anchor.f = f; + return mpc_expect(p, "anchor"); +} + +mpc_parser_t *mpc_state(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STATE; + return p; +} + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *expected) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + p->data.expect.x = a; + p->data.expect.m = malloc(strlen(expected) + 1); + strcpy(p->data.expect.m, expected); + return p; +} + +/* +** As `snprintf` is not ANSI standard this +** function `mpc_expectf` should be considered +** unsafe. +** +** You have a few options if this is going to be +** trouble. +** +** - Ensure the format string does not exceed +** the buffer length using precision specifiers +** such as `%.512s`. +** +** - Patch this function in your code base to +** use `snprintf` or whatever variant your +** system supports. +** +** - Avoid it altogether. +** +*/ + +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...) { + va_list va; + char *buffer; + + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EXPECT; + + va_start(va, fmt); + buffer = malloc(2048); + if (!buffer) { + return NULL; + } + vsprintf(buffer, fmt, va); + va_end(va); + + buffer = realloc(buffer, strlen(buffer) + 1); + p->data.expect.x = a; + p->data.expect.m = buffer; + return p; +} + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ANY; + return mpc_expect(p, "any character"); +} + +mpc_parser_t *mpc_char(char c) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SINGLE; + p->data.single.x = c; + return mpc_expectf(p, "'%c'", c); +} + +mpc_parser_t *mpc_range(char s, char e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_RANGE; + p->data.range.x = s; + p->data.range.y = e; + return mpc_expectf(p, "character between '%c' and '%c'", s, e); +} + +mpc_parser_t *mpc_oneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_ONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "one of '%s'", s); +} + +mpc_parser_t *mpc_noneof(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NONEOF; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "none of '%s'", s); + +} + +mpc_parser_t *mpc_satisfy(int(*f)(char)) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SATISFY; + p->data.satisfy.f = f; + return mpc_expectf(p, "character satisfying function %p", f); +} + +mpc_parser_t *mpc_string(const char *s) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_STRING; + p->data.string.x = malloc(strlen(s) + 1); + strcpy(p->data.string.x, s); + return mpc_expectf(p, "\"%s\"", s); +} + +/* +** Core Parsers +*/ + +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY; + p->data.apply.x = a; + p->data.apply.f = f; + return p; +} + +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_APPLY_TO; + p->data.apply_to.x = a; + p->data.apply_to.f = f; + p->data.apply_to.d = x; + return p; +} + +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK; + p->data.check.x = a; + p->data.check.dx = da; + p->data.check.f = f; + p->data.check.e = malloc(strlen(e) + 1); + strcpy(p->data.check.e, e); + return p; +} + +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_CHECK_WITH; + p->data.check_with.x = a; + p->data.check_with.dx = da; + p->data.check_with.f = f; + p->data.check_with.d = x; + p->data.check_with.e = malloc(strlen(e) + 1); + strcpy(p->data.check_with.e, e); + return p; +} + +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check(a, da, f, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...) { + va_list va; + char *buffer; + mpc_parser_t *p; + + va_start(va, fmt); + buffer = malloc(2048); + vsprintf(buffer, fmt, va); + va_end(va); + + p = mpc_check_with(a, da, f, x, buffer); + free(buffer); + + return p; +} + +mpc_parser_t *mpc_predictive(mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_PREDICT; + p->data.predict.x = a; + return p; +} + +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_NOT; + p->data.not.x = a; + p->data.not.dx = da; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da) { + return mpc_not_lift(a, da, mpcf_ctor_null); +} + +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MAYBE; + p->data.not.x = a; + p->data.not.lf = lf; + return p; +} + +mpc_parser_t *mpc_maybe(mpc_parser_t *a) { + return mpc_maybe_lift(a, mpcf_ctor_null); +} + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_MANY1; + p->data.repeat.x = a; + p->data.repeat.f = f; + return p; +} + +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_COUNT; + p->data.repeat.n = n; + p->data.repeat.f = f; + p->data.repeat.x = a; + p->data.repeat.dx = da; + return p; +} + +mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SEPBY1; + p->data.sepby1.x = a; + p->data.sepby1.f = f; + p->data.sepby1.sep = sep; + return p; +} + +mpc_parser_t *mpc_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; +} + +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = f; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, f); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = va_arg(va, mpc_dtor_t); + } + va_end(va); + + return p; +} + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_soi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_SOI; + return mpc_expect(p, "start of input"); +} + +mpc_parser_t *mpc_eoi(void) { + mpc_parser_t *p = mpc_undefined(); + p->type = MPC_TYPE_EOI; + return mpc_expect(p, "end of input"); +} + +static int mpc_boundary_anchor(char prev, char next) { + const char* word = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "0123456789_"; + if ( strchr(word, next) && prev == '\0') { return 1; } + if ( strchr(word, prev) && next == '\0') { return 1; } + if ( strchr(word, next) && !strchr(word, prev)) { return 1; } + if (!strchr(word, next) && strchr(word, prev)) { return 1; } + return 0; +} + +static int mpc_boundary_newline_anchor(char prev, char next) { + (void)next; + return prev == '\n'; +} + +mpc_parser_t *mpc_boundary(void) { return mpc_expect(mpc_anchor(mpc_boundary_anchor), "word boundary"); } +mpc_parser_t *mpc_boundary_newline(void) { return mpc_expect(mpc_anchor(mpc_boundary_newline_anchor), "start of newline"); } + +mpc_parser_t *mpc_whitespace(void) { return mpc_expect(mpc_oneof(" \f\n\r\t\v"), "whitespace"); } +mpc_parser_t *mpc_whitespaces(void) { return mpc_expect(mpc_many(mpcf_strfold, mpc_whitespace()), "spaces"); } +mpc_parser_t *mpc_blank(void) { return mpc_expect(mpc_apply(mpc_whitespaces(), mpcf_free), "whitespace"); } + +mpc_parser_t *mpc_newline(void) { return mpc_expect(mpc_char('\n'), "newline"); } +mpc_parser_t *mpc_tab(void) { return mpc_expect(mpc_char('\t'), "tab"); } +mpc_parser_t *mpc_escape(void) { return mpc_and(2, mpcf_strfold, mpc_char('\\'), mpc_any(), free); } + +mpc_parser_t *mpc_digit(void) { return mpc_expect(mpc_oneof("0123456789"), "digit"); } +mpc_parser_t *mpc_hexdigit(void) { return mpc_expect(mpc_oneof("0123456789ABCDEFabcdef"), "hex digit"); } +mpc_parser_t *mpc_octdigit(void) { return mpc_expect(mpc_oneof("01234567"), "oct digit"); } +mpc_parser_t *mpc_digits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_digit()), "digits"); } +mpc_parser_t *mpc_hexdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_hexdigit()), "hex digits"); } +mpc_parser_t *mpc_octdigits(void) { return mpc_expect(mpc_many1(mpcf_strfold, mpc_octdigit()), "oct digits"); } + +mpc_parser_t *mpc_lower(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyz"), "lowercase letter"); } +mpc_parser_t *mpc_upper(void) { return mpc_expect(mpc_oneof("ABCDEFGHIJKLMNOPQRSTUVWXYZ"), "uppercase letter"); } +mpc_parser_t *mpc_alpha(void) { return mpc_expect(mpc_oneof("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"), "letter"); } +mpc_parser_t *mpc_underscore(void) { return mpc_expect(mpc_char('_'), "underscore"); } +mpc_parser_t *mpc_alphanum(void) { return mpc_expect(mpc_or(3, mpc_alpha(), mpc_digit(), mpc_underscore()), "alphanumeric"); } + +mpc_parser_t *mpc_int(void) { return mpc_expect(mpc_apply(mpc_digits(), mpcf_int), "integer"); } +mpc_parser_t *mpc_hex(void) { return mpc_expect(mpc_apply(mpc_hexdigits(), mpcf_hex), "hexadecimal"); } +mpc_parser_t *mpc_oct(void) { return mpc_expect(mpc_apply(mpc_octdigits(), mpcf_oct), "octadecimal"); } +mpc_parser_t *mpc_number(void) { return mpc_expect(mpc_or(3, mpc_int(), mpc_hex(), mpc_oct()), "number"); } + +mpc_parser_t *mpc_real(void) { + + /* [+-]?\d+(\.\d+)?([eE][+-]?[0-9]+)? */ + + mpc_parser_t *p0, *p1, *p2, *p30, *p31, *p32, *p3; + + p0 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p1 = mpc_digits(); + p2 = mpc_maybe_lift(mpc_and(2, mpcf_strfold, mpc_char('.'), mpc_digits(), free), mpcf_ctor_str); + p30 = mpc_oneof("eE"); + p31 = mpc_maybe_lift(mpc_oneof("+-"), mpcf_ctor_str); + p32 = mpc_digits(); + p3 = mpc_maybe_lift(mpc_and(3, mpcf_strfold, p30, p31, p32, free, free), mpcf_ctor_str); + + return mpc_expect(mpc_and(4, mpcf_strfold, p0, p1, p2, p3, free, free, free), "real"); + +} + +mpc_parser_t *mpc_float(void) { + return mpc_expect(mpc_apply(mpc_real(), mpcf_float), "float"); +} + +mpc_parser_t *mpc_char_lit(void) { + return mpc_expect(mpc_between(mpc_or(2, mpc_escape(), mpc_any()), free, "'", "'"), "char"); +} + +mpc_parser_t *mpc_string_lit(void) { + mpc_parser_t *strchar = mpc_or(2, mpc_escape(), mpc_noneof("\"")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, strchar), free, "\"", "\""), "string"); +} + +mpc_parser_t *mpc_regex_lit(void) { + mpc_parser_t *regexchar = mpc_or(2, mpc_escape(), mpc_noneof("/")); + return mpc_expect(mpc_between(mpc_many(mpcf_strfold, regexchar), free, "/", "/"), "regex"); +} + +mpc_parser_t *mpc_ident(void) { + mpc_parser_t *p0, *p1; + p0 = mpc_or(2, mpc_alpha(), mpc_underscore()); + p1 = mpc_many(mpcf_strfold, mpc_alphanum()); + return mpc_and(2, mpcf_strfold, p0, p1, free); +} + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_soi(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(2, mpcf_fst, a, mpc_eoi(), da); } +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da) { return mpc_and(3, mpcf_snd, mpc_soi(), a, mpc_eoi(), mpcf_dtor_null, da); } + +mpc_parser_t *mpc_stripl(mpc_parser_t *a) { return mpc_and(2, mpcf_snd, mpc_blank(), a, mpcf_dtor_null); } +mpc_parser_t *mpc_stripr(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_strip(mpc_parser_t *a) { return mpc_and(3, mpcf_snd, mpc_blank(), a, mpc_blank(), mpcf_dtor_null, mpcf_dtor_null); } +mpc_parser_t *mpc_tok(mpc_parser_t *a) { return mpc_and(2, mpcf_fst, a, mpc_blank(), mpcf_dtor_null); } +mpc_parser_t *mpc_sym(const char *s) { return mpc_tok(mpc_string(s)); } + +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da) { return mpc_whole(mpc_strip(a), da); } + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_string(o), a, mpc_string(c), + free, ad); +} + +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_between(a, ad, "[", "]"); } + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c) { + return mpc_and(3, mpcf_snd_free, + mpc_sym(o), mpc_tok(a), mpc_sym(c), + free, ad); +} + +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "(", ")"); } +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "<", ">"); } +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "{", "}"); } +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad) { return mpc_tok_between(a, ad, "[", "]"); } + +/* +** Regular Expression Parsers +*/ + +/* +** So here is a cute bootstrapping. +** +** I'm using the previously defined +** mpc constructs and functions to +** parse the user regex string and +** construct a parser from it. +** +** As it turns out lots of the standard +** mpc functions look a lot like `fold` +** functions and so can be used indirectly +** by many of the parsing functions to build +** a parser directly - as we are parsing. +** +** This is certainly something that +** would be less elegant/interesting +** in a two-phase parser which first +** builds an AST and then traverses it +** to generate the object. +** +** This whole thing acts as a great +** case study for how trivial it can be +** to write a great parser in a few +** lines of code using mpc. +*/ + +/* +** +** ### Regular Expression Grammar +** +** : | ( "|" ) +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : +** | "\" +** | "(" ")" +** | "[" "]" +*/ + +static mpc_val_t *mpcf_re_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpc_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcf_re_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_lift(mpcf_ctor_str); + for (i = 0; i < n; i++) { + p = mpc_and(2, mpcf_strfold, p, xs[i], free); + } + return p; +} + +static mpc_val_t *mpcf_re_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpc_many(mpcf_strfold, xs[0]); }; break; + case '+': { free(xs[1]); return mpc_many1(mpcf_strfold, xs[0]); }; break; + case '?': { free(xs[1]); return mpc_maybe_lift(xs[0], mpcf_ctor_str); }; break; + default: + num = *(int*)xs[1]; + free(xs[1]); + } + + return mpc_count(num, mpcf_strfold, xs[0], free); +} + +static mpc_parser_t *mpc_re_escape_char(char c) { + switch (c) { + case 'a': return mpc_char('\a'); + case 'f': return mpc_char('\f'); + case 'n': return mpc_char('\n'); + case 'r': return mpc_char('\r'); + case 't': return mpc_char('\t'); + case 'v': return mpc_char('\v'); + case 'b': return mpc_and(2, mpcf_snd, mpc_boundary(), mpc_lift(mpcf_ctor_str), free); + case 'B': return mpc_not_lift(mpc_boundary(), free, mpcf_ctor_str); + case 'A': return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + case 'Z': return mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free); + case 'd': return mpc_digit(); + case 'D': return mpc_not_lift(mpc_digit(), free, mpcf_ctor_str); + case 's': return mpc_whitespace(); + case 'S': return mpc_not_lift(mpc_whitespace(), free, mpcf_ctor_str); + case 'w': return mpc_alphanum(); + case 'W': return mpc_not_lift(mpc_alphanum(), free, mpcf_ctor_str); + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_escape(mpc_val_t *x, void* data) { + + int mode = *((int*)data); + char *s = x; + mpc_parser_t *p; + + /* Any Character */ + if (s[0] == '.') { + free(s); + if (mode & MPC_RE_DOTALL) { + return mpc_any(); + } else { + return mpc_expect(mpc_noneof("\n"), "any character except a newline"); + } + } + + /* Start of Input */ + if (s[0] == '^') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_and(2, mpcf_snd, mpc_or(2, mpc_soi(), mpc_boundary_newline()), mpc_lift(mpcf_ctor_str), free); + } else { + return mpc_and(2, mpcf_snd, mpc_soi(), mpc_lift(mpcf_ctor_str), free); + } + } + + /* End of Input */ + if (s[0] == '$') { + free(s); + if (mode & MPC_RE_MULTILINE) { + return mpc_or(2, + mpc_newline(), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } else { + return mpc_or(2, + mpc_and(2, mpcf_fst, mpc_newline(), mpc_eoi(), free), + mpc_and(2, mpcf_snd, mpc_eoi(), mpc_lift(mpcf_ctor_str), free)); + } + } + + /* Regex Escape */ + if (s[0] == '\\') { + p = mpc_re_escape_char(s[1]); + p = (p == NULL) ? mpc_char(s[1]) : p; + free(s); + return p; + } + + /* Regex Standard */ + p = mpc_char(s[0]); + free(s); + return p; +} + +static const char *mpc_re_range_escape_char(char c) { + switch (c) { + case '-': return "-"; + case 'a': return "\a"; + case 'f': return "\f"; + case 'n': return "\n"; + case 'r': return "\r"; + case 't': return "\t"; + case 'v': return "\v"; + case 'b': return "\b"; + case 'd': return "0123456789"; + case 's': return " \f\n\r\t\v"; + case 'w': return "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"; + default: return NULL; + } +} + +static mpc_val_t *mpcf_re_range(mpc_val_t *x) { + + mpc_parser_t *out; + size_t i, j; + size_t start, end; + const char *tmp = NULL; + const char *s = x; + int comp = s[0] == '^' ? 1 : 0; + char *range = calloc(1,1); + + if (s[0] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + if (s[0] == '^' && + s[1] == '\0') { free(range); free(x); return mpc_fail("Invalid Regex Range Expression"); } + + for (i = comp; i < strlen(s); i++){ + + /* Regex Range Escape */ + if (s[i] == '\\') { + tmp = mpc_re_range_escape_char(s[i+1]); + if (tmp != NULL) { + range = realloc(range, strlen(range) + strlen(tmp) + 1); + strcat(range, tmp); + } else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i+1]; + } + i++; + } + + /* Regex Range...Range */ + else if (s[i] == '-') { + if (s[i+1] == '\0' || i == 0) { + range = realloc(range, strlen(range) + strlen("-") + 1); + strcat(range, "-"); + } else { + start = s[i-1]+1; + end = s[i+1]-1; + for (j = start; j <= end; j++) { + range = realloc(range, strlen(range) + 1 + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = (char)j; + } + } + } + + /* Regex Range Normal */ + else { + range = realloc(range, strlen(range) + 1 + 1); + range[strlen(range) + 1] = '\0'; + range[strlen(range) + 0] = s[i]; + } + + } + + out = comp == 1 ? mpc_noneof(range) : mpc_oneof(range); + + free(x); + free(range); + + return out; +} + +mpc_parser_t *mpc_re(const char *re) { + return mpc_re_mode(re, MPC_RE_DEFAULT); +} + +mpc_parser_t *mpc_re_mode(const char *re, int mode) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *Regex, *Term, *Factor, *Base, *Range, *RegexEnclose; + + Regex = mpc_new("regex"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + Range = mpc_new("range"); + + mpc_define(Regex, mpc_and(2, mpcf_re_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_char('|'), Regex, free)), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Term, mpc_many(mpcf_re_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcf_re_repeat, + Base, + mpc_or(5, + mpc_char('*'), mpc_char('+'), mpc_char('?'), + mpc_brackets(mpc_int(), free), + mpc_pass()), + (mpc_dtor_t)mpc_delete + )); + + mpc_define(Base, mpc_or(4, + mpc_parens(Regex, (mpc_dtor_t)mpc_delete), + mpc_squares(Range, (mpc_dtor_t)mpc_delete), + mpc_apply_to(mpc_escape(), mpcf_re_escape, &mode), + mpc_apply_to(mpc_noneof(")|"), mpcf_re_escape, &mode) + )); + + mpc_define(Range, mpc_apply( + mpc_many(mpcf_strfold, mpc_or(2, mpc_escape(), mpc_noneof("]"))), + mpcf_re_range + )); + + RegexEnclose = mpc_whole(mpc_predictive(Regex), (mpc_dtor_t)mpc_delete); + + mpc_optimise(RegexEnclose); + mpc_optimise(Regex); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + mpc_optimise(Range); + + if(!mpc_parse("", re, RegexEnclose, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Regex: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(6, RegexEnclose, Regex, Term, Factor, Base, Range); + + mpc_optimise(r.output); + + return r.output; + +} + +/* +** Common Fold Functions +*/ + +void mpcf_dtor_null(mpc_val_t *x) { (void) x; return; } + +mpc_val_t *mpcf_ctor_null(void) { return NULL; } +mpc_val_t *mpcf_ctor_str(void) { return calloc(1, 1); } +mpc_val_t *mpcf_free(mpc_val_t *x) { free(x); return NULL; } + +mpc_val_t *mpcf_int(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 10); + free(x); + return y; +} + +mpc_val_t *mpcf_hex(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 16); + free(x); + return y; +} + +mpc_val_t *mpcf_oct(mpc_val_t *x) { + int *y = malloc(sizeof(int)); + *y = strtol(x, NULL, 8); + free(x); + return y; +} + +mpc_val_t *mpcf_float(mpc_val_t *x) { + float *y = malloc(sizeof(float)); + *y = strtod(x, NULL); + free(x); + return y; +} + +mpc_val_t *mpcf_strtriml(mpc_val_t *x) { + char *s = x; + while (isspace((unsigned char)*s)) { + memmove(s, s+1, strlen(s)); + } + return s; +} + +mpc_val_t *mpcf_strtrimr(mpc_val_t *x) { + char *s = x; + size_t l = strlen(s); + while (l > 0 && isspace((unsigned char)s[l-1])) { + s[l-1] = '\0'; l--; + } + return s; +} + +mpc_val_t *mpcf_strtrim(mpc_val_t *x) { + return mpcf_strtriml(mpcf_strtrimr(x)); +} + +static const char mpc_escape_input_c[] = { + '\a', '\b', '\f', '\n', '\r', + '\t', '\v', '\\', '\'', '\"', '\0'}; + +static const char *mpc_escape_output_c[] = { + "\\a", "\\b", "\\f", "\\n", "\\r", "\\t", + "\\v", "\\\\", "\\'", "\\\"", "\\0", NULL}; + +static const char mpc_escape_input_raw_re[] = { '/' }; +static const char *mpc_escape_output_raw_re[] = { "\\/", NULL }; + +static const char mpc_escape_input_raw_cstr[] = { '"' }; +static const char *mpc_escape_output_raw_cstr[] = { "\\\"", NULL }; + +static const char mpc_escape_input_raw_cchar[] = { '\'' }; +static const char *mpc_escape_output_raw_cchar[] = { "\\'", NULL }; + +static mpc_val_t *mpcf_escape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if (*s == input[i]) { + y = realloc(y, strlen(y) + strlen(output[i]) + 1); + strcat(y, output[i]); + found = 1; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 2); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + s++; + } + + + return y; +} + +static mpc_val_t *mpcf_unescape_new(mpc_val_t *x, const char *input, const char **output) { + + int i; + int found = 0; + char buff[2]; + char *s = x; + char *y = calloc(1, 1); + + while (*s) { + + i = 0; + found = 0; + + while (output[i]) { + if ((*(s+0)) == output[i][0] && + (*(s+1)) == output[i][1]) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = input[i]; buff[1] = '\0'; + strcat(y, buff); + found = 1; + s++; + break; + } + i++; + } + + if (!found) { + y = realloc(y, strlen(y) + 1 + 1); + buff[0] = *s; buff[1] = '\0'; + strcat(y, buff); + } + + if (*s == '\0') { break; } + else { s++; } + } + + return y; + +} + +mpc_val_t *mpcf_escape(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_c, mpc_escape_output_c); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_re, mpc_escape_output_raw_re); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cstr, mpc_escape_output_raw_cstr); + free(x); + return y; +} + +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_escape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x) { + mpc_val_t *y = mpcf_unescape_new(x, mpc_escape_input_raw_cchar, mpc_escape_output_raw_cchar); + free(x); + return y; +} + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs) { (void) n; (void) xs; return NULL; } +mpc_val_t *mpcf_fst(int n, mpc_val_t **xs) { (void) n; return xs[0]; } +mpc_val_t *mpcf_snd(int n, mpc_val_t **xs) { (void) n; return xs[1]; } +mpc_val_t *mpcf_trd(int n, mpc_val_t **xs) { (void) n; return xs[2]; } + +static mpc_val_t *mpcf_nth_free(int n, mpc_val_t **xs, int x) { + int i; + for (i = 0; i < n; i++) { + if (i != x) { free(xs[i]); } + } + return xs[x]; +} + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 0); } +mpc_val_t *mpcf_snd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 1); } +mpc_val_t *mpcf_trd_free(int n, mpc_val_t **xs) { return mpcf_nth_free(n, xs, 2); } +mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs) { + int i; + for (i = 0; i < n; i++) { + free(xs[i]); + } + return NULL; +} + +mpc_val_t *mpcf_strfold(int n, mpc_val_t **xs) { + int i; + size_t l = 0; + + if (n == 0) { return calloc(1, 1); } + + for (i = 0; i < n; i++) { l += strlen(xs[i]); } + + xs[0] = realloc(xs[0], l + 1); + + for (i = 1; i < n; i++) { + strcat(xs[0], xs[i]); free(xs[i]); + } + + return xs[0]; +} + +/* +** Printing +*/ + +static void mpc_print_unretained(mpc_parser_t *p, int force) { + + /* TODO: Print Everything Escaped */ + + int i; + char *s, *e; + char buff[2]; + + if (p->retained && !force) {; + if (p->name) { printf("<%s>", p->name); } + else { printf(""); } + return; + } + + if (p->type == MPC_TYPE_UNDEFINED) { printf(""); } + if (p->type == MPC_TYPE_PASS) { printf("<:>"); } + if (p->type == MPC_TYPE_FAIL) { printf(""); } + if (p->type == MPC_TYPE_LIFT) { printf("<#>"); } + if (p->type == MPC_TYPE_STATE) { printf(""); } + if (p->type == MPC_TYPE_ANCHOR) { printf("<@>"); } + if (p->type == MPC_TYPE_EXPECT) { + printf("%s", p->data.expect.m); + /*mpc_print_unretained(p->data.expect.x, 0);*/ + } + + if (p->type == MPC_TYPE_ANY) { printf("<.>"); } + if (p->type == MPC_TYPE_SATISFY) { printf(""); } + + if (p->type == MPC_TYPE_SINGLE) { + buff[0] = p->data.single.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("'%s'", s); + free(s); + } + + if (p->type == MPC_TYPE_RANGE) { + buff[0] = p->data.range.x; buff[1] = '\0'; + s = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + buff[0] = p->data.range.y; buff[1] = '\0'; + e = mpcf_escape_new( + buff, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s-%s]", s, e); + free(s); + free(e); + } + + if (p->type == MPC_TYPE_ONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_NONEOF) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("[^%s]", s); + free(s); + } + + if (p->type == MPC_TYPE_STRING) { + s = mpcf_escape_new( + p->data.string.x, + mpc_escape_input_c, + mpc_escape_output_c); + printf("\"%s\"", s); + free(s); + } + + if (p->type == MPC_TYPE_APPLY) { mpc_print_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_print_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_print_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_NOT) { mpc_print_unretained(p->data.not.x, 0); printf("!"); } + if (p->type == MPC_TYPE_MAYBE) { mpc_print_unretained(p->data.not.x, 0); printf("?"); } + + if (p->type == MPC_TYPE_MANY) { mpc_print_unretained(p->data.repeat.x, 0); printf("*"); } + if (p->type == MPC_TYPE_MANY1) { mpc_print_unretained(p->data.repeat.x, 0); printf("+"); } + if (p->type == MPC_TYPE_COUNT) { mpc_print_unretained(p->data.repeat.x, 0); printf("{%i}", p->data.repeat.n); } + if (p->type == MPC_TYPE_SEPBY1) { + mpc_print_unretained(p->data.sepby1.x, 0); + printf(" ("); + mpc_print_unretained(p->data.sepby1.sep, 0); + printf(" "); + mpc_print_unretained(p->data.sepby1.x, 0); + printf(")"); + printf("*"); + } + + if (p->type == MPC_TYPE_OR) { + printf("("); + for(i = 0; i < p->data.or.n-1; i++) { + mpc_print_unretained(p->data.or.xs[i], 0); + printf(" | "); + } + mpc_print_unretained(p->data.or.xs[p->data.or.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_AND) { + printf("("); + for(i = 0; i < p->data.and.n-1; i++) { + mpc_print_unretained(p->data.and.xs[i], 0); + printf(" "); + } + mpc_print_unretained(p->data.and.xs[p->data.and.n-1], 0); + printf(")"); + } + + if (p->type == MPC_TYPE_CHECK) { + mpc_print_unretained(p->data.check.x, 0); + printf("->?"); + } + if (p->type == MPC_TYPE_CHECK_WITH) { + mpc_print_unretained(p->data.check_with.x, 0); + printf("->?"); + } + +} + +void mpc_print(mpc_parser_t *p) { + mpc_print_unretained(p, 1); + printf("\n"); +} + +/* +** Testing +*/ + +/* +** These functions are slightly unwieldy and +** also the whole of the testing suite for mpc +** mpc is pretty shaky. +** +** It could do with a lot more tests and more +** precision. Currently I am only really testing +** changes off of the examples. +** +*/ + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + mpc_result_t r; + (void) printer; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 0; + } else { + destructor(r.output); + return 1; + } + + } else { + mpc_err_delete(r.error); + return 1; + } + +} + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)) { + + mpc_result_t r; + if (mpc_parse("", s, p, &r)) { + + if (tester(r.output, d)) { + destructor(r.output); + return 1; + } else { + printf("Got "); printer(r.output); printf("\n"); + printf("Expected "); printer(d); printf("\n"); + destructor(r.output); + return 0; + } + + } else { + mpc_err_print(r.error); + mpc_err_delete(r.error); + return 0; + + } + +} + + +/* +** AST +*/ + +void mpc_ast_delete(mpc_ast_t *a) { + + int i; + + if (a == NULL) { return; } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_delete(a->children[i]); + } + + free(a->children); + free(a->tag); + free(a->contents); + free(a); + +} + +static void mpc_ast_delete_no_children(mpc_ast_t *a) { + free(a->children); + free(a->tag); + free(a->contents); + free(a); +} + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents) { + + mpc_ast_t *a = malloc(sizeof(mpc_ast_t)); + + a->tag = malloc(strlen(tag) + 1); + strcpy(a->tag, tag); + + a->contents = malloc(strlen(contents) + 1); + strcpy(a->contents, contents); + + a->state = mpc_state_new(); + + a->children_num = 0; + a->children = NULL; + return a; + +} + +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...) { + + mpc_ast_t *a = mpc_ast_new(tag, ""); + + int i; + va_list va; + va_start(va, tag); + + for (i = 0; i < n; i++) { + mpc_ast_add_child(a, va_arg(va, mpc_ast_t*)); + } + + va_end(va); + + return a; + +} + +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a) { + + mpc_ast_t *r; + + if (a == NULL) { return a; } + if (a->children_num == 0) { return a; } + if (a->children_num == 1) { return a; } + + r = mpc_ast_new(">", ""); + mpc_ast_add_child(r, a); + return r; +} + +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b) { + + int i; + + if (strcmp(a->tag, b->tag) != 0) { return 0; } + if (strcmp(a->contents, b->contents) != 0) { return 0; } + if (a->children_num != b->children_num) { return 0; } + + for (i = 0; i < a->children_num; i++) { + if (!mpc_ast_eq(a->children[i], b->children[i])) { return 0; } + } + + return 1; +} + +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a) { + r->children_num++; + r->children = realloc(r->children, sizeof(mpc_ast_t*) * r->children_num); + r->children[r->children_num-1] = a; + return r; +} + +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, strlen(t) + 1 + strlen(a->tag) + 1); + memmove(a->tag + strlen(t) + 1, a->tag, strlen(a->tag)+1); + memmove(a->tag, t, strlen(t)); + memmove(a->tag + strlen(t), "|", 1); + return a; +} + +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t) { + if (a == NULL) { return a; } + a->tag = realloc(a->tag, (strlen(t)-1) + strlen(a->tag) + 1); + memmove(a->tag + (strlen(t)-1), a->tag, strlen(a->tag)+1); + memmove(a->tag, t, (strlen(t)-1)); + return a; +} + +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t) { + a->tag = realloc(a->tag, strlen(t) + 1); + strcpy(a->tag, t); + return a; +} + +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s) { + if (a == NULL) { return a; } + a->state = s; + return a; +} + +static void mpc_ast_print_depth(mpc_ast_t *a, int d, FILE *fp) { + + int i; + + if (a == NULL) { + fprintf(fp, "NULL\n"); + return; + } + + for (i = 0; i < d; i++) { fprintf(fp, " "); } + + if (strlen(a->contents)) { + fprintf(fp, "%s:%lu:%lu '%s'\n", a->tag, + (long unsigned int)(a->state.row+1), + (long unsigned int)(a->state.col+1), + a->contents); + } else { + fprintf(fp, "%s \n", a->tag); + } + + for (i = 0; i < a->children_num; i++) { + mpc_ast_print_depth(a->children[i], d+1, fp); + } + +} + +void mpc_ast_print(mpc_ast_t *a) { + mpc_ast_print_depth(a, 0, stdout); +} + +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp) { + mpc_ast_print_depth(a, 0, fp); +} + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_index_lb(ast, tag, 0); +} + +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return i; + } + } + + return -1; +} + +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag) { + return mpc_ast_get_child_lb(ast, tag, 0); +} + +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb) { + int i; + + for(i=lb; ichildren_num; i++) { + if(strcmp(ast->children[i]->tag, tag) == 0) { + return ast->children[i]; + } + } + + return NULL; +} + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order) +{ + mpc_ast_trav_t *trav, *n_trav; + mpc_ast_t *cnode = ast; + + /* Create the traversal structure */ + trav = malloc(sizeof(mpc_ast_trav_t)); + trav->curr_node = cnode; + trav->parent = NULL; + trav->curr_child = 0; + trav->order = order; + + /* Get start node */ + switch(order) { + case mpc_ast_trav_order_pre: + /* Nothing else is needed for pre order start */ + break; + + case mpc_ast_trav_order_post: + while(cnode->children_num > 0) { + cnode = cnode->children[0]; + + n_trav = malloc(sizeof(mpc_ast_trav_t)); + n_trav->curr_node = cnode; + n_trav->parent = trav; + n_trav->curr_child = 0; + n_trav->order = order; + + trav = n_trav; + } + + break; + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return trav; +} + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav, *to_free; + mpc_ast_t *ret = NULL; + int cchild; + + /* The end of traversal was reached */ + if(*trav == NULL) return NULL; + + switch((*trav)->order) { + case mpc_ast_trav_order_pre: + ret = (*trav)->curr_node; + + /* If there aren't any more children, go up */ + while(*trav != NULL && + (*trav)->curr_child >= (*trav)->curr_node->children_num) + { + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + } + + /* If trav is NULL, the end was reached */ + if(*trav == NULL) { + break; + } + + /* Go to next child */ + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + (*trav)->curr_child++; + *trav = n_trav; + + break; + + case mpc_ast_trav_order_post: + ret = (*trav)->curr_node; + + /* Move up tree to the parent If the parent doesn't have any more nodes, + * then this is the current node. If it does, move down to its left most + * child. Also, free the previous traversal node */ + to_free = *trav; + *trav = (*trav)->parent; + free(to_free); + + if(*trav == NULL) + break; + + /* Next child */ + (*trav)->curr_child++; + + /* If there aren't any more children, this is the next node */ + if((*trav)->curr_child >= (*trav)->curr_node->children_num) { + break; + } + + /* If there are still more children, find the leftmost child from this + * node */ + while((*trav)->curr_node->children_num > 0) { + n_trav = malloc(sizeof(mpc_ast_trav_t)); + + cchild = (*trav)->curr_child; + n_trav->curr_node = (*trav)->curr_node->children[cchild]; + n_trav->parent = *trav; + n_trav->curr_child = 0; + n_trav->order = (*trav)->order; + + *trav = n_trav; + } + + default: + /* Unreachable, but compiler complaints */ + break; + } + + return ret; +} + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav) { + mpc_ast_trav_t *n_trav; + + /* Go through parents until all are free */ + while(*trav != NULL) { + n_trav = (*trav)->parent; + free(*trav); + *trav = n_trav; + } +} + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **xs) { + + int i, j; + mpc_ast_t** as = (mpc_ast_t**)xs; + mpc_ast_t *r; + + if (n == 0) { return NULL; } + if (n == 1) { return xs[0]; } + if (n == 2 && xs[1] == NULL) { return xs[0]; } + if (n == 2 && xs[0] == NULL) { return xs[1]; } + + r = mpc_ast_new(">", ""); + + for (i = 0; i < n; i++) { + + if (as[i] == NULL) { continue; } + + if (as[i] && as[i]->children_num == 0) { + mpc_ast_add_child(r, as[i]); + } else if (as[i] && as[i]->children_num == 1) { + mpc_ast_add_child(r, mpc_ast_add_root_tag(as[i]->children[0], as[i]->tag)); + mpc_ast_delete_no_children(as[i]); + } else if (as[i] && as[i]->children_num >= 2) { + for (j = 0; j < as[i]->children_num; j++) { + mpc_ast_add_child(r, as[i]->children[j]); + } + mpc_ast_delete_no_children(as[i]); + } + + } + + if (r->children_num) { + r->state = r->children[0]->state; + } + + return r; +} + +mpc_val_t *mpcf_str_ast(mpc_val_t *c) { + mpc_ast_t *a = mpc_ast_new("", c); + free(c); + return a; +} + +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs) { + mpc_state_t *s = ((mpc_state_t**)xs)[0]; + mpc_ast_t *a = ((mpc_ast_t**)xs)[1]; + (void)n; + a = mpc_ast_state(a, *s); + free(s); + return a; +} + +mpc_parser_t *mpca_state(mpc_parser_t *a) { + return mpc_and(2, mpcf_state_ast, mpc_state(), a, free); +} + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_tag, (void*)t); +} + +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t) { + return mpc_apply_to(a, (mpc_apply_to_t)mpc_ast_add_tag, (void*)t); +} + +mpc_parser_t *mpca_root(mpc_parser_t *a) { + return mpc_apply(a, (mpc_apply_t)mpc_ast_add_root); +} + +mpc_parser_t *mpca_not(mpc_parser_t *a) { return mpc_not(a, (mpc_dtor_t)mpc_ast_delete); } +mpc_parser_t *mpca_maybe(mpc_parser_t *a) { return mpc_maybe(a); } +mpc_parser_t *mpca_many(mpc_parser_t *a) { return mpc_many(mpcf_fold_ast, a); } +mpc_parser_t *mpca_many1(mpc_parser_t *a) { return mpc_many1(mpcf_fold_ast, a); } +mpc_parser_t *mpca_count(int n, mpc_parser_t *a) { return mpc_count(n, mpcf_fold_ast, a, (mpc_dtor_t)mpc_ast_delete); } + +mpc_parser_t *mpca_or(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_OR; + p->data.or.n = n; + p->data.or.xs = malloc(sizeof(mpc_parser_t*) * n); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.or.xs[i] = va_arg(va, mpc_parser_t*); + } + va_end(va); + + return p; + +} + +mpc_parser_t *mpca_and(int n, ...) { + + int i; + va_list va; + + mpc_parser_t *p = mpc_undefined(); + + p->type = MPC_TYPE_AND; + p->data.and.n = n; + p->data.and.f = mpcf_fold_ast; + p->data.and.xs = malloc(sizeof(mpc_parser_t*) * n); + p->data.and.dxs = malloc(sizeof(mpc_dtor_t) * (n-1)); + + va_start(va, n); + for (i = 0; i < n; i++) { + p->data.and.xs[i] = va_arg(va, mpc_parser_t*); + } + for (i = 0; i < (n-1); i++) { + p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; + } + va_end(va); + + return p; +} + +mpc_parser_t *mpca_total(mpc_parser_t *a) { return mpc_total(a, (mpc_dtor_t)mpc_ast_delete); } + +/* +** Grammar Parser +*/ + +/* +** This is another interesting bootstrapping. +** +** Having a general purpose AST type allows +** users to specify the grammar alone and +** let all fold rules be automatically taken +** care of by existing functions. +** +** You don't get to control the type spat +** out but this means you can make a nice +** parser to take in some grammar in nice +** syntax and spit out a parser that works. +** +** The grammar for this looks surprisingly +** like regex but the main difference is that +** it is now whitespace insensitive and the +** base type takes literals of some form. +*/ + +/* +** +** ### Grammar Grammar +** +** : ( "|" ) | +** +** : * +** +** : +** | "*" +** | "+" +** | "?" +** | "{" "}" +** +** : "<" ( | ) ">" +** | +** | +** | +** | "(" ")" +*/ + +typedef struct { + va_list *va; + int parsers_num; + mpc_parser_t **parsers; + int flags; +} mpca_grammar_st_t; + +static mpc_val_t *mpcaf_grammar_or(int n, mpc_val_t **xs) { + (void) n; + if (xs[1] == NULL) { return xs[0]; } + else { return mpca_or(2, xs[0], xs[1]); } +} + +static mpc_val_t *mpcaf_grammar_and(int n, mpc_val_t **xs) { + int i; + mpc_parser_t *p = mpc_pass(); + for (i = 0; i < n; i++) { + if (xs[i] != NULL) { p = mpca_and(2, p, xs[i]); } + } + return p; +} + +static mpc_val_t *mpcaf_grammar_repeat(int n, mpc_val_t **xs) { + int num; + (void) n; + if (xs[1] == NULL) { return xs[0]; } + switch(((char*)xs[1])[0]) + { + case '*': { free(xs[1]); return mpca_many(xs[0]); }; break; + case '+': { free(xs[1]); return mpca_many1(xs[0]); }; break; + case '?': { free(xs[1]); return mpca_maybe(xs[0]); }; break; + case '!': { free(xs[1]); return mpca_not(xs[0]); }; break; + default: + num = *((int*)xs[1]); + free(xs[1]); + } + return mpca_count(num, xs[0]); +} + +static mpc_val_t *mpcaf_grammar_string(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_string(y) : mpc_tok(mpc_string(y)); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "string")); +} + +static mpc_val_t *mpcaf_grammar_char(mpc_val_t *x, void *s) { + mpca_grammar_st_t *st = s; + char *y = mpcf_unescape(x); + mpc_parser_t *p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_char(y[0]) : mpc_tok(mpc_char(y[0])); + free(y); + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "char")); +} + +static mpc_val_t *mpcaf_fold_regex(int n, mpc_val_t **xs) { + char *y = xs[0]; + char *m = xs[1]; + mpca_grammar_st_t *st = xs[2]; + mpc_parser_t *p; + int mode = MPC_RE_DEFAULT; + + (void)n; + if (strchr(m, 'm')) { mode |= MPC_RE_MULTILINE; } + if (strchr(m, 's')) { mode |= MPC_RE_DOTALL; } + y = mpcf_unescape_regex(y); + p = (st->flags & MPCA_LANG_WHITESPACE_SENSITIVE) ? mpc_re_mode(y, mode) : mpc_tok(mpc_re_mode(y, mode)); + free(y); + free(m); + + return mpca_state(mpca_tag(mpc_apply(p, mpcf_str_ast), "regex")); +} + +/* Should this just use `isdigit` instead? */ +static int is_number(const char* s) { + size_t i; + for (i = 0; i < strlen(s); i++) { if (!strchr("0123456789", s[i])) { return 0; } } + return 1; +} + +static mpc_parser_t *mpca_grammar_find_parser(char *x, mpca_grammar_st_t *st) { + + int i; + mpc_parser_t *p; + + /* Case of Number */ + if (is_number(x)) { + + i = strtol(x, NULL, 10); + + while (st->parsers_num <= i) { + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = va_arg(*st->va, mpc_parser_t*); + if (st->parsers[st->parsers_num-1] == NULL) { + return mpc_failf("No Parser in position %i! Only supplied %i Parsers!", i, st->parsers_num); + } + } + + return st->parsers[st->parsers_num-1]; + + /* Case of Identifier */ + } else { + + /* Search Existing Parsers */ + for (i = 0; i < st->parsers_num; i++) { + mpc_parser_t *q = st->parsers[i]; + if (q == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (q->name && strcmp(q->name, x) == 0) { return q; } + } + + /* Search New Parsers */ + while (1) { + + p = va_arg(*st->va, mpc_parser_t*); + + st->parsers_num++; + st->parsers = realloc(st->parsers, sizeof(mpc_parser_t*) * st->parsers_num); + st->parsers[st->parsers_num-1] = p; + + if (p == NULL || p->name == NULL) { return mpc_failf("Unknown Parser '%s'!", x); } + if (p->name && strcmp(p->name, x) == 0) { return p; } + + } + + } + +} + +static mpc_val_t *mpcaf_grammar_id(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpc_parser_t *p = mpca_grammar_find_parser(x, st); + free(x); + + if (p->name) { + return mpca_state(mpca_root(mpca_add_tag(p, p->name))); + } else { + return mpca_state(mpca_root(p)); + } +} + +mpc_parser_t *mpca_grammar_st(const char *grammar, mpca_grammar_st_t *st) { + + char *err_msg; + mpc_parser_t *err_out; + mpc_result_t r; + mpc_parser_t *GrammarTotal, *Grammar, *Term, *Factor, *Base; + + GrammarTotal = mpc_new("grammar_total"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(GrammarTotal, + mpc_predictive(mpc_total(Grammar, mpc_soft_delete)) + ); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(GrammarTotal); + mpc_optimise(Grammar); + mpc_optimise(Factor); + mpc_optimise(Term); + mpc_optimise(Base); + + if(!mpc_parse("", grammar, GrammarTotal, &r)) { + err_msg = mpc_err_string(r.error); + err_out = mpc_failf("Invalid Grammar: %s", err_msg); + mpc_err_delete(r.error); + free(err_msg); + r.output = err_out; + } + + mpc_cleanup(5, GrammarTotal, Grammar, Term, Factor, Base); + + mpc_optimise(r.output); + + return (st->flags & MPCA_LANG_PREDICTIVE) ? mpc_predictive(r.output) : r.output; + +} + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...) { + mpca_grammar_st_t st; + mpc_parser_t *res; + va_list va; + va_start(va, grammar); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + res = mpca_grammar_st(grammar, &st); + free(st.parsers); + va_end(va); + return res; +} + +typedef struct { + char *ident; + char *name; + mpc_parser_t *grammar; +} mpca_stmt_t; + +static mpc_val_t *mpca_stmt_afold(int n, mpc_val_t **xs) { + mpca_stmt_t *stmt = malloc(sizeof(mpca_stmt_t)); + stmt->ident = ((char**)xs)[0]; + stmt->name = ((char**)xs)[1]; + stmt->grammar = ((mpc_parser_t**)xs)[3]; + (void) n; + free(((char**)xs)[2]); + free(((char**)xs)[4]); + + return stmt; +} + +static mpc_val_t *mpca_stmt_fold(int n, mpc_val_t **xs) { + + int i; + mpca_stmt_t **stmts = malloc(sizeof(mpca_stmt_t*) * (n+1)); + + for (i = 0; i < n; i++) { + stmts[i] = xs[i]; + } + stmts[n] = NULL; + + return stmts; +} + +static void mpca_stmt_list_delete(mpc_val_t *x) { + + mpca_stmt_t **stmts = x; + + while(*stmts) { + mpca_stmt_t *stmt = *stmts; + free(stmt->ident); + free(stmt->name); + mpc_soft_delete(stmt->grammar); + free(stmt); + stmts++; + } + free(x); + +} + +static mpc_val_t *mpca_stmt_list_apply_to(mpc_val_t *x, void *s) { + + mpca_grammar_st_t *st = s; + mpca_stmt_t *stmt; + mpca_stmt_t **stmts = x; + mpc_parser_t *left; + + while(*stmts) { + stmt = *stmts; + left = mpca_grammar_find_parser(stmt->ident, st); + if (st->flags & MPCA_LANG_PREDICTIVE) { stmt->grammar = mpc_predictive(stmt->grammar); } + if (stmt->name) { stmt->grammar = mpc_expect(stmt->grammar, stmt->name); } + mpc_optimise(stmt->grammar); + mpc_define(left, stmt->grammar); + free(stmt->ident); + free(stmt->name); + free(stmt); + stmts++; + } + + free(x); + + return NULL; +} + +static mpc_err_t *mpca_lang_st(mpc_input_t *i, mpca_grammar_st_t *st) { + + mpc_result_t r; + mpc_err_t *e; + mpc_parser_t *Lang, *Stmt, *Grammar, *Term, *Factor, *Base; + + Lang = mpc_new("lang"); + Stmt = mpc_new("stmt"); + Grammar = mpc_new("grammar"); + Term = mpc_new("term"); + Factor = mpc_new("factor"); + Base = mpc_new("base"); + + mpc_define(Lang, mpc_apply_to( + mpc_total(mpc_predictive(mpc_many(mpca_stmt_fold, Stmt)), mpca_stmt_list_delete), + mpca_stmt_list_apply_to, st + )); + + mpc_define(Stmt, mpc_and(5, mpca_stmt_afold, + mpc_tok(mpc_ident()), mpc_maybe(mpc_tok(mpc_string_lit())), mpc_sym(":"), Grammar, mpc_sym(";"), + free, free, free, mpc_soft_delete + )); + + mpc_define(Grammar, mpc_and(2, mpcaf_grammar_or, + Term, + mpc_maybe(mpc_and(2, mpcf_snd_free, mpc_sym("|"), Grammar, free)), + mpc_soft_delete + )); + + mpc_define(Term, mpc_many1(mpcaf_grammar_and, Factor)); + + mpc_define(Factor, mpc_and(2, mpcaf_grammar_repeat, + Base, + mpc_or(6, + mpc_sym("*"), + mpc_sym("+"), + mpc_sym("?"), + mpc_sym("!"), + mpc_tok_brackets(mpc_int(), free), + mpc_pass()), + mpc_soft_delete + )); + + mpc_define(Base, mpc_or(5, + mpc_apply_to(mpc_tok(mpc_string_lit()), mpcaf_grammar_string, st), + mpc_apply_to(mpc_tok(mpc_char_lit()), mpcaf_grammar_char, st), + mpc_tok(mpc_and(3, mpcaf_fold_regex, mpc_regex_lit(), mpc_many(mpcf_strfold, mpc_oneof("ms")), mpc_lift_val(st), free, free)), + mpc_apply_to(mpc_tok_braces(mpc_or(2, mpc_digits(), mpc_ident()), free), mpcaf_grammar_id, st), + mpc_tok_parens(Grammar, mpc_soft_delete) + )); + + mpc_optimise(Lang); + mpc_optimise(Stmt); + mpc_optimise(Grammar); + mpc_optimise(Term); + mpc_optimise(Factor); + mpc_optimise(Base); + + if (!mpc_parse_input(i, Lang, &r)) { + e = r.error; + } else { + e = NULL; + } + + mpc_cleanup(6, Lang, Stmt, Grammar, Term, Factor, Base); + + return e; +} + +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, f); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file("", f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_pipe(int flags, FILE *p, ...) { + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, p); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_pipe("", p); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang(int flags, const char *language, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + va_start(va, language); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_string("", language); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + return err; +} + +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...) { + + mpca_grammar_st_t st; + mpc_input_t *i; + mpc_err_t *err; + + va_list va; + + FILE *f = fopen(filename, "rb"); + + if (f == NULL) { + err = mpc_err_file(filename, "Unable to open file!"); + return err; + } + + va_start(va, filename); + + st.va = &va; + st.parsers_num = 0; + st.parsers = NULL; + st.flags = flags; + + i = mpc_input_new_file(filename, f); + err = mpca_lang_st(i, &st); + mpc_input_delete(i); + + free(st.parsers); + va_end(va); + + fclose(f); + + return err; +} + +static int mpc_nodecount_unretained(mpc_parser_t* p, int force) { + + int i, total; + + if (p->retained && !force) { return 0; } + + if (p->type == MPC_TYPE_EXPECT) { return 1 + mpc_nodecount_unretained(p->data.expect.x, 0); } + + if (p->type == MPC_TYPE_APPLY) { return 1 + mpc_nodecount_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { return 1 + mpc_nodecount_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { return 1 + mpc_nodecount_unretained(p->data.predict.x, 0); } + + if (p->type == MPC_TYPE_CHECK) { return 1 + mpc_nodecount_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { return 1 + mpc_nodecount_unretained(p->data.check_with.x, 0); } + + if (p->type == MPC_TYPE_NOT) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { return 1 + mpc_nodecount_unretained(p->data.not.x, 0); } + + if (p->type == MPC_TYPE_MANY) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { return 1 + mpc_nodecount_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_SEPBY1) { + total = 1; + total += mpc_nodecount_unretained(p->data.sepby1.x, 0); + total += mpc_nodecount_unretained(p->data.sepby1.sep, 0); + total += mpc_nodecount_unretained(p->data.sepby1.x, 0); + return total; + } + + if (p->type == MPC_TYPE_OR) { + total = 1; + for(i = 0; i < p->data.or.n; i++) { + total += mpc_nodecount_unretained(p->data.or.xs[i], 0); + } + return total; + } + + if (p->type == MPC_TYPE_AND) { + total = 1; + for(i = 0; i < p->data.and.n; i++) { + total += mpc_nodecount_unretained(p->data.and.xs[i], 0); + } + return total; + } + + return 1; + +} + +void mpc_stats(mpc_parser_t* p) { + printf("Stats\n"); + printf("=====\n"); + printf("Node Count: %i\n", mpc_nodecount_unretained(p, 1)); +} + +static void mpc_optimise_unretained(mpc_parser_t *p, int force) { + + int i, n, m; + mpc_parser_t *t; + + if (p->retained && !force) { return; } + + /* Optimise Subexpressions */ + + if (p->type == MPC_TYPE_EXPECT) { mpc_optimise_unretained(p->data.expect.x, 0); } + if (p->type == MPC_TYPE_APPLY) { mpc_optimise_unretained(p->data.apply.x, 0); } + if (p->type == MPC_TYPE_APPLY_TO) { mpc_optimise_unretained(p->data.apply_to.x, 0); } + if (p->type == MPC_TYPE_CHECK) { mpc_optimise_unretained(p->data.check.x, 0); } + if (p->type == MPC_TYPE_CHECK_WITH) { mpc_optimise_unretained(p->data.check_with.x, 0); } + if (p->type == MPC_TYPE_PREDICT) { mpc_optimise_unretained(p->data.predict.x, 0); } + if (p->type == MPC_TYPE_NOT) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MAYBE) { mpc_optimise_unretained(p->data.not.x, 0); } + if (p->type == MPC_TYPE_MANY) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_MANY1) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_COUNT) { mpc_optimise_unretained(p->data.repeat.x, 0); } + if (p->type == MPC_TYPE_SEPBY1) { + mpc_optimise_unretained(p->data.sepby1.x, 0); + mpc_optimise_unretained(p->data.sepby1.sep, 0); + } + + if (p->type == MPC_TYPE_OR) { + for(i = 0; i < p->data.or.n; i++) { + mpc_optimise_unretained(p->data.or.xs[i], 0); + } + } + + if (p->type == MPC_TYPE_AND) { + for(i = 0; i < p->data.and.n; i++) { + mpc_optimise_unretained(p->data.and.xs[i], 0); + } + } + + /* Perform optimisations */ + + while (1) { + + /* Merge rhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[p->data.or.n-1]->type == MPC_TYPE_OR + && !p->data.or.xs[p->data.or.n-1]->retained) { + t = p->data.or.xs[p->data.or.n-1]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + n - 1, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Merge lhs `or` */ + if (p->type == MPC_TYPE_OR + && p->data.or.xs[0]->type == MPC_TYPE_OR + && !p->data.or.xs[0]->retained) { + t = p->data.or.xs[0]; + n = p->data.or.n; m = t->data.or.n; + p->data.or.n = n + m - 1; + p->data.or.xs = realloc(p->data.or.xs, sizeof(mpc_parser_t*) * (n + m -1)); + memmove(p->data.or.xs + m, p->data.or.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.or.xs, t->data.or.xs, m * sizeof(mpc_parser_t*)); + free(t->data.or.xs); free(t->name); free(t); + continue; + } + + /* Remove ast `pass` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_PASS + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge ast lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge ast rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_fold_ast + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_fold_ast) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = (mpc_dtor_t)mpc_ast_delete; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Remove re `lift` */ + if (p->type == MPC_TYPE_AND + && p->data.and.n == 2 + && p->data.and.xs[0]->type == MPC_TYPE_LIFT + && p->data.and.xs[0]->data.lift.lf == mpcf_ctor_str + && !p->data.and.xs[0]->retained + && p->data.and.f == mpcf_strfold) { + t = p->data.and.xs[1]; + mpc_delete(p->data.and.xs[0]); + free(p->data.and.xs); free(p->data.and.dxs); free(p->name); + memcpy(p, t, sizeof(mpc_parser_t)); + free(t); + continue; + } + + /* Merge re lhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[0]->type == MPC_TYPE_AND + && !p->data.and.xs[0]->retained + && p->data.and.xs[0]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[0]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m - 1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + m, p->data.and.xs + 1, (n - 1) * sizeof(mpc_parser_t*)); + memmove(p->data.and.xs, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + /* Merge re rhs `and` */ + if (p->type == MPC_TYPE_AND + && p->data.and.f == mpcf_strfold + && p->data.and.xs[p->data.and.n-1]->type == MPC_TYPE_AND + && !p->data.and.xs[p->data.and.n-1]->retained + && p->data.and.xs[p->data.and.n-1]->data.and.f == mpcf_strfold) { + t = p->data.and.xs[p->data.and.n-1]; + n = p->data.and.n; m = t->data.and.n; + p->data.and.n = n + m - 1; + p->data.and.xs = realloc(p->data.and.xs, sizeof(mpc_parser_t*) * (n + m -1)); + p->data.and.dxs = realloc(p->data.and.dxs, sizeof(mpc_dtor_t) * (n + m - 1 - 1)); + memmove(p->data.and.xs + n - 1, t->data.and.xs, m * sizeof(mpc_parser_t*)); + for (i = 0; i < p->data.and.n-1; i++) { p->data.and.dxs[i] = free; } + free(t->data.and.xs); free(t->data.and.dxs); free(t->name); free(t); + continue; + } + + return; + + } + +} + +void mpc_optimise(mpc_parser_t *p) { + mpc_optimise_unretained(p, 1); +} diff --git a/vendor/mpc.h b/vendor/mpc.h new file mode 100644 index 0000000..49a08ee --- /dev/null +++ b/vendor/mpc.h @@ -0,0 +1,391 @@ +/* +** mpc - Micro Parser Combinator library for C +** +** https://github.com/orangeduck/mpc +** +** Daniel Holden - contact@daniel-holden.com +** Licensed under BSD3 +*/ + +#ifndef mpc_h +#define mpc_h + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +/* +** State Type +*/ + +typedef struct { + long pos; + long row; + long col; + int term; +} mpc_state_t; + +/* +** Error Type +*/ + +typedef struct { + mpc_state_t state; + int expected_num; + char *filename; + char *failure; + char **expected; + char received; +} mpc_err_t; + +void mpc_err_delete(mpc_err_t *e); +char *mpc_err_string(mpc_err_t *e); +void mpc_err_print(mpc_err_t *e); +void mpc_err_print_to(mpc_err_t *e, FILE *f); + +/* +** Parsing +*/ + +typedef void mpc_val_t; + +typedef union { + mpc_err_t *error; + mpc_val_t *output; +} mpc_result_t; + +struct mpc_parser_t; +typedef struct mpc_parser_t mpc_parser_t; + +int mpc_parse(const char *filename, const char *string, mpc_parser_t *p, mpc_result_t *r); +int mpc_nparse(const char *filename, const char *string, size_t length, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_file(const char *filename, FILE *file, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_pipe(const char *filename, FILE *pipe, mpc_parser_t *p, mpc_result_t *r); +int mpc_parse_contents(const char *filename, mpc_parser_t *p, mpc_result_t *r); + +/* +** Function Types +*/ + +typedef void(*mpc_dtor_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_ctor_t)(void); + +typedef mpc_val_t*(*mpc_apply_t)(mpc_val_t*); +typedef mpc_val_t*(*mpc_apply_to_t)(mpc_val_t*,void*); +typedef mpc_val_t*(*mpc_fold_t)(int,mpc_val_t**); + +typedef int(*mpc_check_t)(mpc_val_t**); +typedef int(*mpc_check_with_t)(mpc_val_t**,void*); + +/* +** Building a Parser +*/ + +mpc_parser_t *mpc_new(const char *name); +mpc_parser_t *mpc_copy(mpc_parser_t *a); +mpc_parser_t *mpc_define(mpc_parser_t *p, mpc_parser_t *a); +mpc_parser_t *mpc_undefine(mpc_parser_t *p); + +void mpc_delete(mpc_parser_t *p); +void mpc_cleanup(int n, ...); + +/* +** Basic Parsers +*/ + +mpc_parser_t *mpc_any(void); +mpc_parser_t *mpc_char(char c); +mpc_parser_t *mpc_range(char s, char e); +mpc_parser_t *mpc_oneof(const char *s); +mpc_parser_t *mpc_noneof(const char *s); +mpc_parser_t *mpc_satisfy(int(*f)(char)); +mpc_parser_t *mpc_string(const char *s); + +/* +** Other Parsers +*/ + +mpc_parser_t *mpc_pass(void); +mpc_parser_t *mpc_fail(const char *m); +mpc_parser_t *mpc_failf(const char *fmt, ...); +mpc_parser_t *mpc_lift(mpc_ctor_t f); +mpc_parser_t *mpc_lift_val(mpc_val_t *x); +mpc_parser_t *mpc_anchor(int(*f)(char,char)); +mpc_parser_t *mpc_state(void); + +/* +** Combinator Parsers +*/ + +mpc_parser_t *mpc_expect(mpc_parser_t *a, const char *e); +mpc_parser_t *mpc_expectf(mpc_parser_t *a, const char *fmt, ...); +mpc_parser_t *mpc_apply(mpc_parser_t *a, mpc_apply_t f); +mpc_parser_t *mpc_apply_to(mpc_parser_t *a, mpc_apply_to_t f, void *x); +mpc_parser_t *mpc_check(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *e); +mpc_parser_t *mpc_check_with(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *e); +mpc_parser_t *mpc_checkf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_t f, const char *fmt, ...); +mpc_parser_t *mpc_check_withf(mpc_parser_t *a, mpc_dtor_t da, mpc_check_with_t f, void *x, const char *fmt, ...); + +mpc_parser_t *mpc_not(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_not_lift(mpc_parser_t *a, mpc_dtor_t da, mpc_ctor_t lf); +mpc_parser_t *mpc_maybe(mpc_parser_t *a); +mpc_parser_t *mpc_maybe_lift(mpc_parser_t *a, mpc_ctor_t lf); + +mpc_parser_t *mpc_many(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_many1(mpc_fold_t f, mpc_parser_t *a); +mpc_parser_t *mpc_count(int n, mpc_fold_t f, mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_or(int n, ...); +mpc_parser_t *mpc_and(int n, mpc_fold_t f, ...); + +mpc_parser_t *mpc_predictive(mpc_parser_t *a); + +/* +** Common Parsers +*/ + +mpc_parser_t *mpc_eoi(void); +mpc_parser_t *mpc_soi(void); + +mpc_parser_t *mpc_boundary(void); +mpc_parser_t *mpc_boundary_newline(void); + +mpc_parser_t *mpc_whitespace(void); +mpc_parser_t *mpc_whitespaces(void); +mpc_parser_t *mpc_blank(void); + +mpc_parser_t *mpc_newline(void); +mpc_parser_t *mpc_tab(void); +mpc_parser_t *mpc_escape(void); + +mpc_parser_t *mpc_digit(void); +mpc_parser_t *mpc_hexdigit(void); +mpc_parser_t *mpc_octdigit(void); +mpc_parser_t *mpc_digits(void); +mpc_parser_t *mpc_hexdigits(void); +mpc_parser_t *mpc_octdigits(void); + +mpc_parser_t *mpc_lower(void); +mpc_parser_t *mpc_upper(void); +mpc_parser_t *mpc_alpha(void); +mpc_parser_t *mpc_underscore(void); +mpc_parser_t *mpc_alphanum(void); + +mpc_parser_t *mpc_int(void); +mpc_parser_t *mpc_hex(void); +mpc_parser_t *mpc_oct(void); +mpc_parser_t *mpc_number(void); + +mpc_parser_t *mpc_real(void); +mpc_parser_t *mpc_float(void); + +mpc_parser_t *mpc_char_lit(void); +mpc_parser_t *mpc_string_lit(void); +mpc_parser_t *mpc_regex_lit(void); + +mpc_parser_t *mpc_ident(void); + +/* +** Useful Parsers +*/ + +mpc_parser_t *mpc_startwith(mpc_parser_t *a); +mpc_parser_t *mpc_endwith(mpc_parser_t *a, mpc_dtor_t da); +mpc_parser_t *mpc_whole(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_stripl(mpc_parser_t *a); +mpc_parser_t *mpc_stripr(mpc_parser_t *a); +mpc_parser_t *mpc_strip(mpc_parser_t *a); +mpc_parser_t *mpc_tok(mpc_parser_t *a); +mpc_parser_t *mpc_sym(const char *s); +mpc_parser_t *mpc_total(mpc_parser_t *a, mpc_dtor_t da); + +mpc_parser_t *mpc_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_squares(mpc_parser_t *a, mpc_dtor_t ad); + +mpc_parser_t *mpc_tok_between(mpc_parser_t *a, mpc_dtor_t ad, const char *o, const char *c); +mpc_parser_t *mpc_tok_parens(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_braces(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_brackets(mpc_parser_t *a, mpc_dtor_t ad); +mpc_parser_t *mpc_tok_squares(mpc_parser_t *a, mpc_dtor_t ad); + +mpc_parser_t *mpc_sepby1(mpc_fold_t f, mpc_parser_t *sep, mpc_parser_t *a); + +/* +** Common Function Parameters +*/ + +void mpcf_dtor_null(mpc_val_t *x); + +mpc_val_t *mpcf_ctor_null(void); +mpc_val_t *mpcf_ctor_str(void); + +mpc_val_t *mpcf_free(mpc_val_t *x); +mpc_val_t *mpcf_int(mpc_val_t *x); +mpc_val_t *mpcf_hex(mpc_val_t *x); +mpc_val_t *mpcf_oct(mpc_val_t *x); +mpc_val_t *mpcf_float(mpc_val_t *x); +mpc_val_t *mpcf_strtriml(mpc_val_t *x); +mpc_val_t *mpcf_strtrimr(mpc_val_t *x); +mpc_val_t *mpcf_strtrim(mpc_val_t *x); + +mpc_val_t *mpcf_escape(mpc_val_t *x); +mpc_val_t *mpcf_escape_regex(mpc_val_t *x); +mpc_val_t *mpcf_escape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_escape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_unescape(mpc_val_t *x); +mpc_val_t *mpcf_unescape_regex(mpc_val_t *x); +mpc_val_t *mpcf_unescape_string_raw(mpc_val_t *x); +mpc_val_t *mpcf_unescape_char_raw(mpc_val_t *x); + +mpc_val_t *mpcf_null(int n, mpc_val_t** xs); +mpc_val_t *mpcf_fst(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_fst_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_snd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_trd_free(int n, mpc_val_t** xs); +mpc_val_t *mpcf_all_free(int n, mpc_val_t** xs); + +mpc_val_t *mpcf_freefold(int n, mpc_val_t** xs); +mpc_val_t *mpcf_strfold(int n, mpc_val_t** xs); + +/* +** Regular Expression Parsers +*/ + +enum { + MPC_RE_DEFAULT = 0, + MPC_RE_M = 1, + MPC_RE_S = 2, + MPC_RE_MULTILINE = 1, + MPC_RE_DOTALL = 2 +}; + +mpc_parser_t *mpc_re(const char *re); +mpc_parser_t *mpc_re_mode(const char *re, int mode); + +/* +** AST +*/ + +typedef struct mpc_ast_t { + char *tag; + char *contents; + mpc_state_t state; + int children_num; + struct mpc_ast_t** children; +} mpc_ast_t; + +mpc_ast_t *mpc_ast_new(const char *tag, const char *contents); +mpc_ast_t *mpc_ast_build(int n, const char *tag, ...); +mpc_ast_t *mpc_ast_add_root(mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_child(mpc_ast_t *r, mpc_ast_t *a); +mpc_ast_t *mpc_ast_add_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_add_root_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_tag(mpc_ast_t *a, const char *t); +mpc_ast_t *mpc_ast_state(mpc_ast_t *a, mpc_state_t s); + +void mpc_ast_delete(mpc_ast_t *a); +void mpc_ast_print(mpc_ast_t *a); +void mpc_ast_print_to(mpc_ast_t *a, FILE *fp); + +int mpc_ast_get_index(mpc_ast_t *ast, const char *tag); +int mpc_ast_get_index_lb(mpc_ast_t *ast, const char *tag, int lb); +mpc_ast_t *mpc_ast_get_child(mpc_ast_t *ast, const char *tag); +mpc_ast_t *mpc_ast_get_child_lb(mpc_ast_t *ast, const char *tag, int lb); + +typedef enum { + mpc_ast_trav_order_pre, + mpc_ast_trav_order_post +} mpc_ast_trav_order_t; + +typedef struct mpc_ast_trav_t { + mpc_ast_t *curr_node; + struct mpc_ast_trav_t *parent; + int curr_child; + mpc_ast_trav_order_t order; +} mpc_ast_trav_t; + +mpc_ast_trav_t *mpc_ast_traverse_start(mpc_ast_t *ast, + mpc_ast_trav_order_t order); + +mpc_ast_t *mpc_ast_traverse_next(mpc_ast_trav_t **trav); + +void mpc_ast_traverse_free(mpc_ast_trav_t **trav); + +/* +** Warning: This function currently doesn't test for equality of the `state` member! +*/ +int mpc_ast_eq(mpc_ast_t *a, mpc_ast_t *b); + +mpc_val_t *mpcf_fold_ast(int n, mpc_val_t **as); +mpc_val_t *mpcf_str_ast(mpc_val_t *c); +mpc_val_t *mpcf_state_ast(int n, mpc_val_t **xs); + +mpc_parser_t *mpca_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_add_tag(mpc_parser_t *a, const char *t); +mpc_parser_t *mpca_root(mpc_parser_t *a); +mpc_parser_t *mpca_state(mpc_parser_t *a); +mpc_parser_t *mpca_total(mpc_parser_t *a); + +mpc_parser_t *mpca_not(mpc_parser_t *a); +mpc_parser_t *mpca_maybe(mpc_parser_t *a); + +mpc_parser_t *mpca_many(mpc_parser_t *a); +mpc_parser_t *mpca_many1(mpc_parser_t *a); +mpc_parser_t *mpca_count(int n, mpc_parser_t *a); + +mpc_parser_t *mpca_or(int n, ...); +mpc_parser_t *mpca_and(int n, ...); + +enum { + MPCA_LANG_DEFAULT = 0, + MPCA_LANG_PREDICTIVE = 1, + MPCA_LANG_WHITESPACE_SENSITIVE = 2 +}; + +mpc_parser_t *mpca_grammar(int flags, const char *grammar, ...); + +mpc_err_t *mpca_lang(int flags, const char *language, ...); +mpc_err_t *mpca_lang_file(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_pipe(int flags, FILE *f, ...); +mpc_err_t *mpca_lang_contents(int flags, const char *filename, ...); + +/* +** Misc +*/ + + +void mpc_print(mpc_parser_t *p); +void mpc_optimise(mpc_parser_t *p); +void mpc_stats(mpc_parser_t *p); + +int mpc_test_pass(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +int mpc_test_fail(mpc_parser_t *p, const char *s, const void *d, + int(*tester)(const void*, const void*), + mpc_dtor_t destructor, + void(*printer)(const void*)); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/vendor/yar.c b/vendor/yar.c new file mode 100644 index 0000000..54b4179 --- /dev/null +++ b/vendor/yar.c @@ -0,0 +1,2 @@ +#define YAR_IMPLEMENTATION +#include "yar.h" diff --git a/vendor/yar.h b/vendor/yar.h new file mode 100644 index 0000000..5f4310f --- /dev/null +++ b/vendor/yar.h @@ -0,0 +1,229 @@ +/* yar - dynamic arrays in C - public domain Nicholas Rixson 2025 + * + * https://github.com/segcore/yar + * + * Licence: see end of file + + Sample usage: + #define YAR_IMPLEMENTATION + #include "yar.h" + + int main() { + // struct { double *items; size_t count; size_t capacity; } numbers = {0}; + yar(double) numbers = {0}; + *yar_append(&numbers) = 3.14159; + *yar_append(&numbers) = 2.71828; + *yar_append(&numbers) = 1.61803; + + for(size_t i = 0; i < numbers.count; i++) { + printf("%f\n", numbers.items[i]); + } + + yar_free(&numbers); + } + */ +#ifndef YAR_H +#define YAR_H + +#include // size_t +#include // strlen + +/* + * yar(type) - Declare a new basic dynamic array + * + * yar_append(array) - Add a new item at the end of the array, and return a pointer to it + * + * yar_reserve(array, extra) - Reserve space for `extra` count of items + * + * yar_append_many(array, data, num) - Append a copy of existing data + * + * yar_append_cstr(array, data) - Append a C string (nul-terminated char array) + * + * yar_insert(array, index, num) - Insert items somewhere within the array. Moves items to higher indexes as required. Returns &array[index] + * + * yar_remove(array, index, num) - Remove items from somewhere within the array. Moves items to lower indexes as required. + * + * yar_reset(array) - Reset the count of elements to 0, to re-use the memory. Does not free the memory. + * + * yar_init(array) - Set items, count, and capacity to 0. Can usually be avoided with = {0}; + * + * yar_free(array) - Free items memory, and set the items, count, and capacity to 0. + */ + +#define yar(type) struct { type *items; size_t count; size_t capacity; } +#define yar_append(array) ((_yar_append((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0])) ? \ + &(array)->items[(array)->count - 1] : NULL)) +#define yar_reserve(array, extra) ((_yar_reserve((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), (extra)) ? \ + &(array)->items[(array)->count] : NULL)) +#define yar_append_many(array, data, num) ((_yar_append_many((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), 1 ? (data) : ((array)->items), (num)) )) +#define yar_append_cstr(array, data) yar_append_many(array, data, strlen(data)) +#define yar_insert(array, index, num) ((_yar_insert((void**)&(array)->items, &(array)->count, &(array)->capacity, sizeof((array)->items[0]), index, num) )) +#define yar_remove(array, index, num) ((_yar_remove((void**)&(array)->items, &(array)->count, sizeof((array)->items[0]), index, num) )) +#define yar_reset(array) (((array)->count = 0)) +#define yar_init(array) ((array)->items = NULL, (array)->count = 0, (array)->capacity = 0) +#define yar_free(array) ((_yar_free((array)->items)), (array)->items = NULL, (array)->count = 0, (array)->capacity = 0) + +#ifndef YARAPI + #define YARAPI // nothing; overridable if needed. +#endif + +#ifdef __cplusplus + extern "C" { +#endif + +// Implementation functions +YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size); +YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra); +YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra); +YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra); +YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove); +YARAPI void* _yar_realloc(void* p, size_t new_size); +YARAPI void _yar_free(void* p); + +#ifdef __cplusplus + } +#endif + +#endif // YAR_H + +#if defined(YAR_IMPLEMENTATION) + +#ifndef YAR_MIN_CAP + #define YAR_MIN_CAP 16 +#endif + +#ifndef YAR_REALLOC + #define YAR_REALLOC realloc +#endif + +#ifndef YAR_FREE + #define YAR_FREE free +#endif + +#include // mem* functions +YARAPI void* _yar_append(void** items_pointer, size_t* count, size_t* capacity, size_t item_size) +{ + void* result = _yar_reserve(items_pointer, count, capacity, item_size, 1); + if (result != NULL) *count += 1; + return result; +} + +YARAPI void* _yar_append_many(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, void* data, size_t extra) +{ + void* result = _yar_reserve(items_pointer, count, capacity, item_size, extra); + if (result != NULL) { + memcpy(result, data, item_size * extra); + *count += extra; + } + return result; +} + +YARAPI void* _yar_reserve(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t extra) +{ + char* items = *items_pointer; + size_t newcount = *count + extra; + if (newcount > *capacity) { + size_t newcap = (*capacity < YAR_MIN_CAP) ? YAR_MIN_CAP : *capacity * 8 / 5; + if (newcap < newcount) newcap = newcount; + void* next = _yar_realloc(items, newcap * item_size); + if (next == NULL) return NULL; + items = next; + *items_pointer = next; + *capacity = newcap; + } + void* result = items + (*count * item_size); + if (extra && result) memset(result, 0, item_size * extra); + return result; +} + +YARAPI void* _yar_insert(void** items_pointer, size_t* count, size_t* capacity, size_t item_size, size_t index, size_t extra) +{ + void* next = _yar_reserve(items_pointer, count, capacity, item_size, extra); + if(next == NULL) return NULL; + + char* items = *items_pointer; + if (index < *count) + { + memmove(&items[item_size * (index + extra)], &items[item_size * index], (*count - index) * item_size); + memset(&items[item_size * index], 0, extra * item_size); + } + *count += extra; + return items + index * item_size; +} + +YARAPI void* _yar_remove(void** items_pointer, size_t* count, size_t item_size, size_t index, size_t remove) +{ + if(remove >= *count) { + *count = 0; + return *items_pointer; + } + if (index >= *count) { + return *items_pointer; + } + char* items = *items_pointer; + memmove(&items[item_size * index], &items[item_size * (index + remove)], item_size * (*count - (index + remove))); + *count -= remove; + return items + item_size * index; +} + +YARAPI void* _yar_realloc(void* p, size_t new_size) +{ + // Declaration, so we can call it if the definition is overridden + extern void* YAR_REALLOC(void *ptr, size_t size); + return YAR_REALLOC(p, new_size); +} + +YARAPI void _yar_free(void* p) +{ + extern void YAR_FREE(void *ptr); + YAR_FREE(p); +} + +#endif // YAR_IMPLEMENTATION +/* +------------------------------------------------------------------------------ +This software is available under 2 licenses -- choose whichever you prefer. +------------------------------------------------------------------------------ +ALTERNATIVE A - MIT License + +Copyright (c) 2025 Nicholas Rixson + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +------------------------------------------------------------------------------ +ALTERNATIVE B - Public Domain (www.unlicense.org) +This is free and unencumbered software released into the public domain. +Anyone is free to copy, modify, publish, use, compile, sell, or distribute this +software, either in source code form or as a compiled binary, for any purpose, +commercial or non-commercial, and by any means. + +In jurisdictions that recognize copyright laws, the author or authors of this +software dedicate any and all copyright interest in the software to the public +domain. We make this dedication for the benefit of the public at large and to +the detriment of our heirs and successors. We intend this dedication to be an +overt act of relinquishment in perpetuity of all present and future rights to +this software under copyright law. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------------------------ +*/ diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..20df937 --- /dev/null +++ b/vm.c @@ -0,0 +1,60 @@ +#include "vm.h" +#include "gc.h" + +static I decode_sleb128(U8 **ptr) { + I result = 0; + I shift = 0; + U8 byte; + + do { + byte = **ptr; + (*ptr)++; + result |= (I)(byte & 0x7F) << shift; + shift += 7; + } while (byte & 0x80); + + if ((shift < 64) && (byte & 0x40)) { + result |= -(1LL << shift); + } + + return result; +} + +V vm_init(Vm *vm) { + vm->sp = vm->stack; + vm->rsp = vm->rstack; + gc_init(&vm->gc); + + for (Z i = 0; i < STACK_SIZE; i++) { + vm->stack[i] = NIL; + gc_addroot(&vm->gc, &vm->stack[i]); + } +} + +V vm_push(Vm *vm, O o) { *vm->sp++ = o; } +O vm_pop(Vm *vm) { return *--vm->sp; } +O vm_peek(Vm *vm) { return *(vm->sp - 1); } + +V vm_run(Vm *vm, Bc *chunk, I offset) { + I mark = gc_mark(&vm->gc); + for (Z i = 0; i < chunk->constants.count; i++) + gc_addroot(&vm->gc, &chunk->constants.items[i]); + + vm->ip = chunk->items + offset; + for (;;) { + U8 opcode; + switch (opcode = *vm->ip++) { + case OP_NOP: + break; + case OP_RETURN: + return; + case OP_CONST: { + I idx = decode_sleb128(&vm->ip); + vm_push(vm, chunk->constants.items[idx]); + break; + } + } + } + + gc_reset(&vm->gc, mark); +} diff --git a/vm.h b/vm.h new file mode 100644 index 0000000..7da042d --- /dev/null +++ b/vm.h @@ -0,0 +1,30 @@ +#ifndef VM_H +#define VM_H + +#include "common.h" + +#include "chunk.h" +#include "gc.h" +#include "object.h" + +enum { + OP_NOP = 0, + OP_RETURN, + OP_CONST, +}; + +#define STACK_SIZE 256 + +typedef struct Vm { + Gc gc; + O stack[256], *sp; + U rstack[256], *rsp; + U8 *ip; +} Vm; + +V vm_init(Vm *); +V vm_push(Vm *, O); +O vm_pop(Vm *); +O vm_peek(Vm *); +V vm_run(Vm *, Bc *, I); +#endif