begin work on new lexer
This commit is contained in:
parent
30ff72b3ae
commit
35bad08a0d
33 changed files with 2464 additions and 3 deletions
|
|
@ -6,6 +6,9 @@ project(
|
||||||
default_options : ['buildtype=debugoptimized', 'c_std=gnu11', 'warning_level=3'],
|
default_options : ['buildtype=debugoptimized', 'c_std=gnu11', 'warning_level=3'],
|
||||||
)
|
)
|
||||||
|
|
||||||
|
libutf = subproject('libutf')
|
||||||
|
libutf_dep = libutf.get_variable('libutf_dep')
|
||||||
|
|
||||||
sources = [
|
sources = [
|
||||||
'src/arena.c',
|
'src/arena.c',
|
||||||
'src/chunk.c',
|
'src/chunk.c',
|
||||||
|
|
@ -13,11 +16,13 @@ sources = [
|
||||||
'src/debug.c',
|
'src/debug.c',
|
||||||
'src/dictionary.c',
|
'src/dictionary.c',
|
||||||
'src/file.c',
|
'src/file.c',
|
||||||
|
'src/lexer.c',
|
||||||
'src/object.c',
|
'src/object.c',
|
||||||
'src/gc.c',
|
'src/gc.c',
|
||||||
'src/parser.c',
|
'src/parser.c',
|
||||||
'src/primitive.c',
|
'src/primitive.c',
|
||||||
'src/print.c',
|
'src/print.c',
|
||||||
|
'src/stream.c',
|
||||||
'src/string.c',
|
'src/string.c',
|
||||||
'src/userdata.c',
|
'src/userdata.c',
|
||||||
'src/vm.c',
|
'src/vm.c',
|
||||||
|
|
@ -29,5 +34,6 @@ sources = [
|
||||||
exe = executable(
|
exe = executable(
|
||||||
'growl',
|
'growl',
|
||||||
'src/main.c', sources,
|
'src/main.c', sources,
|
||||||
|
dependencies : [libutf_dep],
|
||||||
install : true,
|
install : true,
|
||||||
)
|
)
|
||||||
|
|
|
||||||
|
|
@ -400,8 +400,6 @@ static I compile_ast(Cm *cm, mpc_ast_t *curr, mpc_ast_trav_t **next) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Bc *compile_program(Cm *cm, mpc_ast_t *ast) {
|
Bc *compile_program(Cm *cm, mpc_ast_t *ast) {
|
||||||
mpc_ast_print_to(ast, stderr);
|
|
||||||
|
|
||||||
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
|
mpc_ast_trav_t *next = mpc_ast_traverse_start(ast, mpc_ast_trav_order_pre);
|
||||||
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal
|
mpc_ast_t *curr = mpc_ast_traverse_next(&next); // Begin traversal
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
|
|
||||||
#include "vendor/mpc.h"
|
#include "vendor/mpc.h"
|
||||||
|
|
||||||
#define COMPILER_DEBUG 1
|
#define COMPILER_DEBUG DEBUG
|
||||||
|
|
||||||
/** Compiler context */
|
/** Compiler context */
|
||||||
typedef struct Cm {
|
typedef struct Cm {
|
||||||
|
|
|
||||||
179
src/lexer.c
Normal file
179
src/lexer.c
Normal file
|
|
@ -0,0 +1,179 @@
|
||||||
|
#include <ctype.h>
|
||||||
|
#include <err.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <utf.h>
|
||||||
|
|
||||||
|
#include "lexer.h"
|
||||||
|
#include "vendor/yar.h"
|
||||||
|
|
||||||
|
static inline int is_delimiter(int i) {
|
||||||
|
return i == '(' || i == ')' || i == '[' || i == ']' || i == '{' || i == '}' ||
|
||||||
|
i == ';' || i == '\\' || i == '"';
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void appendrune(Lx *lx, Rune rn) {
|
||||||
|
char data[5];
|
||||||
|
I len = runetochar(data, &rn);
|
||||||
|
yar_append_many(lx, data, len);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void appendbyte(Lx *lx, char byte) { *yar_append(lx) = byte; }
|
||||||
|
|
||||||
|
static int getc_ws(Lx *lx) {
|
||||||
|
if (ST_EOF(lx->stream))
|
||||||
|
return -1;
|
||||||
|
for (;;) {
|
||||||
|
int ch = ST_GETC(lx->stream);
|
||||||
|
if (isspace(ch))
|
||||||
|
continue;
|
||||||
|
return ch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int scanword(Lx *lx) {
|
||||||
|
int next = ST_GETC(lx->stream);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (next == -1) {
|
||||||
|
if (lx->cursor == 0)
|
||||||
|
lx->kind = TOK_EOF;
|
||||||
|
appendbyte(lx, 0);
|
||||||
|
return lx->kind;
|
||||||
|
} else if (is_delimiter(next) || isspace(next)) {
|
||||||
|
ST_UNGETC(next, lx->stream);
|
||||||
|
appendbyte(lx, 0);
|
||||||
|
return lx->kind;
|
||||||
|
} else {
|
||||||
|
appendbyte(lx, next);
|
||||||
|
next = ST_GETC(lx->stream);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void scanescape(Lx *lx) {
|
||||||
|
char escbuf[7], *escptr = escbuf;
|
||||||
|
int next;
|
||||||
|
Rune tmp;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
next = ST_GETC(lx->stream);
|
||||||
|
|
||||||
|
if (next == -1) {
|
||||||
|
errx(1, "unterminated hex sequence '%s'", escbuf);
|
||||||
|
} else if (next == ';') {
|
||||||
|
*escptr = 0;
|
||||||
|
break;
|
||||||
|
} else if (!isxdigit(next)) {
|
||||||
|
errx(1, "invalid hex digit '%c'", next);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (escptr - escbuf >= 6) {
|
||||||
|
errx(1, "hex sequence too long (6 chars max.)");
|
||||||
|
} else {
|
||||||
|
*(escptr++) = next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tmp = strtol(escbuf, &escptr, 16);
|
||||||
|
if (*escptr == '\0')
|
||||||
|
appendrune(lx, tmp);
|
||||||
|
else
|
||||||
|
errx(1, "invalid hex sequence '%s'", escbuf);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int scanstring(Lx *lx) {
|
||||||
|
int next;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
next = ST_GETC(lx->stream);
|
||||||
|
switch (next) {
|
||||||
|
case -1:
|
||||||
|
goto eof;
|
||||||
|
case '\\':
|
||||||
|
next = ST_GETC(lx->stream);
|
||||||
|
if (next == -1)
|
||||||
|
goto eof;
|
||||||
|
switch (next) {
|
||||||
|
case 't':
|
||||||
|
appendbyte(lx, '\t');
|
||||||
|
break;
|
||||||
|
case 'n':
|
||||||
|
appendbyte(lx, '\n');
|
||||||
|
break;
|
||||||
|
case 'r':
|
||||||
|
appendbyte(lx, '\r');
|
||||||
|
break;
|
||||||
|
case 'b':
|
||||||
|
appendbyte(lx, '\b');
|
||||||
|
break;
|
||||||
|
case 'v':
|
||||||
|
appendbyte(lx, '\v');
|
||||||
|
break;
|
||||||
|
case 'f':
|
||||||
|
appendbyte(lx, '\f');
|
||||||
|
break;
|
||||||
|
case '0':
|
||||||
|
appendbyte(lx, '\0');
|
||||||
|
break;
|
||||||
|
case 'e':
|
||||||
|
appendbyte(lx, '\x1b');
|
||||||
|
break;
|
||||||
|
case '\\':
|
||||||
|
case '"':
|
||||||
|
appendbyte(lx, next);
|
||||||
|
break;
|
||||||
|
case 'x':
|
||||||
|
scanescape(lx);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(stderr, "unknown escape sequence '\\%c'\n", next);
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case '"':
|
||||||
|
appendbyte(lx, 0);
|
||||||
|
return (lx->kind = TOK_STRING);
|
||||||
|
default:
|
||||||
|
appendbyte(lx, next);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
eof:
|
||||||
|
errx(1, "unterminated string literal");
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
I lexer_next(Lx *lx) {
|
||||||
|
int next;
|
||||||
|
lx->cursor = 0;
|
||||||
|
|
||||||
|
if (ST_EOF(lx->stream)) {
|
||||||
|
lx->kind = TOK_EOF;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
next = getc_ws(lx);
|
||||||
|
|
||||||
|
switch (next) {
|
||||||
|
case '\\':
|
||||||
|
for (; next != '\n'; next = ST_GETC(lx->stream))
|
||||||
|
;
|
||||||
|
return lexer_next(lx);
|
||||||
|
case '(':
|
||||||
|
case ')':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
case '{':
|
||||||
|
case '}':
|
||||||
|
case ';':
|
||||||
|
return (lx->kind = next);
|
||||||
|
case '"':
|
||||||
|
return scanstring(lx);
|
||||||
|
default:
|
||||||
|
ST_UNGETC(next, lx->stream);
|
||||||
|
lx->kind = TOK_WORD;
|
||||||
|
return scanword(lx);
|
||||||
|
};
|
||||||
|
}
|
||||||
33
src/lexer.h
Normal file
33
src/lexer.h
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
#ifndef LEXER_H
|
||||||
|
#define LEXER_H
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
#include "stream.h"
|
||||||
|
|
||||||
|
enum {
|
||||||
|
TOK_INVALID = -1,
|
||||||
|
TOK_EOF = 0,
|
||||||
|
TOK_WORD = 'a',
|
||||||
|
TOK_STRING = '"',
|
||||||
|
TOK_SEMICOLON = ';',
|
||||||
|
TOK_LPAREN = '(',
|
||||||
|
TOK_RPAREN = ')',
|
||||||
|
TOK_LBRACKET = '[',
|
||||||
|
TOK_RBRACKET = ']',
|
||||||
|
TOK_LBRACE = '{',
|
||||||
|
TOK_RBRACE = '}',
|
||||||
|
TOK_COMMENT = '\\',
|
||||||
|
};
|
||||||
|
|
||||||
|
typedef struct Lx {
|
||||||
|
I kind;
|
||||||
|
I cursor;
|
||||||
|
Stream *stream;
|
||||||
|
char *items;
|
||||||
|
Z count, capacity;
|
||||||
|
} Lx;
|
||||||
|
|
||||||
|
Lx *lexer_make(Stream *);
|
||||||
|
I lexer_next(Lx *);
|
||||||
|
|
||||||
|
#endif
|
||||||
39
src/stream.c
Normal file
39
src/stream.c
Normal file
|
|
@ -0,0 +1,39 @@
|
||||||
|
#include "stream.h"
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
static int filestream_getc(void *f) { return fgetc((FILE *)f); }
|
||||||
|
static int filestream_ungetc(int c, void *f) { return ungetc(c, (FILE *)f); }
|
||||||
|
static int filestream_eof(void *f) { return feof((FILE *)f); }
|
||||||
|
|
||||||
|
static int bufstream_getc(void *f) {
|
||||||
|
Buf *b = f;
|
||||||
|
if (b->unread != -1) {
|
||||||
|
int c = b->unread;
|
||||||
|
b->unread = -1;
|
||||||
|
return c;
|
||||||
|
} else if (b->pos >= b->len) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
return b->data[b->pos++];
|
||||||
|
}
|
||||||
|
|
||||||
|
static int bufstream_ungetc(int c, void *f) { return ((Buf *)f)->unread = c; }
|
||||||
|
|
||||||
|
static int bufstream_eof(void *f) {
|
||||||
|
Buf *b = f;
|
||||||
|
if (b->unread != -1)
|
||||||
|
return 0;
|
||||||
|
return b->pos >= b->len;
|
||||||
|
}
|
||||||
|
|
||||||
|
// clang-format off
|
||||||
|
static const StreamVtable _filestream_vtable = {
|
||||||
|
filestream_getc, filestream_ungetc, filestream_eof
|
||||||
|
};
|
||||||
|
const StreamVtable *filestream_vtable = &_filestream_vtable;
|
||||||
|
|
||||||
|
static const StreamVtable _bufstream_vtable = {
|
||||||
|
bufstream_getc, bufstream_ungetc, bufstream_eof
|
||||||
|
};
|
||||||
|
const StreamVtable *bufstream_vtable = &_bufstream_vtable;
|
||||||
|
// clang-format on
|
||||||
30
src/stream.h
Normal file
30
src/stream.h
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
#ifndef STREAM_H
|
||||||
|
#define STREAM_H
|
||||||
|
|
||||||
|
typedef struct StreamVtable {
|
||||||
|
int (*__sgetc)(void *);
|
||||||
|
int (*__sungetc)(int, void *);
|
||||||
|
int (*__seof)(void *);
|
||||||
|
} StreamVtable;
|
||||||
|
|
||||||
|
typedef struct Stream {
|
||||||
|
const StreamVtable *vtable;
|
||||||
|
void *data;
|
||||||
|
} Stream;
|
||||||
|
|
||||||
|
typedef struct Buf {
|
||||||
|
const char *data;
|
||||||
|
int len, pos;
|
||||||
|
int unread;
|
||||||
|
} Buf;
|
||||||
|
|
||||||
|
#define ST_GETC(R) ((R)->vtable->__sgetc((R)->data))
|
||||||
|
#define ST_UNGETC(C, R) ((R)->vtable->__sungetc(C, (R)->data))
|
||||||
|
#define ST_EOF(R) ((R)->vtable->__seof((R)->data))
|
||||||
|
|
||||||
|
#define BUF(s) ((Buf){s, sizeof(s)-1, 0, -1})
|
||||||
|
|
||||||
|
extern const StreamVtable *filestream_vtable;
|
||||||
|
extern const StreamVtable *bufstream_vtable;
|
||||||
|
|
||||||
|
#endif
|
||||||
0
subprojects/.wraplock
Normal file
0
subprojects/.wraplock
Normal file
22
subprojects/libutf/NOTICE
Normal file
22
subprojects/libutf/NOTICE
Normal file
|
|
@ -0,0 +1,22 @@
|
||||||
|
This is a Unix port of the Plan 9 UTF-8 library, by Rob Pike and Ken Thompson.
|
||||||
|
Please send comments about the packaging to Russ Cox <rsc@swtch.com>.
|
||||||
|
|
||||||
|
Copyright © 2021 Plan 9 Foundation
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in
|
||||||
|
all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||||
|
THE SOFTWARE.
|
||||||
41
subprojects/libutf/meson.build
Normal file
41
subprojects/libutf/meson.build
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
project('libutf', 'c')
|
||||||
|
add_project_arguments(
|
||||||
|
'-Wno-missing-braces',
|
||||||
|
'-Wno-parentheses',
|
||||||
|
'-Wno-sign-compare',
|
||||||
|
language: 'c'
|
||||||
|
)
|
||||||
|
|
||||||
|
inc = include_directories('.')
|
||||||
|
|
||||||
|
libutf = static_library(
|
||||||
|
'utf',
|
||||||
|
[
|
||||||
|
'rune.c',
|
||||||
|
'runestrcat.c',
|
||||||
|
'runestrchr.c',
|
||||||
|
'runestrcmp.c',
|
||||||
|
'runestrcpy.c',
|
||||||
|
'runestrdup.c',
|
||||||
|
'runestrecpy.c',
|
||||||
|
'runestrlen.c',
|
||||||
|
'runestrncat.c',
|
||||||
|
'runestrncmp.c',
|
||||||
|
'runestrncpy.c',
|
||||||
|
'runestrrchr.c',
|
||||||
|
'runestrstr.c',
|
||||||
|
'runetype.c',
|
||||||
|
'utfecpy.c',
|
||||||
|
'utflen.c',
|
||||||
|
'utfnlen.c',
|
||||||
|
'utfrrune.c',
|
||||||
|
'utfrune.c',
|
||||||
|
'utfutf.c',
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
libutf_dep = declare_dependency(
|
||||||
|
include_directories: inc,
|
||||||
|
link_with: libutf
|
||||||
|
)
|
||||||
|
|
||||||
28
subprojects/libutf/plan9.h
Normal file
28
subprojects/libutf/plan9.h
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
* compiler directive on Plan 9
|
||||||
|
*/
|
||||||
|
#ifndef USED
|
||||||
|
#define USED(x) if(x);else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* easiest way to make sure these are defined
|
||||||
|
*/
|
||||||
|
#define uchar _utfuchar
|
||||||
|
#define ushort _utfushort
|
||||||
|
#define uint _utfuint
|
||||||
|
#define ulong _utfulong
|
||||||
|
typedef unsigned char uchar;
|
||||||
|
typedef unsigned short ushort;
|
||||||
|
typedef unsigned int uint;
|
||||||
|
typedef unsigned long ulong;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* nil cannot be ((void*)0) on ANSI C,
|
||||||
|
* because it is used for function pointers
|
||||||
|
*/
|
||||||
|
#undef nil
|
||||||
|
#define nil 0
|
||||||
|
|
||||||
|
#undef nelem
|
||||||
|
#define nelem(x) (sizeof (x)/sizeof (x)[0])
|
||||||
217
subprojects/libutf/rune.c
Normal file
217
subprojects/libutf/rune.c
Normal file
|
|
@ -0,0 +1,217 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
Bit1 = 7,
|
||||||
|
Bitx = 6,
|
||||||
|
Bit2 = 5,
|
||||||
|
Bit3 = 4,
|
||||||
|
Bit4 = 3,
|
||||||
|
Bit5 = 2,
|
||||||
|
|
||||||
|
T1 = ((1<<(Bit1+1))-1) ^ 0xFF, /* 0000 0000 */
|
||||||
|
Tx = ((1<<(Bitx+1))-1) ^ 0xFF, /* 1000 0000 */
|
||||||
|
T2 = ((1<<(Bit2+1))-1) ^ 0xFF, /* 1100 0000 */
|
||||||
|
T3 = ((1<<(Bit3+1))-1) ^ 0xFF, /* 1110 0000 */
|
||||||
|
T4 = ((1<<(Bit4+1))-1) ^ 0xFF, /* 1111 0000 */
|
||||||
|
T5 = ((1<<(Bit5+1))-1) ^ 0xFF, /* 1111 1000 */
|
||||||
|
|
||||||
|
Rune1 = (1<<(Bit1+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */
|
||||||
|
Rune2 = (1<<(Bit2+1*Bitx))-1, /* 0000 0000 0000 0111 1111 1111 */
|
||||||
|
Rune3 = (1<<(Bit3+2*Bitx))-1, /* 0000 0000 1111 1111 1111 1111 */
|
||||||
|
Rune4 = (1<<(Bit4+3*Bitx))-1, /* 0011 1111 1111 1111 1111 1111 */
|
||||||
|
|
||||||
|
Maskx = (1<<Bitx)-1, /* 0011 1111 */
|
||||||
|
Testx = Maskx ^ 0xFF, /* 1100 0000 */
|
||||||
|
|
||||||
|
Bad = Runeerror
|
||||||
|
};
|
||||||
|
|
||||||
|
int
|
||||||
|
chartorune(Rune *rune, char *str)
|
||||||
|
{
|
||||||
|
int c, c1, c2, c3;
|
||||||
|
long l;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* one character sequence
|
||||||
|
* 00000-0007F => T1
|
||||||
|
*/
|
||||||
|
c = *(uchar*)str;
|
||||||
|
if(c < Tx) {
|
||||||
|
*rune = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* two character sequence
|
||||||
|
* 0080-07FF => T2 Tx
|
||||||
|
*/
|
||||||
|
c1 = *(uchar*)(str+1) ^ Tx;
|
||||||
|
if(c1 & Testx)
|
||||||
|
goto bad;
|
||||||
|
if(c < T3) {
|
||||||
|
if(c < T2)
|
||||||
|
goto bad;
|
||||||
|
l = ((c << Bitx) | c1) & Rune2;
|
||||||
|
if(l <= Rune1)
|
||||||
|
goto bad;
|
||||||
|
*rune = l;
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* three character sequence
|
||||||
|
* 0800-FFFF => T3 Tx Tx
|
||||||
|
*/
|
||||||
|
c2 = *(uchar*)(str+2) ^ Tx;
|
||||||
|
if(c2 & Testx)
|
||||||
|
goto bad;
|
||||||
|
if(c < T4) {
|
||||||
|
l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
|
||||||
|
if(l <= Rune2)
|
||||||
|
goto bad;
|
||||||
|
*rune = l;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* four character sequence
|
||||||
|
* 10000-10FFFF => T4 Tx Tx Tx
|
||||||
|
*/
|
||||||
|
if(UTFmax >= 4) {
|
||||||
|
c3 = *(uchar*)(str+3) ^ Tx;
|
||||||
|
if(c3 & Testx)
|
||||||
|
goto bad;
|
||||||
|
if(c < T5) {
|
||||||
|
l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
|
||||||
|
if(l <= Rune3)
|
||||||
|
goto bad;
|
||||||
|
if(l > Runemax)
|
||||||
|
goto bad;
|
||||||
|
*rune = l;
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* bad decoding
|
||||||
|
*/
|
||||||
|
bad:
|
||||||
|
*rune = Bad;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
runetochar(char *str, Rune *rune)
|
||||||
|
{
|
||||||
|
long c;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* one character sequence
|
||||||
|
* 00000-0007F => 00-7F
|
||||||
|
*/
|
||||||
|
c = *rune;
|
||||||
|
if(c <= Rune1) {
|
||||||
|
str[0] = c;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* two character sequence
|
||||||
|
* 00080-007FF => T2 Tx
|
||||||
|
*/
|
||||||
|
if(c <= Rune2) {
|
||||||
|
str[0] = T2 | (c >> 1*Bitx);
|
||||||
|
str[1] = Tx | (c & Maskx);
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* three character sequence
|
||||||
|
* 00800-0FFFF => T3 Tx Tx
|
||||||
|
*/
|
||||||
|
if(c > Runemax)
|
||||||
|
c = Runeerror;
|
||||||
|
if(c <= Rune3) {
|
||||||
|
str[0] = T3 | (c >> 2*Bitx);
|
||||||
|
str[1] = Tx | ((c >> 1*Bitx) & Maskx);
|
||||||
|
str[2] = Tx | (c & Maskx);
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* four character sequence
|
||||||
|
* 010000-1FFFFF => T4 Tx Tx Tx
|
||||||
|
*/
|
||||||
|
str[0] = T4 | (c >> 3*Bitx);
|
||||||
|
str[1] = Tx | ((c >> 2*Bitx) & Maskx);
|
||||||
|
str[2] = Tx | ((c >> 1*Bitx) & Maskx);
|
||||||
|
str[3] = Tx | (c & Maskx);
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
runelen(long c)
|
||||||
|
{
|
||||||
|
Rune rune;
|
||||||
|
char str[10];
|
||||||
|
|
||||||
|
rune = c;
|
||||||
|
return runetochar(str, &rune);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
runenlen(Rune *r, int nrune)
|
||||||
|
{
|
||||||
|
int nb, c;
|
||||||
|
|
||||||
|
nb = 0;
|
||||||
|
while(nrune--) {
|
||||||
|
c = *r++;
|
||||||
|
if(c <= Rune1)
|
||||||
|
nb++;
|
||||||
|
else
|
||||||
|
if(c <= Rune2)
|
||||||
|
nb += 2;
|
||||||
|
else
|
||||||
|
if(c <= Rune3 || c > Runemax)
|
||||||
|
nb += 3;
|
||||||
|
else
|
||||||
|
nb += 4;
|
||||||
|
}
|
||||||
|
return nb;
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
fullrune(char *str, int n)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
if(n <= 0)
|
||||||
|
return 0;
|
||||||
|
c = *(uchar*)str;
|
||||||
|
if(c < Tx)
|
||||||
|
return 1;
|
||||||
|
if(c < T3)
|
||||||
|
return n >= 2;
|
||||||
|
if(UTFmax == 3 || c < T4)
|
||||||
|
return n >= 3;
|
||||||
|
return n >= 4;
|
||||||
|
}
|
||||||
25
subprojects/libutf/runestrcat.c
Normal file
25
subprojects/libutf/runestrcat.c
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrcat(Rune *s1, Rune *s2)
|
||||||
|
{
|
||||||
|
|
||||||
|
runestrcpy(runestrchr(s1, 0), s2);
|
||||||
|
return s1;
|
||||||
|
}
|
||||||
35
subprojects/libutf/runestrchr.c
Normal file
35
subprojects/libutf/runestrchr.c
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrchr(Rune *s, Rune c)
|
||||||
|
{
|
||||||
|
Rune c0 = c;
|
||||||
|
Rune c1;
|
||||||
|
|
||||||
|
if(c == 0) {
|
||||||
|
while(*s++)
|
||||||
|
;
|
||||||
|
return s-1;
|
||||||
|
}
|
||||||
|
|
||||||
|
while(c1 = *s++)
|
||||||
|
if(c1 == c0)
|
||||||
|
return s-1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
35
subprojects/libutf/runestrcmp.c
Normal file
35
subprojects/libutf/runestrcmp.c
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
runestrcmp(Rune *s1, Rune *s2)
|
||||||
|
{
|
||||||
|
Rune c1, c2;
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
c1 = *s1++;
|
||||||
|
c2 = *s2++;
|
||||||
|
if(c1 != c2) {
|
||||||
|
if(c1 > c2)
|
||||||
|
return 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(c1 == 0)
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
28
subprojects/libutf/runestrcpy.c
Normal file
28
subprojects/libutf/runestrcpy.c
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrcpy(Rune *s1, Rune *s2)
|
||||||
|
{
|
||||||
|
Rune *os1;
|
||||||
|
|
||||||
|
os1 = s1;
|
||||||
|
while(*s1++ = *s2++)
|
||||||
|
;
|
||||||
|
return os1;
|
||||||
|
}
|
||||||
30
subprojects/libutf/runestrdup.c
Normal file
30
subprojects/libutf/runestrdup.c
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrdup(Rune *s)
|
||||||
|
{
|
||||||
|
Rune *ns;
|
||||||
|
|
||||||
|
ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
|
||||||
|
if(ns == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return runestrcpy(ns, s);
|
||||||
|
}
|
||||||
32
subprojects/libutf/runestrecpy.c
Normal file
32
subprojects/libutf/runestrecpy.c
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrecpy(Rune *s1, Rune *es1, Rune *s2)
|
||||||
|
{
|
||||||
|
if(s1 >= es1)
|
||||||
|
return s1;
|
||||||
|
|
||||||
|
while(*s1++ = *s2++){
|
||||||
|
if(s1 == es1){
|
||||||
|
*--s1 = '\0';
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return s1;
|
||||||
|
}
|
||||||
24
subprojects/libutf/runestrlen.c
Normal file
24
subprojects/libutf/runestrlen.c
Normal file
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
long
|
||||||
|
runestrlen(Rune *s)
|
||||||
|
{
|
||||||
|
|
||||||
|
return runestrchr(s, 0) - s;
|
||||||
|
}
|
||||||
32
subprojects/libutf/runestrncat.c
Normal file
32
subprojects/libutf/runestrncat.c
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrncat(Rune *s1, Rune *s2, long n)
|
||||||
|
{
|
||||||
|
Rune *os1;
|
||||||
|
|
||||||
|
os1 = s1;
|
||||||
|
s1 = runestrchr(s1, 0);
|
||||||
|
while(*s1++ = *s2++)
|
||||||
|
if(--n < 0) {
|
||||||
|
s1[-1] = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return os1;
|
||||||
|
}
|
||||||
37
subprojects/libutf/runestrncmp.c
Normal file
37
subprojects/libutf/runestrncmp.c
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
runestrncmp(Rune *s1, Rune *s2, long n)
|
||||||
|
{
|
||||||
|
Rune c1, c2;
|
||||||
|
|
||||||
|
while(n > 0) {
|
||||||
|
c1 = *s1++;
|
||||||
|
c2 = *s2++;
|
||||||
|
n--;
|
||||||
|
if(c1 != c2) {
|
||||||
|
if(c1 > c2)
|
||||||
|
return 1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(c1 == 0)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
33
subprojects/libutf/runestrncpy.c
Normal file
33
subprojects/libutf/runestrncpy.c
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrncpy(Rune *s1, Rune *s2, long n)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
Rune *os1;
|
||||||
|
|
||||||
|
os1 = s1;
|
||||||
|
for(i = 0; i < n; i++)
|
||||||
|
if((*s1++ = *s2++) == 0) {
|
||||||
|
while(++i < n)
|
||||||
|
*s1++ = 0;
|
||||||
|
return os1;
|
||||||
|
}
|
||||||
|
return os1;
|
||||||
|
}
|
||||||
30
subprojects/libutf/runestrrchr.c
Normal file
30
subprojects/libutf/runestrrchr.c
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
Rune*
|
||||||
|
runestrrchr(Rune *s, Rune c)
|
||||||
|
{
|
||||||
|
Rune *r;
|
||||||
|
|
||||||
|
if(c == 0)
|
||||||
|
return runestrchr(s, 0);
|
||||||
|
r = 0;
|
||||||
|
while(s = runestrchr(s, c))
|
||||||
|
r = s++;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
44
subprojects/libutf/runestrstr.c
Normal file
44
subprojects/libutf/runestrstr.c
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return pointer to first occurrence of s2 in s1,
|
||||||
|
* 0 if none
|
||||||
|
*/
|
||||||
|
Rune*
|
||||||
|
runestrstr(Rune *s1, Rune *s2)
|
||||||
|
{
|
||||||
|
Rune *p, *pa, *pb;
|
||||||
|
int c0, c;
|
||||||
|
|
||||||
|
c0 = *s2;
|
||||||
|
if(c0 == 0)
|
||||||
|
return s1;
|
||||||
|
s2++;
|
||||||
|
for(p=runestrchr(s1, c0); p; p=runestrchr(p+1, c0)) {
|
||||||
|
pa = p;
|
||||||
|
for(pb=s2;; pb++) {
|
||||||
|
c = *pb;
|
||||||
|
if(c == 0)
|
||||||
|
return p;
|
||||||
|
if(c != *++pa)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
1151
subprojects/libutf/runetype.c
Normal file
1151
subprojects/libutf/runetype.c
Normal file
File diff suppressed because it is too large
Load diff
54
subprojects/libutf/utf.h
Normal file
54
subprojects/libutf/utf.h
Normal file
|
|
@ -0,0 +1,54 @@
|
||||||
|
#ifndef _UTF_H_
|
||||||
|
#define _UTF_H_ 1
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef unsigned int Rune; /* 32 bits */
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
UTFmax = 4, /* maximum bytes per rune */
|
||||||
|
Runesync = 0x80, /* cannot represent part of a UTF sequence (<) */
|
||||||
|
Runeself = 0x80, /* rune and UTF sequences are the same (<) */
|
||||||
|
Runeerror = 0xFFFD, /* decoding error in UTF */
|
||||||
|
Runemax = 0x10FFFF /* maximum rune value */
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
|
||||||
|
int chartorune(Rune *rune, char *str);
|
||||||
|
int fullrune(char *str, int n);
|
||||||
|
int isalpharune(Rune c);
|
||||||
|
int islowerrune(Rune c);
|
||||||
|
int isspacerune(Rune c);
|
||||||
|
int istitlerune(Rune c);
|
||||||
|
int isupperrune(Rune c);
|
||||||
|
int runelen(long c);
|
||||||
|
int runenlen(Rune *r, int nrune);
|
||||||
|
Rune* runestrcat(Rune *s1, Rune *s2);
|
||||||
|
Rune* runestrchr(Rune *s, Rune c);
|
||||||
|
int runestrcmp(Rune *s1, Rune *s2);
|
||||||
|
Rune* runestrcpy(Rune *s1, Rune *s2);
|
||||||
|
Rune* runestrdup(Rune *s) ;
|
||||||
|
Rune* runestrecpy(Rune *s1, Rune *es1, Rune *s2);
|
||||||
|
long runestrlen(Rune *s);
|
||||||
|
Rune* runestrncat(Rune *s1, Rune *s2, long n);
|
||||||
|
int runestrncmp(Rune *s1, Rune *s2, long n);
|
||||||
|
Rune* runestrncpy(Rune *s1, Rune *s2, long n);
|
||||||
|
Rune* runestrrchr(Rune *s, Rune c);
|
||||||
|
Rune* runestrstr(Rune *s1, Rune *s2);
|
||||||
|
int runetochar(char *str, Rune *rune);
|
||||||
|
Rune tolowerrune(Rune c);
|
||||||
|
Rune totitlerune(Rune c);
|
||||||
|
Rune toupperrune(Rune c);
|
||||||
|
char* utfecpy(char *to, char *e, char *from);
|
||||||
|
int utflen(char *s);
|
||||||
|
int utfnlen(char *s, long m);
|
||||||
|
char* utfrrune(char *s, long c);
|
||||||
|
char* utfrune(char *s, long c);
|
||||||
|
char* utfutf(char *s1, char *s2);
|
||||||
|
|
||||||
|
#if defined(__cplusplus)
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
32
subprojects/libutf/utfdef.h
Normal file
32
subprojects/libutf/utfdef.h
Normal file
|
|
@ -0,0 +1,32 @@
|
||||||
|
/*
|
||||||
|
* compiler directive on Plan 9
|
||||||
|
*/
|
||||||
|
#ifndef USED
|
||||||
|
#define USED(x) if(x);else
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* easiest way to make sure these are defined
|
||||||
|
*/
|
||||||
|
#define uchar _fmtuchar
|
||||||
|
#define ushort _fmtushort
|
||||||
|
#define uint _fmtuint
|
||||||
|
#define ulong _fmtulong
|
||||||
|
#define vlong _fmtvlong
|
||||||
|
#define uvlong _fmtuvlong
|
||||||
|
typedef unsigned char uchar;
|
||||||
|
typedef unsigned short ushort;
|
||||||
|
typedef unsigned int uint;
|
||||||
|
typedef unsigned long ulong;
|
||||||
|
typedef unsigned long long uvlong;
|
||||||
|
typedef long long vlong;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* nil cannot be ((void*)0) on ANSI C,
|
||||||
|
* because it is used for function pointers
|
||||||
|
*/
|
||||||
|
#undef nil
|
||||||
|
#define nil 0
|
||||||
|
|
||||||
|
#undef nelem
|
||||||
|
#define nelem ((void*)0)
|
||||||
38
subprojects/libutf/utfecpy.c
Normal file
38
subprojects/libutf/utfecpy.c
Normal file
|
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#define _BSD_SOURCE 1 /* memccpy */
|
||||||
|
#define _DEFAULT_SOURCE 1
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
char*
|
||||||
|
utfecpy(char *to, char *e, char *from)
|
||||||
|
{
|
||||||
|
char *end;
|
||||||
|
|
||||||
|
if(to >= e)
|
||||||
|
return to;
|
||||||
|
end = memccpy(to, from, '\0', e - to);
|
||||||
|
if(end == nil){
|
||||||
|
end = e-1;
|
||||||
|
while(end>to && (*--end&0xC0)==0x80)
|
||||||
|
;
|
||||||
|
*end = '\0';
|
||||||
|
}else{
|
||||||
|
end--;
|
||||||
|
}
|
||||||
|
return end;
|
||||||
|
}
|
||||||
37
subprojects/libutf/utflen.c
Normal file
37
subprojects/libutf/utflen.c
Normal file
|
|
@ -0,0 +1,37 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
utflen(char *s)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
long n;
|
||||||
|
Rune rune;
|
||||||
|
|
||||||
|
n = 0;
|
||||||
|
for(;;) {
|
||||||
|
c = *(uchar*)s;
|
||||||
|
if(c < Runeself) {
|
||||||
|
if(c == 0)
|
||||||
|
return n;
|
||||||
|
s++;
|
||||||
|
} else
|
||||||
|
s += chartorune(&rune, s);
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
41
subprojects/libutf/utfnlen.c
Normal file
41
subprojects/libutf/utfnlen.c
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
int
|
||||||
|
utfnlen(char *s, long m)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
long n;
|
||||||
|
Rune rune;
|
||||||
|
char *es;
|
||||||
|
|
||||||
|
es = s + m;
|
||||||
|
for(n = 0; s < es; n++) {
|
||||||
|
c = *(uchar*)s;
|
||||||
|
if(c < Runeself){
|
||||||
|
if(c == '\0')
|
||||||
|
break;
|
||||||
|
s++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if(!fullrune(s, es-s))
|
||||||
|
break;
|
||||||
|
s += chartorune(&rune, s);
|
||||||
|
}
|
||||||
|
return n;
|
||||||
|
}
|
||||||
45
subprojects/libutf/utfrrune.c
Normal file
45
subprojects/libutf/utfrrune.c
Normal file
|
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
char*
|
||||||
|
utfrrune(char *s, long c)
|
||||||
|
{
|
||||||
|
long c1;
|
||||||
|
Rune r;
|
||||||
|
char *s1;
|
||||||
|
|
||||||
|
if(c < Runesync) /* not part of utf sequence */
|
||||||
|
return strrchr(s, c);
|
||||||
|
|
||||||
|
s1 = 0;
|
||||||
|
for(;;) {
|
||||||
|
c1 = *(uchar*)s;
|
||||||
|
if(c1 < Runeself) { /* one byte rune */
|
||||||
|
if(c1 == 0)
|
||||||
|
return s1;
|
||||||
|
if(c1 == c)
|
||||||
|
s1 = s;
|
||||||
|
s++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
c1 = chartorune(&r, s);
|
||||||
|
if(r == c)
|
||||||
|
s1 = s;
|
||||||
|
s += c1;
|
||||||
|
}
|
||||||
|
}
|
||||||
44
subprojects/libutf/utfrune.c
Normal file
44
subprojects/libutf/utfrune.c
Normal file
|
|
@ -0,0 +1,44 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
char*
|
||||||
|
utfrune(char *s, long c)
|
||||||
|
{
|
||||||
|
long c1;
|
||||||
|
Rune r;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
if(c < Runesync) /* not part of utf sequence */
|
||||||
|
return strchr(s, c);
|
||||||
|
|
||||||
|
for(;;) {
|
||||||
|
c1 = *(uchar*)s;
|
||||||
|
if(c1 < Runeself) { /* one byte rune */
|
||||||
|
if(c1 == 0)
|
||||||
|
return 0;
|
||||||
|
if(c1 == c)
|
||||||
|
return s;
|
||||||
|
s++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
n = chartorune(&r, s);
|
||||||
|
if(r == c)
|
||||||
|
return s;
|
||||||
|
s += n;
|
||||||
|
}
|
||||||
|
}
|
||||||
41
subprojects/libutf/utfutf.c
Normal file
41
subprojects/libutf/utfutf.c
Normal file
|
|
@ -0,0 +1,41 @@
|
||||||
|
/*
|
||||||
|
* The authors of this software are Rob Pike and Ken Thompson.
|
||||||
|
* Copyright (c) 2002 by Lucent Technologies.
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose without fee is hereby granted, provided that this entire notice
|
||||||
|
* is included in all copies of any software which is or includes a copy
|
||||||
|
* or modification of this software and in all copies of the supporting
|
||||||
|
* documentation for such software.
|
||||||
|
* THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
|
||||||
|
* WARRANTY. IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
|
||||||
|
* ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
|
||||||
|
* OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||||
|
*/
|
||||||
|
#include <stdarg.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include "plan9.h"
|
||||||
|
#include "utf.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Return pointer to first occurrence of s2 in s1,
|
||||||
|
* 0 if none
|
||||||
|
*/
|
||||||
|
char*
|
||||||
|
utfutf(char *s1, char *s2)
|
||||||
|
{
|
||||||
|
char *p;
|
||||||
|
long f, n1, n2;
|
||||||
|
Rune r;
|
||||||
|
|
||||||
|
n1 = chartorune(&r, s2);
|
||||||
|
f = r;
|
||||||
|
if(f <= Runesync) /* represents self */
|
||||||
|
return strstr(s1, s2);
|
||||||
|
|
||||||
|
n2 = strlen(s2);
|
||||||
|
for(p=s1; p=utfrune(p, f); p+=n1)
|
||||||
|
if(strncmp(p, s2, n2) == 0)
|
||||||
|
return p;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue