119 lines
2.1 KiB
C
119 lines
2.1 KiB
C
|
|
#include <err.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
|
|
#include "wscm.h"
|
|
|
|
// helpers
|
|
static int iswhite(int ch) { return ch == ' ' || ch == '\t' || ch == '\n'; }
|
|
|
|
static int isdelim(int ch) {
|
|
return ch == '(' || ch == ')' || ch == '\'' || ch == ';';
|
|
}
|
|
|
|
static inline void appendchar(Lx *lex, char ch) {
|
|
if (lex->cursor > LEXER_CAP) {
|
|
fprintf(stderr, "lexer buffer overflow");
|
|
abort();
|
|
}
|
|
lex->buffer[lex->cursor++] = ch;
|
|
}
|
|
|
|
static int getcws(Lx *lex) {
|
|
if (feof(lex->input))
|
|
return EOF;
|
|
for (;;) {
|
|
int ch = getc(lex->input);
|
|
if (iswhite(ch))
|
|
continue;
|
|
return ch;
|
|
}
|
|
}
|
|
|
|
static int scanword(Lx *lex) {
|
|
int ch = getc(lex->input);
|
|
for (;;) {
|
|
if (ch == EOF) {
|
|
if (lex->cursor == 0)
|
|
lex->kind = TOK_EOF;
|
|
appendchar(lex, 0);
|
|
return lex->kind;
|
|
} else if (iswhite(ch) || isdelim(ch)) {
|
|
ungetc(ch, lex->input);
|
|
appendchar(lex, 0);
|
|
return lex->kind;
|
|
} else {
|
|
appendchar(lex, ch);
|
|
ch = getc(lex->input);
|
|
}
|
|
}
|
|
}
|
|
|
|
static int scanstring(Lx *lex) {
|
|
int ch;
|
|
for (;;) {
|
|
ch = getc(lex->input);
|
|
switch (ch) {
|
|
case EOF:
|
|
goto eof;
|
|
case '\\':
|
|
ch = getc(lex->input);
|
|
if (ch == EOF)
|
|
goto eof;
|
|
switch (ch) {
|
|
case 'n':
|
|
appendchar(lex, '\n');
|
|
break;
|
|
case 't':
|
|
appendchar(lex, '\t');
|
|
break;
|
|
case '"':
|
|
appendchar(lex, '"');
|
|
break;
|
|
}
|
|
break;
|
|
case '"':
|
|
appendchar(lex, 0);
|
|
return (lex->kind = TOK_STRING);
|
|
default:
|
|
appendchar(lex, ch);
|
|
}
|
|
}
|
|
|
|
eof:
|
|
errx(1, "unterminated string literal");
|
|
}
|
|
|
|
int nexttoken(Lx *lex) {
|
|
int ch;
|
|
lex->cursor = 0;
|
|
|
|
if (feof(lex->input)) {
|
|
lex->kind = TOK_EOF;
|
|
*lex->buffer = 0;
|
|
return 0;
|
|
}
|
|
|
|
ch = getcws(lex);
|
|
switch (ch) {
|
|
case ';':
|
|
for (; ch != '\n'; ch = getc(lex->input))
|
|
appendchar(lex, ch);
|
|
appendchar(lex, 0);
|
|
return (lex->kind = TOK_COMMENT);
|
|
case '(':
|
|
case ')':
|
|
case '.':
|
|
case '\'':
|
|
return (lex->kind = ch);
|
|
case '"':
|
|
return scanstring(lex);
|
|
default:
|
|
ungetc(ch, lex->input);
|
|
lex->kind = TOK_WORD;
|
|
return scanword(lex);
|
|
}
|
|
|
|
return 0;
|
|
}
|