wl/old/lex.c


#include <err.h>
#include <stdio.h>
#include <stdlib.h>

#include "wscm.h"

// helpers
static int iswhite(int ch) { return ch == ' ' || ch == '\t' || ch == '\n'; }

static int isdelim(int ch) {
  return ch == '(' || ch == ')' || ch == '\'' || ch == ';';
}

static inline void appendchar(Lx *lex, char ch) {
  if (lex->cursor > LEXER_CAP) {
    fprintf(stderr, "lexer buffer overflow");
    abort();
  }
  lex->buffer[lex->cursor++] = ch;
}

static int getcws(Lx *lex) {
  if (feof(lex->input))
    return EOF;
  for (;;) {
    int ch = getc(lex->input);
    if (iswhite(ch))
      continue;
    return ch;
  }
}

static int scanword(Lx *lex) {
  int ch = getc(lex->input);
  for (;;) {
    if (ch == EOF) {
      if (lex->cursor == 0)
        lex->kind = TOK_EOF;
      appendchar(lex, 0);
      return lex->kind;
    } else if (iswhite(ch) || isdelim(ch)) {
      ungetc(ch, lex->input);
      appendchar(lex, 0);
      return lex->kind;
    } else {
      appendchar(lex, ch);
      ch = getc(lex->input);
    }
  }
}

static int scanstring(Lx *lex) {
  int ch;
  for (;;) {
    ch = getc(lex->input);
    switch (ch) {
    case EOF:
      goto eof;
    case '\\':
      ch = getc(lex->input);
      if (ch == EOF)
        goto eof;
      switch (ch) {
      case 'n':
        appendchar(lex, '\n');
        break;
      case 't':
        appendchar(lex, '\t');
        break;
      case '"':
        appendchar(lex, '"');
        break;
      }
      break;
    case '"':
      appendchar(lex, 0);
      return (lex->kind = TOK_STRING);
    default:
      appendchar(lex, ch);
    }
  }

eof:
  errx(1, "unterminated string literal");
}

int nexttoken(Lx *lex) {
  int ch;
  lex->cursor = 0;

  if (feof(lex->input)) {
    lex->kind = TOK_EOF;
    *lex->buffer = 0;
    return 0;
  }

  ch = getcws(lex);
  switch (ch) {
  case ';':
    for (; ch != '\n'; ch = getc(lex->input))
      appendchar(lex, ch);
    appendchar(lex, 0);
    return (lex->kind = TOK_COMMENT);
  case '(':
  case ')':
  case '.':
  case '\'':
    return (lex->kind = ch);
  case '"':
    return scanstring(lex);
  default:
    ungetc(ch, lex->input);
    lex->kind = TOK_WORD;
    return scanword(lex);
  }

  return 0;
}