/* -*-c-*-
 *
 * $Id: bascat.c,v 1.1 1997/07/23 01:19:33 mdw Exp mdw $
 *
 * Display BBC BASIC programs more or less anywhere
 *
 * (c) 1996, 1997 Matthew Wilcox and Mark Wooding
 */

/*----- Licensing notice --------------------------------------------------*
 *
 * This file is part of Bascat.
 *
 * Bascat is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Library General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * Bascat is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Library General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Bascat; if not, write to the Free Software Foundation,
 * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 
 */

/*----- Revision history --------------------------------------------------*
 *
 * $Log: bascat.c,v $
 * Revision 1.1  1997/07/23 01:19:33  mdw
 * Initial revision
 *
 */

/*----- Header files ------------------------------------------------------*/

/* --- ANSI library headers --- */

#include <ctype.h>
#include <errno.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

/* --- Operating system specific headers --- */

#ifdef HAVE_LIBTERMCAP
#  include <termcap.h>
#endif
#include <unistd.h>

/* --- Private headers --- */

#include "mdwopt.h"

/*----- Version information -----------------------------------------------*/

#ifndef NDEBUG
#  define D(x) x
#else
#  define D(x)
#endif

/*----- Tokenisation tables -----------------------------------------------*
 *
 * These tables are from the BBC BASIC guide.  Additional verification
 * carried out on an A440 with RISC OS 3.1
 */

static const char *bcTok__base[] = {
  "OTHERWISE",
  "AND", "DIV", "EOR", "MOD", "OR", "ERROR", "LINE", "OFF",
  "STEP", "SPC", "TAB(", "ELSE", "THEN", "*", "OPENIN", "PTR",
  "PAGE", "TIME", "LOMEM", "HIMEM", "ABS", "ACS", "ADVAL", "ASC",
  "ASN", "ATN", "BGET", "COS", "COUNT", "DEG", "ERL", "ERR",
  "EVAL", "EXP", "EXT", "FALSE", "FN", "GET", "INKEY", "INSTR(",
  "INT", "LEN", "LN", "LOG", "NOT", "OPENUP", "OPENOUT", "PI",
  "POINT(", "POS", "RAD", "RND", "SGN", "SIN", "SQR", "TAN",
  "TO", "TRUE", "USR", "VAL", "VPOS", "CHR$", "GET$", "INKEY$",
  "LEFT$(", "MID$(", "RIGHT$(", "STR$", "STRING$(", "EOF", "*", "*",
  "*", "WHEN", "OF", "ENDCASE", "ELSE", "ENDIF", "ENDWHILE", "PTR",
  "PAGE", "TIME", "LOMEM", "HIMEM", "SOUND", "BPUT", "CALL", "CHAIN",
  "CLEAR", "CLOSE", "CLG", "CLS", "DATA", "DEF", "DIM", "DRAW",
  "END", "ENDPROC", "ENVELOPE", "FOR", "GOSUB", "GOTO", "GCOL", "IF",
  "INPUT", "LET", "LOCAL", "MODE", "MOVE", "NEXT", "ON", "VDU",
  "PLOT", "PRINT", "PROC", "READ", "REM", "REPEAT", "REPORT", "RESTORE",
  "RETURN", "RUN", "STOP", "COLOUR", "TRACE", "UNTIL", "WIDTH", "OSCLI"
};

static const char *bcTok__c6[] = {
  "SUM", "BEAT"
};

static const char *bcTok__c7[] = {
  "APPEND", "AUTO",
  "CRUNCH", "DELETE", "EDIT", "HELP", "LIST", "LOAD", "LVAR", "NEW",
  "OLD", "RENUMBER", "SAVE", "TEXTLOAD", "TEXTSAVE", "TWIN", "TWINO",
    "INSTALL"
};

static const char *bcTok__c8[] = {
  "CASE", "CIRCLE",
  "FILL", "ORIGIN", "POINT", "RECTANGLE", "SWAP", "WHILE", "WAIT", "MOUSE",
  "QUIT", "SYS", "INSTALL", "LIBRARY", "TINT", "ELLIPSE", "BEATS", "TEMPO",
  "VOICES", "VOICE", "STEREO", "OVERLAY"
};

#define ITEMS(array) (sizeof(array) / sizeof((array)[0]))

/*----- Static variables --------------------------------------------------*/

enum {
  s_keyword,				/* Expecting a keyword next */
  s_normal,				/* Normal state, reading input */
  s_comment,				/* In a command (or literal *cmd) */
  s_quote,				/* Inside a quoted string */
  s_c6, s_c7, s_c8,			/* Various shift states */
  s_dummy
};

static bc__state = s_normal;		/* Current detokenisation state */

enum {
  f_highlight = 1,			/* Highlight keywords and things */
  f_linenumbers = 2,			/* Display linenumbers on left */
  f_tty = 4,				/* We're writing to TTY (or pager) */
  f_less = 8,				/* We're piping through `less' */
  f_dummy
};

static int bc__flags;			/* Various options flags */

#ifdef HAVE_LIBTERMCAP
static char bc__termcap[2048];		/* Terminal capabilities buffer */
#endif

static char *bc__pager = 0;		/* Pointer to pager to use */

/*----- Main code ---------------------------------------------------------*/

/* --- @die@ --- *
 *
 * Arguments:   @char *p@ = pointer to a string
 *
 * Returns:     1
 *
 * Use:         Reports an error to the user and falls over flat on its face.
 */

static int die(const char *p)
{
  fprintf(stderr, "%s: %s\n", optprog, p);
  return (1);
}

/* --- @bc__keyword@ --- *
 *
 * Arguments:   @char *s@ = pointer to keyword string
 *              @FILE *fp@ = stream to write onto
 *
 * Returns:     --
 *
 * Use:         Displays a keyword in a nice way.  There's some nasty hacking
 *              here to make GNU's `less' work properly.  `more' appears to
 *              cope with highlighting codes OK, so that's fine.  `less'
 *              prefers it if we attempt to `overstrike' the bolded
 *              characters.  What fun...
 */

static void bc__keyword(const char *s, FILE *fp)
{
#ifdef HAVE_LIBTERMCAP
  if ((~bc__flags & (f_less | f_highlight)) == 0) {
    while (*s) {
      putc(*s, fp);
      putc(8, fp);			/* evil... */
      putc(*s, fp);
      s++;
    }
  } else {
    static char buff[24];
    static char *hs, *he, *p = buff;

    if (!hs) {
      if (bc__flags & f_highlight) {
	hs = tgetstr("md", &p);
	he = tgetstr("me", &p);
      } else
	hs = he = "";
    }
    fputs(hs, fp);
    fputs(s, fp);
    fputs(he, fp);
  }
#else
  fputs(s, fp);
#endif
}

/* --- @bc__mbtok@ --- *
 *
 * Arguments:   @int byte@ = the current byte
 *              @const char *t[]@ = pointer to token table
 *              @int n@ = number of items in token table
 *              @FILE *fp@ = stream to write onto
 *
 * Returns:     0 if everything's OK
 *
 * Use:         Decodes multibyte tokens.
 */

static int bc__mbtok(int byte, const char *t[], int n, FILE * fp)
{
  byte -= 0x8E;
  if (byte >= n)
    return (die("Bad program: invalid multibyte token"));
  bc__keyword(t[byte], fp);
  bc__state = s_normal;
  return (0);
}

/* --- @bc__decode@ --- *
 *
 * Arguments:   @int byte@ = byte to decode
 *              @FILE *fp@ = stream to write onto
 *
 * Returns:     0 if everything's going OK
 *
 * Use:         Decodes a byte, changing states as necessary.
 */

static int bc__decode(int byte, FILE * fp)
{
  switch (bc__state) {
    case s_keyword:
      if (byte == '*')
	bc__state = s_comment;
      else
	bc__state = s_normal;
      /* Fall through here */
    case s_normal:
      if (byte >= 0x7F) {
	switch (byte) {
	  case 0xC6:
	    bc__state = s_c6;
	    break;
	  case 0xC7:
	    bc__state = s_c7;
	    break;
	  case 0xC8:
	    bc__state = s_c8;
	    break;
	  case 0x8B:			/* ELSE */
	  case 0x8C:			/* THEN */
	  case 0xF5:			/* REPEAT (a funny one) */
	    bc__state = s_keyword;
	    bc__keyword(bcTok__base[byte - 0x7F], fp);
	    break;
	  case 0xDC:			/* DATA */
	  case 0xF4:			/* REM */
	    bc__state = s_comment;
	    /* Fall through here */
	  default:
	    bc__keyword(bcTok__base[byte - 0x7F], fp);
	    break;
	}
      } else {
	if (byte == '"')
	  bc__state = s_quote;
	fputc(byte, fp);
      }
      break;
    case s_quote:
      if (byte == '"')
	bc__state = s_normal;
      /* Fall through here */
    case s_comment:
      fputc(byte, fp);
      break;
    case s_c6:
      return (bc__mbtok(byte, bcTok__c6, ITEMS(bcTok__c6), fp));
      break;
    case s_c7:
      return (bc__mbtok(byte, bcTok__c7, ITEMS(bcTok__c7), fp));
      break;
    case s_c8:
      return (bc__mbtok(byte, bcTok__c8, ITEMS(bcTok__c8), fp));
      break;
  }
  return (0);
}

/* --- @bc__line@ --- *
 *
 * Arguments:   @FILE *in@ = input stream to read
 *              @FILE *out@ = output stream to write
 *
 * Returns:     Zero if there's another line after this one.
 *
 * Use:         Decodes a BASIC line into stuff to be written.
 */

static int bc__line(FILE *in, FILE *out)
{
  /* --- Read the line number --- */

  {
    int a, b;

    a = getc(in);
    D( fprintf(stderr, "ln_0 == %i\n", a); )
      if (a == EOF)
      goto eof;
    if (a == 0xFF)
      return (1);

    b = getc(in);
    D( fprintf(stderr, "ln_1 == %i\n", b); )
      if (b == EOF)
      goto eof;

    if (bc__flags & f_linenumbers)
      fprintf(out, "%5i", (a << 8) + b);
  }

  {
    int len;
    int byte;

    len = getc(in);
    D( fprintf(stderr, "linelen == %i\n", len); )
      if (len == EOF)
      goto eof;
    len -= 4;

    bc__state = s_normal;
    while (len) {
      byte = getc(in);
      D( fprintf(stderr, "state == %i, byte == %i\n", \
		bc__state, byte); )
	if (byte == EOF)
	goto eof;
      bc__decode(byte, out);
      len--;
    }
    putc('\n', out);

    byte = getc(in);
    D( fprintf(stderr, "eol == %i\n", byte); )
      if (byte == EOF)
      goto eof;
    else if (byte != 0x0D)
      return (die("Bad program: expected end-of-line delimiter"));
  }

  return (0);

eof:
  return (die("Bad program: unexpected end-of-file"));
}

/* --- @bc__file@ --- *
 *
 * Arguments:   @FILE *in@ = the input stream
 *              @FILE *out@ = the output stream
 *
 * Returns:     --
 *
 * Use:         Decodes an entire file.
 */

static void bc__file(FILE *in, FILE *out)
{
  int byte;

  /* --- Check for the inital newline char --- */

  byte = getc(in);
  if (byte != 0x0D)
    die("Bad program: doesn't start with a newline");

  /* --- Now read the lines in one by one --- */

  while (!bc__line(in, out)) ;

  /* --- Check that we're really at end-of-file --- */

  byte = getc(in);
  if (byte != EOF)
    die("Found data after end of program");
}

/* --- @bc__sigPipe@ --- *
 *
 * Arguments:   @int s@ = signal number
 *
 * Returns:     Doesn't
 *
 * Use:         Handles SIGPIPE signals, and gracefully kills the program.
 */

static void bc__sigPipe(int s)
{
  (void) s;
  exit(0);				/* Gracefully, oh yes */
}

/* --- @bc__options@ --- *
 *
 * Arguments:   @int c@ = number of arguments
 *              @char *v[]@ = pointer to arguments
 *              @char *s@ = pointer to short options
 *              @struct option *o@ = pointer to long options
 *
 * Returns:     --
 *
 * Use:         Parses lots of arguments.
 */

static void bc__options(int c, char *v[], const char *s,
			const struct option *o)
{
  int i;

  for (;;) {
    i = mdwopt(c, v, s, o, 0, 0, gFlag_negation | gFlag_envVar);
    if (i == -1)
      break;

    switch (i) {
      case 'v':
      case 'h':
	printf("%s v. " VERSION " (" __DATE__ ")\n", optprog);
	if (i == 'v')
	  exit(0);
	printf("\n"
	       "%s [-hv] [-n|+n] [-l|+l] [-p PAGER] [file...]\n"
	       "\n"
	       "Types BBC BASIC programs in a readable way.  Options "
	       "currently supported are as\n"
	       "follows:\n"
	       "\n"
	       "-h, --help:            Displays this evil help message\n"
	       "-v, --version:         Displays the current version number\n"
	       "-n, --line-numbers:    Displays line numbers for each line\n"
	       "-l, --highlight:       Attempts to highlight keywords\n"
	       "-p, --pager=PAGER:     Sets pager to use (default $PAGER)\n"
	       "\n"
	       "Prefix long options with `no-' to cancel them.  Use `+' to "
	       "cancel short options.\n"
	       "Options can also be placed in the `BASCAT' environment "
	       "variable, if you don't\n"
	       "like the standard settings.\n",
	       optprog);
	exit(0);
	break;
      case 'n':
	bc__flags |= f_linenumbers;
	break;
      case 'n' | gFlag_negated:
	bc__flags &= ~f_linenumbers;
	break;
      case 'l':
	bc__flags |= f_highlight;
	break;
      case 'l' | gFlag_negated:
	bc__flags &= ~f_highlight;
	break;
      case 'p':
	bc__pager = optarg;
	break;
    }
  }
}

/* --- @main@ --- *
 *
 * Arguments:   @int argc@ = number of arguments
 *              @char *argc[]@ = pointer to what the arguments are
 *
 * Returns:     0 if it all worked
 *
 * Use:         Displays BASIC programs.
 */

int main(int argc, char *argv[])
{
  static struct option opts[] = {
    { "help", 0, 0, 'h' },
    { "version", 0, 0, 'v' },
    { "line-numbers", gFlag_negate, 0, 'n' },
    { "highlight", gFlag_negate, 0, 'l' },
    { "pager", gFlag_argReq, 0, 'p' },
    { 0, 0, 0, 0 }
  };
  static char *shortopts = "hvn+l+p:";

  /* --- Parse the command line options --- */

  bc__options(argc, argv, shortopts, opts);

  /* --- Now do the job --- */

  if (optind == argc && isatty(0)) {
    fprintf(stderr,
	    "%s: No filenames given, and standard input is a tty\n"
	    "To force reading from stdin, use `%s -'.  For help, type "
	    "`%s --help'.\n",
	    optprog, optprog, optprog);
    exit(0);
  }

#ifdef HAVE_LIBTERMCAP
  if (bc__flags & f_highlight)
    tgetent(bc__termcap, getenv("TERM"));
#endif

  {
    FILE *in;
    FILE *out;

    /* --- If output is to a terminal, try paging --- *
     *
     * All programs which spew text should do this ;-)
     */

    if (isatty(1)) {
      if (!bc__pager)
	bc__pager = getenv("PAGER");
      if (!bc__pager)
	bc__pager = PAGER;		/* Worth a try */
      if (strstr(bc__pager, "less"))
	bc__flags |= f_less;		/* HACK!!! */
      out = popen(bc__pager, "w");
      if (!out)
	out = stdout;
      else {
	bc__flags |= f_tty;
	signal(SIGPIPE, bc__sigPipe);
      }
    } else
      out = stdout;

    /* --- Now go through all the files --- */

    if (optind == argc)
      bc__file(stdin, out);
    else
      while (optind < argc) {
	if (strcmp(argv[optind], "-") == 0)
	  bc__file(stdin, out);
	else {
	  in = fopen(argv[optind], "rb");
	  if (!in) {
	    fprintf(stderr,
		    "%s: Couldn't open input file: %s\n",
		    optprog, strerror(errno));
	  } else {
	    bc__file(in, out);
	    fclose(in);
	  }
	}
	optind++;
      }
    if (bc__flags & f_tty)
      pclose(out);
  }

  return (0);
}

/*----- That's all, folks -------------------------------------------------*/
