/* wordref.c (emx+gcc) -- Copyright (c) 1996-1997 by Eberhard Mattes */

/* This sample program demonstrates how to use the BSD database
   library (B-trees) and how to handle signals.

   It's a sample program -- fgrep takes less disk space and is
   probably faster. */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <errno.h>
#include <fcntl.h>
#include <signal.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <db.h>
#include <sys/param.h>
#ifdef __EMX__
#include <sys/nls.h>
#endif

/* We use three B-tree files:

   object | file name       | key                  | data
   -------+-----------------+----------------------+----------
   DBF0   | "wordref.db0"   | word, fileno, lineno | -
   DBF1   | "wordref.db1"   | fileno               | file name
   DBF2   | "wordref.db2"   | file name            | fileno

   fileno and lineno are stored in the key of DBF0 to obtain output
   sorted by fileno and lineno. */

/* The key structure for DBF0. */

struct key0
{
  unsigned fileno;              /* File number (see DBF1 and DBF2) */
  unsigned lineno;              /* Line number */
  char word[1];                 /* Null-terminated string */
} __attribute__ ((__packed__));

/* The key structure for DBF1. */

struct key1
{
  unsigned fileno;              /* File number */
};

/* The data structure for DBF2. */

struct data2
{
  unsigned fileno;              /* File number */
};

/* This structure describes one of the three database files. */

struct dbfile
{
  DB *db;                       /* Database file */
  int (*compare)(const DBT*, const DBT*); /* Comparison function */
  int dup;                      /* Allow duplicates */
  unsigned cache_size;          /* Cache size */
  char fname[MAXPATHLEN];       /* File name of the database file */
};

/* Define an array holding the three database files and define
   shorthands for convenience. */

static struct dbfile dbfile[3];

#define DBF0    dbfile[0]
#define DBF1    dbfile[1]
#define DBF2    dbfile[2]

/* Define an array holding three temporary database files. */

static struct dbfile dbtemp[3];

/* To avoid looking up DBF1 and DBF2 over and over, we keep the last
   (file name, fileno) pair in these two variables.  The values are
   valid only if cur_fname is not the empty string. */

static char cur_fname[MAXPATHLEN];
static unsigned cur_fileno;

/* This is the set of signals to block while performing an operation
   that must not be interrupted. */

static sigset_t block_set;

/* This variable is non-zero if the database files should be closed
   during atexit() processing. */

static int cleanup;

/* This variable is set to 'l' or 'u' by the -l and -u options,
   respectively, of the `add', `load', and `find' commands.  If it's
   zero, the case of words won't be changed. */

static char word_case;

/* Character set to be used for case conversion. */

static enum {CS_ASCII, CS_NLS, CS_ISO_8859_1} charset = CS_ASCII;

/* Convert Umlaut-a to ae, etc. */

static char german;

/* This variable is set to true by the `-c' option of the `add'
   command. */

static char lang_c;

/* This variable is set to true by the `-d' option of the `add' and
   `load' commands. */

static char avoid_dupes;

/* Base name of database files. */

static const char *basename = "wordref";


/* Evaluate to the minimum of A and B. */

#define MIN(a,b) ((a) < (b) ? (a) : (b))


/* Tell them how to run this program. */

#ifdef __EMX__
#define N "n"
#else
#define N ""
#endif

static void usage (void)
{
  puts ("Usage:\n"
        "  wordref clear                   Delete the database\n"
        "  wordref add [-cdgil"N"u] <file>   Add words of <file> to the database\n"
        "  wordref load [-dgil"N"u] <file>   Ditto, file contains word/name pairs\n"
        "  wordref find [-gil"N"u] <word>    Query database for <word>\n"
        "  wordref list                    List the database contents\n"
        "  wordref reorg                   Reorganize the database\n"
        "Options:\n"
        "  -c   words are C identifiers\n"
        "  -d   don't put duplicates into the database\n"
        "  -g   convert umlaut-a to ae, etc.\n"
        "  -i   use ISO 8859-1 for -l and -u\n"
#ifdef __EMX__
        "  -n   use current code page for -l and -u\n"
#endif
        "  -l   convert words to lower case\n"
        "  -u   convert words to upper case");
  exit (1);
}


/* Block the signals in BLOCK_SET. */

static void block (void)
{
  sigprocmask (SIG_BLOCK, &block_set, NULL);
}


/* Unblock the signals in BLOCK_SET. */

static void unblock (void)
{
  sigprocmask (SIG_UNBLOCK, &block_set, NULL);
}


/* Close one database file.  Return a non-zero termination code if an
   error occurred; return 0 if successful. */

static int wordref_close_one (struct dbfile *d)
{
  int result;

  block ();
  result = 0; cleanup = 0;
  if (d->db != NULL)
    {
      if (d->db->close (d->db) != 0)
        {
          perror (d->fname);
          result = 2;
        }
      d->db = NULL;
    }
  unblock ();
  return result;
}


/* Close any open database files.  Return a non-zero termination code
   if an error occurred; return 0 if successful. */

static int wordref_close (void)
{
  int rc, t, i;

  rc = 0; cleanup = 0;
  for (i = 0; i < 3; ++i)
    {
      t = wordref_close_one (&dbfile[i]);
      if (t > rc)
        rc = t;
    }
  for (i = 0; i < 3; ++i)
    {
      t = wordref_close_one (&dbtemp[i]);
      if (t > rc)
        rc = t;
    }
  return rc;
}



/* Handle asynchronous signals which cause process termination, such
   as SIGINT and SIGTERM. */

static void term_sig (int signo)
{
  struct sigaction sa;
  sigset_t set;

  /* stderr is buffered, so this might happen to work. */

  fprintf (stderr, "Got signal %d, cleaning up...\n", signo);

  /* Close any open database files. */

  wordref_close ();

  /* Regenerate the signal SIGNO with default action installed to
     terminate the process.  Don't forget to unblock the signal. */

  sa.sa_handler = SIG_DFL;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  if (sigaction (signo, &sa, NULL) != 0)
    perror ("sigaction");
  sigemptyset (&set);
  sigaddset (&set, signo);
  sigprocmask (SIG_UNBLOCK, &set, NULL);
  raise (signo);
  exit (3);
}


/* Handle sychronous process termination. */

static void wordref_atexit (void)
{
  int i;

  /* If we should close any open database files, do so. */

  if (cleanup)
    {
      fprintf (stderr, "Cleaning up...\n");
      wordref_close ();
      for (i = 0; i < 3; ++i)
        if (dbtemp[i].fname[0] != 0)
          remove (dbtemp[i].fname);
    }
}


/* Allocate memory; exit on failure. */

static void *xmalloc (size_t size)
{
  void *p = malloc (size);
  if (p == NULL)
    {
      fputs ("Out of memory\n", stderr);
      exit (2);
    }
  return p;
}


/* Call get() with signals blocked and with error checking. */

static int db_get (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->get (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Call seq() with signals blocked and with error checking. */

static int db_seq (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->seq (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Call put() with signals blocked and with error checking. */

static int db_put (struct dbfile *dbf, DBT *key, DBT *data, unsigned flags)
{
  int rc;

  block ();
  rc = dbf->db->put (dbf->db, key, data, flags);
  unblock ();
  if (rc == -1)
    {
      perror (dbf->fname);
      exit (2);
    }
  return rc;
}


/* Compare two keys of DBF0. */

static int compare0 (const DBT *dbt1, const DBT *dbt2)
{
  int cmp;
  struct key0 k1, k2;

  cmp = strcmp (((const struct key0 *)dbt1->data)->word,
                ((const struct key0 *)dbt2->data)->word);
  if (cmp != 0)
    return cmp;

  /* Copy the structures to avoid alignment problems on certain
     architectures. */

  memcpy (&k1, dbt1->data, sizeof (k1));
  memcpy (&k2, dbt2->data, sizeof (k2));

  if (k1.fileno < k2.fileno)
    return -1;
  else if (k1.fileno > k2.fileno)
    return 1;
  else if (k1.lineno < k2.lineno)
    return -1;
  else if (k1.lineno > k2.lineno)
    return 1;
  else
    return 0;
}


/* Compare two keys of DBF1. */

static int compare1 (const DBT *dbt1, const DBT *dbt2)
{
  struct key1 k1, k2;

  /* Copy the structures to avoid alignment problems on certain
     architectures. */

  memcpy (&k1, dbt1->data, sizeof (k1));
  memcpy (&k2, dbt2->data, sizeof (k2));
  if (k1.fileno < k2.fileno)
    return -1;
  else if (k1.fileno > k2.fileno)
    return 1;
  else
    return 0;
}


/* Compare two keys of DBF2.  Note that the file name in DBF2 is not
   null-terminated! */

static int compare2 (const DBT *dbt1, const DBT *dbt2)
{
  int cmp;

  cmp = memcmp (dbt1->data, dbt2->data, MIN (dbt1->size, dbt2->size));
  if (cmp != 0)
    return cmp;
  else if (dbt1->size < dbt2->size)
    return -1;
  else if (dbt1->size > dbt2->size)
    return 1;
  else
    return 0;
}


/* Open one database (B-tree) file, pointed to by DBF.  Create the
   file if CREATE is non-zero. */

static void wordref_open_one (struct dbfile *dbf, int create)
{
  BTREEINFO info;
  DB *db;
  int oflags;

  info.flags = dbf->dup ? R_DUP : 0;
  info.cachesize = dbf->cache_size;
  info.psize = 4096;
  info.lorder = 0;
  info.minkeypage = 0;
  info.compare = dbf->compare;
  info.prefix = NULL;
  oflags = create ? O_CREAT | O_RDWR : O_RDONLY;
  block ();
  db = dbopen (dbf->fname, oflags, S_IREAD | S_IWRITE, DB_BTREE, &info);
  if (db == NULL)
    {
      perror (dbf->fname);
      unblock ();
      exit (2);
    }
  dbf->db = db;
  unblock ();
}


/* Open all the database files.  Create non-existing files if CREATE
   is non-zero. */

static void wordref_open (int create)
{
  int i;

  cleanup = 1;
  for (i = 0; i < 3; ++i)
    wordref_open_one (&dbfile[i], create);
}


static const char *conv_ascii_upper[256] =
{
  "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
  "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
  "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027",
  "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037",
  " ",    "!",    "\"",   "#",    "$",    "%",    "&",    "'",
  "(",    ")",    "*",    "+",    ",",    "-",    ".",    "/",
  "0",    "1",    "2",    "3",    "4",    "5",    "6",    "7",
  "8",    "9",    ":",    ";",    "<",    "=",    ">",    "?",
  "@",    "A",    "B",    "C",    "D",    "E",    "F",    "G",
  "H",    "I",    "J",    "K",    "L",    "M",    "N",    "O",
  "P",    "Q",    "R",    "S",    "T",    "U",    "V",    "W",
  "X",    "Y",    "Z",    "[",    "\\",   "]",    "^",    "_",
  "`",    "A",    "B",    "C",    "D",    "E",    "F",    "G",
  "H",    "I",    "J",    "K",    "L",    "M",    "N",    "O",
  "P",    "Q",    "R",    "S",    "T",    "U",    "V",    "W",
  "X",    "Y",    "Z",    "{",    "|",    "}",    "~",    "\177",
  "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
  "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
  "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
  "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
  "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
  "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
  "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
  "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
  "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
  "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
  "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
  "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
  "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
  "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
  "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
  "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377"
};

static const char *conv_ascii_lower[256] =
{
  "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
  "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
  "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027",
  "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037",
  " ",    "!",    "\"",   "#",    "$",    "%",    "&",    "'",
  "(",    ")",    "*",    "+",    ",",    "-",    ".",    "/",
  "0",    "1",    "2",    "3",    "4",    "5",    "6",    "7",
  "8",    "9",    ":",    ";",    "<",    "=",    ">",    "?",
  "@",    "a",    "b",    "c",    "d",    "e",    "f",    "g",
  "h",    "i",    "j",    "k",    "l",    "m",    "n",    "o",
  "p",    "q",    "r",    "s",    "t",    "u",    "v",    "w",
  "x",    "y",    "z",    "[",    "\\",   "]",    "^",    "_",
  "`",    "a",    "b",    "c",    "d",    "e",    "f",    "g",
  "h",    "i",    "j",    "k",    "l",    "m",    "n",    "o",
  "p",    "q",    "r",    "s",    "t",    "u",    "v",    "w",
  "x",    "y",    "z",    "{",    "|",    "}",    "~",    "\177",
  "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
  "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
  "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
  "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
  "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
  "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
  "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
  "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
  "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
  "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
  "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
  "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
  "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
  "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
  "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
  "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377"
};

static const char *conv_iso_8859_1_upper[256] =
{
  "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
  "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
  "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027",
  "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037",
  " ",    "!",    "\"",   "#",    "$",    "%",    "&",    "'",
  "(",    ")",    "*",    "+",    ",",    "-",    ".",    "/",
  "0",    "1",    "2",    "3",    "4",    "5",    "6",    "7",
  "8",    "9",    ":",    ";",    "<",    "=",    ">",    "?",
  "@",    "A",    "B",    "C",    "D",    "E",    "F",    "G",
  "H",    "I",    "J",    "K",    "L",    "M",    "N",    "O",
  "P",    "Q",    "R",    "S",    "T",    "U",    "V",    "W",
  "X",    "Y",    "Z",    "[",    "\\",   "]",    "^",    "_",
  "`",    "A",    "B",    "C",    "D",    "E",    "F",    "G",
  "H",    "I",    "J",    "K",    "L",    "M",    "N",    "O",
  "P",    "Q",    "R",    "S",    "T",    "U",    "V",    "W",
  "X",    "Y",    "Z",    "{",    "|",    "}",    "~",    "\177",
  "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
  "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
  "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
  "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
  "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
  "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
  "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
  "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
  "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
  "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
  "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\327",
  "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\337",
  "\300", "\301", "\302", "\303", "\304", "\305", "\306", "\307",
  "\310", "\311", "\312", "\313", "\314", "\315", "\316", "\317",
  "\320", "\321", "\322", "\323", "\324", "\325", "\326", "\367",
  "\330", "\331", "\332", "\333", "\334", "\335", "\336", "\377"
};

static const char *conv_iso_8859_1_lower[256] =
{
  "\000", "\001", "\002", "\003", "\004", "\005", "\006", "\007",
  "\010", "\011", "\012", "\013", "\014", "\015", "\016", "\017",
  "\020", "\021", "\022", "\023", "\024", "\025", "\026", "\027",
  "\030", "\031", "\032", "\033", "\034", "\035", "\036", "\037",
  " ",    "!",    "\"",   "#",    "$",    "%",    "&",    "'",
  "(",    ")",    "*",    "+",    ",",    "-",    ".",    "/",
  "0",    "1",    "2",    "3",    "4",    "5",    "6",    "7",
  "8",    "9",    ":",    ";",    "<",    "=",    ">",    "?",
  "@",    "a",    "b",    "c",    "d",    "e",    "f",    "g",
  "h",    "i",    "j",    "k",    "l",    "m",    "n",    "o",
  "p",    "q",    "r",    "s",    "t",    "u",    "v",    "w",
  "x",    "y",    "z",    "[",    "\\",   "]",    "^",    "_",
  "`",    "a",    "b",    "c",    "d",    "e",    "f",    "g",
  "h",    "i",    "j",    "k",    "l",    "m",    "n",    "o",
  "p",    "q",    "r",    "s",    "t",    "u",    "v",    "w",
  "x",    "y",    "z",    "{",    "|",    "}",    "~",    "\177",
  "\200", "\201", "\202", "\203", "\204", "\205", "\206", "\207",
  "\210", "\211", "\212", "\213", "\214", "\215", "\216", "\217",
  "\220", "\221", "\222", "\223", "\224", "\225", "\226", "\227",
  "\230", "\231", "\232", "\233", "\234", "\235", "\236", "\237",
  "\240", "\241", "\242", "\243", "\244", "\245", "\246", "\247",
  "\250", "\251", "\252", "\253", "\254", "\255", "\256", "\257",
  "\260", "\261", "\262", "\263", "\264", "\265", "\266", "\267",
  "\270", "\271", "\272", "\273", "\274", "\275", "\276", "\277",
  "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
  "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\327",
  "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
  "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\337",
  "\340", "\341", "\342", "\343", "\344", "\345", "\346", "\347",
  "\350", "\351", "\352", "\353", "\354", "\355", "\356", "\357",
  "\360", "\361", "\362", "\363", "\364", "\365", "\366", "\367",
  "\370", "\371", "\372", "\373", "\374", "\375", "\376", "\377"
};


static char *conv_table (char *s, size_t *len, const char **table)
{
  size_t i, len0, len1, alloc, n;
  const char *t;
  char *d, *d2;

  len0 = *len; len1 = 0; d = s; alloc = 0;
  for (i = 0; i < len0; ++i)
    {
      t = table[(unsigned char)s[i]];
      if (t[0] != 0)
        {
          if (t[1] == 0)
            d[len1++] = *t;
          else
            {
              n = strlen (t);
              if (d == s)
                {
                  alloc = len0 + n + 8;
                  d = xmalloc (alloc);
                  memcpy (d, s, len1);
                }
              if (len1 + n > alloc)
                {
                  alloc = len1 + n + 16;
                  d2 = xmalloc (alloc);
                  memcpy (d2, d, len1);
                  free (d);
                  d = d2;
                }
              memcpy (d + len1, t, n);
              len1 += n;
            }
        }
    }
  *len = len1;
  return d;
}


/* Convert the word pointed to by S to lower case.  The word contains
   *LEN characters.  Return a pointer to the converted word and update
   the length pointed to by LEN.  If the return value does not equal
   S, the caller should use free() to deallocate the memory used by
   the converted word. */

static char *word_lower (char *s, size_t *len)
{
#ifdef __EMX__
  char *s1;
#endif
  switch (charset)
    {
    case CS_ASCII:
      return conv_table (s, len, conv_ascii_lower);
#ifdef __EMX__
    case CS_NLS:
      s1 = xmalloc (*len + 1);
      memcpy (s1, s, *len);
      s1[*len] = 0;
      _nls_strlwr (s1);
      return s1;
#endif
    case CS_ISO_8859_1:
      return conv_table (s, len, conv_iso_8859_1_lower);
    default:
      abort ();
    }
}


/* Convert the word pointed to by S to upper case.  See word_lower()
   for details. */

static char *word_upper (char *s, size_t *len)
{
#ifdef __EMX__
  char *s1;
#endif
  switch (charset)
    {
    case CS_ASCII:
      return conv_table (s, len, conv_ascii_upper);
#ifdef __EMX__
    case CS_NLS:
      s1 = xmalloc (*len + 1);
      memcpy (s1, s, *len);
      s1[*len] = 0;
      _nls_strupr (s1);
      return s1;
#endif
    case CS_ISO_8859_1:
      return conv_table (s, len, conv_iso_8859_1_upper);
    default:
      abort ();
    }
}


/* Case-convert the word as requested.  See word_lower() for
   details. */

static char *word_conv (char *s, size_t *len)
{
  switch (word_case)
    {
    case 'l':
      return word_lower (s, len);
    case 'u':
      return word_upper (s, len);
    default:
      return s;
    }
}


/* Interpret options `-i', `-l', `-n', and `-u'. */

static void set_word_case (int c)
{
  switch (c)
    {
    case 'l':
    case 'u':
      if (word_case != 0)
        usage ();
      word_case = (char)c;
      break;
    case 'i':
      if (charset != CS_ASCII)
        usage ();
      charset = CS_ISO_8859_1;
      break;
#ifdef __EMX__
    case 'n':
      if (charset != CS_ASCII)
        usage ();
      charset = CS_NLS;
      break;
#endif
    case 'g':
      german = 1;
      break;
    default:
      usage ();
    }
}


/* Check options for case conversion and initialize tables. */

static void init_word_case (void)
{
  if (german && charset != CS_ISO_8859_1)
    {
      fputs ("-g requires -i\n", stderr);
      exit (2);
    }
  if (german && word_case == 0)
    {
      /* TODO: Support -g without -l and -u */
      fputs ("-g requires -l or -u\n", stderr);
      exit (2);
    }
  if (charset != CS_ASCII && word_case == 0)
    {
#ifdef __EMX__
      fputs ("-i and -n require -l or -u\n", stderr);
#else
      fputs ("-i requires -l or -u\n", stderr);
#endif
      exit (2);
    }

  if (german)
    {
      conv_iso_8859_1_upper[0xc4] = "AE";
      conv_iso_8859_1_upper[0xd6] = "OE";
      conv_iso_8859_1_upper[0xdc] = "UE";
      conv_iso_8859_1_upper[0xe4] = "AE";
      conv_iso_8859_1_upper[0xf6] = "OE";
      conv_iso_8859_1_upper[0xfc] = "UE";
      conv_iso_8859_1_upper[0xdf] = "SS";
      conv_iso_8859_1_lower[0xc4] = "ae";
      conv_iso_8859_1_lower[0xd6] = "oe";
      conv_iso_8859_1_lower[0xdc] = "ue";
      conv_iso_8859_1_lower[0xe4] = "ae";
      conv_iso_8859_1_lower[0xf6] = "oe";
      conv_iso_8859_1_lower[0xfc] = "ue";
    }
}


/* Add the word pointed to by WORD of WORD_LEN characters to the
   database.  The word is line LINENO of file FNAME.  Note that the
   array pointed to by WORD is modified if WORD_CASE is 'u' or 'l'. */

static void add_word (const char *fname, unsigned lineno,
                      char *word, size_t word_len)
{
  DBT key, data;
  struct key0 *pk0;
  struct key1 k1;
  struct data2 d2;
  char *cvt_word;

  /* Case-convert the word as requested. */

  cvt_word = word_conv (word, &word_len);

  /* If the fileno for the file name is cached, use the cached
     value. */

  if (cur_fname[0] == 0 || strcmp (cur_fname, fname) != 0)
    {
      /* Search DBF2 for the file name. */

      key.data = (void *)fname; key.size = strlen (fname);
      data.data = NULL; data.size = 0;
      if (db_get (&DBF2, &key, &data, 0) == 0)
        {
          /* Found.  Use the fileno from the database.  Copy the
             structure to avoid alignment problems on certain
             architectures. */

          memcpy (&d2, data.data, sizeof (d2));
          cur_fileno = d2.fileno;
          strcpy (cur_fname, fname);
        }
      else
        {
          /* Not found.  The file name is a new one, not yet in the
             database.  We have to add a new record to DBF1 and DBF2.
             First, we have to find a unique fileno; we use the
             biggest fileno in DBF1 plus 1. */

          key.data = NULL; key.size = 0;
          data.data = NULL; data.size = 0;
          if (db_seq (&DBF1, &key, &data, R_LAST) == 0)
            {
              /* Copy the structure to avoid alignment problems on
                 certain architectures. */
              memcpy (&k1, key.data, sizeof (k1));
              cur_fileno = k1.fileno + 1;
            }
          else
            cur_fileno = 1;

          /* Set CUR_FNAME, avoiding to write beyond the end of
             CUR_FNAME. */

          if (strlen (fname) >= sizeof (cur_fname))
            {
              fputs ("File name too long\n", stderr);
              exit (2);
            }
          strcpy (cur_fname, fname);

          /* Add a new fileno -> file name mapping to DBF1.  Note that
             the file name in DBF1 is not null-terminated! */

          k1.fileno = cur_fileno;
          key.data = &k1; key.size = sizeof (k1);
          data.data = (void *)fname; data.size = strlen (fname);
          db_put (&DBF1, &key, &data, 0);

          /* Add a new file name -> fileno mapping to DBF2.  Note that
             the file name in DBF2 is not null-terminated! */

          key.data = (void *)fname; key.size = strlen (fname);
          d2.fileno = cur_fileno;
          data.data = &d2; data.size = sizeof (d2);
          db_put (&DBF2, &key, &data, 0);
        }
    }

  /* OK, now we have a file number in CUR_FILENO.  Add the (word,
     fileno, lineno) key to DBF0.  There's no data. */

  pk0 = xmalloc (sizeof (struct key0) + word_len);
  memcpy (pk0->word, cvt_word, word_len);
  pk0->word[word_len] = 0;
  pk0->fileno = cur_fileno;
  pk0->lineno = lineno;
  key.data = pk0; key.size = sizeof (struct key0) + word_len;
  data.data = NULL; data.size = 0;
  db_put (&DBF0, &key, &data, avoid_dupes ? R_NOOVERWRITE : 0);
  free (pk0);
  if (cvt_word != word)
    free (cvt_word);
}


/* Implement the `add' command for one file, FNAME. */

static int wordref_add_file (const char *fname)
{
  FILE *f;
  size_t max_word_len, word_len;
  unsigned lineno, word_count;
  char *word;
  int c;

  /* Open the file and abort if we fail to do that. */

  f = fopen (fname, "r");
  if (f == NULL)
    {
      perror (fname);
      exit (2);
    }

  /* Print a progress message and open the database files. */

  printf ("Processing \"%s\"...\n", fname);
  wordref_open (1);

  /* This loops reads words, discarding characters which don't belong
     to words.  All words are added to the database by calling
     add_word().  The word is stored in the array pointed to by WORD;
     the array has a size of MAX_WORD_LEN characters.  Extend the
     array as needed.  The word has WORD_LEN characters.  Note that
     WORD is not null-terminated. */

  word = NULL; word_len = 0; max_word_len = 0; lineno = 1; word_count = 0;
  c = fgetc (f);
  while (c != EOF)
    {
      if (isalpha (c)
          || (lang_c && (c == '_' || (word_len > 0 && isdigit (c)))))
        {
          /* Character C belongs to a word.  Add the character to WORD
             and update WORD_LEN. */

          if (word_len >= max_word_len)
            {
              /* We have to extend WORD. */

              max_word_len += 512;
              word = realloc (word, max_word_len);
              if (word == NULL)
                {
                  fputs ("Out of memory\n", stderr);
                  exit (2);
                }
            }
          word[word_len++] = (char)c;
        }
      else
        {
          /* Character C does not belong to a word.  If there's a word
             in WORD, add that word to the database. */

          if (word_len != 0)
            {
              add_word (fname, lineno, word, word_len);
              word_len = 0; ++word_count;
            }

          /* Update the line counter when reaching the end of a
             line. */

          if (c == '\n')
            ++lineno;
        }
      c = fgetc (f);
    }

  /* Add the last word if the last word of the file is directly
     followed by EOF. */

  if (word_len != 0)
    {
      add_word (fname, lineno, word, word_len);
      ++word_count;
    }

  /* Close the files and print another progress message. */

  fclose (f);
  printf ("\"%s\" contains %u words.\n", fname, word_count);
  return wordref_close ();
}


static void wordref_add (int argc, char *argv[])
{
  int c, i, result;

  while ((c = getopt (argc, argv, "cdgilnu")) != -1)
    switch (c)
      {
      case 'c':
        lang_c = 1;
        break;
      case 'd':
        avoid_dupes = 1;
        break;
      case 'l': case 'u': case 'i': case 'n': case 'g':
        set_word_case (c);
        break;
      default:
        usage ();
      }
  init_word_case ();
  if (optind >= argc)
    usage ();
  for (i = optind; i < argc; ++i)
    {
      result = wordref_add_file (argv[i]);
      if (result != 0)
        exit (result);
    }
}


/* Implement the `load' command for one file, FNAME. */

static int wordref_load_file (const char *fname)
{
  FILE *f;
  size_t max_word_len, word_len, max_where_len, where_len;
  unsigned word_count, bad_count;
  char *word, *where;
  int c;

  if (strcmp (fname, "-") == 0)
    f = stdin;
  else
    {
      /* Open the file and abort if we fail to do that. */

      f = fopen (fname, "r");
      if (f == NULL)
        {
          perror (fname);
          exit (2);
        }
    }

  /* Print a progress message and open the database files. */

  printf ("Processing \"%s\"...\n", fname);
  wordref_open (1);

  /* This loops reads lines consisting of a word and a file name,
     each, separated by a tab character.  All words are added to the
     database by calling add_word().  The word is stored in the array
     pointed to by WORD; the array has a size of MAX_WORD_LEN
     characters.  Extend the array as needed.  The word has WORD_LEN
     characters.  Note that WORD is not null-terminated. */

  word_count = 0; bad_count = 0;
  max_word_len = 512; word = xmalloc (max_word_len); word_len = 0;
  max_where_len = 512; where = xmalloc (max_where_len); where_len = 0;
  c = fgetc (f);
  while (c != EOF)
    {
      while (c != EOF && c != '\n' && c != '\t')
        {
          /* Add the character to WORD and update WORD_LEN. */

          if (word_len >= max_word_len)
            {
              /* We have to extend WORD. */

              max_word_len += 512;
              word = realloc (word, max_word_len);
              if (word == NULL)
                {
                  fputs ("Out of memory\n", stderr);
                  exit (2);
                }
            }
          word[word_len++] = (char)c;
          c = fgetc (f);
        }
      if (c == '\t')
        {
          where_len = 0;
          c = fgetc (f);
          while (c != EOF && c != '\n')
            {
              /* Add the character to WHERE. */

              if (where_len + 1 >= max_where_len)
                {
                  /* We have to extend WHERE. */

                  max_where_len += 512;
                  where = realloc (where, max_where_len);
                  if (where == NULL)
                    {
                      fputs ("Out of memory\n", stderr);
                      exit (2);
                    }
                }
              where[where_len++] = (char)c;
              c = fgetc (f);
            }
          where[where_len] = 0;
          add_word (where, 0, word, word_len);
          word_len = 0; ++word_count;
          if (c != EOF)
            c = fgetc (f);
        }
      else
        {
          /* Ignore line. */
          while (c != EOF && c != '\n')
            c = fgetc (f);
          if (c != EOF)
            c = fgetc (f);
          word_len = 0; ++bad_count;
        }
    }

  /* Close the files and print another progress message. */

  fclose (f);
  printf ("\"%s\" contains %u valid lines, %u bad lines.\n",
          fname, word_count, bad_count);
  return wordref_close ();
}


static void wordref_load (int argc, char *argv[])
{
  int c, i, result;

  while ((c = getopt (argc, argv, "dgilnu")) != -1)
    switch (c)
      {
      case 'd':
        avoid_dupes = 1;
        break;
      case 'l': case 'u': case 'i': case 'n': case 'g':
        set_word_case (c);
        break;
      default:
        usage ();
      }
  init_word_case ();
  if (optind >= argc)
    usage ();
  for (i = optind; i < argc; ++i)
    {
      result = wordref_load_file (argv[i]);
      if (result != 0)
        exit (result);
    }
}


/* Show the DBF0 record (key) pointed to by K0.  Include the word if
   WORD is non-NULL. */

static void show_record (struct key0 *pk0, const char *word)
{
  DBT key, data;
  struct key1 k1;

  /* Get the file name for the file number.  If the file number is in
     the cache, just use the cached file name. */

  if (cur_fname[0] == 0 || pk0->fileno != cur_fileno)
    {
      /* The file number is not cached.  We have to search DBF1. */

      k1.fileno = pk0->fileno;
      key.data = &k1; key.size = sizeof (k1);
      data.data = NULL; data.size = 0;
      if (db_get (&DBF1, &key, &data, 0) != 0)
        {
          fprintf (stderr, "File number %u not found.\n", pk0->fileno);
          exit (2);
        }

      /* Update the cache.  Note that the file name in DBF1 is not
         null-terminated. */

      memcpy (cur_fname, data.data, data.size);
      cur_fname[data.size] = 0;
      cur_fileno = pk0->fileno;
    }

  /* Print the values. */

  if (word != NULL && pk0->lineno != 0)
    printf ("%s:%u:%s\n", cur_fname, pk0->lineno, word);
  else if (word != NULL)
    printf ("%s:%s\n", cur_fname, word);
  else if (pk0->lineno != 0)
    printf ("%s:%u\n", cur_fname, pk0->lineno);
  else
    printf ("%s\n", cur_fname);
}


/* Implement the `find' command. */

static void wordref_find (int argc, char *argv[])
{
  DBT key, data;
  struct key0 k0, *pk0;
  char *word, *cvt_word;
  int rc, c;
  size_t word_len;

  while ((c = getopt (argc, argv, "gilnu")) != -1)
    switch (c)
      {
      case 'l': case 'u': case 'i': case 'n': case 'g':
        set_word_case (c);
        break;
      default:
        usage ();
      }
  init_word_case ();
  if (argc - optind != 1)
    usage ();

  word = argv[optind];
  word_len = strlen (word);
  cvt_word = word_conv (word, &word_len);

  /* Open the database files, don't create them if they don't
     exist (this will result in an error message). */

  wordref_open (0);

  /* Search DBF0 for the smallest key greater than or equal to the key
     (word, 0, 0).  This will find the first line of the first file
     containing the word. */

  pk0 = xmalloc (sizeof (struct key0) + word_len);
  memcpy (pk0->word, cvt_word, word_len);
  pk0->word[word_len] = 0;
  pk0->fileno = 0; pk0->lineno = 0;
  key.data = pk0; key.size = sizeof (struct key0) + word_len;
  data.data = NULL; data.size = 0;
  rc = db_seq (&DBF0, &key, &data, R_CURSOR);
  free (pk0);
  while (rc == 0)
    {
      /* We found a (or another) key.  However, the key might not
         match the word.  Stop looping if the word doesn't match. */

      pk0 = (struct key0 *)key.data;
      if (strlen (pk0->word) != word_len
          || memcmp (pk0->word, cvt_word, word_len) != 0)
        break;

      /* Show the record and fetch the next one.  Copy the structure
         to avoid alignment problems on certain architectures. */

      memcpy (&k0, key.data, sizeof (k0));
      show_record (&k0, NULL);
      rc = db_seq (&DBF0, &key, &data, R_NEXT);
    }

  /* Close the database files and quit on error. */

  rc = wordref_close ();
  if (rc != 0)
    exit (rc);
  if (cvt_word != word)
    free (cvt_word);
}


/* Implement the `list' command. */

static void wordref_list (void)
{
  DBT key, data;
  struct key0 k0;
  int rc;

  /* Open the database files, don't create them if they don't
     exist (this will result in an error message). */

  wordref_open (0);

  /* Sequentially scan DBF0. */

  key.data = NULL; key.size = 0;
  data.data = NULL; data.size = 0;
  rc = db_seq (&DBF0, &key, &data, R_FIRST);
  while (rc == 0)
    {
      /* Show the record and fetch the next one.  Copy the structure
         to avoid alignment problems on certain architectures. */

      memcpy (&k0, key.data, sizeof (k0));
      show_record (&k0, ((const struct key0 *)key.data)->word);
      rc = db_seq (&DBF0, &key, &data, R_NEXT);
    }

  /* Close the database files and quit on error. */

  rc = wordref_close ();
  if (rc != 0)
    exit (rc);
}


/* Implement the `clear' command. */

static void wordref_clear (void)
{
  int ok, i;

  /* Delete all the database files. */

  ok = 1;
  for (i = 0; i < 3; ++i)
    if (remove (dbfile[i].fname) != 0)
      {
        perror (dbfile[i].fname);
        ok = 0;
      }
  if (ok)
    printf ("Database deleted.\n");
}


/* Implement the `reorg' command. */

static void wordref_reorg (void)
{
  int rc, i, n;
  DBT key, data;
  char fname[MAXPATHLEN];

  /* Set up the descriptions for the temporary database files. */

  for (i = 0; i < 3; ++i)
    sprintf (dbtemp[i].fname, "%s.tm%d", basename, i);

  /* Check for existing temporary files. */

  n = 0;
  for (i = 0; i < 3; ++i)
    if (access (dbtemp[i].fname, 0) == 0)
      {
        printf ("%s: file exists.\n", dbtemp[i].fname);
        ++n;
      }
  if (n != 0)
    {
      printf ("Database not reorganized; please delete the file%s "
              "listed above.\n", n == 1 ? "" : "s");
      exit (2);
    }

  for (i = 0; i < 3; ++i)
    {
      cleanup = 1;
      wordref_open_one (&dbfile[i], 0);
      wordref_open_one (&dbtemp[i], 1);

      /* Sequentially read the source file.  With the BSD database
         library, copying a B-tree file sequentially will build an
         optimal tree. */

      key.data = NULL; key.size = 0;
      data.data = NULL; data.size = 0;

      rc = db_seq (&dbfile[i], &key, &data, R_FIRST);
      while (rc == 0)
        {
          /* Copy the record to the temporary file and fetch the next
             source record. */

          db_put (&dbtemp[i], &key, &data, 0);
          rc = db_seq (&dbfile[i], &key, &data, R_NEXT);
        }

      rc = wordref_close_one (&dbfile[i]);
      if (rc == 0)
        rc = wordref_close_one (&dbtemp[i]);
      else
        wordref_close_one (&dbtemp[i]);
      if (rc != 0)
        exit (rc);

      strcpy (fname, dbtemp[i].fname);
      dbtemp[i].fname[0] = 0;   /* Don't delete this file */
      rc = remove (dbfile[i].fname);
      if (rc == 0)
        rc = rename (fname, dbfile[i].fname);
      if (rc != 0)
        {
          perror (dbfile[i].fname);
          exit (2);
        }
    }
  printf ("Database reorganized.\n");
}


/* Install a signal-catching function, term_sig (), for signal
   SIGNO. */

static void catch_sig (int signo)
{
  struct sigaction sa;

  sa.sa_handler = term_sig;
  sa.sa_flags = 0;
  sigemptyset (&sa.sa_mask);
  if (sigaction (signo, &sa, NULL) != 0)
    {
      perror ("sigaction");
      exit (2);
    }
}


/* Initialize this program. */

static void init (void)
{
  int i;

  /* Set up the database file descriptions. */

  DBF0.compare = compare0; DBF0.dup = 1;
  DBF0.cache_size = 2048 * 1024;
  DBF1.compare = compare1; DBF1.dup = 0;
  DBF1.cache_size = 64 * 1024;
  DBF2.compare = compare2; DBF2.dup = 0;
  DBF2.cache_size = 64 * 1024;

  for (i = 0; i < 3; ++i)
    {
      dbfile[i].db = NULL;
      dbtemp[i] = dbfile[i];
      sprintf (dbfile[i].fname, "%s.db%d", basename, i);
      dbtemp[i].fname[0] = 0;
    }

  /* Set up signal processing. */

  sigemptyset (&block_set);
  sigaddset (&block_set, SIGINT);
#ifdef SIGBREAK
  sigaddset (&block_set, SIGBREAK);
#endif
  sigaddset (&block_set, SIGTERM);
  sigaddset (&block_set, SIGHUP);

  catch_sig (SIGINT);
#ifdef SIGBREAK
  catch_sig (SIGBREAK);
#endif
  catch_sig (SIGHUP);

  /* Set up atexit() processing. */

  atexit (wordref_atexit);

  /* Clear the cache. */

  cur_fname[0] = 0;

#ifdef __EMX__
  _nls_init ();
#endif
}


/* The program starts here. */

int main (int argc, char *argv[])
{
  /* Support wildcards: `wordref add *.doc'. */

#ifdef __EMX__
  _wildcard (&argc, &argv);
#endif

  /* Initialize. */

  init ();

  /* Call the function implementing the command given on the command
     line.  Tell them how to run this program if there's something
     rotten on the command line. */

  if (argc == 2 && strcmp (argv[1], "clear") == 0)
    wordref_clear ();
  else if (argc >= 3 && strcmp (argv[1], "add") == 0)
    wordref_add (argc - 1, argv + 1);
  else if (argc >= 3 && strcmp (argv[1], "load") == 0)
    wordref_load (argc - 1, argv + 1);
  else if (argc >= 3 && strcmp (argv[1], "find") == 0)
    wordref_find (argc - 1, argv + 1);
  else if (argc == 2 && strcmp (argv[1], "list") == 0)
    wordref_list ();
  else if (argc == 2 && strcmp (argv[1], "reorg") == 0)
    wordref_reorg ();
  else
    usage ();

  /* We come here after successful processing of a command. */

  cleanup = 0;
  return 0;
}
