/* -*- Mode: Text -*- */
#ifndef lint
static char Rcs_Id[] =
    "$Id: buildhash.c,v 1.44 1992/01/07 10:04:46 geoff Exp $";
#endif

#define MAIN

/*
 * buildhash.c - make a hash table for ispell
 *
 * Pace Willisson, 1983
 *
 * Copyright 1987, 1988, 1989, by Geoff Kuenning, Manhattan Beach, CA
 * Permission for non-profit use is hereby granted.
 * All other rights reserved.
 * See "version.h" for a more complete copyright notice.
 */

/*
 * $Log: buildhash.c,v $
 * Revision 1.44  1992/01/07  10:04:46  geoff
 * Fix an unintentional infinite loop in the overflow processing.
 *
 * Revision 1.43  91/07/15  19:26:37  geoff
 * Provide the "canonical" parameter to all strtoichar and strtosichar calls.
 * 
 * Revision 1.42  91/07/11  19:51:53  geoff
 * Move the include files so that param.h gets included before stat.h.
 * Remove the include of stdio.h, since that's now done by ispell.h.
 * 
 * Revision 1.41  91/07/05  20:31:49  geoff
 * Fix some more lint complaints.
 * 
 * Revision 1.40  91/07/05  19:51:35  geoff
 * Fix a bunch of lint complaints.
 * 
 * Revision 1.39  91/07/03  18:20:13  geoff
 * Don't include types.h, dir.h, or param.h, since config.h now does that.
 * 
 * Revision 1.38  91/06/23  22:01:00  geoff
 * On non-USG systems, include sys/dir.h for MAXNAMLEN
 * 
 * Revision 1.37  91/05/27  21:32:54  geoff
 * Add code to write the string character type tables to the hash file.
 * 
 * Revision 1.36  91/01/27  00:43:29  geoff
 * Replace the old hard-to-interpret statistics report with a new one which
 * directly gives the number of probes, number of words taking that number
 * of probes, cumulative total number of words, and cumulative average
 * number of probes.
 * 
 * Revision 1.35  90/12/31  00:58:51  geoff
 * Reformat to follow a consistent convention throughout ispell
 * 
 * Revision 1.34  89/12/27  03:16:50  geoff
 * Move all messages to msgs.h so they can be reconfigured
 * 
 * Revision 1.33  89/10/20  00:11:04  geoff
 * Add version.h
 * 
 * Revision 1.32  89/07/11  00:18:38  geoff
 * Add Amiga support from luis@rice.edu.
 * 
 * Revision 1.31  89/06/09  15:51:32  geoff
 * Add support for the internal "character" type, ichar_t.
 * 
 * Revision 1.30  89/04/28  01:06:34  geoff
 * Change Header to Id;  nobody cares about my pathnames.
 * 
 * Revision 1.29  89/04/03  00:59:34  geoff
 * Fix a bunch of lint complaints (mostly casting unused return values to
 * void).  Also add support for the selectable flag marker.
 * 
 * Revision 1.28  89/02/20  22:09:34  geoff
 * Move a misplaced endif so we don't get into infinite loops if
 * capitalization is turned off.
 * 
 * Revision 1.27  89/01/30  22:54:55  geoff
 * Fix a misplaced right curly brace (Ken Stevens)
 * 
 * Revision 1.26  88/12/26  02:22:41  geoff
 * Add a copyright notice.
 * 
 * Revision 1.25  88/04/30  22:11:56  geoff
 * Fix some lint complaints.
 * 
 * Revision 1.24  88/03/27  00:56:48  geoff
 * Call combinecaps even when compiled without CAPITALIZATION.
 * 
 * Revision 1.23  88/03/23  00:40:43  geoff
 * Add the -s (silent) option.  Fix the reporting of table overflows
 * to be non-interactive, and to give only one message with the count.
 * Add a missing ifdef on CAPITALIZATION.  Get rid of an unused variable.
 * 
 * Revision 1.22  88/03/12  02:42:52  geoff
 * Correctly check for hash table overflow (>=, not >).  Also make the test
 * slightly more efficient by avoiding a multiply and add every loop trip.
 * Make sure the bumping of freepointer doesn't over-read the hash table.
 * 
 * Revision 1.21  88/02/20  23:09:35  geoff
 * Add the "-c" option to replace icombine.  Major changes to use the new
 * capitalization format.
 * 
 * Revision 1.20  87/09/26  15:52:35  geoff
 * Add code to check the maximum affix extension in the language table
 * and print a warning if there's a problem.
 * 
 * Revision 1.19  87/09/09  00:15:20  geoff
 * Speed up by a factor of five by calling malloc much less, and for larger
 * chunks of memory.  (This is somewhat, though not terribly, wasteful of
 * memory).
 * 
 * Revision 1.18  87/08/28  21:19:50  geoff
 * Change the argument order so that the hash file is last again.
 * 
 * Revision 1.17  87/07/20  23:20:31  geoff
 * Add code to parse and write out the language tables.  Delete the old
 * English-specific flag code.
 * 
 * Revision 1.16  87/06/09  00:15:21  geoff
 * Add a missing #else clause for CAPITALIZE
 * 
 * Revision 1.15  87/06/07  16:10:02  geoff
 * Fix a syntax error in the non-CAPITALIZE mode
 * 
 * Revision 1.14  87/05/30  13:00:55  geoff
 * Stefan Taxhet's missing-argument bugfix
 * 
 * Revision 1.13  87/04/24  20:31:54  geoff
 * Recognize slash as a word terminator in makedent.
 * 
 * Revision 1.12  87/04/21  23:28:49  geoff
 * Pad to a struct dent boundary with null bytes, not with near-garbage.
 * 
 * Revision 1.11  87/04/19  22:50:44  geoff
 * Add extensive code to handle capitalization.
 * 
 * Revision 1.10  87/04/01  15:21:53  geoff
 * Integrate Joe Orost's V7/register changes into the main branch
 * 
 * Revision 1.9  87/03/30  00:33:43  geoff
 * Move the include of stat.h so it compiles on BSD systems (Ken Yap).
 * 
 * Revision 1.8  87/03/27  17:20:22  geoff
 * Accept (but don't require) new-format dictionaries without excess slashes.
 * 
 * Revision 1.7  87/03/26  00:40:38  geoff
 * Don't include types.h on BSD systems (Jim Knutson)
 * 
 * Revision 1.6  87/03/26  00:30:23  geoff
 * Integrate Rich Salz's changes/improvements
 * 
 * Revision 1.5  87/03/23  00:07:27  geoff
 * Don Kark's don't-save-the-whole-dictionary bug fix
 * 
 * Revision 1.4  87/02/26  00:41:52  geoff
 * Integrate McQueer's and Ries's enhancements/fixes into the main branch
 * 
 * Revision 1.3  87/01/20  14:17:04  geoff
 * Return an explicit exit status
 * 
 * Revision 1.2  87/01/17  13:11:22  geoff
 * Add RCS ID keywords
 * 
 */

#include "config.h"
#include "ispell.h"
#include "msgs.h"
#include "version.h"
#include <ctype.h>
#include <string.h>
#ifdef AMIGA
#include <stat.h>
#else /* AMIGA */
#include <sys/stat.h>
#endif /* AMIGA */

#define NSTAT	100		/* Size probe-statistics table */

struct stat	dstat;		/* Result of stat-ing dict file */
struct stat	cstat;		/* Result of stat-ing count file */

int		hashsize;	/* Number of entries to go in hash table */

extern char *	index ();
extern char *	calloc ();
extern char *	malloc ();
extern char *	realloc ();
extern char *	strcpy ();
extern void	exit ();

extern void	yyinit ();
extern void	upcase ();
#ifdef CAPITALIZATION
extern long	whatcap ();
#endif

struct hashheader hashheader;	/* Header of hash table being built */
struct dent *	hashtbl;	/* Entries to go in hash table */

char *		Dfile;		/* Name of dictionary file */
char *		Hfile;		/* Name of hash (output) file */
char *		Lfile;		/* Name of language file */

char		Cfile[MAXPATHLEN]; /* Name of count file */
char		Sfile[MAXPATHLEN]; /* Name of statistics file */
#ifdef OS2
char		CfileFAT[MAXPATHLEN]; /* Name of count file - FAT system*/
char		SfileFAT[MAXPATHLEN]; /* Name of statistics file -FAT */
unsigned char   *sptr;  /* pointer for string copies */
#endif  /* OS2 */


static int silent = 0;		/* NZ to suppress count reports */

main (argc, argv)
    int		argc;
    char *	argv[];
    {
    FILE *	countf;
    FILE *	statf;
    int		stats[NSTAT];
    int		i;
    int		combine = 0;

#ifdef AMIGA
    _Heapsize = 2*HEAPSIZE;
#endif /* AMIGA */

    while (argc > 1  &&  *argv[1] == '-')
	{
	argc--;
	argv++;
	switch (argv[0][1])
	    {
	    case 'c':
		if (argc > 1)
		    {
		    hashsize = atoi (argv[1]);
		    argc--;
		    argv++;
		    }
		combine = 1;
		break;
	    case 's':
		silent = 1;
		break;
	    }
	}
    if (combine)
	{
	if (argc > 1)
	    Lfile = argv[1];
	else
	    Lfile = DEFLANG;
	}
    else if (argc > 1)
	{
	++argv;
	Dfile = *argv;
	if (argc > 2)
	    {
	    ++argv;
	    Lfile = *argv;
	    if (argc > 3)
		{
		++argv;
		Hfile = *argv;
		}
	    else
		{
		Hfile = DEFHASH;
		}
	    }
	else
	    {
	    Lfile = DEFLANG;
	    Hfile = DEFHASH;
	    }
	}
    else
	{
	Dfile = DEFDICT;
	Lfile = DEFLANG;
	Hfile = DEFHASH;
	}

    if (yyopen (Lfile))			/* Open the language file */
	return 1;
    yyinit ();				/* Set up for the parse */
    if (yyparse ())			/* Parse the language tables */
	exit (1);

    if (!combine)
	{
	(void) sprintf (Cfile, "%s.cnt", Dfile);
	(void) sprintf (Sfile, "%s.stat", Dfile);

#ifdef OS2        /* Make FAT names */
	strcpy(CfileFAT, Dfile);
	strcpy(SfileFAT, Dfile);
        
        if ( (sptr = strrchr(CfileFAT, '.')) != NULL ) 
                strcpy(sptr+1, "cnt\0");
        else
                strcat(CfileFAT, ".cnt\0");


        if ( (sptr = strrchr(SfileFAT, '.')) != NULL) 
                strcpy(sptr+1, "st\0");
        else
                strcat(SfileFAT, ".st\0");
#endif  /* OS2 */

	if (stat (Dfile, &dstat) < 0)
	    {
	    (void) fprintf (stderr, BHASH_C_NO_DICT, Dfile);
		exit (1);
	    }
	if (stat (Cfile, &cstat) < 0 || dstat.st_mtime > cstat.st_mtime)
	    newcount ();

	if ((countf = fopen (Cfile, "r")) == NULL)
	    {
#ifdef OS2
  	    if ((countf = fopen (CfileFAT, "r")) == NULL)
               { 
               (void) fprintf (stderr, BHASH_C_NO_COUNT);
	       exit (1);
               }
#else
           (void) fprintf (stderr, BHASH_C_NO_COUNT);
           exit (1);
#endif  /* OS2 */
	    }
	hashsize = 0;
	(void) fscanf (countf, "%d", &hashsize);
	(void) fclose (countf);
	if (hashsize == 0)
	    {
	    (void) fprintf (stderr, BHASH_C_BAD_COUNT);
	    exit (1);
	    }
	}
    readdict (combine);

    if (combine)
	combineout ();
    else
	{
	int		    avg;
	int		    j;

	if ((statf = fopen (Sfile, "w")) == NULL)
	    {
#ifdef OS2
	    (void) fprintf (stderr, CANT_CREATE_TRYING_FAT, Sfile, SfileFAT);
  	    if ((statf = fopen (SfileFAT, "w")) == NULL)
               { 
               (void) fprintf (stderr, CANT_CREATE, SfileFAT);
	       exit (1);
               }
#else
            (void) fprintf (stderr, CANT_CREATE, Sfile);
            exit (1);
#endif  /* OS2 */
	    }

	for (i = 0; i < NSTAT; i++)
	    stats[i] = 0;
	for (i = 0; i < hashsize; i++)
	    {
	    struct dent *   dp;

	    dp = &hashtbl[i];
	    if ((dp->flagfield & USED) != 0)
		{
		for (j = 0;  dp != NULL;  j++, dp = dp->next)
		    {
		    if (j >= NSTAT)
			j = NSTAT - 1;
		    stats[j]++;
		    }
		}
	    }
	for (i = 0, j = 0, avg = 0;  i < NSTAT;  i++)
	    {
	    j += stats[i];
	    avg += stats[i] * (i + 1);
	    if (j == 0)
		(void) fprintf (statf, "%d:\t%d\t0\t0.0\n", i + 1, stats[i]);
	    else
		(void) fprintf (statf, "%d:\t%d\t%d\t%f\n", i + 1, stats[i], j,
		  (double) avg / j);
	    }
	(void) fclose (statf);

	filltable ();
	output ();
	}
    exit(0);
    /* NOTREACHED */
    }

combineout ()
    {
    register struct dent *  dp;
    register struct dent *  tdp;
    register int	    i;

    /* Put out the dictionary on stdout in text format */
    for (i = 0, dp = hashtbl;  i < hashsize;  i++, dp++)
	{
	if (dp->flagfield & USED)
	    {
	    for (tdp = dp;  tdp != NULL;  tdp = tdp->next)
		{
		toutent (stdout, tdp, 0);
#ifdef CAPITALIZATION
		while (tdp->flagfield & MOREVARIANTS)
		    tdp = tdp->next;
#endif
		}
	    }
	}
    }

output ()
    {
    register FILE *		outfile;
    register struct dent *	dp;
    int				strptr;
    int				n;
    int				i;
    int				maxplen;
    int				maxslen;
    struct flagent *		fentry;

    if ((outfile = fopen (Hfile, "wb")) == NULL)
	{
	(void) fprintf (stderr, CANT_CREATE, Hfile);
	return;
	}
    hashheader.stringsize = 0;
    hashheader.lstringsize = 0;
    hashheader.tblsize = hashsize;
    (void) fwrite ((char *) &hashheader, sizeof hashheader, 1, outfile);
    strptr = 0;
    /*
    ** Put out the strings from the flags table.  This code assumes that
    ** the size of the hash header is a multiple of the size of ichar_t,
    ** and that any integer can be converted to an (ichar_t *) and back
    ** without damage.
    */
    maxslen = 0;
    for (i = numsflags, fentry = sflaglist;  --i >= 0;  fentry++)
	{
	if (fentry->stripl)
	    {
	    (void) fwrite ((char *) fentry->strip, fentry->stripl + 1,
	      sizeof (ichar_t), outfile);
	    fentry->strip = (ichar_t *) strptr;
	    strptr += (fentry->stripl + 1) * sizeof (ichar_t);
	    }
	if (fentry->affl)
	    {
	    (void) fwrite ((char *) fentry->affix, fentry->affl + 1,
	      sizeof (ichar_t), outfile);
	    fentry->affix = (ichar_t *) strptr;
	    strptr += (fentry->affl + 1) * sizeof (ichar_t);
	    }
	n = fentry->affl - fentry->stripl;
	if (n < 0)
	    n = -n;
	if (n > maxslen)
	    maxslen = n;
	}
    maxplen = 0;
    for (i = numpflags, fentry = pflaglist;  --i >= 0;  fentry++)
	{
	if (fentry->stripl)
	    {
	    (void) fwrite ((char *) fentry->strip, fentry->stripl + 1,
	      sizeof (ichar_t), outfile);
	    fentry->strip = (ichar_t *) strptr;
	    strptr += (fentry->stripl + 1) * sizeof (ichar_t);
	    }
	if (fentry->affl)
	    {
	    (void) fwrite ((char *) fentry->affix, fentry->affl + 1,
	      sizeof (ichar_t), outfile);
	    fentry->affix = (ichar_t *) strptr;
	    strptr += (fentry->affl + 1) * sizeof (ichar_t);
	    }
	n = fentry->affl - fentry->stripl;
	if (n < 0)
	    n = -n;
	if (n > maxplen)
	    maxplen = n;
	}
    /*
    ** Write out the string character type tables.
    */
    hashheader.strtypestart = strptr;
    for (i = 0;  i < hashheader.nstrchartype;  i++)
	{
	n = strlen (chartypes[i].name) + 1;
	(void) fwrite (chartypes[i].name, n, 1, outfile);
	strptr += n;
	for (n = 0;
	  chartypes[i].suffixes[n] != '\0';
	  n += strlen (&chartypes[i].suffixes[n]) + 1)
	    ;
	n++;
	(void) fwrite (chartypes[i].suffixes, n, 1, outfile);
	strptr += n;
	}
    hashheader.lstringsize = strptr;
    /* We allow one extra byte because missingletter() may add one byte */
    maxslen += maxplen + 1;
    if (maxslen > MAXAFFIXLEN)
	{
	(void) fprintf (stderr,
	  BHASH_C_BAFF_1 (MAXAFFIXLEN, maxslen - MAXAFFIXLEN));
	(void) fprintf (stderr, BHASH_C_BAFF_2);
	}
    /* Put out the dictionary strings */
    for (i = 0, dp = hashtbl;  i < hashsize;  i++, dp++)
	{
	if (dp->word == NULL)
	    dp->word = (char *) -1;
	else
	    {
	    n = strlen (dp->word) + 1;
	    (void) fwrite (dp->word, n, 1, outfile);
	    dp->word = (char *) strptr;
	    strptr += n;
	    }
	}
    /* Pad file to a struct dent boundary for efficiency. */
    n = (strptr + sizeof hashheader) % sizeof (struct dent);
    if (n != 0)
	{
	n = sizeof (struct dent) - n;
	strptr += n;
	while (--n >= 0)
	    (void) putc ('\0', outfile);
	}
    /* Put out the hash table itself */
    for (i = 0, dp = hashtbl;  i < hashsize;  i++, dp++)
	{
	if (dp->next != 0)
	    {
	    int		x;
	    x = dp->next - hashtbl;
	    dp->next = (struct dent *)x;
	    }
	else
	    {
	    dp->next = (struct dent *)-1;
	    }
	}
    (void) fwrite ((char *) hashtbl, sizeof (struct dent), hashsize, outfile);
    /* Put out the language tables */
    (void) fwrite ((char *) sflaglist,
      sizeof (struct flagent), numsflags, outfile);
    hashheader.stblsize = numsflags;
    (void) fwrite ((char *) pflaglist,
      sizeof (struct flagent), numpflags, outfile);
    hashheader.ptblsize = numpflags;
    /* Finish filling in the hash header. */
    hashheader.stringsize = strptr;
    rewind (outfile);
    (void) fwrite ((char *) &hashheader, sizeof hashheader, 1, outfile);
    (void) fclose (outfile);
    }

filltable ()
    {
    struct dent *freepointer, *nextword, *dp;
    struct dent *hashend;
    int i;
    int overflows;
    
    hashend = hashtbl + hashsize;
    for (freepointer = hashtbl;
      (freepointer->flagfield & USED)  &&  freepointer < hashend;
      freepointer++)
	;
    overflows = 0;
    for (nextword = hashtbl, i = hashsize; i != 0; nextword++, i--)
	{
	if ((nextword->flagfield & USED) == 0)
	    continue;
	if (nextword->next >= hashtbl  &&  nextword->next < hashend)
	    continue;
	dp = nextword;
	while (dp->next)
	    {
	    if (freepointer >= hashend)
		{
		overflows++;
		break;
		}
	    else
		{
		*freepointer = *(dp->next);
		dp->next = freepointer;
		dp = freepointer;

		while ((freepointer->flagfield & USED)
		  &&  freepointer < hashend)
		    freepointer++;
		}
	    }
	}
    if (overflows)
	(void) fprintf (stderr, BHASH_C_OVERFLOW, overflows);
    }

#if MALLOC_INCREMENT == 0
char * mymalloc (size)
    int		size;
    {

    return malloc (size);
    }

/* ARGSUSED */
char * myrealloc (ptr, size, oldsize)
    char *	ptr;
    int		size;
    int		oldsize;
    {

    return realloc (ptr, size);
    }

void myfree (ptr)
    char *	ptr;
    {

    free (ptr);
    }

#else

char * mymalloc (size)		/* Fast, unfree-able variant of malloc */
    int			size;
    {
    char *		retval;
    static int		bytesleft = 0;
    static char *	nextspace;

    if (size < 4)
	size = 4;
    size = (size + 7) & ~7;	/* Assume doubleword boundaries are enough */
    if (bytesleft < size)
	{
	bytesleft = (size < MALLOC_INCREMENT) ? MALLOC_INCREMENT : size;
	nextspace = malloc ((unsigned) bytesleft);
	if (nextspace == NULL)
	    {
	    bytesleft = 0;
	    return NULL;
	    }
	}
    retval = nextspace;
    nextspace += size;
    bytesleft -= size;
    return retval;
    }

char * myrealloc (ptr, size, oldsize)
    char *		ptr;
    int			size;
    int			oldsize;
    {
    char *nptr;

    nptr = mymalloc (size);
    if (nptr == NULL)
	return NULL;
    (void) bcopy (ptr, nptr, oldsize);
    return nptr;
    }

/* ARGSUSED */
void myfree (ptr)
    char *		ptr;
    {
    }
#endif

readdict (combine)
    int			combine;
    {
    struct dent		d;
    register struct dent * dp;
    struct dent *	lastdp;
    char		lbuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS];
    char		ucbuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS];
    FILE *		dictf;
    int			i;
    int			h;

    if (combine)
	dictf = stdin;
    else
	{
	if ((dictf = fopen (Dfile, "r")) == NULL)
	    {
	    (void) fprintf (stderr, BHASH_C_CANT_OPEN_DICT);
	    exit (1);
	    }
	}

    hashtbl =
      (struct dent *) calloc ((unsigned) hashsize, sizeof (struct dent));
    if (hashtbl == NULL)
	{
	(void) fprintf (stderr, BHASH_C_NO_SPACE);
	exit (1);
	}

    i = 0;
    while (fgets (lbuf, sizeof lbuf, dictf) != NULL)
	{
	if (!silent  &&  (i % 1000) == 0)
	    {
	    (void) fprintf (stderr, "%d ", i);
	    (void) fflush (stdout);
	    }
	i++;

	if (makedent (lbuf, &d) < 0)
	    continue;

	h = hash (strtosichar (d.word, 1), hashsize);

	dp = &hashtbl[h];
	if ((dp->flagfield & USED) == 0)
	    {
	    *dp = d;
#ifdef CAPITALIZATION
	    /*
	    ** If it's a followcase word, we need to make this a
	    ** special dummy entry, and add a second with the
	    ** correct capitalization.
	    */
	    if (captype (d.flagfield) == FOLLOWCASE)
		{
		if (addvheader (dp))
		  exit (1);
		}
#endif
	    }
	else
	    {

	    /*
	    ** Collision.  Skip to the end of the collision
	    ** chain, or to a pre-existing entry for this
	    ** word.  Note that d.word always exists at
	    ** this point.
	    */
	    (void) strcpy (ucbuf, d.word);
	    chupcase (ucbuf);
	    while (dp != NULL)
		{
		if (strcmp (dp->word, ucbuf) == 0)
		    break;
#ifdef CAPITALIZATION
		while (dp->flagfield & MOREVARIANTS)
		    dp = dp->next;
#endif /* CAPITALIZATION */
		dp = dp->next;
		}
	    if (dp != NULL)
		{
		/*
		** A different capitalization is already in
		** the dictionary.  Combine capitalizations.
		*/
		if (combinecaps (dp, &d) < 0)
		    exit (1);
		}
	    else
		{
		/* Insert a new word into the dictionary */
		for (dp = &hashtbl[h];  dp->next != NULL;  )
		    dp = dp->next;
		lastdp = dp;
		dp = (struct dent *) mymalloc (sizeof (struct dent));
		if (dp == NULL)
		    {
		    (void) fprintf (stderr, BHASH_C_COLLISION_SPACE);
		    exit (1);
		    }
		*dp = d;
		lastdp->next = dp;
		dp->next = NULL;
#ifdef CAPITALIZATION
		/*
		** If it's a followcase word, we need to make this a
		** special dummy entry, and add a second with the
		** correct capitalization.
		*/
		if (captype (d.flagfield) == FOLLOWCASE)
		    {
		    if (addvheader (dp))
		      exit (1);
		    }
#endif
		}
	    }
	}
    if (!silent)
	(void) fprintf (stderr, "\n");
    if (!combine)
	(void) fclose (dictf);
    }

newcount ()
    {
    char		buf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS];
#ifdef CAPITALIZATION
    ichar_t		ibuf[INPUTWORDLEN + MAXAFFIXLEN + 2 * MASKBITS];
#endif
    register FILE *	d;
    register int	i;
#ifdef CAPITALIZATION
    ichar_t		lastibuf[sizeof ibuf / sizeof (ichar_t)];
    int			headercounted;
    int			followcase;
    register char *	cp;
#endif

    if (!silent)
	(void) fprintf (stderr, BHASH_C_COUNTING);

    if ((d = fopen (Dfile, "r")) == NULL)
	{
	(void) fprintf (stderr, BHASH_C_CANT_OPEN_DICT);
	exit (1);
	}

#ifdef CAPITALIZATION
    headercounted = 0;
    lastibuf[0] = 0;
#endif
    for (i = 0;  fgets (buf, sizeof buf, d);  )
	{
	if ((++i % 1000) == 0  &&  !silent)
	    {
	    (void) fprintf (stderr, "%d ", i);
	    (void) fflush (stdout);
	    }
#ifdef CAPITALIZATION
	cp = index (buf, hashheader.flagmarker);
	if (cp != NULL)
	    *cp = '\0';
	strtoichar (ibuf, buf, 1);
	followcase = (whatcap (ibuf) == FOLLOWCASE);
	upcase (ibuf);
	if (icharcmp (ibuf, lastibuf) != 0)
	    headercounted = 0;
	else if (!headercounted)
	    {
	    /* First duplicate will take two entries */
	    if ((++i % 1000) == 0  &&  !silent)
		{
		(void) fprintf (stderr, "%d ", i);
		(void) fflush (stdout);
		}
	    headercounted = 1;
	    }
	if (!headercounted  &&  followcase)
	    {
	    /* It's followcase and the first entry -- count again */
	    if ((++i % 1000) == 0  &&  !silent)
		{
		(void) fprintf (stderr, "%d ", i);
		(void) fflush (stdout);
		}
	    headercounted = 1;
	    }
	(void) icharcpy (lastibuf, ibuf);
#endif
	}
    (void) fclose (d);
    if (!silent)
	(void) fprintf (stderr, BHASH_C_WORD_COUNT, i);
    if ((d = fopen (Cfile, "w")) == NULL)
	{
#ifdef OS2
	(void) fprintf (stderr, CANT_CREATE_TRYING_FAT, Cfile, CfileFAT);
        if ((d = fopen (CfileFAT, "w")) == NULL)
           {
           (void) fprintf (stderr, CANT_CREATE, CfileFAT);
           exit (1);
           }
#else
        (void) fprintf (stderr, CANT_CREATE, CfileFAT);
        exit (1);
#endif  /* OS2 */
	}
    (void) fprintf (d, "%d\n", i);
    (void) fclose (d);
    }
