/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include <stdio.h>
#include "_pstring.h" /* <string.h> */
#include <assert.h>
#include "_pstat.h" /* <sys/stat.h> */

#include "zcontext.h"
#include "zalloc.h"
#include "zchars.h"
#include "zcharset.h"
#include "zerror.h"
#include "zfile.h"
#include "zstdlib.h"
#include "zstring.h"

#include "cfg.h"
#include "defs.h"
#include "error.h"
#include "html.h"
#include "indexer.h"
#include "structur.h"
#include "words.h"

#include "configur.h"
#include "indexjob.h"
#include "indexing.h"

/***************************************************************************/
/*                                                                         */
/*  Tagging                                                                */
/*                                                                         */
/***************************************************************************/

#define TAGNAME_SIZE       128

struct sitdef_t
{
  const char *name;
  int length;
  int situation;
};

static struct sitdef_t sitTable[] =
{
  { "%",      1,  1 },
  { "?",      1,  1 },
  { "!--",    3,  2 },
  { "style",  5,  3 },
  { "script", 6,  3 },
  { NULL,     0,  0 }
};

#define sitTableSize (sizeof( sitTable ) / sizeof( struct sitdef_t ) - 1)

static Boolean getTagEnd( FILE *stream, unsigned char c)
{
  Boolean startValue = False, isValue = False, quoted = False;

  do
  {
    if( startValue )
    {
      if( c == '\"' ) quoted = False;
      startValue = False;
    }
    else if( quoted )
    {
      if( c == '\"' ){ quoted = False; isValue = False; }
    }
    else if( c == '>' )
      return True;
    else if( isValue )
    {
      if( _isSpace( c ) ) isValue = False;
    }
    else if( c == '=' )
      startValue = isValue = True;
  }while( fread( &c, 1, 1, stream) == 1 );

  return False;
}

static int getTagName( FILE *stream, char *tag)
{
  struct sitdef_t *sd;
  unsigned char c, lasts[2];
  int tagLength = 0;

  for( ;; )
  {
    if( fread( &c, 1, 1, stream) != 1 ) return -1;
    if( tagLength == 0 && (c == '%' || c == '?') )
    {
      tag[tagLength++] = c;
      break;
    }
    if( (tagLength == 0 && (_isAlpha( c ) || c == '/' || c == '!')) ||
        (tagLength > 0 && (_isAlnum( c ) || (c == '-' && tag[0] == '!' &&
         (tagLength == 1 || (tagLength == 2 && tag[1] == '-'))))) )
    {
      if( tagLength < TAGNAME_SIZE-1 ) tag[tagLength++] = c;
    }
    else
      break;
    if( tagLength == 3 && tag[0] == '!' && tag[1] == '-' && tag[2] == '-' ) break;
  }

  if( tagLength == 0 )
  {
    tag[0] = '<';
    tag[1] = c;
    return 1;
  }

  tag[tagLength] = '\0';
  if( *tag == '/' )
    sd = &sitTable[sitTableSize];
  else
    for( sd = sitTable; sd->name != NULL; sd++)
      if( sd->length == tagLength && strcasecmp( sd->name, tag) == 0 ) break;
  if( c == '>' && sd->situation == 0 ) return tagLength;

  switch( sd->situation )
  {
    case 0:
      if( !getTagEnd( stream, c) ) return -1;
      return tagLength;

    case 1:
      for( lasts[0] = '\0'; ; )
      {
        if( fread( &c, 1, 1, stream) != 1 ) return -1;
	if( c == '>' && lasts[0] == *tag ) return 0;
	lasts[0] = c;
      }
      /* Never reached */

    case 2:
      for( lasts[0] = lasts[1] = '\0'; ; )
      {
        if( fread( &c, 1, 1, stream) != 1 ) return -1;
        if( tagLength < TAGNAME_SIZE-1 ) tag[tagLength++] = c;
        if( c == '>' && lasts[0] == '-' && lasts[1] == '-' )
        {
          tag[tagLength] = '\0';
          return 0;
        }
	lasts[0] = lasts[1];
	lasts[1] = c;
      }
      /* Never reached */

    case 3:
      if( !getTagEnd( stream, c) )
        return -1;
      else
      {
        char buf[TAGNAME_SIZE];
        int i;

        for( ;; )
        {
          if( fread( &c, 1, 1, stream) != 1 ) return -1;
          if( c != '<' ) continue;
	  if( (i = getTagName( stream, buf)) < 0 ) return -1;
	  if( i == tagLength+1 && *buf == '/' && strcasecmp( tag, buf+1) == 0 )
            return tagLength;
        }
      }
      /* Never reached */

    default:
#ifdef CHECK
      assert( False );
#endif
      return 0;
  }
}

/***************************************************************************/
/*                                                                         */
/*  Indexing the text                                                      */
/*                                                                         */
/***************************************************************************/

static Boolean rejectWord( const char *word )
{
  int cur;

  if( E_RejectWords.count <= 0 ) return False;

  for( cur = 0; cur < E_RejectWords.count; cur++)
    if( zStringMatch( word, E_RejectWords.list[cur], smfUseSequenceTable) )
      return True;

  return False;
}

#if defined( MAX_DIGIT_SEQUENCE_SIZE ) || \
    defined( MAX_CONSONANT_SEQUENCE_SIZE ) || \
    defined( MAX_VOWEL_SEQUENCE_SIZE )
#define MAX_SEQUENCE_SIZES
#else
#undef MAX_SEQUENCE_SIZES
#endif

#if defined( IGNORE_ALL_DIGITS_WORD ) || \
    defined( IGNORE_ALL_CONSONANTS_WORD ) || \
    defined( IGNORE_ALL_VOWELS_WORD )
#define IGNORE_ALL_SAME_CHARS_WORD
#else
#undef IGNORE_ALL_SAME_CHARS_WORD
#endif

static Boolean testWord( struct zcontext_t *cnt, const char *word)
{
  int i, length;
#ifdef MAX_EQUAL_SEQUENCE_SIZE
  int thesame, lastChar;
#endif
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS )
  int russianCount, englishCount;
#endif
#ifdef IGNORE_ALL_DIGITS_WORD
  int digitCount;
#endif
#ifdef IGNORE_ALL_CONSONANTS_WORD
  int consonantCount;
#endif
#ifdef IGNORE_ALL_VOWELS_WORD
  int vowelCount;
#endif
#ifdef MAX_DIGIT_SEQUENCE_SIZE
  int digitSequenceCount;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
  int consonantSequenceCount;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
  int vowelSequenceCount;
#endif

  if( word[0] == '\0' ) return False;
  if( checkValidWord( cnt, word) ) return True;
  if( checkStopWord( cnt, word) ) return False;

  length = strlen( word );
  if( length > E_MaxWordLength || length < E_MinWordLength ) return False;

  if( zStringCollectionFind( &E_BadWords, word) != NULL || rejectWord( word ) )
    return False;

#ifdef BEGIN_WORD_CHAR_BITS
  if( !testChar( word[0], BEGIN_WORD_CHAR_BITS) ) return False;
#endif
#ifdef END_WORD_CHAR_BITS
  if( !testChar( word[length-1], END_WORD_CHAR_BITS) ) return False;
#endif

#ifdef MAX_EQUAL_SEQUENCE_SIZE
  lastChar = '\0';
  thesame = 0;
#endif
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS )
  russianCount = englishCount = 0;
#endif
#ifdef IGNORE_ALL_DIGITS_WORD
  digitCount = 0;
#endif
#ifdef IGNORE_ALL_CONSONANTS_WORD
  consonantCount = 0;
#endif
#ifdef IGNORE_ALL_VOWELS_WORD
  vowelCount = 0;
#endif

#ifdef MAX_DIGIT_SEQUENCE_SIZE
  digitSequenceCount = 0;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
  consonantSequenceCount = 0;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
  vowelSequenceCount = 0;
#endif

#if defined( MAX_SEQUENCE_SIZES ) || defined( IGNORE_ALL_SAME_CHARS_WORD ) || \
    defined( MAX_EQUAL_SEQUENCE_SIZE ) || \
    (defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS ))
  for( i = 0; i < length; i++)
  {
    register int c = ((unsigned char *) word)[i];

#ifdef MAX_EQUAL_SEQUENCE_SIZE
    if( c == lastChar )
      ++thesame;
    else
      thesame = 1;
#endif

#if defined( MAX_DIGIT_SEQUENCE_SIZE ) || defined( IGNORE_ALL_DIGITS_WORD )
    if( _isDigit( c ) )
    {
#ifdef IGNORE_ALL_DIGITS_WORD
      digitCount++;
#endif
#ifdef MAX_DIGIT_SEQUENCE_SIZE
      if( ++digitSequenceCount > MAX_DIGIT_SEQUENCE_SIZE ) return False;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
      consonantSequenceCount = 0;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
      vowelSequenceCount = 0;
#endif
    }
    else
#endif
    {
#if defined( MAX_VOWEL_SEQUENCE_SIZE ) || defined( IGNORE_ALL_VOWELS_WORD )
      if( _isVowel( c ) )
      {
#ifdef IGNORE_ALL_VOWELS_WORD
        vowelCount++;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
        if( ++vowelSequenceCount > MAX_VOWEL_SEQUENCE_SIZE ) return False;
#endif
#ifdef MAX_DIGIT_SEQUENCE_SIZE
        digitSequenceCount = 0;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
        consonantSequenceCount = 0;
#endif
      }
      else
#endif
#if defined( MAX_CONSONANT_SEQUENCE_SIZE ) || defined( IGNORE_ALL_CONSONANTS_WORD )
      if( _isConsonant( c ) )
      {
#ifdef IGNORE_ALL_CONSONANTS_WORD
        consonantCount++;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
        if( ++consonantSequenceCount > MAX_CONSONANT_SEQUENCE_SIZE ) return False;
#endif
#ifdef MAX_DIGIT_SEQUENCE_SIZE
        digitSequenceCount = 0;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
        vowelSequenceCount = 0;
#endif
      }
      else
#endif
      {
#ifdef MAX_DIGIT_SEQUENCE_SIZE
        digitSequenceCount = 0;
#endif
#ifdef MAX_CONSONANT_SEQUENCE_SIZE
        consonantSequenceCount = 0;
#endif
#ifdef MAX_VOWEL_SEQUENCE_SIZE
        vowelSequenceCount = 0;
#endif
      }

#if defined( MAX_EQUAL_SEQUENCE_SIZE ) || \
    (defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS ))
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
      if( (c & 0x80) != 0 ) /* XXX: if( _isRussian( c ) ) ??? */
      {
#ifdef SINGLE_LANGUAGE_WORDS
        russianCount++;
#endif
#ifdef MAX_EQUAL_SEQUENCE_SIZE
        if( thesame > MAX_EQUAL_SEQUENCE_SIZE ) return False;
#endif
      }
      else
#endif
      if( _isAlpha( c ) )
      {
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS )
        englishCount++;
#endif
#ifdef MAX_EQUAL_SEQUENCE_SIZE
        if( thesame > MAX_EQUAL_SEQUENCE_SIZE ) return False;
#endif
      }
#endif
    }

#ifdef MAX_EQUAL_SEQUENCE_SIZE
    lastChar = c;
#endif
  }
#endif

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE ) && defined( SINGLE_LANGUAGE_WORDS )
  if( englishCount > 0 && russianCount > 0 ) return False;
#endif

#ifdef IGNORE_ALL_DIGITS_WORD
  if( digitCount == length ) return False;
#endif
#ifdef IGNORE_ALL_CONSONANTS_WORD
  if( consonantCount == length ) return False;
#endif
#ifdef IGNORE_ALL_VOWELS_WORD
  if( vowelCount == length ) return False;
#endif

  return True;
}

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
static void convertToRussian( char *word, const char *torTable)
{
  char *s;

/* ॢਬ,   ᫮     ᪠ 㪢 */
  for( s = word; *s != '\0'; s++) if( isRussian( *s ) ) break;
  if( *s == '\0' ) return;

/* ந  */
  for( s = word; *s != '\0'; s++)
  {
    unsigned char c = *(unsigned char *) s;
    if( _isAlpha( c ) && torTable[c] != '\0' ) *s = torTable[c];
  }
}
#endif

static int getStructureAddition( struct structuredef_t *structure )
{
  int addition;

  if( structure->titled > 0 ) return 5;

  addition = 0;
  if( structure->headered > 0 ) addition += 3;
  if( structure->anchored > 0 ) addition++;

  return addition;
}

static Boolean indexString( struct flu_indexer_t *fi, char *string, _st_t st,
    struct structuredef_t *structure, const char *torTable)
{
  char *word = NULL, *wordend = NULL;
  struct structuredef_t wordStructure;
  _st_t chic;

  for( ;; )
  {
    if( structure != NULL && testChar( *string, CHAR_SPECIAL) )
    {
      applyStructure( *string, structure);
      if( word != NULL ) minimizeStructure( &wordStructure, structure);
    }
    else if( *string == '\0' || isSpace( *string ) ||
               !testChar( *string, WORD_CHAR_BITS) )
    {
      if( word != NULL )
      {
	int c = *wordend;
	*wordend = '\0';
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
        if( torTable != NULL ) convertToRussian( word, torTable);
#endif
        chic = getCharacteristicValue( word, False);
	if( testWord( fi->context, word) )
        {
          if( structure != NULL && structure->titled > 0 ) st = AREA_FLAG_TITLE;
          if( !fluIndexerAddWord( fi, word, structure == NULL ? 0 :
                 getStructureAddition( &wordStructure ), st | chic) ) return False;
        }
	word = NULL;
	*wordend = c;
      }
    }
    else if( word == NULL )
    {
      word = string;
      wordend = string+1;
      if( structure != NULL ) copyStructure( &wordStructure, structure);
    }
    else
      *wordend ++ = *string;

    if( *string == '\0' ) break;
    string++;
  }

  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Current indexed file                                                   */
/*                                                                         */
/***************************************************************************/

struct curfile_t
{
  struct zcontext_t *context;
  struct flu_indexer_t *fi;
  char title[MAX_TITLE_LENGTH*6+1];
  char content[MAX_CONTENT_LENGTH*6+1];
  Boolean allContent;
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  const char *torTable;
#endif
};

#define INIT_CUR_FILE(cf,cnt,fi,tort) \
    (cf)->context = cnt;              \
    (cf)->fi = fi;                    \
    (cf)->title[0] = 0;               \
    (cf)->content[0] = 0;             \
    (cf)->torTable = tort;            \
    (cf)->allContent = True


#define PUSH_CHAR(buf,length,size,c)                \
    if( (length) < (size)-1 )                       \
      if( (c) != ' ' && (((c) & 0x80) == 0 || _isRussian( (c) )) ) \
        (buf)[(length)++] = (c);                    \
      else if( (length) > 0 && (buf)[(length)-1] != ' ' ) \
        (buf)[(length)++] = ' '

#define PUSH_TITLE_CHAR(c)                          \
    PUSH_CHAR( cf->title, titleLength, sizeof( cf->title ), c)

#define PUSH_CONTENT_CHAR(c)                        \
    PUSH_CHAR( cf->content, contentLength, sizeof( cf->content ), c)

#define PUSH_WORD_CHAR(c)                           \
    PUSH_CHAR( word, wordLength, sizeof( word ), c)

#define END_CHAR(buf,length)                        \
    (buf)[(length)] = '\0'


#define INSERT_SPACE(buf,length,size)               \
    if( (length) > 0 && (length) < (size)-1 && (buf)[(length)-1] != ' ' ) \
      (buf)[(length)++] = ' '

#define INSERT_TITLE_SPACE()                        \
    INSERT_SPACE( cf->title, titleLength, sizeof( cf->title ))

#define INSERT_CONTENT_SPACE()                      \
    INSERT_SPACE( cf->content, contentLength, sizeof( cf->content ))

#define INSERT_WORD_SPACE()                         \
    INSERT_SPACE( word, wordLength, sizeof( word ))


static Boolean indexFileName( struct curfile_t *cf, const char *fileName, const char *table)
{
  char name[1024], *s;

  if( (s = strrchr( fileName, SLASH)) != NULL )
    strncpy( name, s+1, sizeof( name ));
  else
    strncpy( name, fileName, sizeof( name ));
  name[sizeof(name)-1] = '\0';

  if( (s = strrchr( name, '.')) != NULL && s != name ) *s = '\0';

  {
    register unsigned int c;
    register char *ptr;

    if( table != NULL )
      for( ptr = name; (c = *(unsigned char *) ptr) != '\0'; ptr++)
      {
        if( c >= 128 ) c = table[c-128] & 0xff;
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
        c = ztFixupTable[c];
#endif
        *ptr = c;
      }
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    else
      for( ptr = name; (c = *(unsigned char *) ptr) != '\0'; ptr++)
      {
        *ptr = ztFixupTable[c];
      }
#endif
  }

  return indexString( cf->fi, name, AREA_FLAG_FILENAME, NULL, cf->torTable);
}

static Boolean indexTextFile( struct curfile_t *cf, const char *fileName, const char *table)
{
  FILE *stream;
  char line[8*1024];
  int contentLength = 0;
  char fileBuffer[16*1024];

  if( (stream = fopen( fileName, READ_T_MODE)) == NULL )
  {
    cf->context->printError( cf->context, zefWarning | zerFileOpen, fileName);
    return True; /* XXX: !!! */
  }
  zSetFileBuffer( stream, fileBuffer, sizeof(fileBuffer));

  while( fgets( line, sizeof( line ), stream) != NULL )
  {
    register unsigned int c;
    register char *ptr;

    if( table != NULL )
      for( ptr = line; (c = *(unsigned char *) ptr) != '\0'; ptr++)
      {
        if( c < ' ' ) c = ' '; else if( c >= 128 ) c = table[c-128] & 0xff;
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
        c = ztFixupTable[c];
#endif
        *ptr = c;
        PUSH_CONTENT_CHAR( c );
      }
    else
      for( ptr = line; (c = *(unsigned char *) ptr) != '\0'; ptr++)
      {
        if( c < ' ' ) c = ' ';
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	c = ztFixupTable[c];
#endif
        *ptr = c;
        PUSH_CONTENT_CHAR( c );
      }

    if( !indexString( cf->fi, line, AREA_FLAG_CONTENT, NULL, cf->torTable) ) return False;
  }

  fclose( stream );

  END_CHAR( cf->content, contentLength);
  return True;
}

static Boolean indexHtmlFile( struct curfile_t *cf, const char *fileName, const char *table)
{
  Boolean endOfFile, isTitle, wasTitle;
  FILE *stream;
  char fileBuffer[16*1024];
  char word[8*1024], tag[TAGNAME_SIZE];
  struct structuredef_t structure;
  int wordLength, contentLength, titleLength;
  int lastCount, i, noIndex;
  int c, lastChar = ' ', tagChar;
  char s[1];

  if( (stream = fopen( fileName, READ_T_MODE)) == NULL )
  {
    cf->context->printError( cf->context, zefWarning | zerFileOpen, fileName);
    return True; /* XXX: !!! */
  }

  zSetFileBuffer( stream, fileBuffer, sizeof( fileBuffer ));

  wordLength = contentLength = titleLength = 0;
  endOfFile = isTitle = wasTitle = False;
  initStructure( &structure );
  lastCount = 0;
  noIndex = 0;

  do
  {
    if( lastCount > 0 ) lastCount--;
    if( lastCount > 0 )
      c = (lastCount == 2) ? '>' : lastChar;
    else
      if( fread( s, 1, 1, stream) != 1 )
      {
        endOfFile = True;
        c = ' ';
      }
      else
      {
        c = *(unsigned char *) s;
        if( (c & 0x80) != 0 && table != NULL )
          c = table[ (c & 0x7f) ] & 0xff;
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	c = ztFixupTable[c];
#endif
        if( c < ' ' ) c = ' ';
      }

    if( c == '<' && lastCount != 2 )
    {
      if( (i = getTagName( stream, tag)) < 0 ) break;
      if( i == 0 )
      {
        /* ࠡ⠥ noindex */
        if( tag[0] == '!' && tag[1] == '-' && tag[2] == '-' )
        {
          if( tag[3] == 'n' || tag[3] == 'N' )
          {
	    if( strcasecmp( &tag[4], "oindex-->") == 0 ) noIndex++;
            if( noIndex == 1 )
            {
              if( isTitle ){ INSERT_TITLE_SPACE(); }else{ INSERT_CONTENT_SPACE(); }
              if( wordLength > 0 )
              {
                word[wordLength] = '\0';
                convertEntities( word );
		if( !indexString( cf->fi, word, AREA_FLAG_CONTENT, &structure, cf->torTable) ) return False;
                wordLength = 0;
              }
            }
	  }
	  else if( tag[3] == '/' && (tag[4] == 'n' || tag[4] == 'N') )
	  {
	    if( strcasecmp( &tag[5], "oindex-->") == 0 ) noIndex--;
            /* XXX: maybe if( noIndex > 0 ) noIndex--; */
          }
        }
        continue;
      }
      if( *tag == '<' )
      {
        lastCount = 3;
        lastChar = ((unsigned char *) tag)[1];
        continue;
      }

      if( !isStyleTag( tag ) )
      {
        INSERT_CONTENT_SPACE();
        INSERT_WORD_SPACE();
      }

      tagChar = getTagChar( tag );
      if( isTitle )
      {
        isTitle = False;
        wasTitle = True;
        if( tagChar != SPECIAL_TITLE_END_CHAR )
          PUSH_WORD_CHAR( SPECIAL_TITLE_END_CHAR );
      }
      else if( tagChar == SPECIAL_TITLE_END_CHAR )
        tagChar = '\0';
      if( tagChar == SPECIAL_TITLE_START_CHAR )
        if( wasTitle ) tagChar = '\0'; else isTitle = True;
      if( tagChar != '\0' ) PUSH_WORD_CHAR( tagChar );
    }
    else if( noIndex <= 0 )
    {
      if( isTitle ){ PUSH_TITLE_CHAR( c ); }else{ PUSH_CONTENT_CHAR( c ); }

      if( _isSpace( c ) && wordLength > 0 )
      {
        word[wordLength] = '\0';
        convertEntities( word );
	if( !indexString( cf->fi, word, AREA_FLAG_CONTENT, &structure, cf->torTable) ) return False;
        wordLength = 0;
      }
      else
        PUSH_WORD_CHAR( c );
    }
  }while( !endOfFile );

  fclose( stream );

  END_CHAR( cf->title, titleLength);
  END_CHAR( cf->content, contentLength);
  convertEntities( cf->title );
  convertEntities( cf->content );
  zCompressString( cf->title, False);
  zCompressString( cf->content, False);

  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Indexing the file                                                      */
/*                                                                         */
/***************************************************************************/

static Boolean getFileAttribs( struct zcontext_t *cnt,
    const char *fileName, zoff_t *psize, ztime_t *plastModified)
{
  struct stat statbuf;

  if( stat( fileName, &statbuf) )
  {
    cnt->printError( cnt, zefWarning | zerFileStat, fileName);
    *psize = -1;
    *plastModified = 0;
    return False;
  }
  else
  {
    *psize = statbuf.st_size;
    *plastModified = zTime( statbuf.st_mtime, 0);
    return True;
  }
}

int indexFile( struct flu_indexer_t *fi, struct indexjob_t *ij,
    const char *fileName, const char *url, int contentType,
    const char *table, const char *torTable,
    unsigned zint_t *pwordCount)
{
  struct zcontext_t *cnt = ij->context;
  Boolean success = True;
  zoff_t size;
  ztime_t lastModified;
  struct curfile_t curFile;

  if( !getFileAttribs( cnt, fileName, &size, &lastModified) ) return 0;

  /* 㥬 䠩 */
  if( !fluIndexerDocumentStart( fi, url, size, lastModified, 0) ) return -1;
  if( !fluIndexerDocumentAlive( fi ) ) return 1;
  INIT_CUR_FILE( &curFile, cnt, fi, torTable);

  switch( contentType )
  {
    case CTYPE_HTML:
      if( !indexHtmlFile( &curFile, fileName, table) ) success = False;
#ifdef INDEX_CONTENT_FILE_NAMES
      if( success && !indexFileName( &curFile, fileName, table) ) success = False;
#endif
      break;

    case CTYPE_TEXT:
      if( !indexTextFile( &curFile, fileName, table) ) success = False;
#ifdef INDEX_CONTENT_FILE_NAMES
      if( success && !indexFileName( &curFile, fileName, table) ) success = False;
#endif
      break;

    default:
      if( !indexFileName( &curFile, fileName, table) ) success = False;
      break;
  }

  if( success )
  {
    curFile.title[MAX_TITLE_LENGTH] = '\0';
    curFile.allContent = (Boolean) (strlen( curFile.content ) <= E_MaxContentLength);
    curFile.content[E_MaxContentLength] = '\0';
    success = fluIndexerSetContent( fi, curFile.title, curFile.content, curFile.allContent);
  }

  if( !fluIndexerDocumentEnd( fi, success, pwordCount) ) success = False;

  return success ? 0 : -1;
}
