/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include "_pstdio.h" /* <strdio.h> */
#include "_pstring.h" /* <string.h> */
#include "_ptime.h" /* <time.h> */
#include "_pdir.h" /* <dirent.h> */
#include "_pstat.h" /* <stat.h> */

#include "zcontext.h"
#include "zchars.h"
#include "zcharset.h"
#include "zerror.h"
#include "zfile.h"
#include "zstdio.h"
#include "zstdlib.h"
#include "zstring.h"
#include "ztime.h"

#include "cfg.h"
#include "error.h"
#include "html.h"
#include "indxfile.h"
#include "indexer.h"
#include "words.h"

#include "configur.h"
#include "indexjob.h"
#include "indexing.h"
#include "index.h"

/***************************************************************************/
/*                                                                         */
/*  Selection rules                                                        */
/*                                                                         */
/***************************************************************************/

static Boolean testSelectionRule( const struct zstrcoll_t *selectionList,
    const char *name, unsigned int flags)
{
  int i;

  for( i = 0; i < selectionList->count; i++)
    if( zStringMatch( name, &selectionList->list[i][1], flags) )
      return (Boolean) (selectionList->list[i][0] != '\0');

  return False;
}

static Boolean testFile( struct indexjob_t *ij, const char *name)
{
  if( ij != NULL )
  {
    if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
    {
      if( testSelectionRule( &ij->ignoreFileNames, name,
          ij->context->patternFlags) ) return False;
    }
    else if( ij->defaults != NULL &&
        zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
      if( testSelectionRule( &ij->defaults->ignoreFileNames, name,
          ij->context->patternFlags) ) return False;
  }

  return True;
}

static Boolean testDir( struct indexjob_t *ij, const char *dir)
{
  if( ij != NULL )
  {
    if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
    {
      if( testSelectionRule( &ij->ignoreDirNames, dir,
        ij->context->patternFlags) ) return False;
    }
    else if( ij->defaults != NULL &&
        zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
      if( testSelectionRule( &ij->defaults->ignoreDirNames, dir,
        ij->context->patternFlags) ) return False;
  }

  return True;
}

static Boolean testUrl( struct indexjob_t *ij, const char *name)
{
  if( ij != NULL )
  {
    if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
    {
      if( testSelectionRule( &ij->ignoreFileURLs, name,
          ij->context->patternFlags) ) return False;
    }
    else if( ij->defaults != NULL &&
        zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
      if( testSelectionRule( &ij->defaults->ignoreFileURLs, name,
          ij->context->patternFlags) ) return False;
  }

  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Content type                                                           */
/*                                                                         */
/***************************************************************************/

static int getContentType( struct indexjob_t *ij, const char *name)
{
  int i;

  if( ij != NULL )
  {
    if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_CTYPES) )
    {
      for( i = 0; i < ij->ctypes.count; i++)
	if( zStringMatch( name, &ij->ctypes.list[i][1], ij->context->patternFlags) )
          return ij->ctypes.list[i][0];
    }
    else if( ij->defaults != NULL &&
        zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_CTYPES) )
      for( i = 0; i < ij->defaults->ctypes.count; i++)
	if( zStringMatch( name, &ij->defaults->ctypes.list[i][1], ij->context->patternFlags) )
          return ij->defaults->ctypes.list[i][0];
  }

  return DEFAULT_CONTENT_TYPE;
}

/***************************************************************************/
/*                                                                         */
/*  Charset                                                                */
/*                                                                         */
/***************************************************************************/

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
#define SOURCE_ENCODING_NAME "CharsetSourceEnc"

static Boolean parseAccessFile( struct indexjob_t *ij,
    const char *accessFile, const char *dir)
{
  int charset;
  char line[1024], c, buf[2*MAX_FILENAME_LENGTH+1];
  FILE *stream;

  zsprintf( buf, sizeof( buf ), "%.1000s" SLASH_STRING "%.1000s", dir, accessFile);
  if( (stream = fopen( buf, READ_T_MODE)) == NULL )
  {
    ij->context->printError( ij->context, zerFileOpen, buf);
    return False;
  }

  while( fgets( line, sizeof( line ), stream) != NULL )
  {
    char *s, *p;

    for( s = line; isSpace( *s ); s++) continue;
    if( strncasecmp( s, SOURCE_ENCODING_NAME, sizeof(SOURCE_ENCODING_NAME)-1) != 0 ) continue;
    s += sizeof(SOURCE_ENCODING_NAME)-1;
    if( !isSpace( *s ) ) continue;
    while( isSpace( *s ) ) s++;
    if( *s == '\0' ) continue;

    for( p = s; *p != '\0' ; p++) if( isSpace( *p ) ) break;
    *p = '\0';
    if( (charset = zCharsetType( s )) == ZCHARSET_UNKNOWN )
    {
      ij->context->printError( ij->context, zerUnknownCharset, s);
      break;
    }

    c = (char) charset;
    zsprintf( buf, sizeof( buf ), "%.1000s" SLASH_STRING, dir);
    zParamCollectionAdd( &ij->accessCharsets, buf, &c, 1);
    break;
  }

  fclose( stream );
  return True;
}

typedef enum
{
  htsText,
  htsTagBegin,
  htsTag,
  htsTagEnd,
  htsComment,
} htstatus_t;

typedef enum
{
  htaUnknown,
  htaContent,
  htaHttpEquiv
} htattr_t;

static Boolean getConvertTableByMetaTag( struct indexjob_t *ij,
    const char *name, const char **ptable)
{
  FILE *stream;
  char buf[CHARSET_META_TAG_BYTES+1];
  int n;

  if( (stream = fopen( name, READ_B_MODE)) == NULL )
  {
    ij->context->printError( ij->context, zerFileOpen, name);
    return False;
  }

  if( (n = fread( buf, 1, sizeof(buf)-1, stream)) < 0 )
  {
    ij->context->printError( ij->context, zerFileRead, name);
    fclose( stream );
    return False;
  }

  buf[sizeof(buf)-1] = '\0';
  fclose( stream );

  {
    register char *b = buf, *p;
    register int i;
    char c;
    int charset;
    htstatus_t status;
    htattr_t attr;
    Boolean isMeta, isContentType, quoted;

    for( status = htsText, i = 0; i < n; )
      switch( status )
      {
        case htsText:
          while( i < n && b[i] != '<' ) i++;
          status = htsTagBegin;
          break;

        case htsTagBegin:
          i++;
          if( (n-i) >= 3 && b[i] == '!' && b[i+1] == '-' && b[i+2] == '-' )
            status = htsComment;
          else
          {
            while( i < n && isSpace( b[i] ) ) i++;
            if( (n-i) >= 5 &&
		toLower( b[i] ) == 'm' && toLower( b[i+1] ) == 'e' &&
		toLower( b[i+2] ) == 't' && toLower( b[i+3] ) == 'a' &&
                isSpace( b[i+4] ) )
              isMeta = True;
            else
              isMeta = False;
            isContentType = False;
            charset = -1;

            while( i < n && b[i] != '>' && !isSpace( b[i] ) ) i++;
            if( i >= n ) break;
            status = (b[i] == '>' ) ? htsTagEnd : htsTag;
          }
          break;

        case htsTag:
          for( ; i < n; attr = htaUnknown)
          {
            while( i < n && isSpace( b[i] ) ) i++;
            if( i >= n ) break;
            p = &b[i];
            while( i < n && !isSpace( b[i] ) && b[i] != '>' && b[i] != '=' ) i++;
            if( i >= n ) break;
            if( b[i] == '>' ) break;
            if( b[i] == '=' )
            {
              i++;
              if( i >= n ) break;

              attr = htaUnknown;
              if( isMeta )
                if( strncasecmp( p, "HTTP-EQUIV", 10) == 0 && p[10] == '=' )
                  attr = htaHttpEquiv;
                else if( strncasecmp( p, "CONTENT", 7) == 0 && p[7] == '=' )
                  attr = htaContent;

              quoted = False;
              if( i < n && b[i] == '\"' )
              {
                quoted = True;
                i++;
              }

              p = &b[i];
              if( quoted )
                while( i < n && b[i] != '\"' ) i++;
              else
                while( i < n && !isSpace( b[i] ) && b[i] != '>' ) i++;
              if( i >= n ) break;

              switch( attr )
              {
                case htaHttpEquiv:
                  if( quoted ) while( isSpace( *p ) ) p++;
                  if( strncasecmp( p, "Content-Type", 12) == 0 &&
                      (isSpace( p[12] ) ||
                      ((quoted && p[12] == '\"') || (!quoted && p[12] == '>'))) )
                    isContentType = True;
                  break;
                case htaContent:
                  c = b[i];
                  b[i] = '\0';
                  if( (p = strchr( p, ';')) != NULL )
                  {
                    ++p;
                    while( isSpace( *p ) ) p++;
                    if( strncasecmp( p, "charset=", 8) == 0 ) charset = zCharsetType( &p[8] );
                  }
                  b[i] = c;
                  break;
              }
              if( b[i] == '>' || (isContentType && charset >= 0) ) break;
              if( quoted ) i++;
            }
          }
          status = htsTagEnd;
          break;

        case htsTagEnd:
          if( isContentType && charset >= 0 )
          {
            *ptable = any2anyTables[charset][ij->context->localCharset];
            return True;
          }
          i++;
          status = htsText;
          break;

        case htsComment:
          for( i += 3; ; i++)
          {
            while( i < n && b[i] != '-' ) i++;
            if( i >= n ) break;
            if( (n-i) >= 3 && b[i+1] == '-' && b[i+2] == '>' ) break;
          }
          i += 3;
          status = htsText;
          break;
      }
  }

  return False;
}

static Boolean getConvertTableByAccessFile( struct indexjob_t *ij,
    const char *name, const char **ptable)
{
  int i;

  for( i = 0; i < ij->accessCharsets.count; i++)
    if( strnsyscmp( &ij->accessCharsets.list[i][1], name,
          strlen( &ij->accessCharsets.list[i][1] )) == 0 )
    {
      *ptable = any2anyTables[ij->accessCharsets.list[i][0]][ij->context->localCharset];
      return True;
    }

  return False;
}

static Boolean getConvertTableByDirective( struct indexjob_t *ij,
    const char *name, const char **ptable)
{
  struct zparamcoll_t *charsets;
  int i;

  if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_CHARSETS) )
    charsets = &ij->charsets;
  else if( ij->defaults != NULL && zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_CHARSETS) )
    charsets = &ij->defaults->charsets;
  else
    charsets = NULL;

  if( charsets != NULL )
    for( i = 0; i < charsets->count; i++)
      if( zStringMatch( name, &charsets->list[i][1], ij->context->patternFlags) )
      {
        *ptable = any2anyTables[charsets->list[i][0]][ij->context->localCharset];
        return True;
      }

  return False;
}

static const char *getConvertTable( struct indexjob_t *ij, const char *name, int ctype)
{
  int i, *methods;
  const char *table;

  if( ij == NULL || ij->context->localCharset == ZCHARSET_UNKNOWN ) return NULL;

  if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_CHARSET_METHODS) )
    methods = ij->charsetMethods;
  else if( ij->defaults != NULL && zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_CHARSET_METHODS) )
    methods = ij->defaults->charsetMethods;
  else
    methods = NULL;

  if( methods != NULL )
    for( i = 0; i < CMETHOD_LAST && methods[i] != CMETHOD_NONE; i++)
      switch( methods[i] )
      {
        case CMETHOD_BY_ACCESS_FILE:
          if( getConvertTableByAccessFile( ij, name, &table) ) return table;
          break;
        case CMETHOD_BY_META_TAG:
          if( ctype == CTYPE_HTML && getConvertTableByMetaTag( ij, name, &table) ) return table;
          break;
        case CMETHOD_BY_DIRECTIVE:
          if( getConvertTableByDirective( ij, name, &table) ) return table;
          break;
      }
  else
  {
    if( getConvertTableByAccessFile( ij, name, &table) ) return table;
    if( getConvertTableByDirective( ij, name, &table) ) return table;
  }

  return NULL;
}

const char *getConvertToRussianTable( struct indexjob_t *ij )
{
  if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_CONVERT_TO_RUSSIAN) )
    return ij->torTable;
  else if( ij->defaults != NULL &&
      zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_CONVERT_TO_RUSSIAN) )
    return ij->defaults->torTable;
  return NULL;
}
#endif

/***************************************************************************/
/*                                                                         */
/*  Processing the directory list                                          */
/*                                                                         */
/***************************************************************************/

static Boolean swapping( struct zcontext_t *cnt, struct flu_indexer_t *fi, Boolean final)
{
  static int count = 0;
  struct ztimeval_t timer[2];

  if( !fluIndexerReadyForSwapping( fi, final) ) return True;

  if( cnt->verboseLevel >= 1 )
  {
    count++;
    zprintf( cnt, "%swapping (#%d) ... ", final ? "Final s" : "S", count);
    zInitTimeValue( &timer[0] );
  }

  if( !fluIndexerSwapping( fi, final) ) return False;

  if( cnt->verboseLevel >= 1 )
  {
    char buf[40];
    zInitTimeValue( &timer[1] );
    if( *zGetTimeValue( buf, sizeof( buf ), &timer[1], &timer[0]) != '\0' )
      zprintf( cnt, "- OK (%s)\n", buf);
    else
      zprintf( cnt, "- OK\n");
  }

  return True;
}

static Boolean processDir( struct indexjob_t *ij, struct flu_indexer_t *fi,
    const char *dir, Boolean doCheck)
{
  struct zcontext_t *cnt = ij->context;
  DIR *dd;
  struct dirent *dp;
  struct stat statBuf;
  struct zstrcoll_t fileList[1], dirList[1];
  char buf[2*MAX_FILENAME_LENGTH+1];
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  const char *accessFile;
  Boolean hasAccessFile = False;
#endif
  int i, j, rv;
  Boolean success = True;

  if( cnt->verboseLevel == 2 )
    zprintf( cnt, "Indexing the directory %s ...\n", dir);
  else if( cnt->verboseLevel > 2 )
    zprintf( cnt, "\nProcessing the directory %s:\n", dir);

  zStringCollectionInit( cnt, fileList, 0, 20, zcfSorted);
  zStringCollectionInit( cnt, dirList, 0, 10, zcfSorted);

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_ACCESS_FILE) )
    accessFile = ij->accessFile;
  else if( ij->defaults != NULL && zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_ACCESS_FILE) )
    accessFile = ij->defaults->accessFile;
  else
    accessFile = NULL;
#endif

  if( (dd = opendir( (char *) dir )) == NULL )
  {
    cnt->printError( cnt, zefWarning | zerDirOpen, dir);
    return True;
  }
  if( dir[0] == SLASH && dir[1] == '\0' ) dir = "";

  while( (dp = readdir( dd )) != NULL )
  {
    if( dp->d_name[0] == '.' && (dp->d_name[1] == '\0' ||
          (dp->d_name[1] == '.' && dp->d_name[2] == '\0')) ) continue;

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    if( accessFile != NULL && !hasAccessFile )
      if( strsyscmp( accessFile, dp->d_name) == 0 )
      {
        hasAccessFile = True;
        continue;
      }
#endif

    zsprintf( buf, sizeof( buf ), "%.1000s" SLASH_STRING "%.1000s", dir, dp->d_name);

#if defined( HAVE_LSTAT ) && defined( S_IFLNK )
    if( !E_FollowLinks && lstat( buf, &statBuf) == 0 &&
         (statBuf.st_mode & S_IFLNK) == S_IFLNK ) continue;
#endif

    if( stat( buf, &statBuf) != 0 )
    {
      cnt->printError( cnt, zefWarning | zerFileStat, buf);
      continue;
    }

    if( (statBuf.st_mode & S_IFMT) == S_IFDIR )
      (void) zStringCollectionAdd( dirList, dp->d_name, 0);
    else if( (statBuf.st_mode & S_IFMT) == S_IFREG )
      (void) zStringCollectionAdd( fileList, dp->d_name, 0);
    else
      cnt->printError( cnt, zefWarning | zerNotRegularFile, buf);
  }

  closedir( dd );

  if( doCheck && ij != NULL )
  {
    struct zstrcoll_t *ignoreDirWithFile = NULL;
    if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
      ignoreDirWithFile = &ij->ignoreDirWithFile;
    else if( ij->defaults != NULL &&
             zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
      ignoreDirWithFile = &ij->defaults->ignoreDirWithFile;
    if( ignoreDirWithFile != NULL )
      for( j = 0; j < ignoreDirWithFile->count; j++)
	for( i = 0; i < fileList->count; i++)
	  if( zStringMatch( fileList->list[i], &ignoreDirWithFile->list[j][1], cnt->patternFlags) )
            if( ignoreDirWithFile->list[j][0] == '\0' )
	      goto cok;
	    else
	    {
	      if( cnt->verboseLevel >= 2 )
		zprintf( cnt, "    Refusing: the directory contains forbidden file '%s'\n",
		  fileList->list[i]);
              zStringCollectionFree( fileList );
              zStringCollectionFree( dirList );
              return True;
            }
    cok: ;
  }

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  if( hasAccessFile ) (void) parseAccessFile( ij, accessFile, dir);
#endif

  for( i = 0; i < fileList->count; i++)
  {
    int contentType;
    unsigned zint_t wordCount;
    const char *table = NULL, *torTable = NULL, *url;

    if( !testFile( ij, fileList->list[i]) ) continue;
    if( (contentType = getContentType( ij, fileList->list[i])) == CTYPE_UNKNOWN )
      continue;

    if( !swapping( cnt, fi, False) )
    {
      success = False;
      break;
    }

    zsprintf( buf, sizeof( buf ), "%.1000s" SLASH_STRING "%.1000s", dir, fileList->list[i]);

    if( ij != NULL )
    {
      struct zstrcoll_t *ignoreFullPathFiles = NULL;
      if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
        ignoreFullPathFiles = &ij->ignoreFullPathFiles;
      else if( ij->defaults != NULL &&
	       zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_SELECTS) )
        ignoreFullPathFiles = &ij->defaults->ignoreFullPathFiles;
      if( ignoreFullPathFiles != NULL && testSelectionRule(
                     ignoreFullPathFiles, buf, cnt->patternFlags) ) continue;
    }

    {
      struct zreplace_t *rpl = NULL;
      if( ij != NULL )
        if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_REPLACES) )
          rpl = &ij->replaces;
        else if( ij->defaults != NULL &&
                 zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_REPLACES) )
          rpl = &ij->defaults->replaces;
      url = (rpl != NULL) ? zReplaceApply( rpl, buf, 0) : buf;

      if( ij != NULL && !testUrl( ij, url) ) continue;
    }

    if( cnt->verboseLevel > 2 )
      zprintf( cnt, "    %s", fileList->list[i]);
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    table = getConvertTable( ij, buf, contentType);
    torTable = getConvertToRussianTable( ij );
#endif
    if( (rv = indexFile( fi, ij, buf, url, contentType, table, torTable, &wordCount)) < 0 )
    {
      success = False;
      break;
    }

    if( cnt->verboseLevel > 2 )
      if( rv > 0 )
        zprintf( cnt, " (already indexed!)\n");
      else if( wordCount == 0 )
        zprintf( cnt, " (no words)\n");
      else
        zprintf( cnt, " (%" _ZINT_FORMAT "u word%s)\n",
          wordCount, (wordCount == 1) ? "" : "s");
  }
  zStringCollectionFree( fileList );

  if( success ) for( i = 0; i < dirList->count; i++)
  {
    zsprintf( buf, sizeof( buf ), "%.1000s" SLASH_STRING "%.1000s", dir, dirList->list[i]);

    if( !testDir( ij, buf) ) continue;

    if( !processDir( ij, fi, buf, True) )
    {
      success = False;
      break;
    }
  }
  zStringCollectionFree( dirList );

  return success;
}

static Boolean processIndexList( struct flu_indexer_t *fi,
    struct indexjob_t *ij, const struct zstrcoll_t *indexList)
{
  struct zcontext_t *cnt = ij->context;
  int i, length;
  char buf[MAX_FILENAME_LENGTH];
  struct stat statBuf;
  Boolean success = True;

  for( i = 0; i < indexList->count; i++)
  {
    strncpy( buf, indexList->list[i], sizeof( buf ));
    buf[ sizeof( buf ) - 1 ] = '\0';

    length = strlen( buf );
    while( length > 0 && buf[length-1] == SLASH ) buf[--length] = '\0';
    if( *buf == '\0' ){ buf[0] = SLASH; buf[1] = '\0'; }

    if( stat( buf, &statBuf) != 0 )
    {
      cnt->printError( cnt, zefWarning | zerFileStat, buf);
      continue;
    }

    if( (statBuf.st_mode & S_IFMT) == S_IFDIR )        /* Directory */
    {
      if( cnt->verboseLevel == 1 )
        zprintf( cnt, "Indexing directory \"%s\"\n", buf);
      if( !processDir( ij, fi, buf, False) ) return False;
    }
    else if( (statBuf.st_mode & S_IFMT) == S_IFREG )   /* Regular file */
    {
      char *name, *s;
      int contentType, rv;
      unsigned zint_t wordCount;
      const char *table = NULL, *torTable = NULL, *url;

      if( (s = strrchr( buf, SLASH)) == NULL )
	name = buf;
      else
      {
	*s = '\0';
	name = s + 1;
      }

      if( !testFile( ij, name) ) continue;
      if( !swapping( cnt, fi, False) ) return False;

      contentType = getContentType( ij, name);
      if( s != NULL ) *s = SLASH;

      if( cnt->verboseLevel >= 2 ) zprintf( cnt, "\n");
      if( cnt->verboseLevel >= 1 ) zprintf( cnt, "Indexing file \"%s\"", buf);

      {
        struct zreplace_t *rpl = NULL;
        if( ij != NULL )
          if( zCheckFlags( ij->flags, INDEXJOB_FLAG_HAVE_REPLACES) )
            rpl = &ij->replaces;
          else if( ij->defaults != NULL &&
                   zCheckFlags( ij->defaults->flags, INDEXJOB_FLAG_HAVE_REPLACES) )
            rpl = &ij->defaults->replaces;
	url = (rpl != NULL) ? zReplaceApply( rpl, buf, 0) : buf;
      }

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
      table = getConvertTable( ij, buf, contentType);
      torTable = getConvertToRussianTable( ij );
#endif
      if( (rv = indexFile( fi, ij, buf, url, contentType, table, torTable, &wordCount)) < 0 )
      {
        success = False;
        break;
      }

      if( cnt->verboseLevel >= 1 )
        if( rv > 0 )
          zprintf( cnt, " (already indexed!)\n");
        else if( wordCount == 0 )
          zprintf( cnt, " (no words)\n");
        else
          zprintf( cnt, " (%" _ZINT_FORMAT "u word%s)\n",
            wordCount, (wordCount == 1) ? "" : "s");
    }
    else
      cnt->printError( cnt, zefWarning | zerNotRegularFile, buf);
  }

  return success;
}

/***************************************************************************/
/*                                                                         */
/*  Main indexing subroutine                                               */
/*                                                                         */
/***************************************************************************/

static void iPrintError( struct flu_indexer_t *fi, unsigned zint_t errorCode, const char *name)
{
  struct zcontext_t *cnt = (struct zcontext_t *) fi->info;

  ZERROR_COPY( cnt, fi->context);

  cnt->printError( cnt, errorCode, name);
}

static void iMemoryFail( struct flu_indexer_t *fi, const char *prog)
{
  struct zcontext_t *cnt = (struct zcontext_t *) fi->info;

  cnt->printError( cnt, zerNoMemory, prog);
}

static Boolean iCheckWord( struct flu_indexer_t *fi, const char *word, unsigned int fileCount)
{
  if( fileCount >= E_IgnoreFilesLimit &&
      E_IgnorePercentLimit <= 100 && !isValidWord( fi->context, word) &&
      ((float) fileCount / fi->pif->header.fileCount) * 100.0 >= (float) E_IgnorePercentLimit )
  {
    addStopWord( fi->context, word, WORD_PRESENT);
    return False;
  }
  else
    return True;
}

static void iTracer( struct flu_indexer_t *fi, int step, unsigned zint_t param)
{
  struct zcontext_t *cnt = (struct zcontext_t *) fi->info;

  if( cnt->verboseLevel > 0 )
    switch( step )
    {
#if defined( FLUIDS43 )
      case fisStepStartDirlist:
        zprintf( cnt, "\nSorting document URLs... ");
        break;

      case fisStepEndDirlist:
        zprintf( cnt, "done.\n");
        break;
#endif

      case fisStepStartWordlist:
        zprintf( cnt, "Writing main index... ");
        break;

      case fisStepEndWordlist:
        zprintf( cnt, "%[%" _ZINT_FORMAT "u unique word%s%]%[no words%] indexed.\n",
          (param != 0), param, (param == 1) ? "" : "s", (param == 0));
        break;

      case fisStepStopWords:
        zprintf( cnt, "Stop words: %[%" _ZINT_FORMAT "u%]%[none%].\n",
          (param != 0), param, (param == 0));
        break;

      case fisStepStartDoclist:
        zprintf( cnt, "Writing file index... ");
        break;

      case fisStepEndDoclist:
        zprintf( cnt, "%[%" _ZINT_FORMAT "d file%s%]%[no files%] indexed.\n",
          (param != 0), param, (param == 1) ? "" : "s", (param == 0));
        break;
    }
}

Boolean performIndex( struct zcontext_t *cnt, const char *indexFileName,
    const struct zstrcoll_t *indexList, unsigned int flags)
{
  struct ztimeval_t timer[2];
  struct indexjob_t *ij;
  Boolean success = True;
  char buf[ZMAX_FILE_NAME_SIZE];
  const char *indexFile;
  struct flu_indexer_t indexer;

/* ந樠㥬 㥬  ६ */
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  zSetFlags( cnt->patternFlags, smfUseSequenceTable);
#endif
#ifdef HAVE_CASEINDEP_FILE_NAMES
  zSetFlags( cnt->patternFlags, smfCaseIndep | smfBraCaseIndep);
#endif
  markupTagChars();

/* 䨪㥬  砫  樨 */
  if( cnt->verboseLevel > 0 ) zInitTimeValue( &timer[0] );

/* । 䠩  ᮧ ᭮ 䠩 */
  if( zCheckFlags( flags, pifOverwrite) && !zCheckFlags( flags, pifReindexAll | pifUpdate) )
    indexFile = indexFileName;
  else if( (indexFile = zMakeTempName( cnt, buf, sizeof( buf ), indexFileName, True)) == NULL )
  {
    cnt->printError( cnt, cnt->errorCode, NULL);
    return False;
  }

/* ந樠㥬 , ஥  䠩  ⠭ 
   ࠬ */
  if( !fluIndexerInit( &indexer, iPrintError, iMemoryFail, E_TempDir, cnt) ||
      (E_MaxSwapFiles > 0 &&
       !fluIndexerSetSwapCount( &indexer, E_MaxSwapFiles, E_MaxMemoryVolum)) ||
      !fluIndexerAddStopWords( &indexer, (const char **) E_StopWords.list, E_StopWords.count) ||
      !fluIndexerAddValidWords( &indexer, (const char **) E_ValidWords.list, E_ValidWords.count) )
  {
    fluIndexerFree( &indexer );
    return False;
  }
  success = fluIndexerOpen( &indexer, indexFile, NULL, E_IndexName,
    E_IndexDescription, E_IndexPointer, E_IndexAdmin, fifInfoSafe);

/* ᫨  室 २஢ ,  ᭠砫 
    䠩 */
  if( success && zCheckFlags( flags, pifReindexAll | pifUpdate) )
    success = fluIndexerAddIndexFile( &indexer, indexFileName,
      zCheckFlags( flags, pifReindexAll) ? fifReindexAll : 0);

/* ந㥬  䠩 */
  if( success && indexList != NULL )
    if( zStringCollectionEmpty( indexList ) )
    {
      for( ij = indexJobs; ij != NULL; ij = ij->next)
        if( ij->defaults != NULL )
          if( !(success = processIndexList( &indexer, ij, &ij->objects)) ) break;
    }
    else
      success = processIndexList( &indexer, indexJobs, indexList);

/*  ᢮ */
  if( success ) success = swapping( cnt, &indexer, True);

/* ᨬ  樨    ஥  䠩 */
  if( success ) success = fluIndexerClose( &indexer, iCheckWord, iTracer, 0);
  fluIndexerFree( &indexer );

/* ६    䠩 */
  if( success )
    if( !zCheckFlags( flags, pifOverwrite) || zCheckFlags( flags, pifReindexAll | pifUpdate) )
    {
#if defined( __MSDOS__ ) || defined( __WIN32__ ) || defined( __OS2__ )
      unlink( indexFileName );
#endif
      if( rename( indexFile, indexFileName) != 0 )
      {
        cnt->errorPtr = indexFile;
        cnt->printError( cnt, zerTempFileRename, indexFileName);
        success = False;
      }
    }
#ifdef INDEX_PERMISIONS
  if( success ) chmod( indexFileName, INDEX_PERMISIONS);
#endif

/* ᫨ ந諠 訡  樨, 㭨⮦ ࠧ  䠩 */
  if( !success ) unlink( indexFile );

/* 騬  ६, 祭   樨 */
  if( success && cnt->verboseLevel > 0 )
  {
    char buf[256];
    zInitTimeValue( &timer[1] );
    if( *zGetTimeValue( buf, sizeof( buf ), &timer[1], &timer[0]) != '\0' )
      zprintf( cnt, "Running time: %s\n", buf);
    zprintf( cnt, "Done.\n");
  }

  return success;
}
