/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include "_pstdio.h" /* <stdio.h> */
#include "_pstring.h" /* <string.h> */
#include <stdlib.h>
#ifdef CHECK
#include <assert.h>
#endif

#include "zcontext.h"
#include "zalloc.h"
#include "zcharset.h"
#include "zstdlib.h"

#include "cfg.h"
#include "error.h"
#include "fludata.h"
#include "words.h"
#include "wtrack.h"
#include "indxfile.h"

static Boolean ifReadHeader( struct indexfile_t *pif, Boolean dontRecode)
{
  struct zcontext_t *cnt = pif->context;
  unsigned char indexHeaderBuffer[INDEX_HEADER_SIZE];
  unsigned char indexOffsetsBuffer[INDEX_OFFSET_COUNT*4];
  const unsigned char *indexHeader = indexHeaderBuffer;
  const unsigned char *indexOffsets = indexOffsetsBuffer;
  int i;

  if( (indexHeader = zFileObjectRead( pif->file, indexHeaderBuffer,
        INDEX_HEADER_SIZE, True)) == NULL ) return False;
  if( (indexOffsets = zFileObjectRead( pif->file, indexOffsetsBuffer,
        4 * INDEX_OFFSET_COUNT, True)) == NULL ) return False;

  pif->header.id[0] = indexHeader[0];
  pif->header.id[1] = indexHeader[1];
  pif->header.id[2] = indexHeader[2];
  pif->header.id[3] = indexHeader[3];
  pif->header.version = indexHeader[4];
  pif->header.subversion = indexHeader[5];
  pif->header.checksum = ifChecksum( (unsigned char *) indexHeader, indexOffsets);
  pif->header.charset = indexHeader[7];
  pif->header.wordCount = zGetLong( &indexHeader[8], INDEX_FILE_ENDIAN);
  pif->header.fileCount = zGetLong( &indexHeader[12], INDEX_FILE_ENDIAN);
  pif->header.fileSize = (zoff_t) zGetLong( &indexHeader[16], INDEX_FILE_ENDIAN);
  pif->header.timeStamp = (ztime_t) zGetLong( &indexHeader[20], INDEX_FILE_ENDIAN);

  if( pif->header.id[0] != 'F' || pif->header.id[1] != 'L' ||
      pif->header.id[2] != 'U' || pif->header.id[3] != '\0' )
  {
    pif->context->printError( pif->context, errIndexFileId, pif->alias);
    return False;
  }

  if( pif->header.checksum != '\0' )
  {
    pif->context->printError( pif->context, errIndexChecksum, pif->alias);
    return False;
  }

  if( pif->header.version != INDEX_VERSION ||
      pif->header.subversion != INDEX_SUBVERSION )
  {
    pif->context->printError( pif->context, errIndexFileVersion, pif->alias);
    return False;
  }

  for( i = 0; i < INDEX_OFFSET_COUNT; i++)
    pif->offsets[i] = (zoff_t) zGetLong( &indexOffsets[4*i], INDEX_FILE_ENDIAN);

#ifdef CHECK
  for( i = 0; i < 12; i += 2)
    if( pif->offsets[i] > pif->offsets[i+1] ||
        pif->offsets[i+1] > pif->header.fileSize ||
        pif->offsets[i] < MIN_OFFSET_VALUE )
    {
      pif->context->printError( pif->context, errIndexOffsets, pif->alias);
      return False;
    }

  for( i = 0; i < 10; i += 2)
  {
    int j;
    for( j = i + 2; j < 12; j += 2)
      if( pif->offsets[i+1] > pif->offsets[j] &&
          pif->offsets[j+1] > pif->offsets[i] )
      {
        pif->context->printError( pif->context, errIndexOffsets, pif->alias);
        return False;
      }
  }
#endif

  if( pif->header.fileCount > MAX_FILENUM )
  {
    pif->context->printError( pif->context, errIndexFileCount, pif->alias);
    return False;
  }

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
  if( (pif->charset = zToLocalCharset( pif->header.charset )) == ZCHARSET_UNKNOWN ||
      dontRecode || cnt->localCharset == ZCHARSET_UNKNOWN )
  {
    pif->recodeTable = NULL;
    pif->convTable = NULL;
  }
  else
  {
    pif->recodeTable = any2anyTables[pif->charset][cnt->localCharset];
    pif->convTable = any2anyTables[cnt->localCharset][pif->charset];
  }
#endif

  return True;
}

static Boolean ifReadWordTrack( struct indexfile_t *pif )
{
  struct wordtrack_t *wt = &pif->wordTrack;
  zint_t maybeCount = (pif->offsets[OFFSET_WORDTRACK_END] -
    pif->offsets[OFFSET_WORDTRACK_START] - 225 * 2) / 5;
#ifdef CHECK
  unsigned zshort_t i;
#endif

  if( !ifSeek( pif, pif->offsets[OFFSET_WORDTRACK_START]) ) return False;
  if( !wtRead( wt, pif->file) ) return False;

  if( (zint_t) wt->deltas[224] != maybeCount )
  {
    pif->context->printError( pif->context, errIndexWordTrackSize, pif->alias);
    return False;
  }

#ifdef CHECK
  for( i = 0; i < wt->deltas[224]; i++)
  {
    zoff_t offset = (zoff_t) zGetLong( &wt->offsets[4*i], INDEX_FILE_ENDIAN);
    if( offset < pif->offsets[OFFSET_WORDS_START] ||
        offset > pif->offsets[OFFSET_WORDS_END] )
    {
      pif->context->printError( pif->context, errIndexWordTrackOffsets, pif->alias);
      return False;
    }
  }
#endif

  return True;
}

static Boolean ifReadFileOffsets( struct indexfile_t *pif )
{
  unsigned char *ptr;
#ifdef CHECK
  unsigned int i;
#endif

  if( pif->header.fileCount == 0 ) return True;

  if( (pif->offsets[OFFSET_FILEOFFSETS_END] -
       pif->offsets[OFFSET_FILEOFFSETS_START]) / 4 != (zoff_t) pif->header.fileCount )
  {
    pif->context->printError( pif->context, errIndexFileOffsetsCount, pif->alias);
    return False;
  }

  if( !ifSeek( pif, pif->offsets[OFFSET_FILEOFFSETS_START]) ) return False;
  if( (ptr = zFileObjectRead( pif->file, pif->fileOffsets,
         4 * (size_t) pif->header.fileCount, True)) == NULL ) return False;
  pif->fileOffsets = ptr;

#ifdef CHECK
  for( i = 0; i < (unsigned int) pif->header.fileCount; i++)
  {
/* XXX:    !!! */
    zoff_t offset = (zoff_t) zGetLong( &pif->fileOffsets[4*i], INDEX_FILE_ENDIAN);
    if( offset > pif->offsets[OFFSET_FILES_END] ||
        offset < pif->offsets[OFFSET_FILES_START] )
    {
      pif->context->printError( pif->context, errIndexFileOffsets, pif->alias);
      return False;
    }
  }
#endif

  return True;
}

static Boolean ifReadFileStructures( struct indexfile_t *pif )
{
  unsigned char *ptr;

  if( pif->header.fileCount == 0 ) return True;

  if( (pif->offsets[OFFSET_FILESTRUCTURES_END] -
       pif->offsets[OFFSET_FILESTRUCTURES_START]) != 2 * (zoff_t) pif->header.fileCount )
  {
    pif->context->printError( pif->context, errIndexFileStructuresCount, pif->alias);
    return False;
  }

  if( !ifSeek( pif, pif->offsets[OFFSET_FILESTRUCTURES_START]) ) return False;
  if( (ptr = zFileObjectRead( pif->file, pif->fileStructures,
         2 * (int) pif->header.fileCount, True)) == NULL ) return False;
  pif->fileStructures = ptr;

  return True;
}

static Boolean ifReadStopWords( struct indexfile_t *pif )
{
  if( !ifSeek( pif, pif->offsets[OFFSET_STOPWORDS_START]) ) return False;

  if( !readWordCollection( FLU(pif->context)->stopWords, pif->file,
        (zint_t) (pif->offsets[OFFSET_STOPWORDS_END] - pif->offsets[OFFSET_STOPWORDS_START]),
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	pif->recodeTable
#else
	NULL
#endif
        ) )
  {
    pif->context->printError( pif->context, errIndexFileReadStopWords, pif->alias);
    return False;
  }

  return True;
}

static Boolean ifReadValidWords( struct indexfile_t *pif )
{
  if( !ifSeek( pif, pif->offsets[OFFSET_VALIDWORDS_START]) ) return False;

  if( !readWordCollection( FLU(pif->context)->validWords, pif->file,
	(zint_t) (pif->offsets[OFFSET_VALIDWORDS_END] - pif->offsets[OFFSET_VALIDWORDS_START]),
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	pif->recodeTable
#else
	NULL
#endif
        ) )
  {
    pif->context->printError( pif->context, errIndexFileReadValidWords, pif->alias);
    return False;
  }

  return True;
}

static Boolean ifReadIndexInfo( struct indexfile_t *pif )
{
  char info[MAX_STRING_LENGTH], *ptr;
  int infoLength, infoType;
  unsigned char c;

  if( pif->offsets[OFFSET_INFO_END] - pif->offsets[OFFSET_INFO_START] <= 4 )
    return True;

  if( !ifSeek( pif, pif->offsets[OFFSET_INFO_START]) ) return False;
  pif->infoAlloced = True;

  for( infoLength = 0, infoType = 0; ; )
  {
    if( zFileObjectRead( pif->file, &c, 1, False) == NULL ) return False;

    if( c == '\n' || c == '\0' )
    {
      if( infoLength > 0 )
      {
        info[infoLength] = '\0';
	ptr = zStrdup( pif->context, info);
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
        if( pif->recodeTable != 0 )
        {
          char *s, c;
          for( s = ptr; (c = *s) != '\0'; s++)
            if( (c & 0x80) != 0 ) *s = pif->recodeTable[ (c & 0x7f) ];
        }
#endif
        switch( infoType )
        {
          case 0:
            pif->indexName = ptr;
            break;
          case 1:
            pif->indexDescription = ptr;
            break;
          case 2:
            pif->indexPointer = ptr;
            break;
          case 3:
            pif->indexAdmin = ptr;
            break;
        }
        infoLength = 0;
      }
      if( c == '\0' ) break;
      if( ++infoType >= 4 ) break;
    }
    else
      if( infoLength < sizeof( info )-1 ) info[infoLength++] = c;
  }

#ifdef CHECK
  assert( zFileObjectTell( pif->file ) == pif->offsets[OFFSET_INFO_END] );
#endif

  return True;
}

Boolean ifReadOpen( struct zcontext_t *cnt, struct indexfile_t *pif,
    const char *fileName, const char *fileAlias, unsigned int flags)
{
  Boolean success = True;
  zoff_t fileSize;

#ifdef CHECK
  assert( pif != NULL );
  assert( fileName != NULL );
#endif

  ifInit( cnt, pif);
  pif->name = zStrdup( cnt, fileName);
  pif->alias = (fileAlias == NULL) ? pif->name : zStrdup( cnt, fileAlias);
  zFileObjectInit( cnt, &pif->fileObject, pif->name, pif->alias, 0, ifErrorCodes);

  success = zFileObjectOpen( &pif->fileObject, 0);
  if( success )
  {
    pif->file = &pif->fileObject;
    success = ifReadHeader( pif, (Boolean) zCheckFlags( flags, iffDontRecode));
  }

/* ᫨ 㥬 樥 䠩,   㡥,  ᫥ 
   ᫮  䠩 ᫥  㫥 ... */
  if( success && zCheckFlags( pif->file->flags, zfoMapped) )
  {
    unsigned char *ptr;
    if( !ifSeek( pif, pif->offsets[OFFSET_WORDS_END]) )
      success = False;
    else if( (ptr = zFileObjectRead( pif->file, NULL, 4, True)) == NULL )
      success = False;
    else if( ptr[0] != '\0' || ptr[1] != '\0' || ptr[2] != '\0' || ptr[3] != '\0' )
    {
      cnt->printError( cnt, errIndexFileFormat, pif->alias);
      success = False;
    }
  }

  if( success )
    if( (fileSize = ifGetSize( pif )) < 0 )
      success = False;
    else if( pif->header.fileSize != fileSize )
    {
      cnt->printError( cnt, errIndexFileSize, pif->alias);
      success = False;
    }
#ifdef CHECK
  if( success ) assert( pif->header.fileSize >= MIN_INDEX_SIZE );
#endif

  if( success && (flags & iffNoWordTrack) == 0 )
    success = ifReadWordTrack( pif );

  if( success ) ifPrepareFileMemory( pif, flags);
  if( success && !zCheckFlags( flags, iffNoFileOffsets) )
    success = ifReadFileOffsets( pif );
  if( success && !zCheckFlags( flags, iffNoFileStructures) )
    success = ifReadFileStructures( pif );

  if( success && !zCheckFlags( flags, iffNoStopWords) )
    success = ifReadStopWords( pif );

  if( success && !zCheckFlags( flags, iffNoValidWords) )
    success = ifReadValidWords( pif );

  if( success && !zCheckFlags( flags, iffNoIndexInfo) )
    success = ifReadIndexInfo( pif );

  if( success )
    zSetFlags( pif->flags, INDEXFILE_FLAG_IS_OPEN | INDEXFILE_FLAG_WAS_OPEN);
  else
    ifClose( pif, False);

  return success;
}

zoff_t ifGetFileOffset( struct indexfile_t *pif, _fn_t filenum)
{
  zoff_t offset;

#ifdef CHECK
  assert( pif != NULL );
  assert( pif->fileOffsets != NULL );
  assert( filenum > 0 );
#endif

  if( !(filenum > 0 && filenum <= (_fn_t) pif->header.fileCount) )
  {
    pif->context->errorIntParam = (int) filenum;
    pif->context->printError( pif->context, errInvalidFilenoValue, "ifGetFileOffset");
    return -1;
  }

  offset = (zoff_t) zGetLong( &pif->fileOffsets[4*(filenum-1)], INDEX_FILE_ENDIAN);

  if( offset <= 0 ||
      offset > pif->offsets[OFFSET_FILES_END] ||
      offset < pif->offsets[OFFSET_FILES_START] )
  {
    pif->context->printError( pif->context, errIndexFileOffsets, pif->alias);
    return -1;
  }

  return offset;
}

zoff_t ifFollowWordTrack( struct indexfile_t *pif, const char *word, Boolean isPattern)
{
  zoff_t startOffset, stopOffset;

  /* ।⥫쭮 ந樠㥬 stopOffset ᠬ 訬 祭 
      */
  stopOffset = pif->offsets[OFFSET_WORDS_END];

  /* ᫨  ४ ᫮   ᫥  word',  頥 0 */
  if( (startOffset = wtFollow( &pif->wordTrack, word, &stopOffset, isPattern)) == 0 ) return 0;

  /*  ஢ਬ startOffset  stopOffset, ⮡   
     pif->offsets[OFFSET_WORDS_START] ... pif->offsets[OFFSET_WORDS_END] */
  if( startOffset < 0 || stopOffset < 0 || startOffset >= stopOffset ||
      startOffset < pif->offsets[OFFSET_WORDS_START] ||
      startOffset >= pif->offsets[OFFSET_WORDS_END] ||
      stopOffset < pif->offsets[OFFSET_WORDS_START] ||
      stopOffset > pif->offsets[OFFSET_WORDS_END] )
  {
    pif->context->printError( pif->context, errIndexWordTrackOffsets, pif->alias);
    return -1;
  }

  /*  ᯮ樮㥬  startOffset.  쭥襬  ⥭ ᥩ
      ᭮ 䠩 㦭 㤥 ⮫쪮 ஢,  諨   
     । stopOffset',   頥  ⮩ 㭪樨. */
  if( !ifSeek( pif, startOffset) ) return -1;
  return stopOffset;
}
