/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include <stdio.h>
#include "_pstring.h" /* <string.h> */

#include "zcontext.h"
#include "zalloc.h"
#include "zchars.h"
#include "zcharset.h"
#include "zcoll.h"
#include "zstdio.h"
#include "zstdlib.h"

#include "cfg.h"
#include "defs.h"
#include "error.h"
#include "indxfile.h"
#include "indio.h"
#include "structur.h"

#include "dump.h"
#include "fileinfo.h"
#include "wordinfo.h"

/***************************************************************************/
/*                                                                         */
/*  Entry info                                                             */
/*                                                                         */
/***************************************************************************/

static Boolean printEntryInfo( struct indexfile_t *pif,
    struct wordentry_t *we, zoff_t offset)
{
  struct zcontext_t *cnt = pif->context;
  char buf[MAX_STRUCTURE_STRING_SIZE];
  struct flu_docentry_t docInfo;
  Boolean success;
  size_t len;
  unsigned int tmp;
  _st_t structure;
  _fn_t filenum, num;
  _rn_t rank;

  for( len = we->wordLength+1, filenum = 0; len < we->filled; )
  {
    IO_DECODE_STRUCTURE( structure, we->buffer, len);
    IO_DECODE_NUMBER( num, we->buffer, len, tmp);
    IO_DECODE_NUMBER( rank, we->buffer, len, tmp);
    filenum += num + 1;

    if( !ifReadFileInfo( pif, filenum, &docInfo, False) ) return False;
    zprintf( cnt, SHIFT_STRING "%10" _ZINT_FORMAT "u  %s  \"%s\"\n",
      rank, getStructureString( buf, structure), docInfo.url);
  }

  success = ifSeek( pif, offset);
  zprintf( cnt, "\n");
  return success;
}

static void printIndexWord( struct zcontext_t *cnt, const char *word,
    Boolean tooLong, zint_t count, zoff_t length)
{
  zprintf( cnt, "%s%c %" _ZINT_FORMAT "d/%" _ZOFF_FORMAT "d\n",
    word, tooLong ? '>' : ':', count, length);
}

static Boolean printDocumentWord( struct zcontext_t *cnt,
    struct wordentry_t *we, Boolean tooLong, _fn_t desiredNum)
{
  char buf[MAX_STRUCTURE_STRING_SIZE];
  size_t len;
  unsigned int tmp;
  _st_t structure;
  _fn_t filenum, num;
  _rn_t rank;

  for( len = we->wordLength+1, filenum = 0; len < we->filled; )
  {
    IO_DECODE_STRUCTURE( structure, we->buffer, len);
    IO_DECODE_NUMBER( num, we->buffer, len, tmp);
    IO_DECODE_NUMBER( rank, we->buffer, len, tmp);
    filenum += num + 1;

    if( filenum != desiredNum ) continue;
    zprintf( cnt, "%s%c", we->word, tooLong ? '>' : ':');
    zprintf( cnt, "%.*c  %10" _ZINT_FORMAT "u  %s\n",
      32 - we->wordLength, ' ', rank, getStructureString( buf, structure));
    break;
  }

  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Word index                                                             */
/*                                                                         */
/***************************************************************************/

Boolean dumpWordIndex( struct indexfile_t *pif, Boolean all)
{
  struct zcontext_t *cnt = pif->context;
  Boolean wordTooLong;
  zoff_t startOffset, endOffset;
  struct wordentry_t wordEntry;

  if( !ifSeek( pif, startOffset = pif->offsets[OFFSET_WORDS_START]) ) return False;
  ioInitWordEntry( cnt, &wordEntry);
  wordEntry.stopOffset = pif->offsets[OFFSET_WORDS_END];

  for( ;; )
  {
    if( startOffset >= wordEntry.stopOffset ) break;
    if( !ioReadWordEntry( &wordEntry, pif->file, pif->file->stream, pif->alias) ) return False;

    wordTooLong = (Boolean) (wordEntry.wordLength > MAX_WORD_SIZE);
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    if( pif->recodeTable != NULL ) zRecode8( wordEntry.word, pif->recodeTable);
#endif

    if( (endOffset = zFileObjectTell( pif->file )) < 0 ) return False;

    ioGetLastFilenum( &wordEntry );
    printIndexWord( cnt, wordEntry.word, wordTooLong, wordEntry.fileCount, endOffset - startOffset);
    if( all ) if( !printEntryInfo( pif, &wordEntry, endOffset) ) return False;

    startOffset = endOffset;
  }

  IO_FREE_BUFFER( &wordEntry );

  zprintf( cnt, "\n");
  return True;
}

static int getWordLength( char *word )
{
  int length, i, c;

#ifdef SUBWORD_SEARCH
  length = zIsPatternChar( *word ) ? -1 : 0;
#else
  length = 0;
#endif

  for( i = 0; (c = ((unsigned char *) word)[i]) != '\0'; i++)
    if( zIsPatternChar( c ) )
    {
      if( length == 0 ) length = i;
    }
    else
      word[i] = _toLower(c);

  return length;
}

Boolean dumpWordInfo( struct indexfile_t *pif, struct zstrcoll_t *words)
{
  struct zcontext_t *cnt = pif->context;
  struct wordentry_t wordEntry;
  int i;

  ioInitWordEntry( cnt, &wordEntry);

  for( i = 0; i < words->count; i++)
  {
    char word[MAX_WORD_SIZE+1];
    zoff_t startOffset, endOffset;
    int length, shift, c;

    strncpy( word, words->list[i], sizeof( word ));
    word[ sizeof( word ) - 1 ] = '\0';
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    if( zCheckFlags( cnt->ioFlags, ZCONTEXT_IOFLAG_CHARSET_CONV) )
      zRecode8( word, cnt->convTable);
#endif
    length = getWordLength( word );

#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    if( pif->convTable != NULL ) zRecode8( word, pif->convTable);
#endif

#ifdef SUBWORD_SEARCH
    if( length < 0 )
    {
      startOffset = pif->offsets[OFFSET_WORDS_START];
      wordEntry.stopOffset = pif->offsets[OFFSET_WORDS_END];
      if( startOffset == wordEntry.stopOffset ) break;
      if( !ifSeek( pif, startOffset) ) return False;
      shift = 0;
    }
    else
#endif
    {
      if( length > 0 )
      {
        c = word[length];
        word[length] = '\0';
      }

      if( (wordEntry.stopOffset = ifFollowWordTrack( pif, word, (Boolean) (length > 0 ))) <= 0 ) continue;
      if( (startOffset = zFileObjectTell( pif->file )) < 0 ) return False;

      if( length > 0 ) word[length] = c;
      shift = length;
    }

    for( ;; )
    {
      int cmp;

      if( !ioReadWordEntry( &wordEntry, pif->file, pif->file->stream, pif->alias) ) return False;
      if( length != 0 )
      {
#ifdef SUBWORD_SEARCH
        if( length < 0 )
          cmp = 0;
        else
#endif
          cmp = strncmp( wordEntry.word, word, length);
        if( cmp == 0 && !zStringMatch( &wordEntry.word[shift], &word[shift], cnt->patternFlags) ) cmp = -1;
      }
      else
        cmp = strcmp( wordEntry.word, word);

      if( cmp > 0 ) break;
      if( (endOffset = zFileObjectTell( pif->file )) < 0 ) return False;

      if( cmp == 0 )
      {
        Boolean wordTooLong = (Boolean) (wordEntry.wordLength > MAX_WORD_SIZE);
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
        if( pif->recodeTable != NULL ) zRecode8( wordEntry.word, pif->recodeTable);
#endif

        ioGetLastFilenum( &wordEntry );
        printIndexWord( cnt, wordEntry.word, wordTooLong, wordEntry.fileCount, endOffset - startOffset);
        if( !printEntryInfo( pif, &wordEntry, endOffset) ) return False;
	if( length == 0 ) break;
      }

      if( (startOffset = endOffset) >= wordEntry.stopOffset ) break;
    }
  }

  IO_FREE_BUFFER( &wordEntry );

  zprintf( cnt, "\n");
  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Document words                                                         */
/*                                                                         */
/***************************************************************************/

Boolean dumpDocumentWords( struct indexfile_t *pif, const char *docName)
{
  struct zcontext_t *cnt = pif->context;
  Boolean wordTooLong;
  zoff_t startOffset, endOffset;
  struct wordentry_t wordEntry;
  _fn_t num;

  if( (num = findDocument( pif, docName)) == 0 )
  {
    cnt->errorStrParam = pif->alias;
    cnt->printError( cnt, errNoSuchDocument, docName);
    return False;
  }

  if( !ifSeek( pif, startOffset = pif->offsets[OFFSET_WORDS_START]) ) return False;
  ioInitWordEntry( cnt, &wordEntry);
  wordEntry.stopOffset = pif->offsets[OFFSET_WORDS_END];

  for( ;; )
  {
    if( startOffset >= wordEntry.stopOffset ) break;
    if( !ioReadWordEntry( &wordEntry, pif->file, pif->file->stream, pif->alias) ) return False;

    wordTooLong = (Boolean) (wordEntry.wordLength > MAX_WORD_SIZE);
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
    if( pif->recodeTable != NULL ) zRecode8( wordEntry.word, pif->recodeTable);
#endif

    if( (endOffset = zFileObjectTell( pif->file )) < 0 ) return False;

    if( !printDocumentWord( cnt, &wordEntry, wordTooLong, num) ) return False;

    startOffset = endOffset;
  }

  IO_FREE_BUFFER( &wordEntry );

  zprintf( cnt, "\n");
  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Word track                                                             */
/*                                                                         */
/***************************************************************************/

Boolean dumpWordTrack( struct indexfile_t *pif )
{
  struct zcontext_t *cnt = pif->context;
  unsigned zshort_t trackCount = pif->wordTrack.deltas[224];
  zoff_t offset, startOffset, endOffset;
  zint_t wordCount;
  unsigned int i, j, j1, j2;
  struct wordentry_t wordEntry;

  if( trackCount == 0 ) return True;
  if( !ifSeek( pif, offset = pif->offsets[OFFSET_WORDS_START]) ) return False;
  ioInitWordEntry( cnt, &wordEntry);
  wordEntry.stopOffset = pif->offsets[OFFSET_WORDS_END];

  for( i = 0; i < 224; i++)
  {
    if( (j1 = pif->wordTrack.deltas[i]) == (j2 = pif->wordTrack.deltas[i+1]) ) continue;

    for( j = j1; j < j2; j++)
    {
      startOffset = (zoff_t) zGetLong( &pif->wordTrack.offsets[4*j], INDEX_FILE_ENDIAN);
      endOffset = (j < trackCount-1) ? zGetLong( &pif->wordTrack.offsets[4*(j+1)],
        INDEX_FILE_ENDIAN) : wordEntry.stopOffset;
      if( startOffset < 0 || endOffset < 0 || startOffset >= endOffset ||
	  startOffset < pif->offsets[OFFSET_WORDS_START] ||
	  startOffset >= pif->offsets[OFFSET_WORDS_END] ||
	  endOffset < pif->offsets[OFFSET_WORDS_START] ||
	  endOffset > pif->offsets[OFFSET_WORDS_END] )
      {
	pif->context->printError( pif->context, errIndexWordTrackOffsets, pif->alias);
        return False;
      }
      if( startOffset != offset )
      {
        /* XXX: printWarning( war, );
                if( !ifSeek( pif, offset = startOffset) ) return False; */
        pif->context->printError( pif->context, errIndexFileFormat, pif->alias);
        return False;
      }
      if( startOffset >= endOffset ) continue;

      wordCount = 0;
      while( offset < endOffset )
      {
	if( !ioReadWordEntry( &wordEntry, pif->file, pif->file->stream, pif->alias) ) return False;
        wordCount++;
        if( (offset = zFileObjectTell( pif->file )) < 0 ) return False;
      }

      zprintf( cnt, "%c%c:  %" _ZINT_FORMAT "d/%" _ZINT_FORMAT "d\n",
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	(pif->recodeTable != NULL) ? pif->recodeTable[i+32] : i+32,
#else
        i+32,
#endif
	(pif->wordTrack.letters[j] == '\0') ? ' ' :
#if defined( RUSSIAN_SUPPORT ) && defined( RUSSIAN_RELEASE )
	 ((pif->recodeTable != NULL) ? pif->recodeTable[pif->wordTrack.letters[j]] : pif->wordTrack.letters[j]),
#else
         pif->wordTrack.letters[j],
#endif
        wordCount, (zint_t) (endOffset - startOffset));
    }
  }

  IO_FREE_BUFFER( &wordEntry );

  zprintf( cnt, "\n");
  return True;
}
