/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include "_pstring.h" /* <string.h> */
#include <math.h>
#include <assert.h>

#include "zcontext.h"
#include "zalloc.h"
#include "zerror.h"
#include "zfile.h"

#include "indio.h"
#include "indxword.h"

/***************************************************************************/
/*                                                                         */
/*  Location memory                                                        */
/*                                                                         */
/***************************************************************************/

#define iwNewLocationMemory(cntx,mv) \
    ((mv) += sizeof(struct iw_location_t), ZNEW( (cntx), struct iw_location_t))

static void iwFreeLocationChain( struct zcontext_t *cnt, struct iw_location_t *lp)
{
  while( lp != NULL )
  {
    register struct iw_location_t *oldlp = lp;
    lp = lp->next;
    zFree( cnt, oldlp);
  }
}

/***************************************************************************/
/*                                                                         */
/*  Word list                                                              */
/*                                                                         */
/***************************************************************************/

#define WORD_INIT                      100

void iwInitWordList( struct iw_wordlist_t *wordList )
{
  ZEROFILL( wordList, sizeof(struct iw_wordlist_t) * WORD_LIST_SIZE);
}

void iwFreeWordList( struct zcontext_t *cnt, struct iw_wordlist_t *wordList)
{
  int i, j;

  for( i = 0; i < WORD_LIST_SIZE; i++)
  {
    struct iw_wordlist_t *wt = &wordList[i];

    if( wt->list != NULL )
    {
      for( j = 0; j < wt->count; j++)
      {
        struct iw_indexword_t *w = &wt->list[j];
        _ZFREE( cnt, w->word);
        iwFreeLocationChain( cnt, w->list);
      }

      zFree( cnt, wt->list);
    }

    wt->count = wt->reserved = 0;
    wt->list = NULL;
  }
}

zint_t iwAddWord( struct zcontext_t *cnt, struct iw_wordlist_t *wordList,
    const char *word, _fn_t fnum, int emphasized, _st_t structure)
{
  int index, first = *((unsigned char *) word) - 32;
  struct iw_wordlist_t *wp;
  struct iw_location_t *pl;
  struct iw_indexword_t *w;
  Boolean result = False;
  zint_t memoryVolum = 0;

  if( first < 0 ) return 0;
  wp = &wordList[first];
#ifdef CHECK
  assert( !zCheckFlags( structure, 0xff00u) );
#endif
  EXTEND_STRUCTURE( structure, structure);

  {
    int lower, upper;

    for( lower = 0, upper = wp->count-1; lower <= upper; )
    {
      int midpoint = (lower + upper) >> 1;
      int cmp = strcmp( word, wp->list[midpoint].word);

      if( cmp == 0 )
      {
        result = True;
        lower = midpoint;
        break;
      }
      if( cmp > 0 )
        lower = midpoint + 1;
      else
        upper = midpoint - 1;
    }

    index = lower;
  }

  if( result )
  {
    w = &wp->list[index];

    if( w->last->filenum != fnum )
    {
      pl = iwNewLocationMemory( cnt, memoryVolum);
      pl->filenum = fnum;
      pl->frequency = 1;
      pl->emphasized = emphasized;
      pl->structure = structure;
      pl->next = NULL;
      w->last->next = pl;
      w->last = pl;
    }
    else
    {
      pl = w->last;
      pl->frequency++;
      /* if( emphasised && (tp->structure & structure) != 0 )
	tp->emphasized++;
      else */
        pl->emphasized += emphasized;
      pl->structure |= structure;
    }
  }
  else
  {
    if( wp->list == NULL )
    {
      wp->list = (struct iw_indexword_t *) zMalloc( cnt,
        (wp->reserved = WORD_INIT) * sizeof( struct iw_indexword_t ));
      memoryVolum += WORD_INIT * sizeof( struct iw_indexword_t );
    }
    else if( wp->count == wp->reserved )
    {
      int delta = wp->reserved / 2;
      if( delta > 300 ) delta = 300;
      wp->list = (struct iw_indexword_t *) zRealloc( cnt, wp->list,
        (wp->reserved = delta + wp->reserved) * sizeof( struct iw_indexword_t ) );
      memoryVolum += delta * sizeof( struct iw_indexword_t );
    }

    w = &wp->list[index];
    if( index < wp->count )
      memmove( &w[1], w, (wp->count - index) * sizeof( struct iw_indexword_t ));

    w->word = zStrdup( cnt, word);
    memoryVolum += cnt->lastStringSize;
    pl = iwNewLocationMemory( cnt, memoryVolum);
    pl->filenum = fnum;
    pl->frequency = 1;
    pl->emphasized = emphasized;
    pl->structure = structure;
    pl->next = NULL;
    w->last = w->list = pl;

    wp->count++;
  }

  return memoryVolum;
}

/***************************************************************************/
/*                                                                         */
/*  Writing word list                                                      */
/*                                                                         */
/***************************************************************************/

static _rn_t getRank( struct iw_location_t *location, unsigned zint_t wordCount)
{
  float r;
  _rn_t rank;

  r = sqrt( (float) wordCount + 20.0 );
  r = sqrt( r ) * r;
  r = log(1.8 + (float) location->emphasized) *
      (1.0 + (float) location->frequency) / r;
  r = r * 10000.0;

  if( r >= (float) MAX_RANK )
    rank = MAX_RANK;
  else
  {
    rank = (_rn_t) r;
    if( (r - (float) rank) >= 0.5 ) rank++;
    if( rank <= 0 ) rank = 1;
  }

  return rank;
}

Boolean iwWriteWordList( struct zcontext_t *cnt, struct iw_wordlist_t *wordList,
    struct ztempfile_t *inSwap, struct ztempfile_t *outSwap,
    struct wordentry_t *we1, struct wordentry_t *we2,
    iw_wordcount_t getWordCount, void *info)
{
  FILE *in = NULL, *out = outSwap->stream;
  Boolean done1 = True, done2 = True;
  int currentWord = -1, currentList = 0, cmp;
  zoff_t stopOffset, pos;
  struct wordentry_t *we;
  struct iw_wordlist_t *wp = wordList;
  struct iw_indexword_t *w;

  if( inSwap != NULL )
  {
    in = inSwap->stream;
    stopOffset = inSwap->size;
  }

  for( ;; )
  {
    if( in != NULL && done1 )
      if( (pos = ftell( in )) < 0L )
      {
        cnt->printError( cnt, zerTempFileTell, NULL);
        return False;
      }
      else if( pos >= stopOffset )
      {
        in = NULL;
#ifdef CHECK
        assert( pos == stopOffset );
#endif
      }
      else
      {
        if( !ioReadWordEntry( we1, NULL, in, NULL) ) return False;
        we1->fileCount = 1;
        done1 = False;
      }

    if( wp != NULL && done2 )
    {
      if( currentWord < 0 || currentWord >= wp->count-1 )
      {
        for( wp = NULL, currentList++; currentList < WORD_LIST_SIZE; currentList++)
          if( wordList[currentList].count > 0 )
	  {
            wp = &wordList[currentList];
            currentWord = 0;
            break;
          }
      }
      else
        currentWord++;
      if( wp == NULL )
	w = NULL;
      else
	w = &wp->list[currentWord];
      done2 = False;
    }

    if( in == NULL && w == NULL )
      break;
    else if( in == NULL )
      cmp = 1;
    else if( w == NULL )
      cmp = -1;
    else
      cmp = strcmp( we1->word, w->word);

    if( cmp <= 0 )
    {
      we = we1;
      we->buffer[we->wordLength] = WORD_END;
      if( cmp == 0 ) ioGetLastFilenum( we );
#ifdef CHECK
      else ioGetLastFilenum( we );
#endif
    }
    else
      ioAddEntryWord( we = we2, w->word);

    if( cmp >= 0 )
    {
      struct iw_location_t *location = w->list;

      while( location != NULL )
      {
        struct iw_location_t *tmp = location;
        unsigned zint_t wordCount = getWordCount( info, location->filenum);
        if( wordCount != 0 )
          ioAddEntryItem( we, location->structure, location->filenum,
            getRank( location, wordCount));
        location = location->next;
        zFree( cnt, tmp);
      }

      w->list = NULL;
      ZFREE( cnt, w->word);
    }

    if( we->fileCount > 0 )
      if( !ioWriteWordEntry( we, out, NULL) ) return False;
    if( cmp <= 0 ) done1 = True;
    if( cmp >= 0 ) done2 = True;
  }

  if( (outSwap->size = ftell( out )) < 0 )
  {
    cnt->printError( cnt, zerTempFileTell, NULL);
    return False;
  }

  return True;
}
