/*
    FLUIdS - local search system
    Copyright (C) 1998, 2000  VVK (valera@sbnet.ru), CNII Center, Moscow

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/


#include "zdefs.h"
#include "_pstdio.h" /* <stdio.h> */
#include "_pstring.h" /* <string.h> */
#include <assert.h>

#include "zcontext.h"
#include "zcoding.h"
#include "zcoll.h"
#include "zerror.h"
#include "zfile.h"

#include "error.h"
#include "indexer.h"
#include "indexer_.h"

struct flu_doclist_footprint_t
{
  struct flu_doclist_footprint_t *next;
  unsigned zint_t crc32;
  _fn_t fnum;
};

struct flu_doclist_block_t
{
  struct flu_doclist_block_t *next;
  int count;
  struct flu_doclist_footprint_t block[FLU_INDEXER_DOCFOOTPRINT_BLOCK_SIZE];
};

struct flu_doclist_fingerprint_t
{
  struct flu_doclist_footprint_t *fp;
  unsigned zint_t crc32;
  time_t lastModified;
  zoff_t size;
};

/***************************************************************************/
/*                                                                         */
/*  Document list                                                          */
/*                                                                         */
/***************************************************************************/

static void freeIndexFile( struct zcontext_t *cnt, void *elem)
{
  struct flu_indexer_indexfile_t *ii = (struct flu_indexer_indexfile_t *) elem;
  ifClose( &ii->indexFile, False);
  dmapFree( cnt, &ii->map);
}

Boolean _fluDoclistInit( struct zcontext_t *cnt, struct flu_doclist_t *dl)
{
  ZEROFILL( dl, sizeof(struct flu_doclist_t));
  dl->context = cnt;

  zSetCollectionInit( cnt, &dl->files, sizeof(struct flu_indexer_doc_t), 1024, 100);
  zDataCollectionInit( cnt, &dl->indexes, 0, 16, 0,
    sizeof(struct flu_indexer_indexfile_t), NULL, freeIndexFile);

  zInitTempFile( &dl->docSwap );

  return True;
}

static void _fluFootPrintHeapFree( struct flu_doclist_t *dl );

void _fluDoclistFree( struct flu_doclist_t *dl )
{
  int i;

  if( dl->context == NULL ) return;

  zSetCollectionFree( &dl->files, NULL);
  zDataCollectionFree( &dl->indexes );
  zCloseTempFile( dl->context, &dl->docSwap);

  _fluFootPrintHeapFree( dl );
  _fluItemMemoryHeapFree( dl );

  for( i = 0; i < FLU_INDEXER_DOCHASH_SIZE; i++) dl->docHash[i] = NULL;

  dl->context = NULL;
}

/***************************************************************************/
/*                                                                         */
/*  Document traces                                                        */
/*                                                                         */
/***************************************************************************/

static struct flu_doclist_footprint_t *_fluFootPrintNew( struct flu_doclist_t *dl )
{
  int count;

  if( dl->docBlocks == NULL || dl->docBlocks->count >= FLU_INDEXER_DOCFOOTPRINT_BLOCK_SIZE )
  {
    struct flu_doclist_block_t *newBlock = ZNEW( dl->context, struct flu_doclist_block_t);
    newBlock->count = 0;
    newBlock->next = dl->docBlocks;
    dl->docBlocks = newBlock;
  }

  count = dl->docBlocks->count;
  dl->docBlocks->count++;
  return &dl->docBlocks->block[count];
}

static void _fluFootPrintHeapFree( struct flu_doclist_t *dl )
{
  struct flu_doclist_block_t *list = dl->docBlocks, *tmp;

  while( list != NULL )
  {
    tmp = list;
    list = list->next;
    zFree( dl->context, tmp);
  }

  dl->docBlocks = NULL;
}

static Boolean _fluDoclistGetData( struct flu_doclist_t *dl,
    _fn_t fnum, struct flu_docentry_t *de)
{
  struct flu_indexer_doc_t *doc;

  if( fnum == 0 || fnum > zSetCollectionCount( &dl->files ) )
  {
    dl->context->errorIntParam = (int) fnum;
    dl->context->printError( dl->context, errInvalidFilenoValue, "_fluDoclistGetData");
    return False;
  }

  doc = (struct flu_indexer_doc_t *) zSetCollectionElem( &dl->files, fnum-1);
  if( doc->fnum == 0 )
  {
    fluDocEntryInit( de );
    return True;
  }

  if( doc->inum == 0 )
  {
    ZCHECK_ALGR( dl->context, (doc->length > 0), return False);
    ZCHECK_ALGR( dl->context, (dl->docSwap.stream != NULL), return False);
    if( !fluDocEntryRead( dl->context, de, NULL, dl->docSwap.stream,
          doc->offset, doc->length, False, NULL) ) return False;
  }
  else
  {
    struct flu_indexer_indexfile_t *pif;
    if( doc->inum < 0 ) doc->inum = -doc->inum;
    pif = (struct flu_indexer_indexfile_t *) zDataCollectionElem( &dl->indexes, doc->inum - 1);
    if( !ifReadFileInfo( &pif->indexFile, doc->fnum, de, False) ) return False;
  }

  return True;
}

static Boolean _fluDoclistFingerPrint( struct flu_doclist_t *dl,
    const char *url, struct flu_doclist_fingerprint_t *docfp)
{
  unsigned zint_t crc32;
  struct flu_doclist_footprint_t *fp;
  struct flu_docentry_t docEntry;

  crc32 = zCodingCRC32( __ZINT(0), url, strlen( url ));
  docfp->fp = NULL;
  docfp->crc32 = crc32;
  docfp->lastModified = 0;
  docfp->size = -1;

/* ⠥   㬥  ᯨ᪥ 㦥 ந஢  crc32  url */
  for( fp = dl->docHash[FLU_INDEXER_DOCHASH(crc32)]; fp != NULL; fp = fp->next)
  {
    if( fp->crc32 != crc32 ) continue;

    /*  筮   url 㬥, crc32 ண ᮢ  訬.
       ⠥  . */
    if( !_fluDoclistGetData( dl, fp->fnum, &docEntry) ) return False;
    if( *docEntry.url == '\0' ) continue;

    /* ࠢ url' */
    if( strcmp( url, docEntry.url) == 0 )
    {
      docfp->fp = fp;
      docfp->size = docEntry.size;
      docfp->lastModified = docEntry.lastModified;
      return True;
    }
  }

  return True;
}

/***************************************************************************/
/*                                                                         */
/*  Indexed documents                                                      */
/*                                                                         */
/***************************************************************************/

Boolean fluIndexerDocumentStart( struct flu_indexer_t *fi,
    const char *url, zoff_t size, ztime_t lastModified, unsigned int flags)
{
  struct flu_doclist_fingerprint_t docfp;
  struct flu_indexer_doc_t docInfo;
  Boolean docExist;

/* ᫨   㣨 䠩... */
  if( !fluIndexerDocumentEnd( fi, True, NULL) ) return False;

/*  砩 墠⪨  */
  if( setjmp( fi->ptr->jumpBuf ) != 0 ) return False;

/* ।, ஢   㦥 㬥  ⠪ url   */
  fi->ptr->docList.generalCount++;
  if( !_fluDoclistFingerPrint( &fi->ptr->docList, url, &docfp) ) return False;
  docExist = (Boolean) (docfp.fp != NULL);

  if( docExist )
  {
  /* ࠢ ࠧ   㬥⮢ */
    if( lastModified != 0 )
    {
      if( docfp.lastModified < lastModified )
        docExist = False;
      else if( docfp.lastModified == lastModified )
      {
        if( docfp.size < 0 )
	{
          if( size >= 0 || zCheckFlags( flags, fifReindexLasts) )
            docExist = False;
        }
        else if( size >= 0 && docfp.size != size )
          docExist = False;
      }
      else if( zCheckFlags( flags, fifReindexOlders) )
	docExist = False;
    }
    else if( docfp.lastModified == 0 )
    {
      if( size >= 0 )
      {
        if( docfp.size != size ) docExist = False;
      }
      else if( docfp.size < 0 && zCheckFlags( flags, fifReindexLasts) )
        docExist = False;
    }
  }

  if( docExist ) return True;

/* ந樠㥬       */
  ZEROFILL( &docInfo, sizeof(struct flu_indexer_doc_t));
  docInfo.fnum = zSetCollectionCount( &fi->ptr->docList.files ) + 1;
  docInfo.inum = 0;
  zSetCollectionAdd( &fi->ptr->docList.files, &docInfo);

/* ந樠㥬  */
  fi->ptr->doc.fnum = docInfo.fnum;
  fi->ptr->doc.info = (struct flu_indexer_doc_t *) zSetCollectionElem(
    &fi->ptr->docList.files, docInfo.fnum - 1);
  fi->ptr->doc.lastModified = lastModified;
  fi->ptr->doc.size = size;
  if( zCheckFlags( flags, fifUrlSafe) )
  {
    fi->ptr->doc.url = url;
    fi->ptr->doc.urlSafe = True;
  }
  else
  {
    fi->ptr->doc.url = zStrdup( fi->context, url);
    fi->ptr->doc.urlSafe = False;
  }
  fi->ptr->doc.fp = docfp.fp;
  fi->ptr->doc.crc32 = docfp.crc32;
  /* fi->ptr->doc.titleLength = fi->ptr->doc.contentLength = 0; */
  fi->ptr->doc.contentAll = True;

  return True;
}

Boolean fluIndexerDocumentAlive( struct flu_indexer_t *fi )
{
  return (Boolean) (fi->ptr->doc.fnum != 0);
}

Boolean fluIndexerDocumentEnd( struct flu_indexer_t *fi, Boolean success,
    unsigned zint_t *pwordCount)
{
  if( pwordCount != NULL ) *pwordCount = 0;
  if( fi->ptr->doc.fnum == 0 ) return True;

  if( success )
  {
    struct flu_docentry_t docEntry;

    /*  砩 墠⪨  */
    if( setjmp( fi->ptr->jumpBuf ) != 0 ) return False;

    if( fi->ptr->docList.docSwap.stream == NULL &&
	!zOpenTempFile( fi->context, &fi->ptr->docList.docSwap, fi->tempDir) )
      success = False;

    /* 室   ந஢ 㬥   */
    if( success )
    {
      if( pwordCount != NULL ) *pwordCount = fi->ptr->doc.info->wordCount;
      if( fi->ptr->doc.info->wordCount < 100 ) fi->ptr->doc.info->wordCount = 100;
      /* fi->ptr->doc.title[fi->ptr->doc.titleLength] = '\0'; */
      /* fi->ptr->doc.content[fi->ptr->doc.contentLength] = '\0'; */
      fluDocEntryAddAttribs( &docEntry, fi->ptr->doc.size,
        fi->ptr->doc.lastModified, fi->ptr->doc.url,
        fi->ptr->doc.title, fi->ptr->doc.content, fi->ptr->doc.contentAll, NULL);
      if( (fi->ptr->doc.info->length = fluDocEntryWrite( fi->context, &docEntry,
            fi->ptr->docList.docSwap.stream, &fi->ptr->doc.info->offset)) < 0 )
        success = False;
    }

    /* ॣ㥬  㬥 */
    if( success )
      if( fi->ptr->doc.fp == NULL )
      {
        struct flu_doclist_footprint_t *newfp = _fluFootPrintNew( &fi->ptr->docList );
        newfp->fnum = fi->ptr->doc.fnum;
        newfp->crc32 = fi->ptr->doc.crc32;
        newfp->next = fi->ptr->docList.docHash[FLU_INDEXER_DOCHASH(newfp->crc32)];
        fi->ptr->docList.docHash[FLU_INDEXER_DOCHASH(newfp->crc32)] = newfp;
      }
      else
      {
        struct flu_indexer_doc_t *dp = (struct flu_indexer_doc_t *)
          zSetCollectionElem( &fi->ptr->docList.files, fi->ptr->doc.fp->fnum-1);
        dp->fnum = 0;
        fi->ptr->doc.fp->fnum = fi->ptr->doc.fnum;
      }
  }
  else
  {
    success = True;
    /* XXX: ᫨ wordCount ࠢ ,     㬥 稪 */
    fi->ptr->doc.info->fnum = 0;
  }

  if( !fi->ptr->doc.urlSafe ) zFree( fi->context, fi->ptr->doc.url);
  fi->ptr->doc.url = NULL;
  fi->ptr->doc.fnum = 0;
  return success;
}

Boolean _fluIndexerDocumentFinish( struct flu_indexer_t *fi,
    _fn_t fnum, zoff_t offset, int length, _st_t structure, short inum)
{
  if( fi->ptr->doc.fnum == 0 ) return True;

  if( fnum == 0 )
  {
    /* XXX:   㬥 稪 */
    fi->ptr->doc.info->fnum = 0;
  }
  else
  {
    /* 塞   襬 㬥 */
    fi->ptr->doc.info->fnum = fnum;
    fi->ptr->doc.info->offset = offset;
    fi->ptr->doc.info->length = length;
    fi->ptr->doc.info->structure = structure;
    fi->ptr->doc.info->inum = inum;
    fi->ptr->doc.info->wordCount = 100;

    /* ॣ㥬  㬥 */
    if( fi->ptr->doc.fp == NULL )
    {
      struct flu_doclist_footprint_t *newfp = _fluFootPrintNew( &fi->ptr->docList );
      newfp->fnum = fi->ptr->doc.fnum;
      newfp->crc32 = fi->ptr->doc.crc32;
      newfp->next = fi->ptr->docList.docHash[FLU_INDEXER_DOCHASH(newfp->crc32)];
      fi->ptr->docList.docHash[FLU_INDEXER_DOCHASH(newfp->crc32)] = newfp;
    }
    else
    {
      struct flu_indexer_doc_t *dp = (struct flu_indexer_doc_t *)
        zSetCollectionElem( &fi->ptr->docList.files, fi->ptr->doc.fnum-1);
      dp->fnum = 0;
      fi->ptr->doc.fp->fnum = fi->ptr->doc.fnum;
    }
  }

  if( !fi->ptr->doc.urlSafe ) zFree( fi->context, fi->ptr->doc.url);
  fi->ptr->doc.url = NULL;
  fi->ptr->doc.fnum = 0;
  return True;
}

Boolean fluIndexerSetContent( struct flu_indexer_t *fi, const char *title,
    const char *content, Boolean allContent)
{
  struct flu_indexer_doccurrent_t *doc = &fi->ptr->doc;

  if( doc->fnum != 0 )
  {
    strncpy( doc->title, title, sizeof(doc->title));
    doc->title[sizeof(doc->title)-1] = '\0';
    strncpy( doc->content, content, sizeof(doc->content));
    doc->content[sizeof(doc->content)-1] = '\0';
    doc->contentAll = allContent;
  }

  return True;
}
