/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
**  long with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**--------------------------------------------------------------------
*/
#include "swish.h"
#include "index.h"
#include "hash.h"
#include "mem.h"
#include "file.h"
#include "string.h"
#include "list.h"

static void indexadir(char *dir);
static void indexafile(char *path);
static void printfiles(struct sortentry *e);
static void printdirs(struct sortentry *e);
static int ishtml(char *filename);
static int isoktitle(char *title);


/* file system specific configuration parameters
**/
static struct swline *pathconlist = 0;
static struct swline *dirconlist = 0;
static struct swline *fileconlist = 0;
static struct swline *titconlist = 0;
static struct swline *fileislist = 0;
static struct swline *suffixlist = 0;


/* Have we already indexed a file or directory?
** This function is used to avoid multiple index entries
** or endless looping due to symbolic links.
*/

static int already_indexed(char *path)
{
#ifndef NO_SYMBOLIC_FILE_LINKS
        static struct dev_ino {
                dev_t  dev;
                ino_t  ino;
                struct dev_ino *next;
        } *inode_hash[BIGHASHSIZE], *p;

        struct stat buf;
        char key[34];           /* Hash key -- allow for 64 bit inodes */
        unsigned hashval;

        if ( stat( path, &buf ) )
                return 0;

        /* Create hash key:  string contains device and inode. */
        sprintf( key, "%lx/%lx", (unsigned long)buf.st_dev,
                (unsigned long)buf.st_ino  );

        hashval = bighash(key); /* Search hash for this file. */
        for ( p = inode_hash[hashval]; p != NULL; p = p->next )
                if ( p->dev == buf.st_dev &&
                p->ino == buf.st_ino    )
        {                               /* We found it. */
                if ( verbose == 3 )
                        printf( "Skipping %s:  %s\n",
                        path, "Already indexed." );
                return 1;
        }

        /* Not found, make new entry. */
        p = (struct dev_ino*)emalloc(sizeof(struct dev_ino));
        p->dev = buf.st_dev;
        p->ino = buf.st_ino;
        p->next = inode_hash[hashval];
        inode_hash[hashval] = p;
#endif

        return 0;
}


/* Recursively goes into a directory and calls the word-indexing
** functions for each file that's found.
*/

static void indexadir(char *dir)
{
        int badfile;
        DIR *dfd;
#ifdef NEXTSTEP
        struct direct *dp;
#else
        struct dirent *dp;
#endif
        static char s[MAXFILELEN], title[MAXSTRLEN];
        struct sortentry *sortfilelist, *sortdirlist;
        struct swline *tmplist;

        sortfilelist = sortdirlist = NULL;

        if (islink(dir) && !followsymlinks)
                return;

        if ( already_indexed(dir) )
                return;

        if (dir[strlen(dir) - 1] == '/')
                dir[strlen(dir) - 1] = '\0';

        if ((dfd = opendir(dir)) == NULL)
                return;

        while ((dp = readdir(dfd)) != NULL && dirconlist != NULL) {
                badfile = 0;
                tmplist = dirconlist;
                while (tmplist != NULL) {
                        if (matchARegex(dp->d_name, tmplist->line)) {
                                badfile = 1;
                                break;
                        }
                        tmplist = tmplist->next;
                }
                if (badfile)
                        return;
        }
        closedir(dfd);
        dfd = opendir(dir);

        while ((dp = readdir(dfd)) != NULL) {

                if ((dp->d_name)[0] == '.')
                        continue;
                if (islink(dp->d_name) && !followsymlinks)
                        continue;

                badfile = 0;
                tmplist = fileislist;
                while (tmplist != NULL) {
                        if (matchARegex(dp->d_name, tmplist->line)) {
                                badfile = 1;
                                break;
                        }
                        tmplist = tmplist->next;
                }
                if (badfile)
                        continue;

                badfile = 0;
                tmplist = fileconlist;
                while (tmplist != NULL) {
                        if (matchARegex(dp->d_name, tmplist->line)) {
                                badfile = 1;
                                break;
                        }
                        tmplist = tmplist->next;
                }
                if (badfile)
                        continue;

                sprintf(s, "%s%s%s", dir, dir[strlen(dir) - 1] == '/' ?
                        "" : "/", dp->d_name);
                if (islink(s) && !followsymlinks)
                        continue;

                badfile = 0;
                tmplist = pathconlist;
                while (tmplist != NULL) {
                        if (matchARegex(s, tmplist->line)) {
                                badfile = 1;
                                break;
                        }
                        tmplist = tmplist->next;
                }
                if (badfile)
                        continue;

                if (!isdirectory(s)) {

                        if ( already_indexed(s) )
                                continue;

                        if (!isoksuffix(dp->d_name, suffixlist))
                                continue;

                        if (ishtml(s)) {
                                strcpy(title, (char *) parsetitle(s, s));
                                if (!isoktitle(title))
                                        continue;
                        }
                        else {
                                if (strrchr(s, '/') != NULL)
                                        strcpy(title, strrchr(s, '/') + 1);
                                else
                                        strcpy(title, s);
                        }
                        sortfilelist = (struct sortentry *)
                                addsortentry(sortfilelist, s, title);
                }
                else {
                        sortdirlist = (struct sortentry *)
                                addsortentry(sortdirlist, s, s);
                }
        }

        closedir(dfd);

        printfiles(sortfilelist);
        printdirs(sortdirlist);
}

/* Calls the word-indexing function for a single file.
*/

static void indexafile(char *path)
{
        int badfile;
        char *t, title[MAXSTRLEN];
        struct sortentry *fileentry;
        struct swline *tmplist;

        if (islink(path) && !followsymlinks)
                return;

        if ( already_indexed(path) )
                return;

        if (path[strlen(path) - 1] == '/')
                path[strlen(path) - 1] = '\0';

        badfile = 0;
        tmplist = fileislist;
        while (tmplist != NULL) {
                if (!matchARegex(path, tmplist->line)) {
                        badfile = 1;
                        break;
                }
                tmplist = tmplist->next;
        }
        if (badfile)
                return;

        badfile = 0;
        tmplist = fileconlist;
        while (tmplist != NULL) {
                if (matchARegex(path, tmplist->line)) {
                        badfile = 1;
                        break;
                }
                tmplist = tmplist->next;
        }
        if (badfile)
                return;

        badfile = 0;
        tmplist = pathconlist;
        while (tmplist != NULL) {
                if (matchARegex(path, tmplist->line)) {
                        badfile = 1;
                        break;
                }
                tmplist = tmplist->next;
        }
        if (badfile)
                return;

        if (!isoksuffix(path, suffixlist))
                return;

        if (ishtml(path)) {
                strcpy(title, (char *) parsetitle(path, path));
                if (!isoktitle(title))
                        return;
        }
        else {
                if ((t = strrchr(path, '/')) != NULL)
                        strcpy(title, t + 1);
                else
                        strcpy(title, path);
        }

        fileentry = (struct sortentry *) emalloc(sizeof(struct sortentry));
        fileentry->filename = (char *) mystrdup(path);
        fileentry->title = (char *) mystrdup(title);
        fileentry->left = fileentry->right = NULL;

        printfiles(fileentry);
}

/* Indexes the words in the tree of files in alphabetical order.
*/

static void printfiles(struct sortentry *e)
{
        int wordcount;
        char *s;
        FILE *fp;

        if (e != NULL) {
                printfiles(e->left);
                if (verbose == 3) {
                        if ((s = (char *) strrchr(e->filename, '/')) == NULL)
                                printf("  %s", e->filename);
                        else
                                printf("  %s", s + 1);
                }
                if ((fp = fopen(e->filename, "r" )) != NULL ) {
                        wordcount = countwords(fp, e->filename, e->title,
                                isoksuffix(e->filename, nocontentslist) && nocontentslist != NULL);
                        fclose(fp);
                }
                if (verbose == 3) {
                        if (wordcount)
                                printf(" (%d words)\n", wordcount);
                        else
                                printf(" (no words)\n");
                        fflush(stdout);
                }
                free(e->filename);
                free(e->title);
                printfiles(e->right);
                free(e);
        }
}

/* Prints out the directory names as things are getting indexed.
** Calls indexadir() so directories in the tree are indexed,
** in alphabetical order...
*/

static void printdirs(struct sortentry *e)
{
        if (e != NULL) {
                printdirs(e->left);
                if (verbose == 3)
                        printf("\nIn dir \"%s\":\n", e->filename);
                else if (verbose == 2)
                        printf("Checking dir \"%s\"...\n", e->filename);
                indexadir(e->filename);
                free(e->filename);
                free(e->title);
                printdirs(e->right);
                free(e);
        }
}



/* This checks is a filename has one of the following suffixes:
** "htm", "HTM", "html", "HTML", "shtml", "SHTML".
*/

int ishtml(filename)
char *filename;
{
        char *c, suffix[MAXSUFFIXLEN];

        c = (char *) strrchr(filename, '.');

        if (c == NULL)
                return 0;
        strcpy(suffix, c + 1);
        if (suffix[0] == '\0')
                return 0;

        if (!strncmp(suffix, "htm", 3))
                return 1;
        else if (!strncmp(suffix, "HTM", 3))
                return 1;
        else if (!strncmp(suffix, "shtml", 5))
                return 1;
        else if (!strncmp(suffix, "SHTML", 5))
                return 1;
        return 0;
}

/* Check if a particular title should be ignored
** according to the settings in the configuration file.
*/

int isoktitle(title)
char *title;
{
        int badfile;
        struct swline *tmplist;

        badfile = 0;
        tmplist = titconlist;
        while (tmplist != NULL) {
                if (matchARegex(title, tmplist->line)) {
                        badfile = 1;
                        break;
                }
                tmplist = tmplist->next;
        }
        if (badfile)
                return 0;
        else
                return 1;
}

/********************************************************/
/*                                      "Public" functions                                      */
/********************************************************/

void fs_indexpath(char *path)
{
    if (isdirectory(path)) {
                if (verbose >= 2)
                        printf("\nChecking dir \"%s\"...\n",
                        path);
                indexadir(path);
    }
    else if (isfile(path)) {
                if (verbose >= 2)
                        printf("\nChecking file \"%s\"...\n",
                        path);
                indexafile(path);
    }
}

int fs_vgetc(void *vp)
{
        return fgetc((FILE *)vp);
}


int fs_vsize(void *vp)
{
        struct stat stbuf;
        return fstat(fileno((FILE *)vp), &stbuf) ? -1 : stbuf.st_size;
}


int fs_parseconfline(char *line)
{
    int rv = 0;

        if (grabCmdOptions(line, "IndexOnly", &suffixlist)) { rv = 1; }
        else if (lstrstr(line, "FileRules"))
        {
                if (grabCmdOptions(line, "pathname contains", &pathconlist)) { rv = 1; }
                else if (grabCmdOptions(line, "directory contains", &dirconlist)) { rv = 1; }
                else if (grabCmdOptions(line, "filename contains", &fileconlist)) { rv = 1; }
                else if (grabCmdOptions(line, "title contains", &titconlist)) { rv = 1; }
                else if (grabCmdOptions(line, "filename is", &fileislist)) { rv = 1; }
                else if (grabCmdOptions(line, "pathname contains", &pathconlist)) { rv = 1; }
        }

    return rv;
}

struct _indexing_data_source_def FileSystemIndexingDataSource = {
  "File-System",
  "fs",
  fs_indexpath,
  fs_vgetc,
  fs_vsize,
  fs_parseconfline
};
