/*
** Copyright (C) 1995, 1996, 1997, 1998 Hewlett-Packard Company
** Originally by Kevin Hughes, kev@kevcom.com, 3/11/94
**
** This program and library is free software; you can redistribute it and/or
** modify it under the terms of the GNU (Library) General Public License
** as published by the Free Software Foundation; either version 2
** of the License, or any later version.
**
** This program is distributed in the hope that it will be useful,
** but WITHOUT ANY WARRANTY; without even the implied warranty of
** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
** GNU (Library) General Public License for more details.
**
** You should have received a copy of the GNU (Library) General Public License
** along with this program; if not, write to the Free Software
** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
**-------------------------------------------------------------
** Changed getdefaults to allow metaNames in the user
** configuration file
** G.Hill 4/16/97 ghill@library.berkeley.edu
*/

#include "swish.h"
#include "file.h"
#include "mem.h"
#include "string.h"
#include "error.h"
#include "list.h"
#include "hash.h"
#include "index.h"

/* Is a file a directory?
*/

int isdirectory(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFMT) == S_IFDIR) ? 1 : 0;
}

/* Is a file a regular file?
*/

int isfile(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFMT) == S_IFREG) ? 1 : 0;
}

/* Is a file a link?
*/

int islink(path)
char *path;
{
#ifndef NO_SYMBOLIC_FILE_LINKS
	struct stat stbuf;
	
	if (lstat(path, &stbuf))
		return 0;
	return ((stbuf.st_mode & S_IFLNK) == S_IFLNK) ? 1 : 0;
#else
	return 0;
#endif
}

/* Get the size, in bytes, of a file.
** Return -1 if there's a problem.
*/

int getsize(path)
char *path;
{
	struct stat stbuf;
	
	if (stat(path, &stbuf))
		return -1;
	return stbuf.st_size;
}

/* Add an entry to the metaEntryList with the given value and the
** appropriate index
*/

void addMetaEntry(metaList, metaWord, isDocProp)
struct metaEntry** metaList;
char* metaWord;
int isDocProp;
{
	static int counter;
	int i;
	struct metaEntry* newEntry;
	struct metaEntry* tmpEntry;
	
	if (counter == 0)
		counter = 2;
	else if (counter == 1 ||  (!counter % 128) )
		counter++;
	for( i=0; metaWord[i]; i++)
		metaWord[i] =  tolower(metaWord[i]);

	tmpEntry = *metaList;
	while (tmpEntry)
	{
		if (strcmp(tmpEntry->metaName, metaWord) == 0)
		{
			#ifdef SUPPORT_DOC_PROPERTIES
			/*
			 * found a duplicate entry already in the list.
			 * Since there are two different config tags that can
			 * be used to get here (MetaNames and PropertyNames)
			 * and that might be using the same Meta tag name,
			 * we cannot assume that either one of these was
			 * called first.
			 * The semantics we want for the metaEntry are:
			 *	isDocProperty = 1 if in PropertyNames, else 0
			 *	isOnlyDocProperty = 1 if not in MetaNames, else 0
			 */
			if (isDocProp)
			{
				/* this is a DocumentProperty tag */
				if (!tmpEntry->isDocProperty)
				{
					tmpEntry->isDocProperty = 1;
				}
			}
			else
			{
				/* this is a MetaName tag */
				if (tmpEntry->isDocProperty)
				{
					tmpEntry->isOnlyDocProperty = 0;
				}
			}
			#endif

			return;
		}
		tmpEntry = tmpEntry->next;
	}

	newEntry = (struct metaEntry*) emalloc(sizeof(struct metaEntry));

	#ifdef SUPPORT_DOC_PROPERTIES
	/* isDocProp is true when we see the PropertyNames config tag */
	newEntry->isDocProperty = isDocProp;
	newEntry->isOnlyDocProperty = isDocProp;
	#endif

	newEntry->metaName = (char*)mystrdup(metaWord);
	newEntry->index = counter++;
	newEntry->next = NULL;
	if (*metaList)
	{
		for(tmpEntry=*metaList;tmpEntry->next!=NULL;tmpEntry=tmpEntry->next)
			;
		tmpEntry->next = newEntry;
	}
	else
		*metaList = newEntry;
	
	return;
}

/*
 * Some handy routines for parsing the Configuration File
 */

int grabYesNoField(line, commandTag, yesNoValue)
char* line;
char* commandTag;
int* yesNoValue;
{
	char value[MAXSTRLEN];
	if (getconfvalue(line, commandTag, value) != NULL)
	{
		*yesNoValue = (lstrstr(value, "yes")) ? 1 : 0;
		return 1;	/* matched commandTag */
	}
	return 0;
}

int grabStringValueField(line, commandTag, singleValue)
char* line;
char* commandTag;
char* singleValue;
{
	/* line must be "<commandTag> <stringValue>" */
	char value[MAXSTRLEN];
	if (getconfvalue(line, commandTag, value) != NULL)
	{
		strcpy(singleValue, value);
		return 1;	/* matched commandTag */
	}
	return 0;
}

int grabIntValueField(line, commandTag, singleValue, dontToIt)
char* line;
char* commandTag;
int* singleValue;
int dontToIt;
{
	char value[MAXSTRLEN];
	if (!grabStringValueField(line, commandTag, value))
		return 0;
	
	if ((value[0]) && (value[0] != '\n') && !dontToIt)
	{
		*singleValue = atoi(value);
	}
	return 1;	/* matched commandTag */
}


int grabCmdOptionsMega(line, 
			       commandTag,
			       listOfWords, 
			       gotAny, dontToIt)
char* line;
char* commandTag;
struct swline **listOfWords;
int* gotAny;
int dontToIt;
{
	/*
	 * parse the line if it contains commandTag 
	 * (commandTag is not required to be the first token in the line)
	 * Grab all of the words after commandTag and place them in the listOfWords.
	 * If "gotAny" is not NULL then set it to 1 if we grabbed any words.
	 * If dontDoIt is "1" then do not grab the words.
	 * Line may be "<commandTag> <stringValue> .." but it could also
	 * be "<other commands> <commandTag> <stringValue> .."
	 */
	line = lstrstr(line, commandTag);	/* includes main command tag? */
	if (line == NULL)
		return 0;
	line += strlen(commandTag);
	
	/* grab all words after the command tag */
	if (!dontToIt)
	{
		char value[MAXSTRLEN];
		int skiplen;
		while (1) 
		{
			strcpy(value, getword(line, &skiplen));
			if (!skiplen | value[0] == '\0' || value[0] == '\n')
			{
				break;
			}
			else 
			{
				line += skiplen;
				*listOfWords = (struct swline *) addswline(*listOfWords, value);
				if (gotAny)
					*gotAny = 1;
			}
		}
	}
	return 1;
}

int grabCmdOptions(line, commandTag, listOfWords)
char* line;
char* commandTag;
struct swline **listOfWords;
{
	return grabCmdOptionsMega(line, commandTag, listOfWords, NULL, 0);
}

/* Reads the configuration file and puts all the right options
** in the right variables and structures.
*/

void getdefaults(conffile, hasdir, hasindex, plimit, flimit, hasverbose)
char *conffile;
int *hasdir;
int *hasindex;
long *plimit;
long *flimit;
int hasverbose;
{
	int skiplen, gotdir, gotindex;
	char *c, line[MAXSTRLEN], value[MAXSTRLEN];
	FILE *fp;
	int linenumber = 0;
	int baddirective = 0;
	
	gotdir = gotindex = 0;
	
	if ((fp = fopen(conffile, "r")) == NULL  ||
		!isfile(conffile) ) 
	{
		sprintf(errorstr, "Couldn't open the configuration file \"%s\".", conffile);
		progerr(errorstr);
	}

	while (fgets(line, MAXSTRLEN, fp) != NULL) 
	{
		linenumber++;
		if (line[0] == '#' || line[0] == '\n')
			continue;
		if (grabCmdOptionsMega(line, "IndexDir", &dirlist, &gotdir, *hasdir)) {}
		else if (grabCmdOptions(line, "NoContents", &nocontentslist)) {}
		else if (grabCmdOptionsMega(line, "IndexFile", &indexlist, &gotindex, *hasindex)) {}
		else if (grabIntValueField(line, "IndexReport", &verbose, hasverbose))	{}
		else if (grabIntValueField(line, "MinWordLimit", &minwordlimit, 0))	{}
		else if (grabIntValueField(line, "IndexComments", &indexComments, 0))	{}
		else if (grabIntValueField(line, "MaxWordLimit", &maxwordlimit, 0))	{}
		else if (grabStringValueField(line, "WordCharacters", wordchars))	{}
		else if (grabStringValueField(line, "BeginCharacters", beginchars))	{}
		else if (grabStringValueField(line, "EndCharacters", endchars))	{}
		else if (grabStringValueField(line, "IgnoreLastChar", ignorelastchar))	{}
		else if (grabStringValueField(line, "IgnoreFirstChar", ignorefirstchar))	{}
		else if (grabCmdOptions(line, "ReplaceRules", &replacelist)) { checkReplaceList(); }
		else if (grabYesNoField(line, "FollowSymLinks", &followsymlinks))	{}
		else if (grabStringValueField(line, "IndexName", indexn))	{}
		else if (grabStringValueField(line, "IndexDescription", indexd))	{}
		else if (grabStringValueField(line, "IndexPointer", indexp))	{}
		else if (grabStringValueField(line, "IndexAdmin", indexa))	{}
		else if (grabYesNoField(line, "UseStemming", &applyStemmingRules))	{}	/* 11/24/98 MG */
		else if (grabYesNoField(line, "IgnoreTotalWordCountWhenRanking", &ignoreTotalWordCountWhenRanking))	{}	/* 11/24/98 MG */
		else if (c = (char *) lstrstr(line, "MetaNames")) 
		{
			c += strlen("MetaNames");
			while (1) 
			{
				strcpy(value, (char *) getword(c, &skiplen));
				if (!skiplen | value[0] == '\0' || value[0] == '\n')
				{
					break;
				}
				else 
				{
					c += skiplen;
					addMetaEntry(&metaEntryList, value, 0);
				}
			}
		}
		#ifdef SUPPORT_DOC_PROPERTIES
		else if (c = (char *) lstrstr(line, "PropertyNames")) 	/* 11/24/98 MG */
		{
			c += strlen("PropertyNames");
			while (1) 
			{
				strcpy(value, (char *) getword(c, &skiplen));
				if (!skiplen | value[0] == '\0' || value[0] == '\n')
				{
					break;
				}
				else 
				{
					c += skiplen;
					addMetaEntry(&metaEntryList, value, 1);	/* isDocProp = 1 */
				}
			}
		}
		#endif
		else if (c = (char *) lstrstr(line, "IgnoreWords")) {
			c += strlen("IgnoreWords");
			while (1) {
				strcpy(value, (char *) getword(c, &skiplen));
				if (!skiplen || value[0] == '\0' || value[0] == '\n')
					break;
				else {
					c += skiplen;
					if (lstrstr(value, "SwishDefault"))
						readdefaultstopwords();
					else
						addstophash(value);
				}
			}
		}
		else if ((c = (char *) lstrstr(line, "IgnoreLimit"))) {
			c += strlen("IgnoreLimit");
			strcpy(value, (char *) getword(c, &skiplen));
			if (!skiplen || value[0] == '\0' || value[0] == '\n')
				continue;
			else {
				c += skiplen;
				*plimit = atoi(value);
			}
			strcpy(value, (char *) getword(c, &skiplen));
			if (!skiplen || value[0] == '\0' || value[0] == '\n')
				continue;
			else {
				c += skiplen;
				*flimit = atoi(value);
			}
		}
		/* IndexVerbose is supported for backwards compatibility */
		else if (c = (char *) lstrstr(line, "IndexVerbose")) {
			c += strlen("IndexVerbose");
			strcpy(value, (char *) getword(c, &skiplen));
			verbose = (lstrstr(value, "yes")) ? 3 : 0;
		}
		else if (!parseconfline(line)) {
			printf("Bad directive on line #%d: %s", linenumber, line );
			baddirective = 1;
		}
	}
	fclose(fp);
	
	if (baddirective)
		exit(1);
	if (gotdir && !(*hasdir))
		*hasdir = 1;
	if (gotindex && !(*hasindex))
		*hasindex = 1;
}

/* Checks that all the regex in the replace list are correct */
void checkReplaceList() 
{
	struct swline *tmpReplace;
	char rule[MAXSTRLEN], patt[MAXSTRLEN];
	regex_t re;
	int status;
	
	tmpReplace = replacelist;
	while (tmpReplace) {
		strcpy(rule,tmpReplace->line);
		
		/*  If it is not replace, just do nothing */
		if (lstrstr(rule,"append") || lstrstr(rule,"prepend") ) {
			if (tmpReplace->next){
				tmpReplace = tmpReplace->next;
			}
			else
				return;
		}
		if (lstrstr(rule,"replace")) {
			tmpReplace = tmpReplace->next;
			strcpy(patt,tmpReplace->line);
			if (patt == NULL) 
				return;
			status = regcomp(&re,patt, REG_EXTENDED);
			if (status != 0) {
				printf ("Illegal regular expression %s\n", patt);
				exit(0);
			}
			
			if (tmpReplace->next) 
				tmpReplace = tmpReplace->next;
			else
				return;
		}
		tmpReplace = tmpReplace->next;
	}
}

/* This is similar to the previous one, just kept separated because */
/* of the different structure of the list                           */
void checkListRegex (list) 
struct swline *list;
{
	struct swline *tmpReplace;
	char patt[MAXSTRLEN];
	regex_t re;
	int status;
	
	tmpReplace = replacelist;
	while (tmpReplace) {
		strcpy(patt,tmpReplace->line);
		if (patt == NULL) 
			return;
		status = regcomp(&re,patt, REG_EXTENDED);
		if (status != 0) {
			printf ("Illegal regular expression %s\n", patt);
			exit(0);
		}
		tmpReplace = tmpReplace->next;
	}
}/* end of checkListRegex */




