#!/usr/bin/perl

# GPL
# author: Rohan Baxter, Ultimode Inc. rohan@ultimode.com
#
# CGI-script args:
#    q= query string
#    np= page index (default: 0)
#    ps= num of results to show on a page
#    mode= ??all or any?
#    t= url status constraint for search 
#    ul= url filter for search  default: 0
#    ps_next=
#
#
use strict;
use diagnostics;

use udmDB; 
use udmTemplate;
use udmParser;
use DBI();
use CGI();

# print out header straight-away
print "Content-type: text/html\r\n\r\n";

# Set some defaults (can be overwritten in template file)
$ENV{MySQLDB} = 'udmsearch';
$ENV{MySQLHost} = 'localhost';
$ENV{MySQLUser} = 'root';
$ENV{MySQLPass} = '';
my $self = 'search.pl';
my $template_file = "search.htm";
my $search_page = "";

# Read in Template (will set $ENV{} variables as side-effect)!
my $template_hashref = &udmTemplate::read_template( $template_file );
my %template = %$template_hashref;
my %template_env = {}; # contains template environment variables
$template_env{self} = $self;

# Query Processing

# Query string is taken from command line
#my $query = $ARGV[0];
#my $np = 0;  
#my $ps = 20; # number of results to display on a page

# Query string is taken from CGI param
my $cgi = new CGI;
my $query = $cgi->param("q");
my $np = $cgi->param("np"); 
my $ps = $cgi->param("ps");
if (! defined($ps)){
	$ps = 20;
}
my $ul = $cgi->param("ul"); # a filter for urls to be searched...
my $tag = $cgi->param("t"); # a constraint on status of URL being searched i.e. 404 ,etc
my $tagstr = "";
if ($tag) {
	$tagstr = "AND url.tag = $tag";
} 


(my $qu, my $words, my $error) = &udmParser::parse( $query );
if ($error ne ""){
	$search_page .= &udmTemplate::print_error($error, \%template,\%template_env);
	print $search_page;
	exit(0);
} 
if ($words eq ""){ # query is empty 
  $search_page .= &udmTemplate::print_template(\%template,"top",\%template_env);
  $search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);
  print $search_page;
  exit(0);
}

$template_env{query} = $query;

# CGI Issue: what about htmlspecialchars?
# If user supplies &amp;&quot;&gt;%lt

 
# Connect to Database
my $dbh = &udmDB::dbconnect($ENV{MySQLDB},$ENV{MySQLHost},
			    $ENV{MySQLUser},$ENV{MySQLPass});
$template_env{dbh} = $dbh;

# Look for stopwords in words, delete them
# set up words_info to explain what is going on

$template_env{words_info} = "";
($words,$template_env{words_info}) = &udmDB::remove_stopwords( $dbh, $words );

#open FILE,">/tmp/tmp.arg";
#print FILE "words: $words \n";
#close(FILE);

if ($words eq "" && $qu ne ""){ # the only words in query are stopwords
  $error = "all query words are too common to search on.";
  $search_page .= &udmTemplate::print_error($error, \%template,\%template_env);
  $dbh->disconnect();
  print $search_page;
  exit(0);
}

# Now collect word count stats for remaining words
$template_env{words_info} = &udmDB::get_word_stats( $dbh, $words, $template_env{words_info}, $ul, $tagstr );

# Database Query
# I'm puzzled why ul and tagstr work in here???
my $num_hits = &udmDB::get_num_of_hits($dbh,$qu,$words,$ul,$tagstr);


$template_env{found} = $num_hits;

# Do Page Calculations (NB: It would be good to tidy this up)
my $from = $np * $ps; # used in query
$template_env{'from1'} = $from + 1; # num of first doc on page
$template_env{'to'} = ($np + 1) * $ps; # num of last doc on page
my $ps1 = $ps + 1; # used in next query
if ($template_env{'to'} > $num_hits){
	$template_env{'to'} = $num_hits;
}
my $ps_next = $num_hits - $template_env{'to'};
if ( ($ps_next > $ps )|| ($ps_next < 0)){
	$ps_next = $ps;
}

# Finish if words do not occur in the db
# Start outputting Template 
$search_page .= &udmTemplate::print_template(\%template,"top",\%template_env);

if ($num_hits > 0){
  $search_page .= &udmTemplate::print_template(\%template,"restop",\%template_env);
} else {
  $search_page .= &udmTemplate::print_template(\%template,"notfound",\%template_env);
  $search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);
  $dbh->disconnect();
  print $search_page;
  exit(0);
}

# Database Query: get urls with word counts

my $sth;
if ( (defined($ul)) || (defined($tagstr)) ){
	$sth = &udmDB::get_url_ids_using_filter( $dbh, $qu, $words, $from, $ps1, $ul, $tagstr );
} else {
	$sth = &udmDB::get_url_ids( $dbh, $qu, $words, $from, $ps1 );
}
my $rows = $sth->rows();

# Determine if there are more results than can fit on a page
my $isnext = 0;
my $i=0;
if ($rows > $ps){
	$rows = $ps;
	$isnext = 1;
}

# Output the each of the results found
my $url_in = "-1";
my $url_id = "";
while ($i < $rows){
	$i++;

        my @row = $sth->fetchrow_array;
 
	$url_id = $row[0];

	if ($url_in eq "-1"){
		$url_in = "$url_id";
	} else {
		$url_in .= ",$url_id";
	}
}

	if ($url_in ne "-1"){
	  my $sth2 = &udmDB::get_docs($dbh, $qu, $words, $url_in);
	  $template_env{'ndoc'} = $from + 1;
	  my @data = ();
	  while (@data = $sth2->fetchrow_array){
	    $template_env{'url'} = $data[0];
	    $template_env{'title'} = $data[1];
	    $template_env{'text'} = $data[2];
	    $template_env{'contype'} = $data[3];
	    $template_env{'docsize'} = $data[4];
	    $template_env{'lastmod'} = $data[5];
	    $template_env{'keyw'} = $data[6];
	    $template_env{'desc'} = $data[7];
	    $template_env{'crc'} = $data[8];
	    $template_env{'rec_id'} = $data[9];
	    $template_env{'rating'} = $data[10];
	    $template_env{'query'} = $data[11];
	    $search_page .= &udmTemplate::print_template(\%template,"res",\%template_env);
	    #print "title:$template_env{title} :url=$template_env{url} \n";
	    $template_env{'ndoc'}++;	
	  }
	  $sth2->finish();
	}
	# q
	my $prevp = 0;
	my $nextp = 0;
	my $mode = 'all'; # Don't understand this variable ???
	$template_env{nav} =  "&nbsp";
	
	# putting in html special chars
	$query =~ s/ /\+/g;
	$query =~ s/&/%26/g;
	if ($np > 0){
		$prevp = $np - 1;

		$template_env{nav} = 
"<A HREF=\"$self?q=$query&np=$prevp&ps=$ps&m=$mode&t=$tag&ul=$ul\">[&lt; &lt; Prev $ps]</A> &nbsp; ";
	}
	if ($isnext == 1){
		$nextp = $np + 1;

		$template_env{nav} = $template_env{nav} .
"<A HREF=\"$self?q=$query&np=$nextp\&ps=$ps&m=$mode&t=$tag&ul=$ul\">[Next $ps_next &gt; &gt;]</A>";
	}
	
$search_page .= &udmTemplate::print_template(\%template,"resbot",\%template_env);
$search_page .= &udmTemplate::print_template(\%template,"bottom",\%template_env);

$sth->finish();
$dbh->disconnect();

# It might be better to "dribble" output so that results can be seen
# straightaway
print $search_page;
