package udmDB;
# GPL
# author: Rohan Baxter, Ultimode Inc. rohan@ultimode.com

#**************************************************************************
#
# connect to database and return its handle
#
#**************************************************************************
sub dbconnect
{
  my $db = shift;
  my $host = shift;
  my $user = shift;
  my $passwd = shift;
  my $dbh = undef;
  $dbh ||= DBI->connect("dbi:mysql:$db:$host",$user,$passwd, {RaiseError => 1});
  return $dbh;
}

sub get_num_of_hits(){
  my $dbh = shift;
  my $qu = shift;
  my $w = shift;
  my $ul = shift;
  my $tagstr = shift;
  my $query = "SELECT ($qu) as qu , dict.url_id
             FROM dict, url 
             WHERE url.rec_id = dict.url_id
	           AND url.url LIKE '%$ul%' $tagstr
		   AND dict.word in ($w) 
             GROUP BY dict.url_id  
             HAVING qu >0";
  my $sth = $dbh->prepare($query);
  $sth->execute();
  return $sth->rows();
}

sub get_url_ids(){  
  my $dbh = shift;
  my $qu = shift;
  my $w = shift;
  my $from = shift;
  my $ps1 = shift;
  my $query = "SELECT dict.url_id,
		sum(dict.word in ($w))as r,
		($qu)as qu 
	FROM dict 
	WHERE dict.word in ($w) 
	GROUP BY url_id 
	HAVING qu>0 
	ORDER BY r DESC LIMIT $from,$ps1";
  my $sth = $dbh->prepare($query);
  $sth->execute();
  return $sth;
}

sub get_url_ids_using_filter(){
  my $dbh = shift;
  my $qu = shift;
  my $words = shift;
  my $from = shift;
  my $ps1 = shift;
  my $ul = shift;
  my $tagstr = shift;
	open(FILE,">/tmp/tmp.filter");
	print FILE "tagstr: $tagstr ul:$ul \n";
	close FILE;
  my $query = "SELECT dict.url_id,
		sum(dict.word in ($words)) as r,
		($qu) as qu
		FROM url, dict
		WHERE url.rec_id = dict.url_id
		AND url.url LIKE '%$ul%' $tagstr
		AND dict.word in ($words)
		GROUP by url_id
		HAVING qu > 0
		ORDER BY r DESC LIMIT $from, $ps1";
  my $sth = $dbh->prepare($query);
  $sth->execute();
  return $sth;
}

sub get_docs(){
  my $dbh = shift;
  my $qu = shift;
  my $w = shift;
  my $url_in = shift;
  my $query = "SELECT 
	  url.url, url.title, url.txt, url.content_type, url.docsize,
	  url.last_modified, url.keywords, url.description, url.crc,
	  dict.url_id,
	  sum(dict.word in ($w))as r,
	  ($qu)as qu 
	  FROM dict,url 
	  WHERE dict.word in ($w)
 	  AND dict.url_id in ($url_in) 
	  AND dict.url_id=url.rec_id 
	  GROUP BY url_id HAVING qu>0 
	  ORDER BY r DESC";	
  my $sth = $dbh->prepare( $query );
  $sth->execute();
  return $sth;
}

sub get_clones(){
	my $dbh = shift;
	my $crc = shift;
	my $rec_id = shift;

	my $query = "SELECT url, content_type, last_modified
			FROM url
			WHERE crc = '$crc' 
				AND rec_id <> '$rec_id'"; 

	#print "get_clones query: $query \n";
	my $sth = $dbh->prepare( $query );
	$sth->execute();
	return $sth;
}


sub remove_stopwords(){
	my $dbh = shift;
	my $words = shift;

	my %stopword_list;

	if (! defined(%stopword_list)){ # if using mod_perl, we only do this the first time!
		my $query = "SELECT word
			FROM stopword";
		my $sth = $dbh->prepare( $query );
        	$sth->execute();
		my $word_info = ""; 
		while (my @row = $sth->fetchrow_array() ){
        		$stopword = $row[0];
			$stopword_list{$stopword} = 1;
		}
	$sth->finish();
	}
	$words =~ s/\'//g; # remove ' from around words...

	my @word_list = split(',',$words);
	my $new_word = "";
	foreach	my $word (@word_list){
		print FILE "handling: $word \n";
		if (defined($stopword_list{$word})){
			$word_info .= "$word: stopword ";		
		} else {
			if ($new_words){
			$new_words .= ",'$word'"	
			} else {
			$new_words .= "'$word'";
			}
		}
	}
	return ($new_words,$word_info); 
}

sub get_word_stats(){
	my $dbh = shift;
	my $words = shift;
	my $words_info = shift;
	my $ul = shift;
	my $tagstr = shift;
		
	my $query;
	if ($ul || $tagstr){
		$query = "SELECT word,
				count(*) as c
			  FROM dict, url
			  WHERE word IN ($words)
				AND url.rec_id=dict.url_id
				AND url.url LIKE '%$ul%' $tagstr
				GROUP BY word 
				ORDER BY word";	
	} else {
		$query = "SELECT word,
				count(*) as c
			  FROM dict 
			  WHERE word IN ($words)
				GROUP BY word 
				ORDER BY word";	
	}
	open FILE, ">/tmp/tmp.stat";
	print FILE "$query \n";
	close FILE;
	my $sth = $dbh->prepare( $query );
        $sth->execute();
	
	my $rows = $sth->rows();
	for (my $i=0; $i<$rows; $i++){
		my @row = $sth->fetchrow_array();
		$words_info .= "$row[0]: $row[1]";
		if ($i+1 < $rows){
			$words_info .= ', ';
		} else {
			$words_info .= ' &nbsp; &nbsp; ';
		}	
	}
	$sth->finish();
	return $words_info;
}

1;
