package de.hawlitzek.logparser;

import java.util.*;
import java.io.*;
import de.hawlitzek.util.text.*;

/**
 * This simple log analyzer reads a given http log and counts how many request
 * come from a browser or search robot.
 * At last it prints a little statistic
 * 
 * @author Florian Hawlitzek, Hawlitzek IT-Consulting GmbH
 */
public class FHBrowserAnalyzingHttpLogParser {
	
	public static final String VERSION = "1.0.3";
	
	/**  map for all the results */
	private SortedMap stats = new TreeMap();
	
	/** number of all log entries */
	private int nrOcc = 0;
	
	/**
	 * display browser statistics
	 */
	private void displayStats()
	{
		System.out.println("All access count: " + nrOcc + "\n");
		
		System.out.println("browser statistics (in alphabetic order):\n");
		
	 	Iterator it = stats.values().iterator();
	 	while (it.hasNext())
	 	{
	 		BrowserEntry be = (BrowserEntry)it.next();
	 		be.calcNrOccurencesRelative(nrOcc);
	 		System.out.println(be);
	 	}
		System.out.println("\nbrowser statistics (per frequency):\n");
		
		BrowserEntry[] hs = (BrowserEntry[])stats.values().toArray(new BrowserEntry[]{});
		Arrays.sort(hs, new CountedEntryComparator());
	 	for (int i=0; i<hs.length; i++)
	 	{
	 		BrowserEntry be = hs[i];
	 		System.out.println(be);
	 	}
	}
	/**
	 * main method
	 * @param args[0] the http log file
	 */
	public static void main(String[] args) {
		//read arguments
		if (args.length == 0 || args[0] == null) showUsage();
		File file = new File(args[0]);
		if (file == null || !file.exists() || !file.canRead()) showUsage();
		
		FHBrowserAnalyzingHttpLogParser runner = new FHBrowserAnalyzingHttpLogParser();
		runner.readFile(file);
		runner.displayStats();
		//runner.parseLine("195.219.78.43 - - [01/Apr/2002:00:31:13 +0200] \"GET /Java_Downloads/java_downloads.html HTTP/1.0\" 304 - www.hawlitzek-consulting.de \"http://www.google.com/search?q=%22java+downloads%22&hl=en&start=10&sa=N\" \"Mozilla/4.0 (compatible; MSIE 5.0; Windows 98; DigExt)\" \"202.124.207.180\"");
		//runner.parseLine("134.102.218.51 - - [22/Mar/2002:11:32:23 +0100] \"GET /Veroffentlichungen/Nitty_Gritty_Java/nitty_gritty_java.html HTTP/1.0\" 200 1407 www.hawlitzek-consulting.de \"http://www.google.de/search?q=nitty+gritty+java&ie=ISO-8859-1&oe=ISO-8859-1&hl=de&meta=\" \"Mozilla/4.79 [en] (X11; U; SunOS 5.8 sun4u)\" \"134.102.201.61\"");

	}
	/** 
	 * read and parse a log file
	 */
	public void readFile(File file)
	{
		try
		{
			FileReader fr = new FileReader(file);
			BufferedReader br = new BufferedReader(fr);
			int counter = 0;
			boolean finished = false;
			String line = null;
			String browser = null;
			
			while (finished == false)
			{
				// read one line
			   line = br.readLine();
			   if (line==null) finished = true;
			   else
			   {
				  // parse the line and adjust counter
			   	  counter++;
			   	  browser = LogEntry.parseLine(line);
			   	  if (stats.containsKey(browser))
			   	  {
			   	  	// browser is already known -> increment
			   	  	BrowserEntry entry = (BrowserEntry)stats.get(browser);
			   	  	entry.addAnzVorkommen();
			   	  }
			   	  else
			   	  {
			   	  	// browser is not yet known -> add
			   	  	BrowserEntry be = new BrowserEntry(browser);
			   	  	stats.put(browser, be);
			   	  }
			   }
			}
			nrOcc = counter;
		}
		catch (IOException e)
		{
			e.printStackTrace();
		}
		catch (InvalidLogEntryException e)
		{
			System.err.println(e.getMessage());
		}
	}
	private static void showUsage()
	{
		System.out.println("Usage: java de.hawlitzek.logparser.FHBrowserAnalyzingHttpLogParser <file>");
		System.out.println("version " + VERSION);
		System.exit(1);
	}
}
