package de.hawlitzek.logparser;
import java.util.*;
import de.hawlitzek.util.text.*;

/**
 * This class represents one line in the http log. It parses the line into the fields
 * for date, ip address, referrer etc.
 * It also provides functionality to filter log entries by type, occurring names
 * and search engines.
 * 
 * @author Florian Hawlitzek, Hawlitzek IT-Consulting GmbH
 */
public class LogEntry {
	/** @see #getClientIP() */
	private String clientIP;
	/** @see getDate() */	
	private java.util.Date date;	
	/** @see getHttpMethod() */
	private String httpMethod;
	/** @see getPage() */
	private String page;	
	/** @see getReturncode() */
	private String returncode;
	/** @see getBytecount() */
      /* Byte size of served file */
      private String bytecount;			
	/** @see getBrowser() */
	private String browser;				
	/** @see getReferrer() */
	private String referrer;	

    private static java.text.SimpleDateFormat dateFormat = new java.text.SimpleDateFormat("dd/MMM/yyyy:HH:mm:ss z", java.util.Locale.US);
				

	/**
	 * checks, if a log entry is referring to an html page
	 */
	public boolean isHtml() { return (page.endsWith("html") || page.endsWith("htm") || page.endsWith("HTML") || page.endsWith("HTM") || page.endsWith("/")); }

	/**
	 * checks, if a log entry is referring to an image
	 */
	public boolean isImage() { 
		try {
			String suffix = page.substring(page.length()-3).toLowerCase();
			return (suffix.endsWith("gif") || suffix.endsWith("jpg") || suffix.endsWith("png"));
		}
		catch (Exception e) {
			return false;
		} 
	}

	/**
	 * special function for NetObjects fusion generated sites
	 * checks, if a log entry is main html page or a sub frame
	 */
	public boolean isMainPage() { 
		return (isHtml() && 
		        !(page.indexOf("/left_") != -1 || page.indexOf("/body_") != -1
		          || page.indexOf("/header_") != -1)); 
	}	
	
	/**
	 * checks, if a log entry matches to a given filter
	 * @param type type of the search: HTML:   only html pages, 
	 *                                 MAIN:   only main html pages
	 *                                 IMG:    only images
	 *                                 REF:    look in referrer, not in requested ressource
	 *                                 <null>: in all ressource
	 * @param filter set of AND combined search items
	 */
	public boolean matches(String type, String[] filter) {
		boolean result = true;
		boolean referrerSearch = false;
		
		if (type != null) {
			if (type.equals(LogEntryFilterTypes.HTML))
				result = isHtml();
			else if (type.equals(LogEntryFilterTypes.MAIN))
				result = isMainPage();
			else if (type.equals(LogEntryFilterTypes.IMG))
				result = isImage();
			else if (type.equals(LogEntryFilterTypes.REF))
				referrerSearch = true;
		}
		
		if (filter != null) {
			// looking to referrer entry
			if (referrerSearch) {
				for (int i=0; result == true && i<filter.length; i++) {
					if (referrer.indexOf(filter[i]) == -1) result = false;
				}
			}
			// looking to requested ressource entry
			else {
				for (int i=0; result == true && i<filter.length; i++) {
					if (page.indexOf(filter[i]) == -1) result = false;
				}
			}
		}
		return result;
	}
		
	/**
	 * checks, if a log entry belongs to a http GET request
	 */
	public boolean isGet() { return (httpMethod.equals("GET"));}	
	
	/**
	 * Gets the browser
	 * @return Returns a String
	 */
	public String getBrowser() {
		return browser;
	}


	/**
	 * Gets the clientIP
	 * @return Returns a String
	 */
	public String getClientIP() {
		return clientIP;
	}


	/**
	 * Gets the date
	 * @return Returns a java.util.Date
	 */
	public java.util.Date getDate() {
		return date;
	}
	
	/**
	 * Gets the httpMethod
	 * @return Returns a String
	 */
	public String getHttpMethod() {
		return httpMethod;
	}


	/**
	 * Gets the page
	 * @return Returns a String
	 */
	public String getPage() {
		return page;
	}


	/**
	 * Gets the referrer
	 * @return Returns a String
	 */
	public String getReferrer() {
		return referrer;
	}


	/**
	 * Gets the returncode
	 * @return Returns a String
	 */
	public String getReturncode() {
		return returncode;
	}

	/**
	 * checks, if the requested page could be delivered
	 */
	public boolean isOk() {
		return (returncode.equals("200") || returncode.equals("304"));
	}


	/**
	 * parses a line in the (log entry) an return the used browser type
	 * (fast method without creating LogEntry instances, only for detecting the browser type)
	 * 
	 * @param line one line in an http log file
	 * @return String browser
	 */
	public static String parseLine(String line) throws InvalidLogEntryException
	{
		return parseLine(line, null);
	}

	/**
	 * parses a line in the (log entry) an return the used browser type
	 * If a LogEntry instance is provided, the instance is filled with the entries detail data
	 * 
	 * @param line one line in an http log file
	 * @param logEntry object to be filled
	 * @return String browser
	 */
	public static String parseLine(String line, LogEntry logEntry) throws InvalidLogEntryException
	{
		try {
			// separate the line
			StringTokenizer tok = new StringTokenizer(line, "\"");
			String header= tok.nextToken();
			String method= tok.nextToken();
			String result= tok.nextToken();
			String referrer= tok.nextToken();
			if (referrer.endsWith("%"))
				referrer = referrer.substring(0, referrer.length()-2);
			String dummy= tok.nextToken();
			String browser= tok.nextToken().trim();
			browser = BrowserEntry.getBrowserType(browser);
	
			// inspect the details
			if (logEntry != null)
				logEntry.init(header, method, result, referrer, browser);
			
			return browser;
		}
		catch (Exception e) {
			throw new InvalidLogEntryException();
		}
	}
	
	/**
	 * initializes the detail data of a log entry
	 */
	private void init(String header, String method, String result, String referrer, String browser)
	{
		int index1 = header.indexOf(' ');
		clientIP = header.substring(0, index1);
		// extract the date
		index1 = header.indexOf('[');
		int index2 = header.indexOf(']');		
		String tempDat = header.substring(index1+1, index2);
		try {
		date = dateFormat.parse(tempDat);
		}
		catch (java.text.ParseException e)  {}
		// extract the HTTP method
		index1 = method.indexOf(' ');
		index2 = method.indexOf("HTTP/");		
		httpMethod = method.substring(0,index1);
		// Extrahierung der angeforderten Seite
		page=method.substring(index1+1, index2-1);
		// extract the result code (success/failure) of the web server
        returncode = result.substring(1,result.indexOf(' ')+4);
		// extract the reffering page
		this.referrer = referrer;
		// extract the browser type		
		this.browser = browser;
	}
	
	/**
	 * searches for google keywords
	 */
	public String[] parseGoogleKeywords() {
		// filter only google searches
		int index1 = referrer.indexOf("google");
		if (index1 < 0) return null;

		// special case for indirect references (image search)
		int index2 = referrer.indexOf("imgrefurl");
		boolean isGoogleImageSearch = (index2 >= 0);
		if (isGoogleImageSearch) referrer = referrer.substring(index2+10);
				
		// special case for indirect references (translations)
		index2 = referrer.indexOf("translate");
		boolean isGoogleTranslation = (index2 >= 0);
		if (isGoogleTranslation) {
			index2 = referrer.indexOf("&u=", index2);
			if (index2 >=0)
				referrer = referrer.substring(index2+3);
		}

		// decode URL
		referrer = StringDecoder.decodeUrl(referrer);
		if (isGoogleImageSearch || isGoogleTranslation) {
			try {
				referrer = StringDecoder.decodeUrl(referrer); // double encoding
			}
			catch (Exception e) {
				e.printStackTrace();
			}
		}
		
		// extract search items
		return baseParseKeywords(referrer, index1);
	}

	/**
	 * searches for fireball keywords
	 */
	public String[] parseFireballKeywords() {
		// filter only fireball
		int index1 = referrer.indexOf("fireball");
		if (index1 < 0) return null;

		// decode URL
		referrer = StringDecoder.decodeUrl(referrer);
		
		// extract search items
		return baseParseKeywords(referrer, index1);
	}
	/**
	 * searches for altavista keywords
	 */
	public String[] parseAltavistaKeywords() {
		// filter only altavista
		int index1 = referrer.indexOf("altavista");
		if (index1 < 0) return null;

		// decode URL
		referrer = StringDecoder.decodeUrl(referrer);
		
		// extract search items
		return baseParseKeywords(referrer, index1);
	}
	
	/**
	 * base search for search engines with queries beginning with "q=" or "?p="
	 */
	 private String[] baseParseKeywords(String referrer, int startIndex) {
		// extract search items
		int index2 = referrer.indexOf("q=", startIndex);
		if (index2 < 0) {
			index2 = referrer.indexOf("?p=", startIndex);
			if (index2 <0)
				return null;
		}
		// find end of search items
		int index3 = referrer.indexOf("&", index2);
		String searchString = referrer.substring(index2+2, (index3>=0)?index3:(referrer.length()));
		
		return StringDecoder.seperate(searchString);
	 }	
}

