import java.util.*;
import java.io.*;
import java.net.*;

public final class loader implements Runnable
{
 private final static float MAX_QPRIO=9999.99f;
 public static final int IO_BUFFER=4096;
 
 public static final String VERSION="0.18";
 public static final String NAME="Smart Cache Loader";
 public static final String COPYRIGHT="Copyright (c) Radim Kolar 1998-2000. Open source software; There is NO warranty.\n"+
 "See the GNU General Public Licence version 2 or later for copying conditions.";

 public static final int  THREADS=4; // like in Netscape
 
 public static final char EXPANDCHAR='@';   // include this file as argz
 public static final char STARTURLCHAR='^'; // start URL for known location
 public static final char CONFIGCHAR='#'; // also comment in include file
 public static final char OPTIONCHAR='-'; // command line option
 public static final char VISITEDCHAR=':'; // already visited
 public static final char DEFAULTURLCHAR='%'; // configure this url as default
 
 public static final String DEFAULTCFG="loader.cnf";
 
 public static byte maxretry=3;
 public static float retryprio=-1f;

 public static location loc[];
 public static location def;
 
 public static InetAddress proxyserver;
 public static int proxyport;

 public static boolean readonly;
 public static localstore store;
 
 public static priorityqueue pq;
 private static Hashtable visited;
 
 // thread control
 public static short maxThreads;
 public static volatile int now;
 
 
 // worker non-static data
 public request r; 
 Socket s;
  
 public static void main(String argv[]) throws IOException
 {
  configloader cfg;
  System.err.println(NAME+" "+VERSION+"\n"+COPYRIGHT+"\n");
  
  if(argv.length>0)
   if(argv[0].charAt(0)!=CONFIGCHAR) configloader(DEFAULTCFG);  
   else ;
  else
   configloader(DEFAULTCFG);  
   
  init(argv);
  try
  {
    pq.peek();
    start();
    store.close();
    System.err.println(NAME+" "+VERSION+" - end of job.");
  }
  catch (NoSuchElementException nse)
  {
    System.err.println("\n[LOADER] No active servers - nothing done.");
  }
 }
 
 public loader(request r)
 {
  this.r=r;
 }
 
 public static final void init(String argv[])
 {
  // program init
  pq=new priorityqueue(5,10);
  visited=new Hashtable();
  maxThreads=THREADS;
  now=0;

  // expand arguments
  Vector argz;
  argz=new Vector(argv.length,5);

  exparg:
    for(int i=0;i<argv.length;i++)
    {
      String s;
      s=argv[i];
      if(s.length()==0) continue;
      if(s.charAt(0)==EXPANDCHAR)
        insertFile(s.substring(1),argz,false);
        else
         if(s.charAt(0)==VISITEDCHAR) {
           if(s.length()<2) continue; 
	     else if(s.charAt(1)==EXPANDCHAR)
                    insertFile(s.substring(2),argz,true);
         }  else // normal argument
             argz.addElement(s);
    }
    argv=null;
  
  argscan:for(int i=0;i<argz.size();i++)
  {
   String s;
   s=(String)argz.elementAt(i);
   if(s==null || s.length()==0) continue; // NULL size option ??
   /* special chars stuff */
   if(s.charAt(0)==OPTIONCHAR) 
    {
     System.err.println("[PARAMETER_ERROR] Options are not YET supported.");
     continue; // TODO options are not YET supported
    }
   if(s.charAt(0)==CONFIGCHAR) {
                           if(i!=0) System.err.println("[PARAMETER_ERROR] Config file must be the first parameter. All previous parameters are overwritten by it.");
                           configloader(s.substring(1));
                           continue;
                         }
   if(s.charAt(0)==VISITEDCHAR)
    {
     String z;
     z=s.substring(1);
     try
      {
       new URL(z);
       visited.put(z,z);
      }
     catch (MalformedURLException ignore)
     {}
     finally
      {continue;}
    }
   /* URL on commandline ? */
   if(s.indexOf("://")>0) 
    {
     if(s.charAt(0)==STARTURLCHAR)
     {


     }
     // inject new site
     location site;
     if(s.endsWith("/")) { site=createNewLocation(s);
                         }
       else
     {
      try
      {
       /* strip filename from location */
       URL u=new URL(s);
       site=createNewLocation(u.getProtocol()+"://"+u.getHost()+util.getDirname(u.getFile()));
      }
      catch (MalformedURLException grrr)
       { continue;}
     
     }
      
     site.passive=false;
     site.masks=site.content=site.action=location.INCLUDING_DEFAULTS;
     
     // add pending DNS aliases
     site.transferAliases(def);
     
     // startURL fun
     site.addStartURL(s);
     site.transferStartURL(def);
     
     loc=util.addLocationToArray(site,loc);
     continue; // new site injected
     
     }
     /* option on commandline ? */
     if(s.indexOf("=",0)>-1)
      {
       StringTokenizer st=new StringTokenizer(s);
       String opt=st.nextToken("=");
       opt=opt.toLowerCase().trim();
       if(opt.equals("scandepth")|| opt.equals("depth"))
        {
          short c=(short)Integer.valueOf(st.nextToken()).intValue();
          def.setDepth(c);
          continue;
        }
       else if(opt.equals("threads"))
        {
          maxThreads=(short)Integer.valueOf(st.nextToken()).intValue();
	  if(maxThreads<=0) maxThreads=THREADS;
          continue;
        }
       else if(opt.equals("retry"))
        {
          maxretry=(byte)Integer.valueOf(st.nextToken()).intValue();
          continue;
        }
       else if(opt.equals("retrypriority"))
        {
          retryprio=Float.valueOf(st.nextToken()).floatValue();
          continue;
        }
       else if(opt.equals("options"))
         {
           options o=new options(s);
           def.serveroptions(o);
           continue;
         }
        
       else if(opt.equals("priority"))
         {
           float c=Float.valueOf(st.nextToken()).floatValue();
           def.setPriority(c);
           continue;
         }
       else if(opt.equals("locationalias") || opt.equals("alias"))
        {
         do
         {
           String z;
           z=st.nextToken(" ,&\t\r\n");
           def.addAlias(z);
         }
         while(st.hasMoreTokens());
         continue;
        }

       else if(opt.equals("starturl"))
        {
         def.addStartURL(st.nextToken());
         continue; 
        }
       else if(opt.equals("log"))
        {
         options o=new options(s);
         def.addActions(o,true);
         continue; 
        }
       else if(opt.equals("upd"))
        {
         options o=new options(s);
         def.addActions(o,true);
         continue; 
        }
       else
        System.err.println("[PARAMETER_ERROR] Unknown option "+opt);
       continue;
      }
   // scan for alias
   boolean neg;
   if(s.charAt(0)=='!') { neg=true; s=s.substring(1);} else neg=false;
   for(int j=loc.length-1;j>=0;j--)
    {
     if(s.equalsIgnoreCase(loc[j].name)) { loc[j].passive=neg;continue argscan;}
    }
   System.err.println("[PARAMETER_ERROR] Location named '"+s+"' was not found.");   
  } /* argscan loop */
  readonly=store.isReadOnly();
  
  // add ACTIVE sites to QUEUE
  for(int j=loc.length-1;j>=0;j--)
    if(loc[j].passive==false)
      if(loc[j].starturl==null)
        addToQueue(new request(loc[j].locbase,loc[j]),MAX_QPRIO);
       else
        for(int z=loc[j].starturl.length-1;z>=0;z--)
         addToQueue(new request(loc[j].starturl[z],loc[j]),MAX_QPRIO);
     
}

private final static void insertFile(String filename,Vector v,boolean vis)
{
 try
   {
	  // insert argz from file;
	  BufferedReader br=new BufferedReader(new FileReader(filename));
	  while(true)
	  {
	    String l;
	    boolean vv;
	    vv=vis;
	    l=br.readLine();
	    if(l==null) break;
	    if(l.length()==0) continue;
	    l=l.trim();
	    if(l.charAt(0)=='#') continue; // komentar
	    if(l.charAt(0)==VISITEDCHAR) 
	       if(l.length()>1) { vv=true;l=l.substring(1).trim();}
	          else
	        continue; // missing URL
	    if(l.charAt(0)==EXPANDCHAR)
	     {
	       String fn=l.substring(1);
	       if(!visited.contains(fn))
	         {
		  visited.put(fn,fn);
		  insertFile(fn,v,vv);
		 }
	       continue;
	     }
		  
            if(vv==true)  v.addElement(":"+l);
	             else v.addElement(l);
	  }
	  br.close();
   }	  
   catch (IOException z)
    {}
}	  
/*     m a i n    l o o p      */ 

 public final void run()
 {
  try
  {
    DataInputStream dis;
    DataOutputStream dos=null;
    URL target=new URL(r.url);
    String line;
    localurl local;
    if(r.log==mask.LOG_SERVERDEFAULT)
      r.log=(r.loc.defaultmask.log==mask.LOG_SERVERDEFAULT?
         mask.LOG_DEFAULT:r.loc.defaultmask.log);

    log("Processing",mask.LOG_QUEUE);
    
    local=store.getURL(r.url);

// test zda nahravat ze serveru nebo z local filesystemu    
if( 
 /* test na norefresh/noreparse */
 ((r.update==mask.UPD_NOREFRESH || r.update==mask.UPD_NOREPARSE) && local.exists())  ||
/* test na update/forceupdate */ 
 ((r.update==mask.UPD_UPDATE || r.update==mask.UPD_FORCEUPDATE) &&
   (local.exists() && local.getDate()+r.updatelimit<System.currentTimeMillis())
    )
)
{
 try
 {
  /* Loading from local filesystem */
  log("Stored",mask.LOG_STORED);
  if(local.getLocation()!=null)
  {
       request nr=(request)r.clone();
       nr.url=new URL(target,local.getLocation()).toString();
       addToQueue(nr,r.loc.priority);
  }
  if(r.update==mask.UPD_NOREPARSE || !local.isParseable()) 
           { done();return;} // no need to load it
 
  dis=
     new DataInputStream ( 
    new BufferedInputStream(local.getInputStream(),IO_BUFFER)
    );
 }
 catch (IOException iof)
  {
   System.err.println("Reading from localfile failed, turning update off.");
   r.update=mask.UPD_LOAD;
   throw iof;
  }
}
else
{ 
    log("Loading",mask.LOG_LOAD);
    // connect to TCP/IP data source 
    if(proxyserver==null||r.act==mask.ACT_NOPROXY)
      {
        // Direct connection to remote server
        String proto=target.getProtocol();
      
       if(!proto.equalsIgnoreCase("http")) 
        {
          log("Unsupported protocol",mask.LOG_FATALERR);
	  done();
	  return;
        };
       int p=target.getPort();
       sendHTTPrequest(InetAddress.getByName(target.getHost()),p==-1? 80: p,target.getFile());
      }
     else
       // Send request to proxy
       sendHTTPrequest(proxyserver,proxyport,r.url);
    
    if(r.act==mask.ACT_FASTCLOSE) {s.close();done();return;} 
    
    // otevrit data input stream z http serveru
    dis=new DataInputStream(new BufferedInputStream(s.getInputStream(),IO_BUFFER));
    /* HTTP-HEADER PARSING START */
    int ctsize=-1;
    int httprc;
    // String ctype="application/octet-stream";
    line=dis.readLine(); /* HTTP/1.0 XX OK */
    /* precteme si tedy httprc kod */
    StringTokenizer st;
    st=new StringTokenizer(line);
    
    /* WARN: tady to spadne pri remote HTTP 0.9 serveru */
    try
    {
      st.nextToken(); /* http/1.0 - nezajimave */
      httprc=Integer.valueOf(st.nextToken()).intValue();
    } 
     catch (Exception http09)
    {
      log("HTTP 0.9 response",mask.LOG_FATALERR);
      s.close();done();return;
    }
 
 /* cteme hlavicky */
  while(true)
    { 
      int j;
      String s1,s2;
      line=dis.readLine();
      if(line==null) break;
      if(line.length()==0) break;
      
    j=line.indexOf(':',0);
    if(j==-1) continue;
    s1=line.substring(0,j).toLowerCase();
    s2=line.substring(j+1).trim();
    if(s1.equals("content-length")) 
             try
              {
                 ctsize=Integer.valueOf(s2).intValue(); 
              }
              catch (Exception ignore)
              {}
              finally
              { continue;}    
              
    if(s1.equals("content-type") && !s2.toLowerCase().startsWith("text/html"))
     {
      if(r.act==mask.ACT_CLOSE) {s.close();done();return;}
       else
      r.act=mask.ACT_NOPARSE;
     }
    if(s1.equals("location")) 
     { 
       /* Location: handler */
       request nr=(request)r.clone();
       nr.url=new URL(target,s2).toString();
       addToQueue(nr,r.loc.priority);
       continue;
      }        
                                
 } /* hlavicky */
 if(httprc!=200) {s.close();
                  done();
                  log("Error "+httprc,mask.LOG_ERR);
                  return;
                 }
 
 /* ***** SAVE as ******** */
 if( r.act!=mask.ACT_NOSAVE && readonly==false)
 {
   try
   {
     dos=new DataOutputStream(new BufferedOutputStream(local.getOutputStream(),IO_BUFFER));
     log("Saving",mask.LOG_SAVE);
    }
   catch (IOException iof)
   {
      log("Save error",mask.LOG_ERR);
      dos=null;
   }
 } /* open file 4 save stuff */
} /* end if load from hadr */

 // muzeme tedy zacit zpracovavat data
 if(r.act==mask.ACT_NOPARSE || r.depth==-1)
  {
   // jen ulozit a ahoj :)
   saver(dis,dos);
   done();
   return;
  }
  
  /* **** P A R S E     E N G I N E **** */
  /* (hacked from watchit)               */
  
  htmlscanner hscan;
  hscan=new htmlscanner(dis,dos);
  log("Parsing",mask.LOG_PARSE);
  
  Vector urls=new Vector();
  Vector srcs=new Vector();
  boolean anyframe=false;
  while(true)
    { 
      Hashtable x;
      String s;
      try{
        x=hscan.getElement();
      if(x==null) break; // EOF?
      line=(String)x.get("");
      if(line==null) continue; // null tag?
      
      // System.out.println("tag="+line);
      
      if(line.equals("FRAME")) anyframe=true;
      
      /* META - REFRESH HANDLER */
      if(line.equals("META"))
       {
        s=(String)x.get("HTTP-EQUIV");
        if(s==null) continue;
        s=s.trim();
        if(!s.equalsIgnoreCase("Refresh")) continue;
        s=(String)x.get("CONTENT");
        if(s==null) continue;
        s=s.trim();
        int j;
        j=s.indexOf(';');
        if(j==-1) continue;
        try
        {
         j=Integer.valueOf(s.substring(0,j)).intValue();
        }
        catch (NumberFormatException z)
         {
          continue;
         }
        /* vice nez XX sekund - ignorujeme to */ 
        if(j>45) continue;
        j=s.indexOf('=');
        if(j==-1) continue;
        
        s=s.substring(j+1).trim();
        URL url2;
        try{
            url2=new URL(target,s);
           }
        catch (MalformedURLException e)
         { continue;}

        urls.addElement(url2);
        srcs.addElement("REFRESH");
        anyframe=true;
                
        // System.out.println("Redirecting (via REFRESH) : "+this.URL+" to "+s);
        
        continue;
       
       } /* META html redirect */
       else if(line.equals("BODY"))
       {
         s=(String)x.get("BACKGROUND");
         if(s==null) continue;
	 line="IMG"; /* CHECK: cheat it as IMG ?! */ 
         // System.out.println("BODY...BG="+s);
       } else
       {
        /* SRC a HREF generic handler */      
        s=(String)x.get("SRC");
        if(s==null) {
                      s=(String)x.get("HREF");
                      if(s==null) continue;
                    }
       }
        URL url2;
        url2=null;
        try{
            url2=new URL(target,s);
           }
        catch (MalformedURLException e)
         { continue;}
        
        urls.addElement(url2);
        srcs.addElement(line);
        // System.out.println("added URL:"+url2);
       
      }
      catch (EOFException e) {break;}
    }
    hscan.close();
    /* HTML parse hotovo */
    
   if(anyframe==true) r.depth++;
     
   /* prebrat nazbirana URL */
   /*    odstranit mailto:* 
         odstranit  #neco
         resolvnout MOJE (location) DNS aliasy
         
         predelat Vector na Stringy .... 
   */
   
   vectscan:for(int i=urls.size()-1;i>=0;i--)
    {
     line=urls.elementAt(i).toString();
     if(line.startsWith("mailto")) 
       { urls.removeElementAt(i);srcs.removeElementAt(i);continue;}
     if(r.loc.aliases!=null)
      {
       //unaliasing
       findalias:for(int x=r.loc.aliases.length-1;x>=0;x--)
        if(line.startsWith(r.loc.aliases[x]))
         { 
	   
           // System.out.println("URL: "+line);
	   line=r.loc.locbase+line.substring(r.loc.aliases[x].length());
           // System.out.println("\tdealiased as "+line);
          // urls.setElementAt(line,i);
           break findalias;
         }
      
      } /* dealiasing */
      byte v[];
      int ln=line.length();
      v=new byte[ln];
      line.getBytes(0,ln,v,0);
      
     scanhash:for(int z=0;z<ln;z++)
      switch(v[z])
      {
       case 0x23:
       case 0x0d:
       case 0x0a:
       case 0x20:
        line=line.substring(0,z);break scanhash;
      }
     
     urls.setElementAt(line,i);
    }
    
    // zpracovat URLs
    for(int i=urls.size()-1;i>=0;i--)
    {
     String mybase=target.getProtocol()+"://"+target.getHost();
     String mydir=util.getDirname(target.getFile());
     line=(String)urls.elementAt(i);
     if(visited.get(line)!=null) continue; // already visited
     visited.put(line,line);
     r.loc.processURL(mybase,mydir,r.depth,r.depthset,line,(String)srcs.elementAt(i));
    }
    
    // dump print URLs
/*
    for(int i=urls.size()-1;i>=0;i--)
     {
      System.out.println(srcs.elementAt(i)+" = "+urls.elementAt(i));
     }
*/
  }
  catch (Exception ignore)
  {
   System.err.print("Loader got "+ignore+" when loading "+r.url);
   if(!(ignore instanceof java.io.IOException)) ignore.printStackTrace();
   if(r.retry++<maxretry) 
    {
      System.err.println(", re-inserting to queue for retry");
      pq.push(r,retryprio);
    }
   else
    System.err.println("");
  }
  
  // release lock  
  synchronized(pq)
  {
   now--;
   pq.notify();
  }
 }
 
private final void sendHTTPrequest(InetAddress adr,int port,String request) throws IOException
{
   s=new Socket(adr,port);
   DataOutputStream dos;  
   // otevrit data output stream
   dos=new DataOutputStream(new BufferedOutputStream(s.getOutputStream(),1024));
 
   // send request
   StringBuffer sb;
   sb=new StringBuffer(1024);
   sb.append("GET ");
   sb.append(request);
   sb.append(" HTTP/1.0\r\nAccept: */*\r\nUser-Agent: Mozilla/3.01 (Java Virtual Machine; "+NAME+" "+VERSION+")\r\n");
   if(r.update==mask.UPD_RELOAD|| r.update==mask.UPD_FORCEUPDATE)
    sb.append("Pragma: no-cache\r\n");
   sb.append("\r\n");
   dos.writeBytes(sb.toString());
   dos.flush();
   // System.gc();
} 
 
private final static void done()
{
   // release lock  
  synchronized(pq)
  {
   now--;
   pq.notify();
  }
}
 
 private final static void addToQueue(request r,float prio)
 {
  if(-1==pq.search(r) && visited.get(r.url)==null) 
       { 
          //System.err.println("puting to queue");
           visited.put(r.url,r.url);pq.push(r,prio);}
 }
 
 public final static void start()
 {
  //System.out.println("Starting main loop.");
  Thread.currentThread().setPriority(Thread.MAX_PRIORITY-2);
  synchronized(pq)
  {
 
  runloop:while(true)
  {
       if(maxThreads==now)
           try {
                 pq.wait();
               }
           catch (InterruptedException leaveUsAlonePlease) {}
         else
        
        {
          // musime spustit dalsi
          request r;
          try
          {
            r=(request)pq.pop();
          }
            catch (NoSuchElementException nse)
            {
              if(now>0) // fronta prazdna a nejake jeste bezi...
                try {
                     pq.wait();
                    }
                   catch (InterruptedException leaveUsAlonePlease) {}
                else break; /* no runners anymore */
              continue; // skip run new
            }
        
        if(r.act==mask.ACT_REJECT) continue; // ignore it
        // RUN NEW REQUEST
        // System.out.println("Starting fetch of "+r.url);
        Thread t;
        t=new Thread(new loader(r));
        t.setPriority(Thread.NORM_PRIORITY);
        t.start();
        now++;
        }
   }
  }/* runloop */
 
 }
 
 private final static void saver(DataInputStream sin,DataOutputStream out) throws IOException
 {
 /* otocime to, cteme data ze serveru a posilame je klientu */
 while(true)
 {
  byte b[]=new byte[IO_BUFFER];
  int rb;
  rb=sin.read(b); 
  if(rb==-1) break; /* konec dat! */
  if(out!=null) out.write(b,0,rb);
 }
 if(out!=null) out.close();
 sin.close();
 }
 
 private final void log(String what,short msk)
 {
   if( (this.r.log&msk)>0 )
     {
       if( (this.r.log&mask.LOG_URLONLY)>0 )
	       System.out.println(r.url);
	 else
	 {
		 System.out.println(what+": "+r.url);
	 }
     }
 }
 
 private final static void configloader(String cfgfile)
 {
  configloader cfg;
  try
  {
    cfg=new configloader(cfgfile);
  }
  catch (IOException grrrr)
   { System.out.println("[CONFIG_ERROR] Error reading config file "+cfgfile);
     loc=new location[0];
     return;
   }
  
  /* loader init */
  loc=cfg.getLocations();
  def=cfg.getDefaultLocation();
  maxThreads=cfg.getThreads();
  maxretry=cfg.getMaxretry();
  retryprio=cfg.getRetryPriority();  
  
  /* proxy */
  proxyserver=cfg.getProxyServer();
  proxyport=cfg.getProxyPort();
  
  store=cfg.getLocalStore();
 }
 
 private final static location createNewLocation(String baseurl)
 {
   // search 
   location nl;
   for(int i=0;i<loc.length;i++)
    if(baseurl.indexOf(loc[i].locbase)==0) 
      { 
        System.out.println("Location "+baseurl+" configured as "+loc[i].locbase);
        nl=new location(baseurl,loc[i]);
        nl.aliases=loc[i].aliases;
        return nl;
      }
  return new location(baseurl,def);
 }
 
}
