
/*********************************************************************
 * Squid 2 ClamAV redirector                                         *
 *********************************************************************/

/*********************************************************************
 *                                                                   *
 * ---------+-------------------------------------+--------+-------- *
 *  Version | Aenderung                           | Datum  | Author  *
 * ---------+-------------------------------------+--------+-------- *
 *   0.1    | Program build                       |11.01.05| me      *
 *          |                                     |        |         *
 * ---------+-------------------------------------+--------+-------- *
 *   0.2    | Added Database reloading            |17.01.05| me      *
 *          | fixed some bugs                     |        |         *
 * ---------+-------------------------------------+--------+-------- *
 *   0.3    | Added some optional debugging output|19.01.05| me      *
 *          | to fix a strange bug                |        |         *
 * ---------+-------------------------------------+--------+-------- *
 *   0.4    | see changelog for further changes   |22.01.05| me      *
 * ---------+-------------------------------------+--------+-------- *  
 *********************************************************************/

/*********************************************************************
 * Copyright (C) 2005 Daniel Lord (squidclam At users DoT sf Dot net)*
 *                                                                   *
 * This is free software; you can redistribute it and/or modify      *
 * it under the terms of the GNU General Public License as published *
 * by the Free Software Foundation; either version 2 of the License, *
 * or (at your option) any later version.                            *
 *                                                                   *
 * This software is distributed in the hope that it will be useful,  *
 * but WITHOUT ANY WARRANTY; without even the implied warranty of    *
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the     *
 * GNU General Public License for more details.                      *
 *                                                                   *
 * You should have received a copy of the GNU General Public License *
 * along with this software; if not, write to the Free Software      *
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston,            *
 * MA  02111-1307, USA.                                              *
 *********************************************************************/

/* return codes
 * 0 - Success 
 * 1 - Could not unbuffer stdout
 * 2 - problem with clamav
 * 3 - problem with temp files
 * 4 - problem with curl
 */

/* includes  */
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <syslog.h>
#include <clamav.h>
#include <curl/curl.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

/* defines  */
#define ERROR    "http://your.server.here/virus.php"    /* url with the antivir.php file */
#define MY_PROXY "http://127.0.0.1:3128"                /* proxy to use */
#define TMPF     "/tmpdata/squidclamXXXXXXXX"           /* tempfile preferable on ramdisk */
#define RLDB  (100)            /* reload db after how many url */
#define FSIZE (102400)         /* scan/get max 100kb of data */

// #define ERRIGNORE   /* if defined hand url over to squid and don't do anything in case of error */
// #define DEBUG

/* no changes needed down there */
#define TRUE  1
#define FALSE 0

/* global vars  */
struct MemoryStruct {
      char *memory;
      size_t size;
};

/* Memory funktion to store the download  */
size_t
WriteMemoryCallback(void *ptr, size_t size, size_t nmemb, void *data)
{
      register int realsize = size * nmemb;
      struct MemoryStruct *mem = (struct MemoryStruct *)data;
      
      mem->memory = (char *)realloc(mem->memory, mem->size + realsize + 1);
      
      if (mem->memory) {
          memcpy(&(mem->memory[mem->size]), ptr, realsize);
          mem->size += realsize;
          mem->memory[mem->size] = 0;
      }
      
      return realsize;
}

/* get random number  */
unsigned int 
get_random_number (void)
{
    int rf;
    unsigned char ret[1];

    /* open random source */
    if ((rf = open("/dev/urandom", O_RDONLY)) == -1) {
        usleep(1000000);
        return -1;
    }
    
    /* get one random byte */
    if (read(rf,ret,1) != 1) {
        return -1;
    }
    
    /* close random source */
    close(rf);
    
    return ret[0];
}

/* one bloated main for the moment  */
int 
main (void)
{
    const char *dbdir;
    const char *virname;
    struct cl_node *root = NULL;
    struct cl_limits limits;

    struct request {
        char url[10000];
        char src[255];
        char ident[255];
        char method[31];
    } rq;

    CURL *eh;
    double dsize=0;
    
    struct MemoryStruct mem;

    int  ret=0;
    unsigned long int size = 0;
    int  fd;
    char tt[] = { TMPF };
    char erbuf[CURL_ERROR_SIZE+1];
    
    unsigned int count=0;
    unsigned int rand=0;
    unsigned int reload=RLDB;

    /* init struct */
    mem.memory=NULL; /* we expect realloc(NULL, size) to work */
    mem.size = 0;    /* no data at this point */
    
    /* get random number */
    if ((rand = get_random_number()) == -1) {
        syslog(LOG_ERR, "random number failed");
        rand=10;
    }

    /* initialice reload (RLDB + (0-255)) */
    reload += rand;

    /* start sysloging */
    openlog("squidclam", LOG_PID, LOG_DAEMON);
    
    /* now sleep a bit to distribute the load at startup */ 
    usleep((rand%10)*1000);

    /* info that we go up */
    syslog(LOG_INFO,"squidclam starting up now. reload after %i URLs", reload);

    /* make stdout line buffered */
    if (setvbuf(stdout, NULL, _IOLBF, 0) != 0) {
        syslog(LOG_ERR, "stdout not line buffered exiting");
        usleep(1000000); /* don't do DoS */
        return 1;
    }

    /* clamav init (load database) */
    dbdir = cl_retdbdir();
    if ((ret=cl_loaddbdir(dbdir, &root, NULL))) {
        syslog(LOG_ERR, "libclamav: %s", cl_strerror(ret));
        usleep(1000000); /* don't do DoS */
        return 2;
    }
    
    /* build database */
    if((ret = cl_build(root))) {
        syslog(LOG_ERR,"libclamav database initialization error: %s", cl_strerror(ret));
        usleep(1000000); /* don't do DoS */
        return 2;
    }

    /* set up archive limits */
    memset(&limits, 0, sizeof(struct cl_limits));
    limits.maxfiles = 20;             /* max files */
    limits.maxfilesize = 2097152;     /* maximal archived file size == 2 Mb */
    limits.maxreclevel = 5;           /* maximal recursion level */
    limits.maxratio = 100;            /* maximal compression ratio */
    limits.archivememlim = 0;         /* unlimited memory for bzip2 scanner */
    
    /* curl init */
    curl_global_init(CURL_GLOBAL_ALL);

    /* get an easy handle */
    if ((eh = curl_easy_init()) == NULL) {
        syslog(LOG_ERR,"curl_easy_init failed\n");
        usleep(1000000); /* don't do DoS */
        return 4;
    }

    /* set the proxy */
    curl_easy_setopt(eh, CURLOPT_PROXY, MY_PROXY);

    /* send all data to this function  */
    curl_easy_setopt(eh, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);

    /* we pass our 'chunk' struct to the callback function */
    curl_easy_setopt(eh, CURLOPT_WRITEDATA, (void *)&mem);

    /* get usefull error messages */
    curl_easy_setopt(eh, CURLOPT_ERRORBUFFER, erbuf);

    /* get a filehandle preferable in a ram disk */
    if ((fd = mkstemp(tt)) == -1) {
        syslog(LOG_ERR,"Could not get a tmpfile handle");
        usleep(1000000); /* don't do DoS */
        return 3;
    }

    /* loop to parse the given URLs */
    while (scanf("%10000s %255s %255s %31s", rq.url, rq.src, rq.ident, rq.method) != EOF){
        /* show which url we're handling */
#ifdef DEBUG
        syslog(LOG_INFO,"handle url (%s)\n", rq.url);
#endif
       
        /* clean struct */
        free(mem.memory);
        mem.memory = NULL;
        mem.size = 0;    /* no data at this point */
    
        /* set the url to retrive */
        curl_easy_setopt(eh, CURLOPT_URL, rq.url);

        /* only get the head */
        curl_easy_setopt(eh, CURLOPT_NOBODY, TRUE);
        
        /* actually get the header */
        if (curl_easy_perform(eh) != 0) {
            /* no header so just go back to squid */
#ifdef ERRIGNORE
            fprintf(stdout,"\n");
#else
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, 
                    "squidclam_get_head_failed", rq.src, rq.ident, rq.method);
#endif
#ifdef DEBUG
            syslog(LOG_INFO,"could not get HEAD (%s) error was (%s)\n", rq.url, erbuf);
#endif
            continue;
        }
        
        /* get the CONTENT_LENGTH out of the header */
        if (curl_easy_getinfo(eh, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &dsize) != CURLE_OK) {
            /* not ok --> squid should handle this */
#ifdef ERRIGNORE
            fprintf(stdout,"\n");
#else
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, 
                    "squidclam_get_size_of_file_failed", rq.src, rq.ident, rq.method);
#endif
#ifdef DEBUG
            syslog(LOG_INFO,"could not get size from (%s) error was (%s)\n", rq.url, erbuf);
#endif
            continue;
        }

        /* file too big --> don't scan */
        if (dsize > FSIZE) {
            fprintf(stdout,"\n");
#ifdef DEBUG
            syslog(LOG_INFO,"file too big to scan. %s has %.0f bytes\n", rq.url, dsize);
#endif
            continue;
        }
        
        /* now get the complete file */
        curl_easy_setopt(eh, CURLOPT_NOBODY, FALSE);
        curl_easy_setopt(eh, CURLOPT_HTTPGET, TRUE);

        /* get the file */
        if (curl_easy_perform(eh) != 0) {
            /* if error, give back to squid */
#ifdef ERRIGNORE
            fprintf(stdout,"\n");
#else
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, 
                    "squidclam_get_file_failed", rq.src, rq.ident, rq.method);
#endif
#ifdef DEBUG
            syslog(LOG_INFO,"could not get (%s) error was (%s)\n", rq.url, erbuf);
#endif
            continue;
        }
        
        /* reset file handle to the beginning */
        lseek(fd,SEEK_SET,SEEK_SET);

        /* get a file out of the memory chunk */
        if (write(fd,mem.memory, mem.size) != mem.size) {
#ifdef ERRIGNORE
            fprintf(stdout,"\n");
#else
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, 
                    "squidclam_writing_tempfile_failed", rq.src, rq.ident, rq.method);
#endif
            syslog(LOG_INFO,"error writing data to tempfile");
            continue;
        }

        /* make sure, there is no extra data in tempfile */
        if (ftruncate(fd, mem.size) != 0) {
#ifdef ERRIGNORE
            fprintf(stdout,"\n");
#else
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, 
                    "squidclam_truncate_tempfile_failed", rq.src, rq.ident, rq.method);
#endif
            syslog(LOG_INFO,"error truncating tempfile");
            continue;
        }

        /* scan the file with clamav */
        /* we sadly have to use files for clamav to work properly */
        /* but we can put them into a ramdisk or tmpfs ;) */
        /* then we only have to deal with insecure tmpfile creation */
        if((ret = cl_scandesc(fd, &virname, &size, root, &limits, CL_SCAN_STDOPT)) == CL_VIRUS) {
            fprintf(stdout,"%s?url=%s&virus=%s %s %s %s\n", ERROR, rq.url, virname, rq.src, rq.ident, rq.method);
            syslog(LOG_WARNING, "INFECTED url=%s virus=%s", rq.url, virname);
#ifdef DEBUG
            syslog(LOG_WARNING, "Size of file: %i\n", mem.size);
#endif
        }
        else {
            /* file seems clean */
            fprintf(stdout,"\n");
#ifdef DEBUG
            syslog(LOG_INFO,"Scanned but no Virus found at: (%s)\n", rq.url);
            syslog(LOG_INFO,"Size of file: %i\n", mem.size);
#endif
            if(ret != CL_CLEAN) {
                syslog(LOG_ERR, "%s\n", cl_perror(ret));
            }
        }
        
        if (count++ >= reload) {
#ifdef DEBUG
            syslog(LOG_INFO,"Reloading DB by restarting squidclam at %i's URL", reload);
#endif
            closelog();
            free(mem.memory);
            curl_global_cleanup();
            cl_free(root);
            unlink(tt);
            return 0;
            /* update signature db this is done easyly be reloading squidclam :) */
        }
    }
    
    /* clean up  */
    closelog();
    free(mem.memory);
    curl_global_cleanup();
    cl_free(root);
    unlink(tt);
    return 0;
}
