/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "FilterTryList.h"
#include "ResponseFilterChain.h"
#include "HttpResponseMetadata.h"
#include "HttpRequestMetadata.h"
#include "HttpStatusLine.h"
#include "HttpHeader.h"
#include "HttpHeaderStructure.h"
#include "HttpHeaderElement.h"
#include "HttpHeadersCollection.h"
#include "IntrusivePtr.h"
#include "StringUtils.h"
#include "ServiceContext.h"
#include "AcceptEncoding.h"
#include "TE.h"
#include "RegexFilterDescriptor.h"
#include "HtmlResponseFilter.h"
#include "AnalyzeResponseFilter.h"
#include "CompressorResponseFilter.h"
#include "DecompressorResponseFilter.h"
#include "RegexResponseFilter.h"
#include "BString.h"
#include <string>
#include <list>

using namespace std;

FilterTryList::FilterTryList()
{
}

FilterTryList::~FilterTryList()
{
}

FilterTryList::ContentType
FilterTryList::resolveContentType(HttpResponseMetadata const& metadata)
{
	BString ctype = metadata.headers().getHeader(BString("Content-Type")).getValue();
	
	BString const html("text/html");
	if (StringUtils::ciStartsWith(ctype.begin(), ctype.end(), html.begin(), html.end())) {
		return CTYPE_HTML;
	}
	
	BString const xhtml("application/xhtml+xml");
	if (StringUtils::ciStartsWith(ctype.begin(), ctype.end(), xhtml.begin(), xhtml.end())) {
		return CTYPE_XHTML;
	}
	
	return CTYPE_OTHER;
}

bool
FilterTryList::isContentFilterApplicable(ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	if (!metadata.hasBody()) {
		return false;
	}
	int status_code = metadata.statusLine().getCode();
	if (status_code < 400 && status_code != 200) {
		return false;
	}
	BString const cache_control("Cache-Control");
	BString const no_transform("no-transform");
	
	if (HttpHeader const* hdr = metadata.headers().getHeaderPtr(cache_control)) {
		// check response headers for "Cache-Control: no-transform"
		HttpHeaderStructure structure(*hdr);
		if (structure.hasElement(no_transform)) {
			return false;
		}
	}
	if (HttpHeader const* hdr = chain.getRequest().headers().getHeaderPtr(cache_control)) {
		HttpHeaderStructure structure(*hdr);
		if (structure.hasElement(no_transform)) {
			return false;
		}
	}
	return true;
}

bool
FilterTryList::isHtmlFilterApplicable(ResponseFilterChain& chain,
	HttpResponseMetadata const& metadata, ContentType* ctype)
{
	if (!isContentFilterApplicable(chain, metadata)) {
		return false;
	}
	ContentType content_type = resolveContentType(metadata);
	if (ctype) {
		*ctype = content_type;
	}
	return (content_type == CTYPE_HTML || content_type == CTYPE_XHTML);
}

void
FilterTryList::tryHtmlFilter(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	ContentType ctype = CTYPE_HTML;
	if (!isHtmlFilterApplicable(chain, metadata, &ctype)) {
		return;
	}
	if (removeAllEncodings(chain, metadata)) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new HtmlResponseFilter(
				chain, chain.getContext().htmlProcessor(),
				chain.getRequestPtr(), ctype == CTYPE_XHTML
			)
		));
	}
}

void
FilterTryList::tryAnalyzeFilter(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata, bool ignore_size)
{
	if (!isContentFilterApplicable(chain, metadata) &&
	    !metadata.statusLine().isRedirect()) {
		return;
	}
	if (removeAllEncodings(chain, metadata)) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new AnalyzeResponseFilter(chain, ignore_size)
		));
	}
}

void
FilterTryList::tryRegexFilter(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata,
	IntrusivePtr<RegexFilterDescriptor const> filter_desc)
{
	if (!filter_desc->contentTypePattern().get()) {
		if (!isHtmlFilterApplicable(chain, metadata)) {
			return;
		}
	} else {
		if (!isContentFilterApplicable(chain, metadata)) {
			return;
		}
		BString ctype(metadata.headers().getHeader(BString("Content-Type")).getValue());
		if (!filter_desc->contentTypeMatches(ctype)) {;
			return;
		}
	}
	
	// url match is checked elsewhere
	
	if (removeAllEncodings(chain, metadata)) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new RegexResponseFilter(chain, filter_desc)
		));
	}
}

void
FilterTryList::tryCompressorFilter(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	BString ctype = metadata.headers().getHeader(BString("Content-Type")).getValue();
	BString const text("text/");
	if (!StringUtils::ciStartsWith(ctype.begin(), ctype.end(), text.begin(), text.end())) {
		// content is not textual
		return;
	}
	
	if (!isContentFilterApplicable(chain, metadata)) {
		return;
	}
	
	if (!metadata.headers().getHeader(BString("Content-Encoding")).getValue().empty() ||
	    !metadata.headers().getHeader(BString("Transfer-Encoding")).getValue().empty()) {
		// already compressed
		return;    
	}
	
	AcceptEncoding aenc(chain.getRequest().headers().getHeader(BString("Accept-Encoding")));
	if (!aenc.isAccepted(BString("gzip"))) {
		return;
	}
	
	chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
		new CompressorResponseFilter(chain)
	));
}

void
FilterTryList::ensureTransferEncodingUnderstood(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	HttpHeader const* teh = metadata.headers().getHeaderPtr(BString("Transfer-Encoding"));
	if (!teh) {
		return;
	}
	TE te(chain.getRequest().headers().getHeader(BString("TE")));
	HttpHeaderStructure structure(*teh);
	list<HttpHeaderElement>::iterator it = structure.elements().begin();
	list<HttpHeaderElement>::iterator const end = structure.elements().end();
	while (it != end) {
		if (!te.isAccepted(it->getName())) {
			break; // all of the following encodings have to be decoded as well
		}
		if (!DecompressorResponseFilter::isSupported(it->getName())) {
			return; // should not happen, as we specify only what we support in TE
		}
		structure.elements().erase(it++);
	}
	
	for (int i = structure.elements().size(); i > 0; --i) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new DecompressorResponseFilter(chain)
		));
	}
}

void
FilterTryList::ensureContentEncodingUnderstood(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	HttpHeader const* ceh = metadata.headers().getHeaderPtr(BString("Content-Encoding"));
	if (!ceh) {
		return;
	}
	AcceptEncoding aenc(chain.getRequest().headers().getHeader(BString("Accept-Encoding")));
	HttpHeaderStructure structure(*ceh);
	list<HttpHeaderElement>::iterator it = structure.elements().begin();
	list<HttpHeaderElement>::iterator const end = structure.elements().end();
	while (it != end) {
		if (!aenc.isAccepted(it->getName())) {
			break; // all of the following encodings have to be decoded as well
		}
		if (!DecompressorResponseFilter::isSupported(it->getName())) {
			return; // should not happen, as we specify only what we support in Accept-Encoding
		}
		structure.elements().erase(it++);
	}
	
	for (int i = structure.elements().size(); i > 0; --i) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new DecompressorResponseFilter(chain)
		));
	}
}

bool
FilterTryList::removeAllEncodings(
	ResponseFilterChain& chain, HttpResponseMetadata const& metadata)
{
	HttpHeader const* teh = metadata.headers().getHeaderPtr(BString("Transfer-Encoding"));
	HttpHeader const* ceh = metadata.headers().getHeaderPtr(BString("Content-Encoding"));
	int num_encodings = 0;
	if (teh) {
		HttpHeaderStructure structure(*teh);
		list<HttpHeaderElement>::const_iterator it = structure.elements().begin();
		list<HttpHeaderElement>::const_iterator const end = structure.elements().end();
		for (; it != end; ++it) {
			if (!DecompressorResponseFilter::isSupported(it->getName())) {
				return false;
			}
			++num_encodings;
		}
	}
	if (ceh) {
		HttpHeaderStructure structure(*ceh);
		list<HttpHeaderElement>::const_iterator it = structure.elements().begin();
		list<HttpHeaderElement>::const_iterator const end = structure.elements().end();
		for (; it != end; ++it) {
			if (!DecompressorResponseFilter::isSupported(it->getName())) {
				return false;
			}
			++num_encodings;
		}
	}
	
	for (; num_encodings > 0; --num_encodings) {
		chain.appendFilter(IntrusivePtr<AbstractResponseFilter>(
			new DecompressorResponseFilter(chain)
		));
	}
	
	return true;
}
