/*
    BFilter - a smart ad-filtering web proxy
    Copyright (C) 2002-2005  Joseph Artsimovich <joseph_a@mail.ru>

    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/

#include "pch.h"

#ifdef HAVE_CONFIG_H
#include <config.h>
#endif

#include "URI.h"
#include "DataChunk.h"
#include "SplittableBuffer.h"
#include "DataChunk.h"
#include "SBOutStream.h"
#include "StringUtils.h"
#include "InsensitiveEqual.h"
#include "types.h"
#include <memory>
#include <sstream>
#include <cstring>
#include <cctype>
#include <cassert>
#include <stddef.h>

using namespace std;

inline char const*
URI::skipLastPathComponent(char const* begin, char const* end)
{
	while (begin != end) {
		--end;
		if (*end == '/') {
			break;
		}
	}
	return end;
}

URI::URI(BString const& str)
:	m_flags(IS_GOOD),
	m_port(-1)
{
	parse(str);
}

URI::URI(URI const& base, URI const& relative)
:	m_flags(IS_GOOD),
	m_port(-1)
{
	if (relative.isAbsolute()) {
		*this = relative;
		return;
	} else if (base.isAbsolute()) {
		setFlag(IS_ABSOLUTE);
		m_scheme = base.m_scheme;
		if (!validateScheme(m_scheme)) {
			clearFlag(IS_GOOD);
		}
	} else {
		setFlag(IS_ABSOLUTE);
		// m_scheme.clear();
	}
	
	assert(!relative.isAbsolute());
	
	if (relative.hasAuthority()) {
		setFlag(HAS_AUTHORITY);
		setFlag(HAS_USER_INFO, relative.hasUserInfo());
		m_rawUserInfo = relative.m_rawUserInfo;
		m_host = relative.m_host;
		m_port = relative.m_port;
		setFlag(IS_RELATIVE_PATH, relative.isRelativePath());
		m_rawPath = relative.m_rawPath;
		setFlag(HAS_QUERY, relative.hasQuery());
		m_rawQuery = relative.m_rawQuery;
		setFlag(HAS_FRAGMENT, relative.hasFragment());
		m_rawFragment = relative.m_rawFragment;
		if (!validateHost(m_host)) {
			clearFlag(IS_GOOD);
		}
		return;
	} else if (base.hasAuthority()) {
		setFlag(HAS_AUTHORITY);
		setFlag(HAS_USER_INFO, base.hasUserInfo());
		m_rawUserInfo = base.m_rawUserInfo;
		m_host = base.m_host;
		m_port = base.m_port;
		if (!validateHost(m_host)) {
			clearFlag(IS_GOOD);
		}
	} else {
		clearFlag(HAS_AUTHORITY);
		clearFlag(HAS_USER_INFO);
	}
	
	assert(!relative.hasAuthority());
	
	if (!relative.isRelativePath() || (!base.hasAuthority() && base.isRelativePath())) {
		setFlag(IS_RELATIVE_PATH, relative.isRelativePath());
		m_rawPath = relative.m_rawPath;
		setFlag(HAS_QUERY, relative.hasQuery());
		m_rawQuery = relative.m_rawQuery;
		setFlag(HAS_FRAGMENT, relative.hasFragment());
		m_rawFragment = relative.m_rawFragment;
		return;
	} else {
		// base has absolute path, relative has relative path
		clearFlag(IS_RELATIVE_PATH);
		m_rawPath = mergePaths(base.m_rawPath, relative.m_rawPath);
	}
	setFlag(HAS_QUERY, relative.hasQuery());
	m_rawQuery = relative.m_rawQuery;
	setFlag(HAS_FRAGMENT, relative.hasFragment());
	m_rawFragment = relative.m_rawFragment;
}

BString
URI::escape(BString const& str)
{
	struct CharTest
	{
		static bool isAllowed(char ch) {
			return (isalnum(ch) || ch == '-' || ch == '_' || ch == '.');
		}
	};
	
	static char const hexchars[] = "0123456789abcdef";
	char const* p = str.begin();
	char const* const end = str.end();
	for (; p != end && CharTest::isAllowed(*p); ++p) {
		// skip allowed
	}
	if (p == end) {
		return str;
	}
	SBOutStream strm(str.size() * 3);
	strm.write((char const*)str.begin(), p - str.begin());
	do {
		for (; p != end && !CharTest::isAllowed(*p); ++p) {
			char ch = *p;
			strm << '%' << hexchars[ch >> 4] << hexchars[ch & 0xf];
		}
		char const* old_pos = p;
		for (; p != end && CharTest::isAllowed(*p); ++p) {
			// skip allowed
		}
		strm.write((char const*)old_pos, p - old_pos);
	} while (p != end);
	
	return strm.data().toBString();
}

BString
URI::unescape(BString const& str, bool decode_plus)
{
	class IsSafeChar
	{
	public:
		IsSafeChar(bool decode_plus)
		: m_decodePlus(decode_plus) {}
		
		bool operator()(char ch) {
			return !(ch == '%' || (ch == '+' && m_decodePlus));
		}
	private:
		bool m_decodePlus;
	};
	
	IsSafeChar is_safe_char(decode_plus);
	char const* p = str.begin();
	char const* const end = str.end();
	for (; p != end && is_safe_char(*p); ++p) {
		// skip safe chars
	}
	if (p == end) {
		return str;
	}
	SBOutStream strm(str.size());
	strm.write((char const*)str.begin(), p - str.begin());
	do {
		for (; p != end && !is_safe_char(*p); ++p) {
			char ch = *p;
			switch (ch) {
				case '+': {
					strm << ' ';
					break;
				}
				case '%': {
					if (end - p >= 3) { // '%' + 2 hex chars
						char const* e = p + 3;
						uint8_t val = StringUtils::parseUnsignedHex<uint8_t>(p + 1, e);
						if (e == p + 3) {
							strm << val;
							p += 2; // don't forget about ++p in for loop
							break;
						}
					}
					// fall through
				}
				default: {
					strm << ch;
				}
			}
		}
		char const* old_pos = p;
		for (; p != end && is_safe_char(*p); ++p) {
			// skip safe chars
		}
		strm.write((char const*)old_pos, p - old_pos);
	} while (p != end);
	
	return strm.data().toBString();
}

void
URI::setScheme(BString const& scheme)
{
	m_scheme = scheme;
	if (!validateScheme(m_scheme)) {
		clearFlag(IS_GOOD);
	}
	setFlag(IS_ABSOLUTE);
}

void
URI::setRawUserInfo(BString const& user_info)
{
	m_rawUserInfo = user_info;
	setFlag(HAS_USER_INFO);
}

void
URI::setHost(BString const& host)
{
	m_host = host;
	if (!validateHost(m_host)) {
		clearFlag(IS_GOOD);
	}
	setFlag(HAS_AUTHORITY);
}

int
URI::guessPort() const
{
	if (m_port >= 0) {
		return m_port;
	} else {
		InsensitiveEqual ieq;
		if (ieq(m_scheme, BString("http"))) {
			return 80;
		} else if (ieq(m_scheme, BString("https"))) {
			return 443;
		}
	}
	return -1;
}

void
URI::setPort(int port)
{
	m_port = port;
}

void
URI::setAbsoluteRawPath(BString const& path)
{
	clearFlag(IS_RELATIVE_PATH);
	m_rawPath = path;
	if (!m_rawPath.empty() && m_rawPath[0] == '/') {
		m_rawPath.trimFront(1);
	}
}

void
URI::setRawQuery(BString const& query)
{
	setFlag(HAS_QUERY);
	m_rawQuery = query;
}

void
URI::removeQuery()
{
	m_rawQuery.clear();
	clearFlag(HAS_QUERY);
}

void
URI::setRawFragment(BString const& fragment)
{
	m_rawFragment = fragment;
	setFlag(HAS_FRAGMENT);
}

void
URI::removeFragment()
{
	m_rawFragment.clear();
	clearFlag(HAS_FRAGMENT);
}

void
URI::makeRelative()
{
	clearFlags(IS_ABSOLUTE|HAS_AUTHORITY|HAS_USER_INFO);
	m_scheme.clear();
	m_rawUserInfo.clear();
	m_host.clear();
	m_port = -1;
}

void
URI::toStream(std::ostream& strm, Style style) const
{
	if (style != S_RELATIVE) {
		if (isAbsolute() && style != S_START_WITH_HOST) {
			strm << m_scheme << ':';
		}
		if (hasAuthority()) {
			if (style != S_START_WITH_HOST) {
				strm << "//";
				if (hasUserInfo()) {
					strm << m_rawUserInfo << '@';
				}
			}
			strm << m_host;
			if (m_port != -1) {
				strm << ':' << m_port;
			}
		}
	}
	if (!isRelativePath()) {
		strm << '/';
	}
	strm << m_rawPath;
	if (hasQuery()) {
		strm << '?' << m_rawQuery;
	}
	if (hasFragment()) {
		strm << '#' << m_rawFragment;
	}
}

string
URI::toString(Style style) const
{
	ostringstream strm;
	toStream(strm, style);
	return strm.str();
}

BString
URI::toBString(Style style) const
{
	SBOutStream strm(100);
	toStream(strm, style);
	return strm.data().toBString();
}

void
URI::parse(BString const& str)
{
	char const* begin = StringUtils::ltrim(str.begin(), str.begin());
	char const* end = StringUtils::rtrim(begin, str.end());
	begin = parseLeadingScheme(str, begin, end);
	begin = parseLeadingAuthority(str, begin, end);
	begin = parseLeadingPath(str, begin, end);
	begin = parseLeadingQuery(str, begin, end);
	parseLeadingFragment(str, begin, end);
}

char const*
URI::parseLeadingScheme(BString const& str, char const* begin, char const* end)
{
	char const* colon_pos = StringUtils::find(begin, end, ':');
	if (colon_pos != end) {
		m_scheme = BString(str, begin, colon_pos);
		if (validateScheme(m_scheme)) {
			setFlag(IS_ABSOLUTE);
			return colon_pos + 1;
		}
	}
	m_scheme.clear();
	clearFlag(IS_ABSOLUTE);
	return begin;
}

char const*
URI::parseLeadingAuthority(BString const& str, char const* begin, char const* end)
{
	BString const slash2("//");
	if (StringUtils::startsWith(begin, end, slash2.begin(), slash2.end())) {
		begin += 2;
		char const* auth_end = StringUtils::find(begin, end, '/');
		if (auth_end == end) {
			auth_end = StringUtils::find(begin, end, '?');
		}
		if (auth_end == end) {
			auth_end = StringUtils::find(begin, end, '#');
		}
		setFlag(HAS_AUTHORITY);
		parseAuthority(str, begin, auth_end);
		return auth_end;
	} else {
		m_host.clear();
		m_rawUserInfo.clear();
		m_port = -1;
		clearFlags(HAS_AUTHORITY|HAS_USER_INFO);
		return begin;
	}
}

char const*
URI::parseLeadingPath(BString const& str, char const* begin, char const* end)
{
	if (begin != end && *begin == '/') {
		clearFlag(IS_RELATIVE_PATH);
		++begin; // the leading slash is not a part of the path
	} else {
		setFlag(IS_RELATIVE_PATH);
	}
	char const* path_end = StringUtils::find(begin, end, '?');
	if (path_end == end) {
		path_end = StringUtils::find(begin, end, '#');
	}
	m_rawPath = BString(str, begin, path_end);
	return path_end;
}

char const*
URI::parseLeadingQuery(BString const& str, char const* begin, char const* end)
{
	if (begin != end && *begin == '?') {
		++begin;
		char const* query_end = StringUtils::find(begin, end, '#');
		m_rawQuery = BString(str, begin, query_end);
		setFlag(HAS_QUERY);
		return query_end;
	} else {
		m_rawQuery.clear();
		clearFlag(HAS_QUERY);
		return begin;
	}
}

void
URI::parseLeadingFragment(BString const& str, char const* begin, char const* end)
{
	if (begin != end && *begin == '#') {
		++begin;
		m_rawFragment = BString(str, begin, end);
		setFlag(HAS_FRAGMENT);
	} else {
		m_rawFragment.clear();
		clearFlag(HAS_FRAGMENT);
	}
}

void
URI::parseAuthority(BString const& str, char const* begin, char const* end)
{
	char const* at_pos = StringUtils::find(begin, end, '@');
	char const* host_begin = 0;
	if (at_pos == end) {
		host_begin = begin;
		clearFlag(HAS_USER_INFO);
		m_rawUserInfo.clear();
	} else {
		host_begin = at_pos + 1;
		setFlag(HAS_USER_INFO);
		m_rawUserInfo = BString(str, begin, at_pos);
	}
	parseHostAndPort(str, host_begin, end);
}

void
URI::parseHostAndPort(BString const& str, char const* begin, char const* end)
{	
	char const* host_end = StringUtils::rfind(begin, end, ':');
	if (host_end != end) {
		char const* const port_begin = host_end + 1;
		char const* port_end = end;
		m_port = StringUtils::parseUnsigned<unsigned>(port_begin, port_end);
		if (!(port_begin != port_end && port_end == end)) {
			// probably an IPV6 without a port. Example: http://[::1]/
			m_port = -1;
			host_end = end;
		}
	}
	
	m_host = BString(str, begin, host_end);
	if (!validateHost(m_host)) {
		clearFlag(IS_GOOD);
	}
}

BString
URI::mergePaths(BString const& abs, BString const& rel)
{
	/*
	Remember that the leading slash is not the part of a path in URIs.
	This slash just acts as a separator. So, <abs> doesn't start with a slash
	*/
	char const* const abs_begin = abs.begin();
	char const* abs_end = skipLastPathComponent(abs.begin(), abs.end());
	char const* rel_begin = rel.begin();
	char const* const rel_end = rel.end();
	BString const dot_slash("./");
	BString const dot2_slash("../");
	while (rel_begin != rel_end && *rel_begin == '.') {
		if (StringUtils::startsWith(
		    rel_begin, rel_end, dot2_slash.begin(), dot2_slash.end())) {
			rel_begin += 3;
			abs_end = skipLastPathComponent(abs_begin, abs_end);
		} else if (StringUtils::startsWith(
		           rel_begin, rel_end, dot_slash.begin(), dot_slash.end())) {
			rel_begin += 2;
		} else {
			break;
		}
	}
	
	if (abs_begin == abs_end) {
		return BString(rel, rel_begin, rel_end);
	}
	size_t const abs_size = abs_end - abs_begin;
	size_t const rel_size = rel_end - rel_begin;
	auto_ptr<DataChunk> chunk(DataChunk::create(
		abs_size + 1 + rel_size
	));
	char* p = chunk->getDataAddr();
	memcpy(p, abs_begin, abs_size);
	p += abs_size;
	*p++ = '/';
	memcpy(p, rel_begin, rel_size);
	return BString(chunk);
}

bool
URI::validateScheme(BString const& scheme)
{
	if (scheme.empty()) {
		return false;
	}
	if (!isalpha(scheme[0])) {
		return false;
	}
	char const* p = scheme.begin() + 1;
	char const* const end = scheme.end();
	for (; p != end; ++p) {
		char ch = *p;
		if (!isalnum(ch) && ch != '+' && ch != '-' && ch != '.') {
			return false;
		}
	}
	return true;
}

bool
URI::validateHost(BString const& host)
{
	if (host.empty()) {
		return false;
	}
	char const* p = host.begin();
	char const* end = host.end();
	if (p[0] == '[' && end[-1] == ']') {
		// a literal IPV6, as described in RFC 2732
		++p;
		--end;
		for (; p != end; ++p) {
			char ch = *p;
			if (!isalnum(ch) && ch != ':') {
				return false;
			}
		}
	} else {
		for (; p != end; ++p) {
			char ch = *p;
			if (!isalnum(ch) && ch != '-' && ch != '.') {
				return false;
			}
		}
	}
	return true;
}
