/* tokenizer.cpp - Implementation of the Tokenizer
 * Programmed by Kilbert <kilbert@inside3d.com>
 */
#include <stdio.h>
#include <ctype.h>
#include <string.h>
#include "util.h"
#include "tokenizer.h"

/*************************************
 * FIXME: these need to be rewritten *
 *************************************/

/*
 * Tokenizer
 */

Tokenizer::Tokenizer() : m_error(NULL) { }

Tokenizer::~Tokenizer() { delete[] m_error; }

void Tokenizer::setError(int line, const char *error)
{
	delete[] m_error;

	if(!error || strlen(error) <= 0)
	{
		m_error = NULL; // VC++ is such a bitch
		return;
	}

	m_error = new char[strlen(error) + 64]; // extra space for "%i: error: "
	sprintf(m_error, "%i: error: %s", line, error);
}

/*
 * SimpleTokenizer
 */

SimpleTokenizer::SimpleTokenizer(const char *str)
	: m_line_number(0), m_pos_start(NULL), m_pos_end(NULL), m_buffer(NULL)
{
	memset(m_token, 0, 1024);

	m_buffer = Util_StrCopy(str);
	m_pos_start = m_buffer;
	m_pos_end = m_buffer + strlen(m_buffer);
}

SimpleTokenizer::~SimpleTokenizer()
{
	delete[] m_buffer;
}

void SimpleTokenizer::reset()
{
	setError(0, NULL);
	m_pos_start = m_buffer;
	m_pos_end = m_buffer + strlen(m_buffer);
	m_line_number = 0;
}

const char *SimpleTokenizer::next()
{
	if(m_pos_start == m_pos_end)
	{
		memset(m_token, 0, 1024);
		setError(m_line_number, "done");
		return NULL;
	}
	else if(errorSet())
		return NULL;

	bool in_comment_block = false, in_quotes = false;

	while(m_pos_start <= m_pos_end)
	{
		if(in_comment_block)
		{
			while(m_pos_start != m_pos_end)
			{
				if(*m_pos_start == '*' && *(m_pos_start+1) == '/')
				{
					in_comment_block = false;
					m_pos_start += 2;
					break;
				}
				else
					m_pos_start++;
			}
		}

		if(in_quotes)
		{
			memset(m_token, 0, 1024);
			char *t = m_token;
			while(m_pos_start != m_pos_end)
			{
				if(t - m_token >= 1024) // buffer overflow
				{
					setError(m_line_number, "String exceeds 1024 byte limit");
					return NULL;
				}
				else if(*m_pos_start == '\"' && *(m_pos_start-1) != '\\')
				{
					in_quotes = false;
					m_pos_start++;

					return m_token;
				}

				*t++ = *m_pos_start++;
			}
		}

		// eat whitespace
		while(isspace(*m_pos_start))
		{
			m_pos_start++;
			if(m_pos_start == m_pos_end)
				break;
		}
		if(m_pos_start == m_pos_end)
			continue;

		// eat line comments
		if(*m_pos_start == '/' && *(m_pos_start+1) == '/')
		{
			while(m_pos_start != m_pos_end)
			{
				if(*m_pos_start == '\n')
				{
					m_pos_start++;
					break;
				}
				else
					m_pos_start++;
			}
		}

		// detect comment blocks
		if(*m_pos_start == '/' && *(m_pos_start+1) == '*')
		{
			in_comment_block = true;
			continue;
		}

		if(*m_pos_start == '\"')
		{
			in_quotes = true;
			m_pos_start++;
			continue;
		}

		// must be normal token
		memset(m_token, 0, 1024);
		char *t = m_token;
		while(!isspace(*m_pos_start) && *m_pos_start != '\"' && m_pos_start != m_pos_end)
		{
			if(*m_pos_start == '/' && *(m_pos_start+1) == '/')
				break;
			else if(t - m_token >= 1024) // buffer overflow
			{
				setError(m_line_number, "Token exceedes 1024 byte limit");
				return NULL;
			}
			*t++ = *m_pos_start++;
		}
		if(!strcmp(m_token, ""))
			return NULL;
		else
			return m_token;
	}

	return NULL;
}

void SimpleTokenizer::putback(const char *token)
{
	strcpy(m_token, token);
}

/*
 * AdvancedTokenizer
 */
const char *AdvancedTokenizer::double_token_set[] =
{
	">>=", "<<=", "+=", "-=", "*=", "/=", "%=", "&=", "^=",
	"|=", ">>", "<<", "++", "--", "&&", "||", "<=", ">=", "==", "!=", NULL
};

const char AdvancedTokenizer::single_token_set[] =
{
	';', '{', '}', ',', ':', '=', '(', ')', '[', ']', '.', '&', '!', '~', '-',
	'+', '*', '/', '%', '<', '>', '^', '|', '?', '\0' 
};

bool AdvancedTokenizer::isToken(char c1, char c2)
{
	char s[3] = { c1, c2, '\0' };
	for(int i = 0; double_token_set[i] != NULL; i++)
	{
		if(!strcmp(s, double_token_set[i]))
			return true;
	}
	return false;
}

bool AdvancedTokenizer::isToken(char c)
{
	for(int i = 0; single_token_set[i] != '\0'; i++)
	{
		if(single_token_set[i] == c)
			return true;
	}
	return false;
}

AdvancedTokenizer::AdvancedTokenizer(const char *str)
	: m_buffer(NULL), m_pos_start(NULL), m_pos_end(NULL),
	  m_line_number(0)
{
	m_buffer = Util_StrCopy(str);
	memset(m_token, 0, 1024);

	m_pos_start = m_buffer;
	m_pos_end = m_pos_start + strlen(m_buffer);
}

AdvancedTokenizer::~AdvancedTokenizer()
{
	delete[] m_buffer;
}

void AdvancedTokenizer::reset()
{
	setError(0, NULL);
	m_pos_start = m_buffer;
	m_pos_end = m_buffer + strlen(m_buffer);
}

const char *AdvancedTokenizer::next()
{
	if(m_pos_start == m_pos_end)
	{
		memset(m_token, 0, 1024);
		setError(m_line_number, "done");
		return NULL;
	}
	else if(errorSet())
		return NULL;

	bool in_comment_block = false, in_quotes = false;

	do
	{
		if(in_comment_block)
		{
			while(m_pos_start != m_pos_end)
			{
				if(*m_pos_start == '*' && *(m_pos_start+1) == '/')
				{
					in_comment_block = false;
					m_pos_start += 2;
					break;
				}
				else
					m_pos_start++;
			}
		}

		if(in_quotes)
		{
			memset(m_token, 0, 1024);
			char *t = m_token;
			while(m_pos_start != m_pos_end)
			{
				if(t - m_token >= 1024) // buffer overflow
				{
					setError(m_line_number, "String exceeds 1024 byte limit");
					return NULL;
				}
				else if(*m_pos_start == '\"' && *(m_pos_start-1) != '\\')
				{
					in_quotes = false;
					m_pos_start++;

					return m_token;
				}

				*t++ = *m_pos_start++;
			}
		}

		// eat whitespace
		while(isspace(*m_pos_start))
		{
			m_pos_start++;
			if(m_pos_start == m_pos_end)
				break;
		}
		if(m_pos_start == m_pos_end)
			continue;

		// eat line comments
		if(*m_pos_start == '/' && *(m_pos_start+1) == '/')
		{
			m_pos_start = m_pos_end;
			continue;
		}

		// detect comment blocks
		if(*m_pos_start == '/' && *(m_pos_start+1) == '*')
		{
			in_comment_block = true;
			continue;
		}

		if(*m_pos_start == '\"')
		{
			in_quotes = true;
			m_pos_start++;
			continue;
		}

		// detect double tokens
		if(isToken(*m_pos_start, *(m_pos_start+1)))
		{
			char t[3] = { *m_pos_start, *(m_pos_start+1), '\0' };
			strcpy(m_token, t);
			m_pos_start += 2;
			return m_token;
		}

		// detect single char tokens
		if(isToken(*m_pos_start))
		{
			char t[2] = { *m_pos_start, '\0' };
			strcpy(m_token, t);
			m_pos_start++;
			return m_token;
		}

		// must be normal token
		memset(m_token, 0, 1024);
		char *t = m_token;
		while(!isspace(*m_pos_start) && !isToken(*m_pos_start) &&
			!isToken(*m_pos_start, *(m_pos_start+1)) && *m_pos_start != '\"' &&
			m_pos_start != m_pos_end)
		{
			if(*m_pos_start == '/' && *(m_pos_start+1) == '/')
				break;
			else if(t - m_token >= 1024) // buffer overflow
			{
				setError(m_line_number, "Token exceedes 1024 byte limit");
				return NULL;
			}
			*t++ = *m_pos_start++;
		}
		if(!strcmp(m_token, ""))
			return NULL;
		else
			return m_token;
	} while(m_pos_start <= m_pos_end);

	return NULL;
}

void AdvancedTokenizer::putback(const char *token)
{
	strcpy(m_token, token);
}
