/* antlrx.c
 *
 * SOFTWARE RIGHTS
 *
 * We reserve no LEGAL rights to the Purdue Compiler Construction Tool
 * Set (PCCTS) -- PCCTS is in the public domain.  An individual or
 * company may do whatever they wish with source code distributed with
 * PCCTS or the code generated by PCCTS, including the incorporation of
 * PCCTS, or its output, into commerical software.
 * 
 * We encourage users to develop software with PCCTS.  However, we do ask
 * that credit is given to us for developing PCCTS.  By "credit",
 * we mean that if you incorporate our source code into one of your
 * programs (commercial product, research project, or otherwise) that you
 * acknowledge this fact somewhere in the documentation, research report,
 * etc...  If you like PCCTS and have developed a nice tool with the
 * output, please mention that you developed it using PCCTS.  In
 * addition, we ask that this header remain intact in our source code.
 * As long as these guidelines are kept, we expect to continue enhancing
 * this system and expect to make other tools available as they are
 * completed.
 *
 * ANTLR 1.20
 * Terence Parr
 * Purdue University
 * With AHPCRC, University of Minnesota
 * 1989-1994
 */
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <stdio.h>

/* I have to put this here due to C++ limitation
 * that you can't have a 'forward' decl for enums.
 * I hate C++!!!!!!!!!!!!!!!
 */
enum TokenType { TER_HATES_CPP, ITS_UTTER_GARBAGE, WITH_SOME_GOOD_IDEAS };

#include "AToken.h"
#include "ATokenStream.h"
#include "antlrx.h"

static const zzINF_DEF_TOKEN_BUFFER_SIZE = 2000;
static const zzINF_BUFFER_TOKEN_CHUNK_SIZE = 1000;

                 /* L o o k a h e a d  M a c r o s */

/* These can only be used by a member function of ANTLRParser.
 */
#define NLT			(token[lap&(LLk-1)])
#define NLA			(token_type[lap&(LLk-1)])/* --> next LA */

/* maximum of 32 bits/unsigned int and must be 8 bits/byte;
 * we only use 8 bits of it.
 */
SetWordType ANTLRParser::bitmask[sizeof(SetWordType)*8] = {
	0x00000001, 0x00000002, 0x00000004, 0x00000008,
	0x00000010, 0x00000020, 0x00000040, 0x00000080
};

ANTLRParser::
~ANTLRParser()
{
	delete [] token_type;
	delete [] token;

	free(inf_token);			// used calloc/free because need realloc
}

ANTLRParser::
ANTLRParser(ANTLRTokenStream *lexer,
			int k=1,
			int use_inf_look=0,
			int dlook=0,
			int ssize=1)
{
	LLk = k;
	can_use_inf_look = use_inf_look;
	demand_look = dlook;
	bsetsize = ssize;

	guessing = 0;
	inf_lap=0;
	inf_labase = 0;
	inf_last=0;
	token_tbl = NULL;
	eofToken = (TokenType)1;

	// allocate lookahead buffer
	token_type = new TokenType[LLk];
	token = new (ANTLRTokenBase *[LLk]);
	lap = 0;
	labase = 0;

	/* prime lookahead buffer, point to lexer */
	this->lexer = lexer;
	prime_lookahead();
}

int ANTLRParser::
guess(ANTLRParserState *st)
{
	saveState(st);
	guessing = 1;
	return setjmp(guess_start.state);
}


ANTLRTokenBase *
ANTLRParser::
inf_gettok()
{
    static ANTLRTokenBase eof_dummy;

	eof_dummy.setType(eofToken);
    if ( inf_labase >= inf_last ) {
        return &eof_dummy;
    }
    else {
        return inf_token[inf_labase++];
    }
}

void ANTLRParser::
saveState(ANTLRParserState *buf)
{
    int i;

	buf->guess_start = guess_start;
	buf->guessing = guessing;
	buf->inf_labase = inf_labase;
	buf->inf_last = inf_last;
	buf->dirty = dirty;
	for (i=0; i<LLk; i++) buf->token_type[i] = token_type[i];
	for (i=0; i<LLk; i++) buf->token[i] = token[i];
	buf->lap = lap;
	buf->labase = labase;
}

void ANTLRParser::
restoreState(ANTLRParserState *buf)
{
	int i;

	guess_start = buf->guess_start;
	guessing = buf->guessing;
	inf_labase = buf->inf_labase;
	inf_last = buf->inf_last;
	dirty = buf->dirty;
	for (i=0; i<LLk; i++) token_type[i] = buf->token_type[i];
	for (i=0; i<LLk; i++) token[i] = buf->token[i];
	lap = buf->lap;
	labase = buf->labase;
}

/* Get the next symbol from the input stream; put it into lookahead buffer;
 * fill token_type[] fast reference cache also.  NLA is the next place where
 * a lookahead ANTLRTokenBase should go.
 */
void ANTLRParser::
consume()
{
	if ( can_use_inf_look ) NLT = inf_gettok();
	else NLT = lexer->nextToken();
	NLA = NLT->getType();
	dirty--;
	lap = (lap+1)&(LLk-1);
}

void
ANTLRParser::
look(int k)
{
	int i;
	for (i=1; i<=k-(LLk-dirty); i++) consume();
}

TokenType ANTLRParser::
LA(int i)
{
	return demand_look ? token_type[(labase+(i)-1)&(LLk-1)] :
						 token_type[(lap+(i)-1)&(LLk-1)];
}

ANTLRTokenBase *ANTLRParser::
LT(int i)
{
	return demand_look ? token[(labase+(i)-1)&(LLk-1)] :
						 token[(lap+(i)-1)&(LLk-1)];
}

/* allocate default size token and token_type arrays;
 * Then, read all Token's from the input reallocing the arrays as needed.
 */
void ANTLRParser::
fill_inf_look(void)
{
	int tok, line;
	int inf_token_buffer_size = zzINF_DEF_TOKEN_BUFFER_SIZE;
	int inf_lap = 0;
	ANTLRTokenBase *next;

	inf_token = (ANTLRTokenBase **) calloc(inf_token_buffer_size,sizeof(ANTLRTokenBase *));
	if ( inf_token == NULL )
	{
		ANTLRPanic(eMsgd("cannot allocate token buffer (%d tokens)\n",
						 inf_token_buffer_size));
	}
	_inf_line = (int *) calloc(inf_token_buffer_size,sizeof(int));
	if ( _inf_line == NULL )
	{
		ANTLRPanic(eMsgd("cannot allocate line buffer (%d ints)\n",
						 inf_token_buffer_size));
	}

	/* get tokens */
	do {
		line = lexer->line();
/*** NeXT Hack ***/
#ifdef Next
	   do {
	      next = lexer->nextToken();
	   }while(next->is_fileMarker());
	   if (next->getType() == Tok_pragma)
	      {
		 do {
		    if
		       (((TokenStream*)lexer)->tokenLine(((TokenStream*)lexer)->peekToken()) == ((TokenStream*)lexer)->tokenLine(next))
		       next = lexer->nextToken();
		    else
		       break;
		 }while(1);
	      }
#else
		next = lexer->nextToken();
#endif
		while ( inf_lap>=inf_token_buffer_size )
		{
			inf_token_buffer_size += zzINF_BUFFER_TOKEN_CHUNK_SIZE; 
			inf_token = (ANTLRTokenBase **) realloc(inf_token,
										 inf_token_buffer_size*sizeof(ANTLRTokenBase *));
			if ( inf_token == NULL )
			{
				ANTLRPanic(eMsgd("cannot allocate token buffer (%d tokens)\n",
								 inf_token_buffer_size));
			}
			_inf_line = (int *) realloc(_inf_line,
										 inf_token_buffer_size*sizeof(int));
			if ( _inf_line == NULL )
			{
				ANTLRPanic(eMsgd("cannot allocate line buffer (%d ints)\n",
								 inf_token_buffer_size));
			}
		}
		/* record token of input and line # symbol */
		inf_token[inf_lap] = next;
        _inf_line[inf_lap] = line;
		tok = next->getType();
		inf_lap++;
	} while (tok!=eofToken);
	inf_labase = 0;
	inf_last = inf_lap-1;
}

/* fill the lookahead buffer up with k symbols (even if DEMAND_LOOK);
 * read in the entire input if can_use_inf_look is TRUE.
 */
void
ANTLRParser::
prime_lookahead()
{
	if ( can_use_inf_look ) fill_inf_look();
	int i;
	for(i=1;i<=LLk; i++) consume();
	dirty=0;
	lap = 0;
	labase = 0;
}

/* check to see if the current input symbol matches '_t'.
 * During NON demand lookahead mode, dirty will always be 0 and
 * hence the extra code for consuming tokens in _match is never
 * executed; the same routine can be used for both modes.
 */
int ANTLRParser::
_match(TokenType _t, ANTLRChar **MissText,
	   TokenType *MissTok, ANTLRTokenBase **BadTok,
	   SetWordType **MissSet)
{
	if ( dirty==LLk ) {
		consume();
	}
	if ( LA(1)!=_t ) {
		*MissTok= _t; *BadTok = LT(1);
		*MissSet=NULL;
		return 0;
	}
	dirty++;
	labase = (labase+1)&(LLk-1);	// labase maintained even if !demand look
	return 1;
}

void ANTLRParser::
resynch(SetWordType *wd,SetWordType mask)
{
	static int consumed = 1;

	/* if you enter here without having consumed a token from last resynch
	 * force a token consumption.
	 */
	if ( !consumed ) {consume(); return;}

	/* if current token is in resynch set, we've got what we wanted */
	if ( wd[LA(1)]&mask || LA(1) == eofToken ) {consumed=0; return;}
	
	/* scan until we find something in the resynch set */
	while ( !(wd[LA(1)]&mask) && LA(1) != eofToken ) {consume();}
	consumed=1;
}

/* standard error reporting function that assumes DLG-based scanners;
 * you should redefine in subclass to change it or if you use your
 * own scanner.
 */
void ANTLRParser::
syn(ANTLRTokenBase *tok, ANTLRChar *egroup, SetWordType *eset,
	TokenType etok, int k)
{
	int line;

	if ( can_use_inf_look ) line = inf_line(1);
	else line = lexer->line();

	fprintf(stderr, "line %d: syntax error at \"%s\"",
					line, lexer->lextext());
	if ( !etok && !eset ) {fprintf(stderr, "\n"); return;}
	if ( k==1 ) fprintf(stderr, " missing");
	else
	{
		fprintf(stderr, "; \"%s\" not", lexer->lextext());
		if ( set_deg(eset)>1 ) fprintf(stderr, " in");
	}
	if ( set_deg(eset)>0 ) edecode(eset);
	else fprintf(stderr, " %s", token_tbl[etok]);
	if ( strlen(egroup) > 0 ) fprintf(stderr, " in %s", egroup);
	fprintf(stderr, "\n");
}

/* is b an element of set p? */
int ANTLRParser::
set_el(TokenType b, SetWordType *p)
{
	return( p[DIVWORD(b)] & bitmask[MODWORD(b)] );
}

int ANTLRParser::
set_deg(SetWordType *a)
{
	/* Fast compute degree of a set... the number
	   of elements present in the set.  Assumes
	   that all word bits are used in the set
	*/
	register SetWordType *p = a;
	register SetWordType *endp = &(a[bsetsize]);
	register int degree = 0;

	if ( a == NULL ) return 0;
	while ( p < endp )
	{
		register SetWordType t = *p;
		register SetWordType *b = &(bitmask[0]);
		do {
			if (t & *b) ++degree;
		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
		p++;
	}

	return(degree);
}

void ANTLRParser::
edecode(SetWordType *a)
{
	register SetWordType *p = a;
	register SetWordType *endp = &(p[bsetsize]);
	register SetWordType e = 0;

	if ( set_deg(a)>1 ) fprintf(stderr, " {");
	do {
		register SetWordType t = *p;
		register SetWordType *b = &(bitmask[0]);
		do {
			if ( t & *b ) fprintf(stderr, " %s", token_tbl[e]);
			e++;
		} while (++b < &(bitmask[sizeof(SetWordType)*8]));
	} while (++p < endp);
	if ( set_deg(a)>1 ) fprintf(stderr, " }");
}

/* input looks like:
 *      zzFAIL(k, e1, e2, ...,&zzMissSet,&zzMissText,&zzBadTok,&zzBadText,&zzErrk)
 * where the zzMiss stuff is set here to the token that did not match
 * (and which set wasn't it a member of).
 */
void
ANTLRParser::FAIL(int k, ...)
{
    static ANTLRChar text[1000];	// dangerous, but I don't care right now
    static SetWordType *f[20];
    SetWordType **miss_set;
    ANTLRChar **miss_text;
    ANTLRTokenBase **bad_tok;
    ANTLRChar **bad_text;
    unsigned *err_k;
    int i;
    va_list ap;

    va_start(ap, k);

    text[0] = '\0';
	if ( k>20 ) ANTLRPanic("FAIL: overflowed buffer");
    for (i=1; i<=k; i++)    /* collect all lookahead sets */
    {
        f[i-1] = va_arg(ap, SetWordType *);
    }
    for (i=1; i<=k; i++)    /* look for offending token */
    {
        if ( i>1 ) strcat(text, " ");
        strcat(text, LT(i)->getText());
        if ( !set_el(LA(i), f[i-1]) ) break;
    }
    miss_set = va_arg(ap, SetWordType **);
    miss_text = va_arg(ap, ANTLRChar **);
    bad_tok = va_arg(ap, ANTLRTokenBase **);
    bad_text = va_arg(ap, ANTLRChar **);
    err_k = va_arg(ap, unsigned *);
    if ( i>k )
    {
        /* bad; lookahead is permutation that cannot be matched,
         * but, the ith token of lookahead is valid at the ith position
         * (The old LL sub 1 (k) versus LL(k) parsing technique)
         */
        *miss_set = NULL;
        *miss_text = lexer->lextext();
        *bad_tok = LT(1);
        *bad_text = bad_tok->getText();
        *err_k = k;
        return;
    }
/*  fprintf(stderr, "%s not in %dth set\n", zztokens[LA(i)], i);*/
    *miss_set = f[i-1];
    *miss_text = text;
    *bad_tok = LT(i);
    *bad_text = bad_tok->getText();
    if ( i==1 ) *err_k = 1;
    else *err_k = k;
}

static ANTLRChar eMsgBuffer[500];

ANTLRChar *ANTLRParser::
eMsgd(ANTLRChar *err,int d)
{
	sprintf(eMsgBuffer, err, d);	// dangerous, but I don't care
	return eMsgBuffer;
}

ANTLRChar *ANTLRParser::
eMsg(ANTLRChar *err, ANTLRChar *s)
{
	sprintf(eMsgBuffer, err, s);
	return eMsgBuffer;
}

ANTLRChar *ANTLRParser::
eMsg2(ANTLRChar *err,ANTLRChar *s, ANTLRChar *t)
{
	sprintf(eMsgBuffer, err, s, t);
	return eMsgBuffer;
}

void ANTLRParser::
ANTLRPanic(ANTLRChar *msg)
{
	fprintf(stderr, "ANTLR panic: %s\n", msg);
	exit(-1);
}
