/* Constant multiply code generator for pcc.  To use in pcc, just delete
   the driver -- main and testrc. cm() becomes the main program.
   L. M. Breed  12/85
*/

#include <stdio.h>

static display=1, instcount;
static int reg[16], regsn[16];
static int regno[3];
static int valin, valto;	/* Source and target for code generation */
#define rs 0
#define rd 1
#define rt 2
#define Add 0
#define Sub 1
#define Sf 2


static int paired(ra,rb)
register int ra,rb;
{
	return ((regno[ra]^regno[rb])==1);
}

static int sign,
	   bc[32],	/* Lengths of successive strings of 1's and 0's */
	   bci,
	   counting,	/* = 1 if counting instrs, not generating them */
	   icount;	/* the count */

/* Move rb to ra and shift it left n places; complement sign if possible */

static mash(ra,rb,n)
register int ra,rb,n;
{
	register int rna=regno[ra], rnb=regno[rb];

	regsn[rna]=regsn[rnb];
	if (rna==rnb)
	    sli(rna,n);
	else if (paired(ra,rb))
	    slpi(rnb,n);
	else if (n==1 && rnb!=0)
	    cas(rna,rnb,rnb);
	else {
	    if (regsn[rnb]) {
		twoc(rna,rnb);
		regsn[rna]=0;
	    }
	    else
		cas(rna,rnb,0);
	    sli(rna,n);
	}
}
	
/* Generate add or subtract, based on specified op and on sign-inversion
   of operands; set "valin" to identify target that now contains value.
*/

static adors(op,ra,rb)
	register int op,ra,rb;
{
	register int rna=regno[ra], rnb=regno[rb];

	/* Switch below relies on Add=0, Sub=1, Sf=2 */
    switch ((op<<2) + (regsn[rna]<<1) + regsn[rnb]) {
case 0:	/* add  ra  rb */
case 3:	/* add -ra -rb */
case 5:	/* sub  ra -rb */
case 6:	/* sub -ra  rb */
	add(rna,rnb);
	break;

case 1:	/* add  ra -rb */
case 4:	/* sub  ra  rb */
	sub(rna,rnb);
	break;

case 2:	/* add -ra  rb */
case 7:	/* sub -ra -rb */
case 8: /* sf   ra  rb */
	sf(rna,rnb);
	regsn[rna]=0;
	break;

case 9: /* sf   ra -rb */
case 10:/* sf  -ra  rb */
	add(rna,rnb);
	regsn[rna]=!regsn[rna];
	break;

case 11:/* sf  -ra -rb */
	sub(rna,rnb);
	regsn[rna]=0;
	break;
    }
    valin=ra;
}

/* Fill bc with counts of lengths of 0 strings and 1 strings.  con must be
   odd; this assures bc[0]==0.
*/
static fillbc(con)
	register int con;
{
	register int n;

	bci=0;
	while (con) {
	    n=0;
	    while (!(con&1)) { n++; con >>= 1;}
	    bc[bci++] = n;
	    n=0;
	    while (con&1) {n++; con>>=1;}
	    bc[bci++] = n;
	}
}

static int cost(con)
	register int con;
{
	register int svalin = valin;
	register int sregsn1=regsn[regno[rd]],
	             sregsn2=regsn[regno[rt]];

	counting=1;
	icount=0;
	cm1(con);
	counting=0;
	valin=svalin;	/* k l u g e */
	regsn[regno[rd]]=sregsn1;
	regsn[regno[rt]]=sregsn2;
	return icount;
}

/* The main program.  srs, drs, trs are strings representing source, destination
and temp registers.  source==dest, source==temp are possible and affect what 
code is generated; dest and temp may be a register pair, which is recognized and
may permit code improvement.
*/
cm(srs,drs,trs,con)
	register char *srs, *drs, *trs;
	register int con;
{
	int twos=0,
	    td=30,
	    fti=0,
	    ft[13],	/* 15, 5, ... 5 is max no. of factors */
	    mfti, mcon, mcost, prcost;

	regno[rd]=atoi(drs+1);
	regno[rt]=atoi(trs+1);
	regno[rs]=atoi(srs+1);
	valin=rs;

	reg[regno[rs]]= 1; /* generated comments parallel computed values */
	
/* Record operand sign.  If multiply code doesn't rectify negation,
   mopup code will.
*/
	regsn[regno[rs]]=(con<0)?(con = -con, 1):0;

	if (con==0) {
	    sub(regno[rd],regno[rd]);
	    return;
	}

/* Choose target register for bitwise multiply sequence -- generally rd,
   unless rd==rs or we can save an instruction via a paired rs and rt.
*/

	if (regno[rs]==regno[rd] || (paired(rs,rt)))
	     valto=rt;
	else valto=rd;

/* Extract factor of 2**n.  (unsigned) is for -2**31. */

	while (!(con&1)) { twos++; con = (unsigned)con >> 1;}

/* Extract factors of 2**n + 1 and 2**n - 1, starting with largest.  
   Note which partitioning into factors will produce the shortest code
   sequence.  (Only a few, easy partitions are considered.)
*/
	mfti=0;
	mcon=con;
	mcost=cost(con);

	while (td>1) {
	    while (0==con%((1<<td)+1)) {
		con = con/((1<<td)+1);
		ft[fti++] = td;	
		prcost = cost(con) + fti+fti+(paired(rd,rt)?0:fti);
		if (prcost < mcost) {
		    mcost=prcost;
		    mfti=fti;
		    mcon=con;
		}
	    }
	    while (0==con%((1<<td)-1)) {
		con = con/((1<<td)-1);
		ft[fti++] = -td;	
		prcost = cost(con) + fti+fti+(paired(rd,rt)?0:fti);
		if (prcost < mcost) {
		    mcost=prcost;
		    mfti=fti;
		    mcon=con;
		}
	    }
	    td--;
	}
	fti=mfti;

/* Now 	ft[0..fti] represents factors of 2**n +/- 1 that we handle separately;
   mcon represents the kernel to be multiplied by bit-string methods.
   Build that kernel in the dest register, unless it's also the source or
   if we might save an instruction through use of a paired source and temp.
   The dest and temp registers are never the same.
*/
   
	cm1(mcon);

	while (--fti>=0) {
	    if (regno[valin]==regno[rd]) {
		mash(rt,valin,abs(ft[fti]));
		adors(ft[fti]<0?2:0,rd,rt);
	    }
	    else if (valin==rs && paired(rs,rt)) {
		mash(rt,rs,abs(ft[fti]));
		if (ft[fti]>0) {
		    cas(regno[rd],regno[rt],regno[rs]);
			/* rs can't be r0, since r0's pair never participates */
		    regsn[regno[rd]]=regsn[regno[rt]];
		    valin=rd;
		}
		else adors(1,rt,rs);
	    }
	    else {
		mash(rd,valin,abs(ft[fti]));
		adors(ft[fti]<0,rd,valin);
	    }
	}
	if (twos) {
	    mash(rd,valin,twos);
	    valin=rd;
	}
	if (regsn[regno[valin]]) {
	    twoc(regno[rd],regno[valin]);
	    regsn[regno[rd]]=0;
	    valin=rd;
	}
	if (regno[valin]!=regno[rd])
	    cas(regno[rd],regno[valin],0);
}

/* Generate multiply code based on strings of 1's and 0's in con.
   con is known to be odd.  The source is always rs; the target,
   either rd or rt, was chosen by cm().  rs's sign may be inverted
   because con was originally negative; cm1 will attempt to rectify it.
   As a side effect of code generation, valin = target.  If valin==rs,
   no code was generated.
*/

static cm1(con)
	register int con;
{
	register int n,
	    p
	;

	fillbc(con);

/* For each string of 1's, generate either a single add (one 1)
   or an add at the left end and a subtract at the right end (multiple 1's)
*/

	/* Looking at initial ones string */
	if (1==(n=bc[--bci])) {		/*  10...	case		*/
	    if (0==(n=bc[--bci])) 	/*  +				*/
		return;			/*  identity case; no code.	*/
	    p=n;
	    goto C;
	}
	p=n;
A:	/* p = no. of ones; looking at zeroes string */
	n=bc[--bci];
	if (p==2 && valin==rs && regsn[regno[rs]]==0) {
					/* 110...       case		*/
	    prsh(1,Add,n==0);		/* First step best done with a	*/
	    				/*    cas, add if possible	*/
	}
	else if (n==1) {		/*  111101...1	case		*/
	    prsh(p+1,Sub,0);		/* +    -    -			*/
	    p=bc[--bci];
	    goto A;
	}
	else 				/*  1...1000	case		*/
	    prsh(p,Sub,n==0);		/* +    -    -			*/
	if (0==(p=n)) return;
	
	
C:	/* Looking at ones string */
	if (1!=(n=bc[--bci])) {
	    prsh(p,Add,0);
	    p=n;
	    goto A;
	}
	n=bc[--bci];
	prsh(p+1,Add,n==0);

	/* Just passed a singleton one, looking at following zeroes	*/
	if (0!=(p=n)) goto C;
}	

static prsh(ct,op,final)
	register int ct, op, final;

{
	register int rno=regno[valto], rns=regno[rs];

	mash(valto,valin,ct);
	if (final) {	/* last-ditch attempt to put result in rd */
	    if (rns==regno[rd]) /* and therefore valto is rt */
		adors(op*2,rd,valto);	/* Add->Add; Sub->Sf */
	    else if (((op==Add)^regsn[rno]^regsn[rns]) && (rno!=0 || rns!=0)) {
		if (rno!=0)
		    cas(regno[rd],rns,rno);
		else  
		    cas(regno[rd],rno,rns);
		regsn[regno[rd]]=regsn[rno];
		valin=rd;
	    }
	    else adors(op,valto,rs);
	}
	else
	    adors(op,valto,rs);
}


static add(ra,rb)
	register int ra,rb;
{
	if (counting) {icount++;return;}
	reg[ra]=reg[ra]+reg[rb];
	instcount++;
	if (display)
	    printf("    a      r%d,r%d \t\t # cm %d\n",ra,rb,reg[ra]);
}


static sub(ra,rb)
	register int ra,rb;
{
	if (counting) {icount++;return;}
	reg[ra]=reg[ra]-reg[rb];
	instcount++;
	if (display)
	    printf("    s      r%d,r%d \t\t # cm %d\n",ra,rb,reg[ra]);
}

static sf(ra,rb)
	register int ra,rb;
{
	if (counting) {icount++;return;}
	reg[ra]=reg[rb]-reg[ra];
	instcount++;
	if (display)
	    printf("    sf     r%d,r%d \t\t # cm %d\n",ra,rb,reg[ra]);
}

static sli(ra,n)
	register int ra,n;
{
	if (counting) {icount++;return;}
	reg[ra]=reg[ra]<<n;
	instcount++;
	if (display) {
	    if (n<16)
	        printf("    sli    r%d,%d \t\t # cm %d\n",ra,n,reg[ra]);
	    else
	        printf("    sli16  r%d,%d \t\t # cm %d\n",ra,n-16,reg[ra]);
	}
}

static slpi(ra,n)
	register int ra,n;
{
	if (counting) {icount++;return;}
	reg[ra^1]=reg[ra]<<n;
	instcount++;
	if (display) {
	    if (n<16)
	        printf("    slpi   r%d,%d \t\t # cm %d\n",ra,n,reg[ra^1]);
	    else
	        printf("    slpi16 r%d,%d \t\t # cm %d\n",ra,n-16,reg[ra^1]);
	}
}

static twoc(ra,rb)
	register int ra,rb;
{
	if (counting) {icount++;return;}
	reg[ra]= -reg[rb];
	instcount++;
	if (display)
	    printf("    twoc   r%d,r%d \t\t # cm %d\n",
		ra,rb,reg[ra]);
}

static cas(ra,rb,rc)
	register int ra,rb,rc;
{
	if (counting) {icount++;return;}
	reg[ra]= reg[rb]+(rc?reg[rc]:0);
	instcount++;
	if (display)
	    printf("    cas    r%d,r%d,r%d \t\t # cm %d\n",
		ra,rb,rc,reg[ra]);

}
