/*---------------------------------------------------------------------
 *        [ Copyright (c) 1999 Alpha Processor Inc.] - Unpublished Work
 *          All rights reserved
 * 
 *    This file contains source code written by Alpha Processor, Inc.
 *    It may not be used without express written permission. The
 *    expression of the information contained herein is protected under
 *    federal copyright laws as an unpublished work and all copying
 *    without permission is prohibited and may be subject to criminal
 *    and civil penalties. Alpha Processor, Inc.  assumes no
 *    responsibility for errors, omissions, or damages caused by the use
 *    of these programs or from use of the information contained herein.
 *  
 *-------------------------------------------------------------------*/


#include <alpha/regdef.h>

        .arch   ev6

        .text
	.align	4
        .globl  vgafill
        .ent    vgafill
vgafill:
	/* generate the address */
	ldq	t0, vgabaseaddr(gp)	 /* base address now in t0 */

	lda	t8, 32(zero)
	sll	t8, 10, t8		/* 32K */
	addq	t8, t0, t8		/* top address in VGA buffer */
	
vgafill_loop:
        stl     t0, 0(t0)
        addq    t0, 4, t1
        stl     t1, 0(t1)
        addq    t0, 8, t2
        stl     t2, 0(t2)
        addq    t0, 12, t3
        stl     t3, 0(t3)
        addq    t0, 16, t4
        stl     t4, 0(t4)
        addq    t0, 20, t5
        stl     t5, 0(t5)
        addq    t0, 24, t6
        stl     t6, 0(t6)
        addq    t0, 28, t7
        stl     t7, 0(t7)
        addq    t0, 32, t0		/* wind around to next 8-byte block */

        subq    t8, t0, t9
        bgt     t9, vgafill_loop         /* go around if not finished */

        ret     (ra)
        .end    vgafill

	.data
vgabaseaddr:
	.quad	0xFFFF8800000B8000


/*----------------------------------------------------------------------*/
/* Combined bash of PCI bus and memory bus - writes 32K to video card (a1)
 * and 64K to a memory location (passed in a0) */
/* combined fill 2 doesn't do the memory writes */
	.text
	.align	4
	.globl	combinedfill2
	.ent	combinedfill2
combinedfill2:
        /* generate the address */
	bis	a1, zero, t0		/* now passed in via a1 */

	bis	a0, zero, t8		/* work with scrap copy */

        lda     t10, 32(zero)
        sll     t10, 10, t10              /* 32K */
        addq    t10, t0, t10              /* top address in VGA buffer */

combinedfill2_loop:
	stl	t8, 0(t0)		/* I/O write 1 */
	/*stq	t8, 0(t8) */ bis t10, t10, t10	/* NOP */
	addq	t8, 8, t1

	stl	t1, 4(t0)		/* write 2 */
	/* stq	t1, 0(t1) */ bis t10, t10, t10	/* NOP */
	addq	t8, 16, t2

	stl	t2, 8(t0)		/* write 3 */
	/* stq	t2, 0(t2) */ bis t10, t10, t10	/* NOP */
	addq	t8, 24, t3

	stl	t3, 12(t0)		/* write 4 */
	/* stq	t3, 0(t3) */ bis t10, t10, t10	/* NOP */
	addq	t8, 32, t4

	stl	t4, 16(t0)		/* write 5 */
	/* stq	t4, 0(t4) */ bis t10, t10, t10	/* NOP */
	addq	t8, 40, t5

	stl	t5, 20(t0)		/* write 6 */
	/* stq	t5, 0(t5) */ bis t10, t10, t10	/* NOP */
	addq	t8, 48, t6

	stl	t6, 24(t0)		/* write 7 */
	/* stq	t6, 0(t6) */ bis t10, t10, t10	/* NOP */
	addq	t8, 56, t7

	stl	t7, 28(t0)		/* write 8 */
	/* stq	t7, 0(t7) */ bis t10, t10, t10	/* NOP */
	addq	t0, 32, t0		/* increment base pointers */
	addq	t8, 64, t8

        subq    t10, t0, t9
        bgt     t9, combinedfill2_loop   /* go around if not finished */

        ret     (ra)
	.end	combinedfill2


/*----------------------------------------------------------------------*/
/* Combined bash of PCI bus and memory bus - writes 32K to video card (a1)
 * and 64K to a memory location (passed in a0) */
/* combinedfill3 does memory but not io */
	.text
	.align	4
	.globl	combinedfill3
	.ent	combinedfill3
combinedfill3:
        /* generate the address */
	bis	a1, zero, t0		/* now passed in via a1 */

	bis	a0, zero, t8		/* work with scrap copy */

        lda     t10, 32(zero)
        sll     t10, 10, t10              /* 32K */
        addq    t10, t0, t10              /* top address in VGA buffer */

combinedfill3_loop:
	/* stl	t8, 0(t0) */ bis t10, t10, t10	/* I/O write 1 */
	stq	t8, 0(t8)		/* mem write 1 */
	addq	t8, 8, t1

	/* stl	t1, 4(t0) */ bis t10, t10, t10	/* write 2 */
	stq	t1, 0(t1)
	addq	t8, 16, t2

	/* stl	t2, 8(t0) */ bis t10, t10, t10	/* write 3 */
	stq	t2, 0(t2)
	addq	t8, 24, t3

	/* stl	t3, 12(t0) */ bis t10, t10, t10	/* write 4 */
	stq	t3, 0(t3)
	addq	t8, 32, t4

	/* stl	t4, 16(t0) */ bis t10, t10, t10	/* write 5 */
	stq	t4, 0(t4)
	addq	t8, 40, t5

	/* stl	t5, 20(t0) */ bis t10, t10, t10	/* write 6 */
	stq	t5, 0(t5)
	addq	t8, 48, t6

	/* stl	t6, 24(t0) */ bis t10, t10, t10	/* write 7 */
	stq	t6, 0(t6)
	addq	t8, 56, t7

	/* stl	t7, 28(t0) */ bis t10, t10, t10	/* write 8 */
	stq	t7, 0(t7)
	addq	t0, 32, t0		/* increment base pointers */
	addq	t8, 64, t8

        subq    t10, t0, t9
        bgt     t9, combinedfill3_loop   /* go around if not finished */

        ret     (ra)
	.end	combinedfill3


/*----------------------------------------------------------------------*/
/* Combined bash of PCI bus and memory bus - writes 32K to video card (a1)
 * and 64K to a memory location (passed in a0) */
/* combinedfill3 does memory and byte-width ISA io */

	.text
	.align	4
	.globl	combinedfill4
	.ent	combinedfill4
combinedfill4:
        /* generate the address */
	bis	a1, zero, t0		/* now passed in via a1 */
	bis	a1, zero, t11		/* now passed in via a1 */
	bis	a0, zero, t8		/* work with scrap copy */

        lda     t10, 32(zero)
        sll     t10, 10, t10              /* 32K */
        addq    t10, t0, t10              /* top address in VGA buffer */

combinedfill4_loop:
	stb	zero, 0(t11) 		/* I/O write 1 */
	stq	t8, 0(t8)		/* mem write 1 */
	addq	t8, 8, t1

	stb	zero, 0(t11) 		/* I/O write 2 */
	stq	t1, 0(t1)
	addq	t8, 16, t2

	stb	zero, 0(t11) 		/* I/O write 3 */
	stq	t2, 0(t2)
	addq	t8, 24, t3

	stb	zero, 0(t11) 		/* I/O write 4 */
	stq	t3, 0(t3)
	addq	t8, 32, t4

	stb	zero, 0(t11) 		/* I/O write 5 */
	stq	t4, 0(t4)
	addq	t8, 40, t5

	stb	zero, 0(t11) 		/* I/O write 6 */
	stq	t5, 0(t5)
	addq	t8, 48, t6

	stb	zero, 0(t11) 		/* I/O write 7 */
	stq	t6, 0(t6)
	addq	t8, 56, t7

	stb	zero, 0(t11) 		/* I/O write 8 */
	stq	t7, 0(t7)
	addq	t0, 32, t0		/* increment base pointers */
	addq	t8, 64, t8

        subq    t10, t0, t9
        bgt     t9, combinedfill4_loop   /* go around if not finished */

        ret     (ra)
	.end	combinedfill4



/*----------------------------------------------------------------------*/
/* Combined bash of PCI bus and memory bus - writes 32K to video card (a1)
 * and 64K to a memory location (passed in a0) */

/* fill5 does combined VGA byte-writes and memory quadword writes */

	.text
	.align	4
	.globl	combinedfill5
	.ent	combinedfill5
combinedfill5:
        /* generate the address */
	bis	a1, zero, t0		/* now passed in via a1 */

	bis	a0, zero, t8		/* work with scrap copy */

        lda     t10, 32(zero)
        sll     t10, 10, t10              /* 32K */
        addq    t10, t0, t10              /* top address in VGA buffer */

combinedfill5_loop:
	stb	t8, 0(t0)		/* I/O write 1 */
	stq	t8, 0(t8)		/* mem write 1 */
	addq	t8, 8, t1

	stb	t1, 4(t0)		/* write 2 */
	stq	t1, 0(t1)
	addq	t8, 16, t2

	stb	t2, 8(t0)		/* write 3 */
	stq	t2, 0(t2)
	addq	t8, 24, t3

	stb	t3, 12(t0)		/* write 4 */
	stq	t3, 0(t3)
	addq	t8, 32, t4

	stb	t4, 16(t0)		/* write 5 */
	stq	t4, 0(t4)
	addq	t8, 40, t5

	stb	t5, 20(t0)		/* write 6 */
	stq	t5, 0(t5)
	addq	t8, 48, t6

	stb	t6, 24(t0)		/* write 7 */
	stq	t6, 0(t6)
	addq	t8, 56, t7

	stb	t7, 28(t0)		/* write 8 */
	stq	t7, 0(t7)
	addq	t0, 32, t0		/* increment base pointers */
	addq	t8, 64, t8

        subq    t10, t0, t9
        bgt     t9, combinedfill5_loop   /* go around if not finished */

        ret     (ra)
	.end	combinedfill5


/*----------------------------------------------------------------------*/
/* Combined bash of PCI bus and memory bus - writes 32K to video card (a1)
 * and 64K to a memory location (passed in a0) */
	.text
	.align	4
	.globl	combinedfill
	.ent	combinedfill
combinedfill:
        /* generate the address */
	bis	a1, zero, t0		/* now passed in via a1 */

	bis	a0, zero, t8		/* work with scrap copy */

        lda     t10, 32(zero)
        sll     t10, 10, t10              /* 32K */
        addq    t10, t0, t10              /* top address in VGA buffer */

combinedfill_loop:
	stl	zero, 0(t0)		/* I/O write 1 */
	stq	zero, 0(t8)		/* mem write 1 */
	addq	t8, 8, t1

	stl	zero, 4(t0)		/* write 2 */
	stq	zero, 0(t1)
	addq	t8, 16, t2

	stl	zero, 8(t0)		/* write 3 */
	stq	zero, 0(t2)
	addq	t8, 24, t3

	stl	zero, 12(t0)		/* write 4 */
	stq	zero, 0(t3)
	addq	t8, 32, t4

	stl	zero, 16(t0)		/* write 5 */
	stq	zero, 0(t4)
	addq	t8, 40, t5

	stl	zero, 20(t0)		/* write 6 */
	stq	zero, 0(t5)
	addq	t8, 48, t6

	stl	zero, 24(t0)		/* write 7 */
	stq	zero, 0(t6)
	addq	t8, 56, t7

	stl	zero, 28(t0)		/* write 8 */
	stq	zero, 0(t7)
	addq	t0, 32, t0		/* increment base pointers */
	addq	t8, 64, t8

        subq    t10, t0, t9
        bgt     t9, combinedfill_loop   /* go around if not finished */

        ret     (ra)
	.end	combinedfill
