/* bALib.s - asm language buffer subroutine library */

	.data
	.asciz	"Copyright 1984-1988, Wind River Systems, Inc."

/*
modification history
--------------------
*/

/*
DESCRIPTION
This library contains routines to manipulate buffers, which are just
variable length byte arrays.  The routine bcopy copies between two buffers.
Overlapping buffers are handled correctly.
The routine bfill fills a buffer with a constant byte.

These routines are highly optimized.
Operations are performed on long words where possible, even though the 
buffer lengths are specified in terms of bytes.
This particular optimization will only occur if source and
destination buffers are aligned such that either both start on an
odd address, or both start at an even address.  If one is even and one is odd,
operations must be done a byte at a time (because of alignment problems
inherent in the 68000) and the process will be slower.

Certain applications, such as byte-wide memory-mapped peripherals,
may require that only byte operations be performed.  For this purpose,
the routines bcopyBytes and filbytes provide the same functions 
as bcopy and bfill but only using byte-at-a-time operations.
*/

#define ASMLANGUAGE
#include "UniWorks.h"
#include "asm.h"

	.globl	_bcopy
	.globl	_cpybytes
	.globl	_bzero
	.globl	_bfill
	.globl	_filbytes
	.globl	_bcmp

	.text
	.even

/***********************************************************************
*
* bcopy - copy one buffer to another
*
* This routine copies the first nbytes characters from from_buf to to_buf.
* Overlapping buffers are handled correctly.
* This routine will optimize the copy by copying long words (4 bytes)
* at a time if possible (see cpybytes (2) for copying a byte at a time only).
*
* SEE ALSO: cpybytes (2)
*

* VOID bcopy (from_buf, to_buf, nbytes)
*     char *from_buf;	/* pointer to source buffer *
*     char *to_buf;	/* pointer to destination buffer *
*     int nbytes;	/* number of bytes to copy *

*/

_bcopy:
	link	a6,#0
	movel	d2,a7@-		/* save d2 */

	tstb	is68020		/* test for 68020 */
	jeq	9f		/* jump if not 020 */
	movl	a6@(ARG1),a0	/* source */
	movl	a6@(ARG2),a1	/* destination */
	movw	a0,d0		/* check for one addr even & one addr odd */
	movw	a1,d1
	eorw	d1,d0
	andw	#1,d0
	movl	a6@(ARG3),d1	/* count */
	cmpl	a0,a1		/* check for nasty overlap */
	jls	forward0	/* no nasty overlap, copy in forward dir */
	addl	d1,a0		/* point a0 just past end of source string */
	cmpl	a0,a1		/* see if strings really overlap */
	jge	1f		/* no */
	addl	d1,a1		/* yes, point a1 just past end of dest string */
	jlt	backward0	/* overlapping, must do backwards */

    1:				/* no overlap */
	subl	d1,a0		/* point back at beginning of source string */

forward0:
	cmpl	#8,d1		/* if less than 8 bytes just copy bytes */
	jlt	4f
	tstw	d0		/* if one addr even & one addr odd copy bytes */
	jne	4f
	movw	a0,d0		/* check for odd addrs */
	lsrw	#1,d0
	jcc	1f
	subql	#1,d1		/* both addrs odd, copy 1 byte */
	movb	a0@+,a1@+	/*  and then both addrs will be even */
    1:	lsrw	#1,d0		/* both addrs even, get to long boundary */
	jcc	1f
	subql	#2,d1		/* copy 1 word */
	movw	a0@+,a1@+	/*  and then both addrs will be long aligned */
    1:	movl	d1,d0		/* save byte count */
	lsrl	#5,d1		/* change to 32 byte block count (truncated) */
	jra	2f		/* jump into long copy loop */
    1:	movl	a0@+,a1@+; movl	a0@+,a1@+; movl	a0@+,a1@+; movl	a0@+,a1@+
    	movl	a0@+,a1@+; movl	a0@+,a1@+; movl	a0@+,a1@+; movl	a0@+,a1@+
    2:	dbf	d1,1b		/* entry point of long copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b
	movw	d0,d1		/* restore byte count */
	andl	#0x1F,d1
	jra	4f		/* jump into byte copy loop */
    1:	movb	a0@+,a1@+	/* byte copy loop */
    4:	dbf	d1,1b		/* entry point of byte copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b
	movq	#0,d0		/* tidy */
	movel	a7@+,d2			/* restore d2 */
	unlk	a6
	rts

backward0:
	cmpl	#8,d1		/* if less than 8 bytes just copy bytes */
	jlt	4f
	tstw	d0		/* if one addr even & one addr odd copy bytes */
	jne	4f

	movw	a0,d0		/* check for odd addrs */
	lsrw	#1,d0
	jcc	1f
	subql	#1,d1		/* both addrs odd, copy 1 byte */
	movb	a0@-,a1@-	/*  and then both addrs will be even */
    1:	lsrw	#1,d0
	jcc	1f
	subql	#2,d1		/* align to long word, copy 1 word */
	movw	a0@-,a1@-	/*  and then both addrs will be long */
    1:	movl	d1,d0		/* save byte count */
	lsrl	#5,d1		/* change to 32 byte block count (truncated) */
	jra	2f		/* jump into block copy loop */
    1:	movl	a0@-,a1@-; movl	a0@-,a1@-; movl	a0@-,a1@-; movl	a0@-,a1@-
    	movl	a0@-,a1@-; movl	a0@-,a1@-; movl	a0@-,a1@-; movl	a0@-,a1@-
    2:	dbf	d1,1b		/* entry point of block copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b
	movw	d0,d1		/* restore byte count */
	andl	#0x1f,d1
	jra	4f		/* jump into byte copy loop */
    1:	movb	a0@-,a1@-	/* byte copy loop */
    4:	dbf	d1,1b		/* entry point of byte copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b
	movq	#0,d0		/* tidy */
	movel	a7@+,d2			/* restore d2 */
	unlk	a6
	rts

    9:	movl	a6@(ARG1),a0	/* NOT 68020 source */
	movl	a6@(ARG2),a1	/* destination */

	movw	a0,d0		/* check for one addr even & one addr odd */
	movw	a1,d1
	eorw	d1,d0
	andw	#1,d0

	movl	a6@(ARG3),d1	/* count */

	cmpl	a0,a1		/* check for nasty overlap */
	jls	forward		/* no nasty overlap, copy in forward dir */
	addl	d1,a0		/* point a0 just past end of source string */
	cmpl	a0,a1		/* see if strings really overlap */
	jge	1f		/* no */
	addl	d1,a1		/* yes, point a1 just past end of dest string */
	jlt	backward	/* overlapping, must do backwards */

    1:				/* no overlap */
	subl	d1,a0		/* point back at beginning of source string */

forward:
	cmpl	#8,d1		/* if less than 8 bytes just copy bytes */
	jlt	4f
	tstw	d0		/* if one addr even & one addr odd copy bytes */
	jne	4f

	movw	a0,d0		/* check for odd addrs */
	lsrw	#1,d0
	jcc	1f
	subql	#1,d1		/* both addrs odd, copy 1 byte */
	movb	a0@+,a1@+	/*  and then both addrs will be even */

    1:				/* both addrs even, get ready to copy longs */
	movl	d1,d0		/* save byte count */
	lsrl	#2,d1		/* change to long count (truncated) */
	jra	3f		/* jump into long copy loop */

    2:	movl	a0@+,a1@+	/* long copy loop */
    3:	dbf	d1,2b		/* entry point of long copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	2b

	movl	d0,d1		/* restore byte count */
	andl	#3,d1		/* 0-3 bytes left */
	jra	4f		/* jump into byte copy loop */

    1:	movb	a0@+,a1@+	/* byte copy loop */
    4:	dbf	d1,1b		/* entry point of byte copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b

	movq	#0,d0		/* tidy */
	movel	a7@+,d2			/* restore d2 */
	unlk	a6
	rts

backward:
	cmpl	#8,d1		/* if less than 8 bytes just copy bytes */
	jlt	4f
	tstw	d0		/* if one addr even & one addr odd copy bytes */
	jne	4f

	movw	a0,d0		/* check for odd addrs */
	lsrw	#1,d0
	jcc	1f
	subql	#1,d1		/* both addrs odd, copy 1 byte */
	movb	a0@-,a1@-	/*  and then both addrs will be even */

    1:				/* both addrs even, get ready to copy longs */
	movl	d1,d0		/* save byte count */
	lsrl	#2,d1		/* change to long count (truncated) */
	jra	3f		/* jump into long copy loop */

    2:	movl	a0@-,a1@-	/* long copy loop */
    3:	dbf	d1,2b		/* entry point of long copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	2b

	movl	d0,d1		/* restore byte count */
	andl	#3,d1		/* 0-3 bytes left */
	jra	4f		/* jump into byte copy loop */

    1:	movb	a0@-,a1@-	/* byte copy loop */
    4:	dbf	d1,1b		/* entry point of byte copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jgt	1b

	movq	#0,d0		/* tidy */
	movel	a7@+,d2			/* restore d2 */
	unlk	a6
	rts

/***********************************************************************
*
* cpybytes - copy one buffer to another a byte at a time
*
* This routine copies the first nbytes characters from from_buf to to_buf.
* It is identical to bcopy except that the copy is always performed
* a byte at a time.  This may be desirable if one of the buffers
* can only be accessed with byte instructions, as in certain byte-wide
* memory-mapped peripherals.
*
* SEE ALSO: bcopy (2)

* VOID cpybytes (to_buf, from_buf, nbytes)
*     char *from_buf;	/* pointer to source buffer *
*     char *to_buf;	/* pointer to destination buffer *
*     int nbytes;	/* number of bytes to copy *

*/

_cpybytes:
	link	a6,#0
	movel	d2,a7@-			/* save d2 for compat w/cpybuf */

	/* put src in a0, dest in a1, and count in d0 */

	movel	a6@(0x08),a1		/* get to_buf */
	movel	a6@(0x0c),a0		/* from_buf */
	movel	a6@(0x10),d0		/* count */

fwdBytes:
	/* Copy the whole thing, byte by byte */

	movel	d0,d1			/* Set up d1 as the outer loop ctr */
	swap	d1			/* get upper word into dbra counter */
	bra	cby3			/* do the test first */

cby1:	movel	#0xffff,d0		/* set to copy another 64K */

cby2:	moveb	a0@+,a1@+		/* move a byte */
cby3:	dbra	d0,cby2			/* inner loop test */

	dbra	d1,cby1			/* outer loop test */

	movel	a7@+,d2			/* restore d2 for compat w/cpybuf */
	unlk	a6
	rts

/***********************************************************************
*
* bzero - fill buffer with 0s
*
* This routine fills the first nbytes characters of the specified buffer
* with 0.
*
* SEE ALSO: filbytes(2), bfill(2)

* VOID bzero (buf, nbytes)
*     FAST char *buf;		/* pointer to buffer *
*     FAST int nbytes;		/* number of bytes to copy *

*/

_bzero:
	link	a6,#0
	moveml	d2-d3,a7@-		/* save regs */

	tstb	is68020		/* test for 68020 */
	jeq	9f		/* jump if not 68020 */
	movl	a6@(ARG1),a0	/* base */
	movl	a6@(ARG2),d1	/* length */
	jle	7f		/* ought to be positive */
	cmpl	#8,d1		/* if less than 8 bytes just zero bytes */
	jlt	8f		/* short copy */
	movw	a0,d0		/* check for odd address */
	lsrw	#1,d0
	jcc	1f
	clrb	a0@+
	subql	#1,d1
	jle	7f
    1:	lsrw	#1,d0		/* check for long aligned address */
	jcc	1f
	clrw	a0@+
	subql	#2,d1
	jle	7f
    1:	movq	#0,d0 		/* we now have long address, positive count */
	movl	d1,a1		/* save byte count */
	lsrl	#5,d1		/* convert to 32 byte block count (truncated) */
	jra	2f

    1:	movl	d0,a0@+; movl	d0,a0@+; movl	d0,a0@+; movl	d0,a0@+
	movl	d0,a0@+; movl	d0,a0@+; movl	d0,a0@+; movl	d0,a0@+
    2:	dbf	d1,1b		/* entry point of block copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jge	1b
	movw	a1,d1		/* restore byte count */
	andw	#0x1F,d1	/* only interested in bottom 5 bits */
	jra	2f
    1:	movb	d0,a0@+		/* copy last 0-3 bytes */
    2:	dbf	d1,1b
    7:	moveml	a7@+,d2-d3	/* restore regs */
	unlk	a6
    	rts
    1:	clrb	a0@+
    8:	dbf	d1,1b		/* short length zero ... */
	moveml	a7@+,d2-d3	/* restore regs */
	unlk	a6
	rts

    9:	movl	a6@(ARG1),a0	/* base */
	movl	a6@(ARG2),d1	/* length */
	jle	7f		/* ought to be positive */
	movw	a0,d0		/* check for odd address */
	lsrw	#1,d0
	jcc	1f
	clrb	a0@+
	subql	#1,d1
	jle	7f
    1:	movq	#0,d0		/* we now have even address, positive count */
	movl	d1,a1		/* save byte count */
	lsrl	#2,d1		/* convert to long count (truncated) */
	jra	2f
    1:	movl	d0,a0@+		/* long copy loop */
    2:	dbf	d1,1b		/* entry point of long copy loop */
	addqw	#1,d1		/* propogate borrow thru all 32 bits */
	subql	#1,d1
	jge	1b
    	movw	a1,d1		/* restore byte count */
	andw	#3,d1		/* only interested in bottom 2 bits */
	jra	2f
    1:	movb	d0,a0@+		/* copy last 0-3 bytes */
    2:	dbf	d1,1b
    7:	moveml	a7@+,d2-d3	/* restore regs */
	unlk	a6
    	rts

/***********************************************************************
*
* bfill - fill buffer with character
*
* This routine fills the first nbytes characters of the specified buffer
* with the specified character.
* This routine will optimize the fill by filling long words (4 bytes)
* at a time if possible (see filbytes (2) for filling a byte at a time only).
*
* SEE ALSO: filbytes(2)

* VOID bfill (buf, nbytes, ch)
*     FAST char *buf;		/* pointer to buffer *
*     FAST int nbytes;		/* number of bytes to copy *
*     FAST char ch;		/* char with which to fill buffer *

*/

_bfill:
	link	a6,#0
	moveml	d2-d3,a7@-		/* save regs */

	/* put buf in a0, nbytes in d0, and ch in d1 */

	movel	a6@(ARG1),a0		/* get buf */
	movel	a6@(ARG2),d0		/* nbytes */
	movel	a6@(ARG3),d1		/* ch */

	/* if length is less than 20, cheaper to do a byte fill */

	cmpl	#20,d0			/* test count */
	blt	fb5			/* do byte fill */

	/* Put ch in all four bytes of d1, so we can fill 4 bytes at a crack */

	moveb	d1,d2
	lslw	#8,d1			/* move ch into 2nd byte of d1 */
	orb	d2,d1			/* or ch back into 1st byte of d1 */
	movew	d1,d2
	swap	d1			/* get ch-ch into high word of d1 */
	orw	d2,d1			/* or ch-ch back into low word of d1 */

	/* If the buffer is odd-aligned, copy the first byte */

	movew	a0,d2
	btst	#0,d2			/* d2 has from_buf */
	beq	fb0			/* if even-aligned */

	moveb	d1,a0@+			/* copy the byte */
	subl	#1,d0			/* decrement count by 1 */

	/* Since we're copying 4 bytes at a crack, divide count by 4.
	 * Keep the remainder in d0, so we can do those bytes at the
	 * end of the loop. */

fb0:
	movel	d0,d3
	andl	#3,d0			/* remainder in d0 */
	asrl	#2,d3			/* count /= 4 */

	/* The fastest way to do the fill is with a dbra loop, but dbra
	 * uses only a 16 bit counter.  Therefore, break up count into
	 * two pieces, to be used as an inner loop and an outer loop */

	movel	d3,d2			/* Set up d2 as the outer loop ctr */
	swap	d2			/* get upper word into dbra counter */
	bra	fb3			/* do the test first */

fb1:	movel	#0xffff,d3		/* set to fill another 64K */

fb2:	movel	d1,a0@+			/* move 4 bytes */
fb3:	dbra	d3,fb2			/* inner loop test */

	dbra	d2,fb1			/* outer loop test */

	/* do the extras at the end */

	bra	fb5			/* do the test first */
fb4:	moveb	d1,a0@+			/* move 1 byte */
fb5:	dbra	d0,fb4			/* inner loop test */

	moveml	a7@+,d2-d3		/* restore regs */
	unlk	a6
	rts

/***********************************************************************
*
* filbytes - fill buffer with character a byte at a time
*
* This routine fills the first n characters of the specified buffer
* with the specified character.
* It is identical to bfill (2) except that the fill is always performed
* a byte at a time.  This may be desirable if the buffer
* can only be accessed with byte instructions, as in certain byte-wide
* memory-mapped peripherals.
*
* SEE ALSO: bfill(2)

* VOID filbytes (buf, nbytes, ch)
*     FAST char *buf;		/* pointer to buffer *
*     FAST int nbytes;		/* number of bytes to copy *
*     FAST char ch;		/* char with which to fill buffer *

*/

_filbytes:
	link	a6,#0
	movel	d2,a1			/* save d2 in a1 */

	/* put src in a0, dest in a1, and count in d0 */

	movel	a6@(ARG1),a0		/* get to_buf */
	movel	a6@(ARG2),d0		/* count */
	movel	a6@(ARG3),d1		/* ch */

	/* Copy the whole thing, byte by byte */

	movel	d0,d2			/* Set up d2 as the outer loop ctr */
	swap	d2			/* get upper word into dbra counter */
	bra	fby3			/* do the test first */

fby1:	movel	#0xffff,d0		/* set to fill another 64K */

fby2:	moveb	d1,a0@+			/* fill a byte */
fby3:	dbra	d0,fby2			/* inner loop test */

	dbra	d2,fby1			/* outer loop test */

	movel	a1,d2			/* restore d2 */
	unlk	a6
	rts

/****************************************************************************
*
* bcmp 
*
* This routine compares one buffer to another
*
* RETURNS
*   0 if first nbytes of b1 and b2 are identical
*   non-zero otherwise

* int bcmp (src_buf, dest_buf, len)
*     FAST char *src_buf;
*     FAST char *dest_buf;
*     FAST int len;

*/

_bcmp:
	link	a6,#0
/*	movel	d2,a1			/* save d2 in a1 */

	movl	a6@(ARG1),a0
	movl	a6@(ARG2),a1
	movl	a6@(ARG3),d0
	jeq	3f
	subql	#1,d0
    1:	cmpb	a0@+,a1@+
	dbne	d0,1b
	jne	2f
	addqw	#1,d0
	subql	#1,d0
	jge	1b
    2:	addql	#1,d0

/*    3:	movel	a1,d2			/* restore d2 */
    3:	unlk	a6
    	rts
