/******************************************************************************/
#include "gcc_app.h"

/******************************************************************************/
/* Variables in zero-page: it seems we have to set them by hand */

	.global r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15,t

r0	=	$50
r1	=	r0+3
r2	=	r1+3
r3	=	r2+3
r4	=	r3+3
r5	=	r4+3
r6	=	r5+3
r7	=	r6+3
r8	=	r7+3
r9	=	r8+3
r10	=	r9+3
r11	=	r10+3
r12	=	r11+3
r13	=	r12+3
r14	=	r13+3
r15	=	r14+3

t	=	r15+3

/******************************************************************************/

	.code

	.global	lib_ind_jump
lib_ind_jump
	sta	t
	stx	t+1
	jmp	(t)

	.global	lib_exit_pop16_r8
lib_exit_pop16_r8
	pla
	sta	r8
	pla
	sta	r8+1
	rts

/******************************************************************************/
; A * Y -> YXA  (8-bit result)

	
	.global	__mul_8_by_8
__mul_8_by_8
	sty	t
	sta	t+1
	lda	#0
	ldy	#8
lloop8
	asl
	asl	t
	bcc	lnext8
	clc
	adc	t+1
lnext8
	dey
	bne	lloop8
	ldx	t
	ldy	#0
	rts

/******************************************************************************/
; YXA * T -> YXA (24-bit result)

	
	.global	__mul_24_by_24
__mul_24_by_24
	pha
	txa
	pha
	tya
	pha
	lda	t+2
	pha
	lda	t+1
	pha
	lda	t
	pha
	tsx
/* now A is in 0106,x   X in 0105,x  Y in 0104,x  and  T in 0101,x  */
	lda	#0
	sta	t
	sta	t+1
	sta	t+2
	ldy	#24
lloop24
	asl	t
	rol	t+1
	rol	t+2
	asl	$0106,x
	rol	$0105,x
	rol	$0104,x
	bcc	lnext24
	clc
	lda	t
	adc	$0101,x
	sta	t
	lda	t+1
	adc	$0102,x
	sta	t+1
	lda	t+2
	adc	$0103,x
	sta	t+2
lnext24
	dey
	bne	lloop24
	pla
	pla
	pla
	pla
	pla
	pla
	lda	t
	ldx	t+1
	ldy	t+2
	rts

/******************************************************************************/
; XA * T -> YXA (16-bit result)

	
	.global	__mul_16_by_16
__mul_16_by_16
	pha
	txa
	pha
	lda	t+1
	pha
	lda	t
	pha
	tsx
/* now A is in 0104,x   X in 0103,x  and  T in 0101,x  */
	lda	#0
	sta	t
	sta	t+1
	ldy	#16
lloop16
	asl	t
	rol	t+1
	asl	$0104,x
	rol	$0103,x
	bcc	lnext16
	clc
	lda	t
	adc	$0101,x
	sta	t
	lda	t+1
	adc	$0102,x
	sta	t+1
lnext16
	dey
	bne	lloop16
	pla
	pla
	pla
	pla
	lda	t
	ldx	t+1
	ldy	#0
	rts

/******************************************************************************/
; A * XY -> YXA (16-bit result)

	
	.global	__mul_8_by_16
__mul_8_by_16
	pha
	txa
	pha
	tya
	pha
	tsx
/* now A is in 0103,x   X in 0102,x    Y in 0101,x */
	lda	#0
	sta	t
	sta	t+1
	ldy	#8
lloop816
	asl	t
	rol	t+1
	asl	$0103,x
	bcc	lnext816
	clc
	lda	t
	adc	$0101,x
	sta	t
	lda	t+1
	adc	$0102,x
	sta	t+1
lnext816
	dey
	bne	lloop816
	pla
	pla
	pla
	lda	t
	ldx	t+1
	ldy	#0
	rts

/******************************************************************************/
; YXA (lo, hi) * 3 -> YXA

	
	.global	__mul_24_by_con_3
__mul_24_by_con_3
	sta	t
	stx	t+1
	sty	t+2
	asl	t
	rol	t+1
	rol	t+2
	clc
	adc	t
	pha
	txa
	adc	t+1
	tax
	tya
	adc	t+2
	tay
	pla
	rts

/******************************************************************************/
; XA (lo, hi) * 3 -> YXA

	
	.global	__mul_16_by_con_3
__mul_16_by_con_3
	sta	t
	stx	t+1
	asl	t
	rol	t+1
	clc
	adc	t
	tay
	txa
	adc	t+1
	tax
	tya
	ldy	#0
	rts

/******************************************************************************/
; A (lo) * 3 -> YXA

	
	.global	__mul_8_by_con_3
__mul_8_by_con_3
	sta	t
	asl
	clc
	adc	t
	ldx	#0
	ldy	#0
	rts

/******************************************************************************/
; A << Y -> A

	
	.global	__lsl8
__lsl8
	cpy	#0
	beq	lrts

lloopl8	asl
	dey
	bne	lloopl8

lrts	rts

/******************************************************************************/
; XA << Y -> XA

	
	.global	__lsl16
__lsl16
	cpy	#0
	beq	lrts

	stx	t

lloopl16
	asl
	rol	t
	dey
	bne	lloopl16

	ldx	t
	rts

/******************************************************************************/
; YXA << T -> YXA

	
	.global	__lsl24
__lsl24
	stx	t+1
	ldx	t
	beq	lend

	sty	t+2

lloopl24
	asl
	rol	t+1
	rol	t+2
	dec	t
	bne	lloopl24

	ldy	t+2
lend	ldx	t+1
	rts

/******************************************************************************/
; A >> Y -> A

	
	.global	__lsr8
__lsr8
	cpy	#0
	beq	lrts

lloopr8	lsr
	dey
	bne	lloopr8

	rts

/******************************************************************************/
; XA >> T -> XA

	
	.global	__lsr16
__lsr16
	cpy	#0
	beq	lrts

	stx	t

lloopr16
	lsr	t
	ror
	dey
	bne	lloopr16

	ldx	t
	rts

/******************************************************************************/
; YXA >> T -> YXA

	
	.global	__lsr24
__lsr24
	stx	t+1
	ldx	t
	beq	lend24
	sty	t+2

	ldy	t
	cpy	#23
	beq	lfast_23

lloopr24
	lsr	t+2
	ror	t+1
	ror
	dey
	bne	lloopr24

	ldy	t+2
lend24	ldx	t+1
	rts

lfast_23
	asl
	rol
	and	#1
	ldx	#0
	ldy	#0
	rts

/******************************************************************************/
; A >> Y -> A

	
	.global	__asr8
__asr8
	cpy	#0
	beq	lrts2

lloopa8	cmp	#$80
	ror
	dey
	bne	lloopa8

lrts2	rts

/******************************************************************************/
; XA >> T -> XA

	
	.global	__asr16
__asr16
	cpy	#0
	beq	lrts2

	sta	t
	txa

lloopa16
	cmp	#$80
	ror
	ror	t
	dey
	bne	lloopa16

	tax
	lda	t
	rts

/******************************************************************************/
; YXA >> T -> YXA

	
	.global	__asr24
__asr24
	sta	t
	stx	t+1
	ldx	t
	beq	lenda24

	tya

lloopa24
	cmp	#$80
	ror
	ror	t+1
	ror	t
	dey
	bne	lloopa24

	tay
	lda	t
lenda24	ldx	t+1
	rts

/******************************************************************************/
; (r0) <- (r1) for r2 bytes

	
	.global	memcpy_c2c
memcpy_c2c

	ldy	r2
	ldx	r2+1
	bne	l130
	tya
	bne	l130
lrtscpy
	rts

l130	lda	r1
	cmp	r0
	lda	r1+1
	sbc	r0+1
	bcs	l190

l140	clc
	tya
	adc	r1
	sta	r1
	txa
	adc	r1+1
	sta	r1+1

l150	clc
	tya
	adc	r0
	sta	r0
	txa
	adc	r0+1
	sta	r0+1

l160	txa
	bne	l165
	tya
	beq	lrtscpy

l165	tya
	bne	l166
	dex
l166	dey

l170	lda	r0
	bne	l171
	dec	r0+1
l171	dec	r0

	lda	r1
	bne	l172
	dec	r1+1
l172	dec	r1

	lda	(r1)
	sta	(r0)

l180	bra	l160

	rts

l190	txa
	bne	l200

/* Less than 256 - do it quickly */

	tya
	tax
	ldy	#-1

lfast	iny
	lda	(r1),y
	sta	(r0),y
	dex
	bne	lfast
	rts

l200	txa
	bne	l205
	tya
	beq	lrtscpy

l205	tya
	bne	l206
	dex
l206	dey

l210	lda	(r1)
	sta	(r0)

	inc	r0
	bne	l211
	inc	r0+1
l211

	inc	r1
	bne	l220
	inc	r1+1

l220	bra	l200

/******************************************************************************/
; (r0) <- r1 for r2 bytes

	
	.global	memset_c2c
memset_c2c
	ldx	r2+1
	bne	lbig

	ldx	r2
	beq	lrtsset
	ldy	#-1
	lda	r1

lsmall_loop
	iny
	sta	(r0),y
	dex
	bne	lsmall_loop

lrtsset	rts

lbig	ldy	r2
;;	beq	lrtsset

lbig_loop
	lda	r1
	sta	(r0)

	inc	r0
	bne	l10
	inc	r0+1

l10	tya
	bne	l20
	dex
l20	dey

	bne	lbig_loop
	txa
	bne	lbig_loop

	rts

/******************************************************************************/

	.global	setjmp_c2c
setjmp_c2c

	sta	(r0)		; buf[0] = a

	phy

	ldy	#1		; buf[1] = x
	txa
	sta	(r0),y

	iny			; buf[2] = y
	pla
	sta	(r0),y

	iny			; buf[3] = sp
	tsx
	txa
	ina
	ina
	sta	(r0),y

	iny			; buf[4,5] = rtn addr
	pla
	tax
	sta	(r0),y
	iny
	pla
	sta	(r0),y
	pha
	phx

	stz	r0		; return 0
	rts

	.global	longjmp_c2c
longjmp_c2c

	ldy	#3		; old sp
	lda	(r0),y
	tax
	txs

	iny			; old rtn
	lda	(r0),y
	sta	t
	iny
	lda	(r0),y
	sta	t+1
	inc	t
	bne	l15
	inc	t+1

l15	ldy	#2		; old y
	lda	(r0),y
	pha

	dey			; old x
	lda	(r0),y
	tax

	lda	(r0)		; old a

	ply

	pha			; rtn value to r0
	lda	r1
	sta	r0
	pla

	jmp	(t)

/******************************************************************************/
; r0_24 / r1_24 -> r0_24

	
	.global	__div_c2c
__div_c2c
	lda	r0+2
	eor	r1+2
	pha
	jsr	lunsign
	jsr	__udiv_c2c
	pla
	bmi	lresign
	rts

lunsign
	lda	r0+2
	bpl	lunsign1
	sec
	lda	#0
	sbc	r0
	sta	r0
	lda	#0
	sbc	r0+1
	sta	r0+1
	lda	#0
	sbc	r0+2
	sta	r0+2
lunsign1
	lda	r1+2
	bpl	lunsign2
	sec
	lda	#0
	sbc	r1
	sta	r1
	lda	#0
	sbc	r1+1
	sta	r1+1
	lda	#0
	sbc	r1+2
	sta	r1+2
lunsign2
	rts

lresign
	sec
	lda	#0
	sbc	r0
	sta	r0
	lda	#0
	sbc	r0+1
	sta	r0+1
	lda	#0
	sbc	r0+2
	sta	r0+2
	rts
	
	.global	__udiv_c2c
__udiv_c2c
	lda	#0
	sta	t
	sta	t+1
	sta	t+2

	lda	#24
	asl	r0
	rol	r0+1
	rol	r0+2
lloopdiv
	pha
	rol	t
	rol	t+1
	sec
	lda	t
	sbc	r1
	tax
	lda	t+1
	sbc	r1+1
	tay
	lda	t+2
	sbc	r1+2
	bcc	lnextdiv
	sta	t+2
	sty	t+1
	stx	t
lnextdiv
	rol	r0
	rol	r0+1
	rol	r0+2
	pla
	tax
	dex
	txa
	bne	lloopdiv
	rts

/******************************************************************************/
; r0_24 % r1_24 -> r0_24

	
	.global	__rem_c2c
__rem_c2c
	lda	r0+2
	pha
	jsr	lunsign
	jsr	__urem_c2c
	pla
	bmi	lresign
	rts

	.global	__urem_c2c
__urem_c2c
	jsr	__udiv_c2c
	lda	t
	ldx	t+1
	ldy	t+2
	sta	r0
	stx	r0+1
	sty	r0+2
	rts
/******************************************************************************/
