@ vim: set ft=armv4 noet:

#define RGB(r,g,b) (r|(g<<5)|(b<<10))

.arch armv4
.cpu  arm7tdmi

.section .text.crt0,"ax",%progbits

.arm

.type _start,%function
.globl _start
_start:
	b main

.type payload,%function
.globl payload
.thumb
.thumb_func
payload:
	mov  r3, r1

	@ set display mode
	ldr  r0, dispcntval
	strh r0, [r4] @ DISPCNT @ this one's very dark, so I'm putting it in a place where it's not visible for long
	mov  r5, #5
	lsr  r0, #16
	lsl  r5, #24
	strh r0, [r4,#0x0c] @ BG2CNT
	mov  r1, #1

	@ldr r5, =0x05000000 @ CGRAM
	neg  r0, r0 @ looks nice enough
	strh r0, [r5,#2]
	@bl  newcol
	lsl  r1, #13
	ldrh r0, [r4,#0x0e] @ BG3CNT
	orr  r0, r1
	strh r0, [r4,#0x0e]
	mov  r6, #0x80

	@ unpack graphics into VRAM
	@ldr r0, =graphics
	add r0, pc, #(graphics-.Lnextin0)
.Lnextin0:
	mov r1, r7
	@ldr r2, =unpackdata
	add r2, pc, #(unpackdata-.Lnextin1)
.Lnextin1:
	swi 0x10

	b graphics_end

.type unpackdata,%object
unpackdata:
	.2byte graphics_end-graphics @ data length
	.byte  1 @ 2 color input
	.byte  8 @ 256 color output
	.4byte 0
.type graphics,%object
graphics:
	@ tile 0 (xxxh -> 000h)
	.8byte 0
	@ tile 1 (800h -> 040h) : TOP LEFT
	.byte 0b00000000
	.byte 0b10110111
	.byte 0b10010111
	.byte 0b10110101
	.byte 0b00000000
	.byte 0b11101010
	.byte 0b11100100
	.byte 0b10101010
	@ tile 2 (801h -> 080h) : TOP RIGHT
	.byte 0b00000000
	.byte 0b10101101
	.byte 0b01001101
	.byte 0b01010110
	.byte 0b00000000
	.byte 0b11000100
	.byte 0b01001110
	.byte 0b01101010
	@ tile 3, 4 (810h -> 0C0h, 811h -> 100h) : BOTTOM LEFT, RIGHT
	.byte 0b00000000
	.byte 0b00100101
	.byte 0b01110111
	.byte 0b01010101
	.byte 0b00000000
	.byte 0b01010111
	.byte 0b01110101
	.byte 0b01110101

	.byte 0b00000000
	.byte 0b10101111
	.byte 0b01001111
	.byte 0b01000101
	.byte 0b00000000
	.byte 0b00110101
	.byte 0b00110010
	.byte 0b01010010
graphics_end:

	@ set up the tilemap
	lsl  r6, #4
	add  r6, r7
	ldr  r0, tilemap@=0x04030201
	strh r0, [r6]
	lsr  r0, #16
	strh r0, [r6,#0x10]

	add r3, r3 @ WRAM->VRAM
	@ r3 == WRAM
	@ r4 == MMIO
	@ r5 == CGRAM
	mov  r1, #1
	lsl  r1, #5
	str  r1, [r4, #0x20] @ BG2PA, PB
	strh r1, [r4, #0x26] @ BG2PC, PD

	ldr r1, bldcnt@=((1<<3)|(1<<6)|(1<<13)|(11<<16)|(5<<24))
	str r1, [r4, #0x50] @ BLDCNT, BLDY

	mov r6, #0
	mov r7, #1
.thumb_func
mainloop:
	@ wait for vbl
	ldrh r0, [r4, #6]
	cmp r0, #160
	beq vbl

	cmp r0, r1
	beq mainloop
	mov r1, r0

	add r2, #0x8
	neg r0, r6

	cmp r7, #1
	beq .Lpos
	sub r0, r2

	mov r5, #1
	lsl r5, #10
	add r0, r5
	b .Lend
.Lpos:
	add r0, r2

	mov r5, #1
	lsl r5, #10
	sub r0, r5
.Lend:
	str r0, [r4, #0x28] @ BG2X

	b mainloop

vbl:
	@ bleh scrolling
	ldrh r0, [r4, #0x34] @ BG3PC
	lsr r0, #2
	strh r0, [r4, #0x34]
	str r6, [r4, #0x38] @ BG3Y

	add r6, r7
	bne .Lnop1
	neg r7, r7
.Lnop1:
	add r6, #0x96 @ fixed required magic bytes
	sub r6, #0x96 @ lol
	mov r0, #0xc0
	lsl r0, #4
	cmp r6, r0
	bne .Lnop0
	neg r7, r7
.Lnop0:

	mov r2, #0
	b mainloop

.arm
.type main,%function
.globl main
main:
	mov r7, #0x03000000
	mov r4, #0x04000000
	add r1, pc, #(payload-.Lnextins-4)
.Lnextins:
	add r0, pc, #5
	mov r2, #((payload_end - payload) >> 2)
	bx  r0

.thumb
.thumb_func
.type main$thumb,%function
main$thumb:
	mov r0, r1
	mov r1, r7
	swi 0x0C
	mov r1, r7
	lsl r7, #1
	mov r4, #0x40
	lsl r4, #20
	mov pc, r1

@	.balign 4
.type dispcntval,%object
dispcntval:
	.2byte 2 | (1<<10) | (1<<11) @ mode 2, enable bg2 and 3
.type bg2cntval,%object
bg2cntval:
	.2byte (1<<7) | (1<<8) @ 256color, map base 1, inf. tiling

.type tilemap,%object
tilemap:
	.4byte 0x04030201
.type bldcnt,%object
bldcnt:
	.4byte ((1<<3)|(1<<6)|(1<<13)|(11<<16)|(5<<24))

.pool

.globl payload_end
payload_end:
	.asciz "HNY ~pcy"
@	.asciz "poro/K2^TTN HNY"

