; run length de-coder
; from 10000
; to 400000
; input requirements -  a0 is output address
;			d0 is input object number, which then gives :
;		:	a1 is input address
;		:	a2 is limit address - new idea, to prevent crashes through bus failure
;		:	a0 is now output as the follow-on address, to allow sequential decodes

.text

rldin::	movem.l	d0-d1/a1-a2,-(sp)
	asl.w	#2,d0		; multiply up
	movea.l	rlepoint.w,a2
	adda.w	d0,a2
	movea.l	#Gpu_data,a1
	move.l	(a2)+,(a1)+	; copy source address
	move.l	a0,(a1)+	; copy target address

	RunGPU	15
	waitgpu
	move.l	blitsem.w,d0	; recover follow on address
	bpl.b	.ok
	illegal	
.ok:	movea.l	d0,a0
	clr.l	blitsem.w	; and clear
.ended:	movem.l	(sp)+,d0-d1/a1-a2
	rts


; new addition not yet implemented : marker word before any kind of data storage : $B783
; chosen because it's non-existent in the ROM so far, and would be a totally useless command
; if it were in the program.
; this will be followed by a type identifier, for various types of data including RLE (5 types) and
; RELOC (1 type) and possibly LHZ or similar, and possibly some specialist encoding types.

.if 0		; temporary test area, used for time-comparing two decoders (68000 vs GPU)
rld_test::

	movem.l	d0-d1/a0-a2,-(sp)
	border	red,1
	move.l	#3000,d1
lp1:	move.l	#Exdisp,a0
	move.l	#Gpu_data,a1
	movea.l	rlepoint.w,a2
	lea	16*4(a2),a2
	move.l	(a2)+,(a1)+
	move.l	a0,(a1)+
	RunGPU	15
	waitgpu
	clr.l	blitsem
	dbra	d1,lp1
	border	-1,1
	move.l	#3000,d1
lp2:	move.l	#Exdisp,a0
	move.l	#16,d0
	bsr	rldin
	dbra	d1,lp2
	border	0,1
	movem.l	(sp)+,d0-d1/a0-a2
	rts
.endif

; timing results : 3000 iterations of same graphic decode 
; 68000 : 3m48s		GPU : 0m33s
;    or : 228s              : 33s
; which is almost 7 times faster

; old code taken out when GPU version inserted
.if 0
	movea.l	(a2)+,a1
	movea.l	(a2)+,a2

	moveq.l	#0,d0
	moveq.l	#0,d1

.din:	cmpa.l	a1,a2
	ble.b	.ended		; drop out when over run - this must imply a failed decode, but at least stops massive destruction
	move.w	(a1)+,d0
	btst	#15,d0
	bne.b	.singles

	move.w	(a1)+,d1

.wloop:	move.w	d1,(a0)+
;	addq.l	#2,a0		; alternative for debugging
	dbra	d0,.wloop

	bra.b	.din

.singles:
	sub.l	#$8000,d0
	beq.b	.ended
	subq.l	#1,d0

.wl2:	move.w	(a1)+,(a0)+
;	addq.l	#2,a0		; alternative for debugging
;	addq.l	#2,a1
	dbra	d0,.wl2

	bra.b	.din
.endif

