; multi-purpose run length encoding
; 1. convert old style 16 bit rle into new 8 bit paletised rle [variable size palette]
; 2. generate straight 8 bit paletised rle 
; 3. decode paletised rle - note that palette is SEPARATE from rle data, to allow multiple palettes

; encoding as follows :
; 0 = use word for repeats
; 80 = end of list
; 1-7f = n+1 copies
; 81-ff = n-80 singles - long groups of singles must be split (ugh!)
; 

rle2rlp::

	movea.l	#$100000,a0	; input area
	movea.l	#$180000,a1	; output area
	movea.l	#$1f0000,a2	; palette area
	moveq.l	#0,d0

in_loop:
	move.w	(a0)+,d0	; get old rle code
	bmi.b	singles

mult:	cmpi.w	#$80,d0
	bmi.b	small

big:	move.b	#0,(a1)+
	move.w	d0,d1
	lsr.w	#8,d1
	move.b	d1,(a1)+

small:	move.b	d0,(a1)+

	move.w	(a0)+,d0	; read colour
	bsr	add_to_palette	; adds, if required, and returns entry number in d0.b
	move.b	d0,(a1)+

	bra	in_loop

singles:
	bclr	#15,d0		; zap the singles marker bit
	tst.w	d0
	beq.b	end_marker

next_part:
	cmpi.w	#$80,d0
	bmi.b	small2

	move.w	d0,d2		; use d2 to hold full counter
	move.w	#$7e,d0		; set counter for part of whole
	sub.w	#$7f,d2		; remove a chunk of same size
	move.b	#$ff,(a1)+	; write out '7f single items follow'
	bsr.b	colour_write
	move.w	d2,d0
	bra	next_part

small2:	bset	#7,d0		; set new type of marker but
	move.b	d0,(a1)+
	bclr	#7,d0
	subq.b	#1,d0
	bsr.b	colour_write
	bra	in_loop


colour_write:
	move.w	d0,d1		; move counter out of way
write:	move.w	(a0)+,d0
	bsr	add_to_palette
	move.b	d0,(a1)+
	dbra	d1,write
	rts

end_marker:
	move.b	#$80,(a1)+
	message	"Conversion complete"	

add_to_palette:
	move.l	d1,-(sp)
	movea.l	#$1f0000,a3	; scanning address
	moveq.l	#0,d1

.b1:	cmpa.l	a2,a3
	beq.b	add_it
	
	cmp.w	(a3)+,d0
	beq.b	got_it
	addq.l	#1,d1
	bra	.b1

add_it:	move.w	d0,(a2)+	; add to palette
got_it:	move.w	d1,d0
	move.l	(sp)+,d1
	rts


	; input data : a0 = output, a1 = input, a2 = palette
	; output data : a0 = follow on output - worth considering making a1 follow on input ?

rlpin::	movem.l	d0-d1/a1-a2,-(sp)

	move.l	a0,d0
	movea.l	#Gpu_data,a0
	move.l	a1,(a0)+	; start with input data address
	move.l	a2,(a0)+	; then palette address
	move.l	d0,(a0)+	; and finally the output address

	RunGPU	15
	waitgpu
	move.l	blitsem.w,d0	; recover follow on address
	bpl.b	.ok
	illegal	
.ok:	movea.l	d0,a0
	clr.l	blitsem.w	; and clear
	movem.l	(sp)+,d0-d1/a1-a2
	rts


.if 0	; code removed when GPU decode implemented

	move.w	(a1)+,d0
	cmpi.w	#RLPmark,d0
	bne.b	.ended

.pin:	moveq.l	#0,d0
	move.b	(a1)+,d0
	beq.b	.long
	bclr	#7,d0
	bne.b	.singles
	bra.b	.use

.long:	move.b	(a1)+,d0
	asl.w	#8,d0
	move.b	(a1)+,d0

.use:	moveq.l	#0,d1
	move.b	(a1)+,d1	; colour entry
	asl.w	#1,d1
	move.w	(a2,d1.w),d1

.wloop:	move.w	d1,(a0)+
	dbra	d0,.wloop

	bra	.pin

.singles:
	tst.b	d0
	beq.b	.ended
	subq.l	#1,d0

.wl2:	moveq.l	#0,d1
	move.b	(a1)+,d1
	asl.w	#1,d1
	move.w	(a2,d1.w),(a0)+
	dbra	d0,.wl2

	bra	.pin

.ended:
.endif

