	.include	'jaguar.s'
	.include	'blit.inc'
	.extern	VideoIni

	testobject equ $14000	;this is just a llama
	sines equ $15000	;a sine table
	p_sines equ $15100
;	ud1 equ GPU_RAM+$600
;	ud2 equ GPU_RAM+$900
	ud1 equ $78000
	ud2 equ $7c000
;	gpu_sem equ GPU_RAM+$ffc	;semaphore for GPU running
	gpu_sem EQU $14800
	screen1 equ $18000	;a 16bit screen 320x200
	screen2 equ $38000	;same again
	screen3 equ $58000	;going to put the girl here
	rmw_obj equ $78000
	xmax equ 320-160		;limits for the
	ymax equ 200-50		;bouncing thangs
	in_buf equ GPU_RAM+$f60


.text

	move.w	MEMCON1,d0	;start up DRAM
	or.w	#$60,d0
	move.w	d0,MEMCON1

	move.l #stack,a7
	move.l #-1,ud1	;undraw values invalid for 1st pass of gpu
	move.l #-1,ud2	;undraw values invalid for 1st pass of gpu


	jsr VideoIni
	move #0,BG
	bsr GetGpuCode		;copy the GPU code to GPU RAM.
	move.b #1,gpu_data	;parameter to pass to GPU
	clr screen_ready
	move.l #$800000,rot_scale
	move.l #-1,gpu_sem	;GPU idle semaphore
				;set to 1 so the GPU can't run
				;while the blitter draws the test
				;pattern with the 68K

	lea sines,a0
	lea p_sines,a1		;make a positive-only sine table
	move #255,d0		;for use by the gpu
mpstab:	move.b (a0)+,d1
	ext d1
	add #$80,d1
	move.b d1,(a1)+
	dbra d0,mpstab

;	move.l #$807f807f,d0
;	lea rmw_obj,a0
;	move #319,d7
;makermw: move.l d0,(a0)+
;	dbra d7,makermw

	lea testdata,a0		;copy a test object to phrase aligned ram
	move.l #testobject,d0
	and.l #$fffffff8,d0	;to phrase align
	move.l d0,a1
	move #31,d0
copyobj: move.l (a0)+,(a1)+	;copy object to phrase aligned buffer
	dbra d0,copyobj
	bsr InitLists
	move.l dlist,d0		;put a list on the OLP
	move.w #0,ODP
	swap d0
	move.l d0,OLP		;activate the list

	bsr InitBeasties	;some stuff to move about

 
	move.l	#Frame,104	;Initialise level 2 interrupt
	move.w	#21,VI
	move.w	#1,INT1
	move.w	sr,d0
	and.w	#$f8ff,d0
	move.w	d0,sr



	lea screen1,a0
	clr d0			;Clear screen #1
	clr d1	
	move #320,d2
	move #200,d3
	move #0,d4
	bsr BlitBlock

	lea screen2,a0
	clr d0			;Clear screen #2
	clr d1	
	move #320,d2
	move #200,d3
	move #0,d4
	bsr BlitBlock

	lea screen3,a0
	clr d0			;Clear screen #3
	clr d1	
	move #320,d2
	move #200,d3
	move #0,d4
	bsr BlitBlock

	bsr TestPattern		;Blitter test pattern to screen1
;	bsr GetGirl		;get CRY piccie of a girl
	move.w	#$C1,VMODE		; Turn on the display
;	bsr LeonardsMandy
	move.l #0,gpu_sem	;allow the GPU to start up

;
; This loop runs the GPU/Blitter code.  I found that if you
; started up the GPU/Blitter pair from inside the FRAME
; Interrupt, the system would fall over if they got really heavily
; loaded.  MAINLOOP just waits for a sync from the FRAME routine,
; launches the GPU, then loops waiting for another sync.


mainloop: add #$2020,counter
	move counter,BORD1
 	tst sync
	bne mainloop
	move #1,sync
	bsr gpu_run
	bra mainloop	

;	bra wibble
	illegal				;drop out to de old bugger

Wibble:
;
; do wibbly screen thang

	move #0,d0
	move #0,d1
	move #320,d2
	move #200,d3
	move #0,d4
	move #0,d5
	lea screen1,a0
	lea screen2,a1
funkloop:	bsr Funky
	add.l #10,yinc_a
	and.l #$1ff,yinc_a
	add.l #9,xinc_a
	and.l #$1ff,xinc_a
	add.l #8,xinc_a2
	and.l #$1ff,xinc_a2
	bra funkloop

TestPattern:	lea screen1,a0
	clr d0			;Test blitter block draw
	clr d1	
	move #320,d2
	move #200,d3
	move #$ffff,d4
bb_loop: bsr BlitBlock
	sub #$0101,d4
	addq #1,d0
	addq #1,d1
	subq #2,d2
	subq #2,d3
	bne bb_loop

	move #160,d0
	move #100,d1		;do blitter draw line
	move #0,d2
	move #0,d3
	move #$ff80,d4
tbl:	bsr BlitLine
	add.b #2,d4
	add #5,d2
	cmp #320,d2
	blt tbl

	move #320,d2
	move #0,d3
	move #$3f80,d4
tb2: 	bsr BlitLine
	add.b #2,d4
	add #5,d3
	cmp #200,d3
	blt tb2

	move #320,d2
	move #200,d3
	move #$7f80,d4
tb3: 	bsr BlitLine
	add.b #2,d4
	sub #5,d2
	bpl tb3

	move #0,d2
	move #200,d3
	move #$cf80,d4
tb4: 	bsr BlitLine
	add.b #2,d4
	sub #5,d3
	bpl tb4
	rts

Frame: movem.l d0-d7/a0-a6,-(a7)	;simple thang to make
	move #0,BORD1		;for a timing bar

	tst screen_ready	;is GPU ready with a new screen
	beq no_new_screen	;no
	tst sync
	beq no_new_screen
	move.l cscreen,d1
	move.l dscreen,cscreen
 	move.l d1,dscreen	;swap screens
	clr screen_ready
	clr sync

no_new_screen:	move.l dlist,d0
	move.l d0,a0		;address of current DL
	move #1,d7		;loop for # of displayed double buffered screens
setdb:	lea 8(a0),a0		;skip first object which is a cond.
	move.l cscreen,d6	;get address of current displayed screen
	and.l #$fffffff8,d6	;lose three LSB's
	move #9,d5		;shift counter
	lsl.l d5,d6		;move to correct bit position
	move.l (a0),d1		;get first word of the BMO
	and.l #$fff,d1		;clear data pointer
	or.l d6,d1		;mask in new pointer
	move.l d1,(a0)		;replace in OL
	lea 24(a0),a0		;skip to nxt object
	dbra d7,setdb		;loop for all DB backgrounds


	move.w #0,ODP
	swap d0
	move.l d0,OLP		;activate the display list

 	bsr RunBeasties		;build the next one

	move.l dlist,d0
 	move.l blist,dlist
	move.l d0,blist		;swap dlists
	bsr run_rotate_vars	;wibble some things around 
;	bsr run_tf_vars
	bsr run_pat_vars
 	move #$FFFF,BORD1	;end of timing baa
 	movem.l (a7)+,d0-d7/a0-a6
	move.w	#$101,INT1	;do interrupt stuff
	move.w	#0,INT2
	rte

gpu_run: tst.l gpu_sem		;has GPU finished?
	bmi still_running	;no	
	move.l dscreen,d0
	swap d0
	move.l d0,GPU_RAM+$fec
;	bsr gradstuff
	bsr f_rotatestuff
;	bra tf_stuff
	bsr pat_stuff
 	move.l #-1,gpu_sem	;flag GPU running
	move.l #$403000,d0	;start address of GPU program
	swap d0			;for GPU endian-ness
	move.l d0,GPU_PC	;set GPU's PC
	move.l #$11,d0		;GPU_GO is 1
	swap d0
	move.l d0,GPU_CTRL	;fire up GPU	
still_running: tst.l gpu_sem
	bmi still_running	;wait for GPU stop
	bsr WaitBlit		;wait for Blitter stop
	move #1,screen_ready	;tell int routine the screen is ready
				;for display
 	rts 

run_pat_vars: add.l #$200,pointer
 add.l #$3,step1
 add.l #$2,step2
 add #$80,step3
 add #$5,step4
 add #$90,step5
 add #$4,step6
 rts

pat_stuff: move.l pointer,d0
	and.l #$ffff,d0
 	swap d0
	move.l d0,in_buf
	move.l step1,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+4
	move.l step2,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+8	
	move step3,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+16	
	move step4,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+20	
	move step5,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+24	
	move step6,d0
	and.l #$ffff,d0
	swap d0
	move.l d0,in_buf+28	


;	move.l #$403600,d1
	move.l #ud1,d1
;	eor #1,ud
;	bne pstuff1
;	move.l #$403900,d1
;	move.l #ud2,d1
pstuff1: swap d1
	move.l d1,in_buf+12
	rts


run_tf_vars: move.l tx_pos,d0
	move.l tx_vel,d1
	add.l d1,d0
	bpl tx_ok1
tx_bounce: neg.l d1
	add.l d1,d0
	bra tx_ok2
tx_ok1: cmp.l #$1200000,d0
	bge tx_bounce
tx_ok2: move.l d0,tx_pos
	move.l d1,tx_vel
 	move.l ty_pos,d0
	move.l ty_vel,d1
	add.l d1,d0
	bpl ty_ok1
ty_bounce: neg.l d1
	add.l d1,d0
	bra ty_ok2
ty_ok1: cmp.l #$a80000,d0
	bge ty_bounce
ty_ok2: move.l d0,ty_pos
	move.l d1,ty_vel
	rts
 	move tz_pos,d0
	move tz_vel,d1
	add d1,d0
	bpl tz_ok1
tz_bounce: neg d1
	add d1,d0
	bra tz_ok2
tz_ok1: cmp #$5f0,d0
	bge tz_bounce
tz_ok2: move d0,tz_pos
	move d1,tz_vel
	add #1,pointer
	move pointer,d0
	and #$ff,d0
	lea CLUT,a0
	lea sines,a1
	move #0,d1
	move #8,d3
spall: move d1,d2
	lsl d3,d2
 	move.b 0(a1,d0.w),d4
	ext d4
	add #$80,d4
	asr #1,d4
	add #$80,d4	
	or d4,d2
 	move d2,(a0)+
	add.b #1,d0
	add #1,d1
	cmp #256,d1
	blt spall
	rts
	

tf_stuff: clr d0
	move.l dscreen,a0
	bsr cls8		;remember changed to 8bit!
	move tz_pos,tzp
	move.l ty_pos,d1
	lsl.l #4,d1
	swap d1
	move d1,d0
	and #$0f,d1
 	asr #4,d0
	move.l tx_pos,d3
	lsl.l #4,d3
	swap d3
	move d3,d2
	asr #4,d2
;	asl #1,d2
	mulu #320,d0
	lea screen1,a0
	add.l d0,a0
	lea 0(a0,d2.w),a0
	move #$200,d6
	or d1,d6		;put in Z frac
	and #$0f,d3
	move d3,d1
	move #32,d3
gpix: 	move #26,d2		;X loop
	move.l a0,a1
	move #$d0,d7
	or d1,d7		;X frac
gpix2: 	move.b (a1)+,d4	;get pixel colour

	move d7,d0
	swap d0
	clr d0
	move.l d0,GPU_RAM+$fe8
	move d6,d0
	swap d0
	clr d0
	move.l d0,GPU_RAM+$fe4
	move d4,d0
;	asr #8,d0
	and #$ff,d0
	swap d0
	clr d0
	move.l d0,GPU_RAM+$fdc
	move d4,d0		;get colour
;	asr #8,d0		;extract colour vector
	and #$ff,d0
	neg d0
	move tzp,d4
	asr #2,d4
	add #$40,d4
	add d4,d0
	ext.l d0		;make signed long
	swap d0
	move.l d0,GPU_RAM+$fe0
 	move.l #-1,gpu_sem	;flag GPU running
	move.l #$403000,d0	;start address of GPU program
	swap d0			;for GPU endian-ness
	move.l d0,GPU_PC	;set GPU's PC
	move.l #$11,d0		;GPU_GO is 1
	swap d0
	move.l d0,GPU_CTRL	;fire up GPU	
s_running: tst.l gpu_sem
	bmi s_running	;wait for GPU stop
;	move d7,-(a7)
;	bsr WaitBlit			
;	move (a7)+,d7
	sub #16,d7
	sub #1,d2
	bne gpix2
	lea 320(a0),a0
	sub #16,d6
	sub #1,d3
	bne gpix	
	move #1,screen_ready
	rts 

rotatestuff: move.b gpu_data,d0
	and #$ff,d0			;these bits only for
	lea sines,a1			;rotate and tilt
	move.b 0(a1,d0.w),d0		;
	ext d0				;
	add #$81,d0			;
	asl #1,d0			;
	and.l #$fff,d0			;
	swap d0				
	move.l d0,GPU_RAM+$ff4
	
	move #160,d3		;centre point of rotation
	move #100,d4
	move rot_scale,d5
	add #$20,d5
	move pointer,d7
	asl #1,d7
	and #$ff,d7
	move.b 0(a1,d7.w),d0
	ext d0
	ext.l d0
	muls d5,d0
	asr.l #7,d0
	add d3,d0
	add.b #$40,d7
	move.b 0(a1,d7.w),d1
	ext d1
	ext.l d1
	muls d5,d1
	asr.l #7,d1
	add d4,d1
	move.l #GPU_RAM+$ff8,a0
	bsr WriteGPU

	sub.b #$80,d7

	move.b 0(a1,d7.w),d0
	ext d0
	ext.l d0
	muls d5,d0
	asr.l #7,d0
	add d3,d0
	add.b #$40,d7
	move.b 0(a1,d7.w),d1
	ext d1
	ext.l d1
	muls d5,d1
	asr.l #7,d1
	add d4,d1		;second point
	move.l #GPU_RAM+$ff0,a0
	bsr WriteGPU
	add.b #$40,d7
	move.b 0(a1,d7.w),d0
	ext d0
	ext.l d0
	muls d5,d0
	asr.l #7,d0
	add d3,d0
	add.b #$40,d7
	move.b 0(a1,d7.w),d1
	ext d1
	ext.l d1
	muls d5,d1
	asr.l #7,d1
	add d4,d1		;third point	
	move.l #GPU_RAM+$fe8,a0
WriteGPU: move d0,d2
	swap d2
	move d1,d2
	move.l d2,(a0)
	rts


f_rotatestuff: move.b gpu_data,d0
	and #$ff,d0			;these bits only for
	lea sines,a1			;rotate and tilt
	move.b 0(a1,d0.w),d0		;
	ext d0				;
	add #$81,d0			;
	asr #3,d0			;
	add #3,d0
;	move #100,d0
	move #80,d0
	and.l #$ffff,d0			;	
	swap d0				
	move.l d0,GPU_RAM+$ff4

	bsr run_tf_vars

;	move #160,d3			;centre point x and y
;	move #100,d4
;	swap d3
;	clr d3
;	swap d4
;	clr d4				;to 16:16
;	move rot_scale,d5
;	asl #4,d5
;	move pointer2,d7
;	move #0,d7
;	and #$ff,d7
;	move.b 0(a1,d7.w),d0
;	ext d0
;	ext.l d0
;	muls d5,d0
;	asl.l #5,d0
;	add.l d3,d0
	move.l #0,d0
	add.l tx_pos,d0
	swap d0
	move.l d0,GPU_RAM+$ff8
;	add.b #$40,d7
;	move.b 0(a1,d7.w),d1
;	ext d1
;	ext.l d1
;	muls d5,d1
;	asl.l #5,d1
;	add.l d4,d1
	move.l #0,d1
	add.l ty_pos,d1
	swap d1
	move.l d1,GPU_RAM+$ff0

;	sub.b #$80,d7
;	move.b 0(a1,d7.w),d0
;	ext d0
;	ext.l d0
;	muls d5,d0
;	asl.l #5,d0
;	add.l d3,d0
	move.l #32,d0
	swap d0
	add.l tx_pos,d0
	swap d0
	move.l d0,GPU_RAM+$fe8
;	add.b #$40,d7
;	move.b 0(a1,d7.w),d1
;	ext d1
;	ext.l d1
;	muls d5,d1
;	asl.l #5,d1
;	add.l d4,d1		;second point
;	swap d1
	move.l #0,d1
	add.l ty_pos,d1
	swap d1
	move.l d1,GPU_RAM+$fe4

;	add.b #$40,d7
;	move.b 0(a1,d7.w),d0
;	ext d0
;	ext.l d0
;	muls d5,d0
;	asl.l #5,d0
;	add.l d3,d0
	move.l #0,d0
	add.l tx_pos,d0
	swap d0
	move.l d0,GPU_RAM+$fe0
;	add.b #$40,d7
;	move.b 0(a1,d7.w),d1
;	ext d1
;	ext.l d1
;	muls d5,d1
;	asl.l #5,d1
;	add.l d4,d1		;third point	
	move.l #20,d1
	swap d1
	add.l ty_pos,d1
	swap d1
	move.l d1,GPU_RAM+$fdc
	rts


gradstuff:
;
; Transfers line gradients for the horizontal line scale
; GPU thang

	move grad,d0
	sub #$1ff,d0
	bpl grad1
	neg d0
	bpl grad1
	clr d0
grad1:	sub #$ff,d0
	ext.l d0
	asl.l #8,d0
	asl.l #2,d0 
	swap d0
	move.l d0,GPU_RAM+$ff0	;initial gradient of map edge
	move grad2,d0
	sub #$1ff,d0
	bpl grad3
	neg d0
	bpl grad3
	clr d0
grad3:	sub #$ff,d0
	ext.l d0
	asl.l #4,d0
	swap d0
	move.l d0,GPU_RAM+$ff8	;change of gradient
	rts

run_rotate_vars:
	add #1,pointer2
 	move.l rot_scale,d0
	move.l rot_scale_add,d1
	add.l #$100,d1
	sub.l d1,d0
	bpl rgpuv1
	neg.l d1
	sub.l d1,d0	
rgpuv1:	move.l d0,rot_scale
	move.l d1,rot_scale_add
	add.b #1,gpu_data
	rts


run_grad_vars: add #3,grad		;change vars in gpu-space
	and #$3ff,grad
	add #2,grad2		;change vars in gpu-space
	and #$3ff,grad2
	move.b gpu_data,d0
	and #$ff,d0
	add #1,d0
	cmp #200,d0
	blt glok
	clr d0
glok:	move.b d0,gpu_data	
	rts



InitBeasties: lea beasties,a0
 move nbeasties,d7
 move.l #0,d0
 move.l #$280000,d1
 move.l #$10000,d2
ibeasts: move.l d0,(a0)+	;xpos
 move.l d1,(a0)+		;ypos
 move.l d2,(a0)+		;deltax
 clr.l (a0)+			;delta y
 lea 16(a0),a0			;room for other shit
 add.l #$20000,d0
 add.l #$20000,d1
 add.l #$1000,d2
 dbra d7,ibeasts
 rts

RunBeasties: move.l blist,a0		;will use MakeScaledObject to build a list

 clr d0				;a 16-bit background area
 move #66,d1
 move.l cscreen,a1
 clr d2				;no offset for 16 bit
 move.l #$20200050,d3		;scale 1, 40 phrases/line
 move #200,d4			;100 lines
 move #4,d5			;depth is 16bit
 bsr MakeScaledObject			;make background object

 move #160,d0				;a 16-bit background area
 move #66+200,d1
 move.l cscreen,a1
 clr d2				;no offset for 16 bit
 move.l #$20200050,d3		;scale 1, 40 phrases/line
 move #200,d4			;100 lines
 move #4,d5			;depth is 16bit
 bsr MakeScaledObject

 clr d0				;a 16-bit background area
 move #66+200,d1
 move.l #screen1,a1
 clr d2				;no offset for 16 bit
 move.l #$10100050,d3		;scale 1, 40 phrases/line
 move #200,d4			;100 lines
 move #4,d5			;depth is 16bit
 bsr MakeScaledObject

 move #160,d0				;a 16-bit background area
 move #66,d1
 move.l #screen1,a1
 clr d2				;no offset for 16 bit
 move.l #$10100050,d3		;scale 1, 40 phrases/line
 move #200,d4			;100 lines
 move #4,d5			;depth is 16bit
 bsr MakeScaledObject
 bra StopList


 lea beasties,a2
 move nbeasties,d7

RBeasts: move.l 8(a2),d0	;bounce some thangs around
 add.l d0,(a2)
 bmi XBounce
 cmp #xmax,(a2)
 blt YMove
XBounce: neg.l d0
 add.l d0,(a2)
 move.l d0,8(a2)
YMove: move.l 12(a2),d0
 add.l d0,4(a2)
 cmp #ymax,4(a2)
 blt DoGravity
 neg.l d0
 add.l d0,4(a2)
DoGravity: add.l #$0800,d0
 move.l d0,12(a2)

 move (a2),d0			;display them with MakeScaledObject
 move 4(a2),d1
 asl #1,d1			;halflines

 move.l cscreen,a1
 clr d2				;no offset for 16 bit
 move.l #$20200028,d3		;scale 1, 40 phrases/line
 move #100,d4			;100 lines
 move #4,d5			;depth is 16bit
 move d7,-(a7)
 bsr MakeScaledObject
 move (a7)+,d7

; move d7,d2			;index into palette
; lea testobject,a1
; move d1,d3			;copy Y position
; asr #4,d3			;v. roughly 0-16
; or #$2000,d3			;y scale of one for now
; swap d3
; move #1,d3			;size in phrases
 
; move #16,d4
; clr d5
; move d7,-(a7)
; bsr MakeScaledObject
; move (a7)+,d7
  
 lea 32(a2),a2		;do 'em all 
 dbra d7,RBeasts
 
 bra StopList		;put a stopobject on the end  


*------------ routines

InitLists:
;
; Align object list buffers and initialise them with some
; data, set one of them to be displayed

	move.l #list1,d0
	and.l #$ffffffe0,d0	;make sure it's quadphrase aligned
	sub.l #8,d0		;so that cond objs leave list quad aligned
	move.l d0,dlist
	move.l d0,a0
	bsr MakeTestList 

	move.l #list2,d0	;same for the other list
	and.l #$ffffffe0,d0	;make sure it's quadphrase aligned
	sub.l #8,d0		;so that cond objs leave list quad aligned
	move.l d0,blist
	move.l d0,a0
	bra MakeTestList 

MakeTestList:
;
; enter with a0.l set to the start of the list you wanna build

	move #50,d0
	move #100,d1		;X and Y
	clr d2
	lea testobject,a1
	and.l #$ffffffe0,d2	;address of the object
				;NB SCALED BM OBJECTS MUST START
				;ON A QUAD PHRASE BOUNDARY!!!!!
	move.l #$20080001,d3		;one phrase wide, scale=1
	move #16,d4		;16 scanlines high
	move #0,d5			;one bit per pixel
	bsr MakeScaledObject		;make a Thang
 	add #80,d0
	bsr MakeScaledObject
	add #80,d0
	bsr MakeScaledObject
StopList:	move.l #0,(a0)+
	move.l #4,(a0)+		;make a stopobject
	rts

MakeScaledObject:
;
; Make an OL entry for an object. a0 --> current pos in OL being built
; (assumes a0 is already phrase aligned)
;	d0-d1 = x,y
;	d2 = index into CLUT
;	d3 = X size, in phrases
;	d4 = Y size, in scan lines
;	d5 = Object depth
;	a1.l = address of data
;
; uses all d-regs, a0 must have OL position, a6 used internally
;
; Adding new stuff: d3 high word now=Yscale:Xscale


* First, make a conditional object because of the bug in OLP

	move.l a0,d6
	add.l #32,d6		;length of scaled BM object + this object
	and.l #$fffffff8,d6	;lose 3 lsb's (see below)
	move.l d6,a6		;save it for later on...
	lsl.l #5,d6		;put the bits in the right place
				;(It is 5 and not 8, because it's a
				;phrase address, so the three low bits are
				;not used)
	clr (a0)+		;first word is unused
	move.l d6,(a0)+		;write branch address

* Now, calculate where the branch is gonna be...

	move.l d3,d7		;get Y stuff
	swap d7			;get Yscale and Xscale
	cmp #$1010,d7		;hack so i can use half scale obs
	bne scalup
	move d4,d7
	bra dscale

scalup:	lsr #8,d7		;get Y scale
	lsr #5,d7		;ignore fractional bits (for now)
	mulu d4,d7		;multiply y size
	asl #1,d7		;x2 coz it's in half lines

dscale:	move d1,d6		;Y start
	add d7,d6
	lsl #3,d6		;shift to correct bit position
	or #%1000000000000011,d6	;mask in CC and type
	move d6,(a0)+		;and this completes the cond obj.


* Now the funky bit. Build a scaled bitmapped object.
* It is scaled, because of yet another bug in OLP.
* We are now on a quadphrase boundary too.

	move.l a1,d6		;get copy of data pointer
	and.l #$fffffff8,d6	;lose three LSB's
	move #9,d7		;shift counter
	lsl.l d7,d6		;move to correct bit position
	move.l d6,(a0)+		;put it in the list

* The link address is the same as for the preceding conditional
* object, and remember i saved it in a1, so...

	move.l a6,d6		;get back link-pointer
	lsl.l #5,d6		;make it aligned OK (bott. 3 bits are 0)
	swap d6			;get top word of the address..
	or d6,-2(a0)		;and or it into place on the previous word.

* That's the first longword done, and the remaining byte of the
* link address is in d6 high already.

	move.l #0,d7		;make d7 empty
	move d4,d7		;get a copy of the height...
	ror.l #2,d7		;put 2 lowest bits in hi word...
	swap d6			;get back rest of link
	or d7,d6		;OR in top 8-bits of height..
	move d6,(a0)+		;top word now completed..
	swap d7			;get 2 lowest bits of height at top of d7..
	move d1,d6		;copy of Y pos..
	lsl #3,d6		;align...
	or d7,d6		;mask in those 2 bits of height..
	bset #0,d6		;set the type=1
	move d6,(a0)+		;..which completes the first phrase.

* Now on to the next phrase.

	clr (a0)+		;assume Firstpix=0, RELEASE not asserted
	clr d2
	move d2,d7		;get INDEX
	lsl #6,d7		;align it
	move.l #0,d6		;empty d6
	move d3,d6		;get X size in phrases
	ror.l #4,d6		;shift low 4 bits into hi word
	bset #15,d6		;set transparency
	or d7,d6		;mask in Index
	move d6,(a0)+		;ignores Reflect, RMW

* First long of phrase 2 is done...

	move d3,d7		;Assume dwidth=iwidth for simplicity
	lsl #2,d7		;align
	swap d6			;get top of d6, which has rest of iwidth
	or d7,d6		;mask in dwidth	
	move d6,(a0)+		;put it in, assume pitch is 1
	move d5,d6		;get copy of depth
	ror #4,d6		;put it in top byte
	or d0,d6		;Hallelujah! X doesn't need shifting!
	bset #15,d6		;(PITCH=1)
	move d6,(a0)+		;Here endeth the Second Phrase.

* Now the third and final phrase.

	move.l #0,(a0)+		;Not used.....
	move.l d3,d6
	clr.l d7
 	swap d6			;get x- and y-scale
	move d6,d7		;copy scales
	lsr #8,d7
	swap d7
 	move d6,d7		;recombine remainder with d7
	move.l d7,(a0)+	


* Outta here.

	rts

cls8:
;
; clear an 8bpp screen (a0) to colour (d0)

	move.l #PITCH1|PIXEL8|WIDTH320|XADDPHR,d7
	swap d7			;for right Endian-ness
	move.l d7,BLIT_A1FLAGS
	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE	;base of dest screen
	clr.l d7
	move.l d7,BLIT_A1PIXEL
	move.l d7,BLIT_A1PIXELF
	move #-320,d7
	swap d7
	move #1,d7		;X and Y Step
	move.l d7,BLIT_A1STEP
	move.l #0,BLIT_A1STEPF	;no fraction of step
	move #$140,d7
	swap d7
	move #200,d7		;Inner and outer loop count
	move.l d7,BLIT_COUNT
	move d0,d7		;get colour
	swap d7
	move d0,d7		;duplicate
	move.l d7,BLIT_PATD
	move.l d7,BLIT_PATD+4	;fill up phrase wide pattern register
	move.l #PATDSEL|UPDA1,d7
	swap d7
	move.l d7,BLIT_CMD	;do the thang
	bra WaitBlit

BlitBlock:
;
; use the Blitter to draw a block, origin d0/d1, size d2/d3, colour d4,
; on the 320-pixel wide bitmap addressed at a0.

	move.l #PITCH1|PIXEL16|WIDTH320|XADDINC,d7
	swap d7			;for right Endian-ness
	move.l d7,BLIT_A1FLAGS
	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE	;base of dest screen
	move d0,d7
	swap d7
	move d1,d7		;X and Y destination start
	move.l d7,BLIT_A1PIXEL
	move.l #0,BLIT_A1PIXELF
	move #1,d7
	swap d7
	clr d7			;X and Y Increment values
	move.l d7,BLIT_A1INC
	move.l #0,BLIT_A1INCF	;No fractional parts of increment
	move d2,d7
	neg d7
	swap d7
	move #1,d7		;X and Y Step
	move.l d7,BLIT_A1STEP
	move.l #0,BLIT_A1STEPF	;no fraction of step
	move d2,d7
	swap d7
	move d3,d7		;Inner and outer loop count
	move.l d7,BLIT_COUNT
	move d4,d7		;get colour
	swap d7
	move d4,d7		;duplicate
	move.l d7,BLIT_PATD
	move.l d7,BLIT_PATD+4	;fill up phrase wide pattern register
	move.l #PATDSEL|UPDA1,d7
	swap d7
	move.l d7,BLIT_CMD	;do the thang

WaitBlit: move.l BLIT_CMD,d7	;get Blitter status regs
	swap d7
	btst #11,d7
	beq WaitBlit		;wait until outer loop is idle
rrts:	rts



BlitLine:
;
; draw a line with the blitter from d0/d1 to d2/d3, colour d4
; on a 320-pixel-wide 16-bit screen at a0.

* first set up the usual gubbins

	move.l #PITCH1|PIXEL16|WIDTH320|XADDINC,d7
	swap d7			;for right Endian-ness
	move.l d7,BLIT_A1FLAGS
	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE	;base of dest screen
	move d0,d7
	swap d7
	move d1,d7
	move.l d7,BLIT_A1PIXEL
	move.l #0,BLIT_A1PIXELF	;set startpoint of line

	move d2,d5
	sub d0,d5	;delta x
	beq do_vline	;do trivial case if delta x is 0

	swap d5
	move d3,d5
	sub d1,d5	;delta y
	beq do_hline	;do hline if delta y is 0

	move d5,d7
	bpl blin1
	neg d7		;d7 holds abs value of delta y
blin1:	swap d5
	move d5,d6
	bpl blin2
	neg d6		;d6 holds abs value of delta x
blin2:  cmp d7,d6	;see which is larger
	beq do_diagline
	bgt x_larger	;go do x greater version

y_larger: swap d7	;y greater, so is the inner loop count
	move #1,d7	;outer loop count is one
	move.l d7,BLIT_COUNT
	swap d7		;restore delta Y!

	move d5,d6	;get signed dx...
	swap d6
	clr d6		;effectively <<16
	asr.l #1,d6
	divs d7,d6	;divide by absolute integer Y-difference 	
	ext.l d6	;get sign in top of d6
	swap d6		;contains 0 or 1, sign of integer part

	move d6,d7	;signed integer X increment
	swap d7
	move #1,d7	;Y-integer is one...	
	swap d5		;get back signed dy
	tst d5
	bpl y_larg1
	neg d7		;make it the right sign
y_larg1: move.l d7,BLIT_A1INC	;set integer parts of the increment
	and.l #$ffff0000,d6	
	asl.l #1,d6
	move.l d6,BLIT_A1INCF	;set frac parts of increment	
	bra setrest

x_larger: swap d6	;x greater, make it the inner loop count
	move #1,d6
	move.l d6,BLIT_COUNT 
	swap d6		;restore delta X!

	swap d5
	move d5,d7	;get signed dy...
	swap d7
	clr d7		;effectively <<16
	asr.l #1,d7
	divs d6,d7	;divide by absolute integer X-difference 	
	ext.l d7
	swap d7		;contains 0 or 1, sign of integer part

	move #1,d6	;X-integer is one...	
	swap d5		;get back signed dx
	tst d5
	bpl x_larg1
	neg d6		;make it the right sign
x_larg1: swap d6
	move d7,d6	;sign from d7
	move.l d6,BLIT_A1INC	;set integer parts of the increment
	swap d7		;get frac part
	asl #1,d7
	and.l #$ffff,d7	
	move.l d7,BLIT_A1INCF	

setrest: move d4,d5		;set colour
	swap d5
	move d4,d5		;dupe
	move.l d5,BLIT_PATD
	move.l d5,BLIT_PATD+4
	move.l #PATDSEL,d7
	swap d7
	move.l d7,BLIT_CMD 	;go
	bra WaitBlit		;don't return until blitter is ready

do_vline: movem d0-d3,-(a7)
 	cmp d3,d1
	beq zlength		;zero length
	blt noswap_v
	exg d3,d1		;swap regs
noswap_v: move d0,d7
	swap d7
	move d1,d7
	move.l d7,BLIT_A1PIXEL
	move.l #1,BLIT_A1INC		;only increment Y
	move.l #0,BLIT_A1INCF
	move d3,d7
	sub d1,d7
	bra linego

do_hline: movem d0-d3,-(a7)
	cmp d2,d0
	beq zlength		;zero length
	blt noswap_h
	exg d2,d0		;swap regs
noswap_h: move d0,d7
	swap d7
	move d1,d7
	move.l d7,BLIT_A1PIXEL
	move.l #$10000,BLIT_A1INC		;only increment X
	move.l #0,BLIT_A1INCF
	move d2,d7
	sub d0,d7
linego:	swap d7
	move #1,d7
	move.l d7,BLIT_COUNT
	bsr setrest
zlength: movem (a7)+,d0-d3
 	rts

do_diagline: move #1,d6		;handle trivial case where dx=dy
	move d2,d5
	sub d0,d5
	bpl d_dl1
	neg d6
d_dl1:	swap d6
	move #1,d6
	move d3,d5
	sub d1,d5
	bpl d_dl2
	neg d6
	neg d5
d_dl2: move.l d6,BLIT_A1INC
	move.l #0,BLIT_A1INCF
	swap d5
	move #1,d5
	move.l d5,BLIT_COUNT
	move d0,d5
	swap d5
	move d1,d5
	move.l d5,BLIT_A1PIXEL
	bra setrest
			
CopyBlock:
;
; Copy from screen at a0 to screen at a1
; d0/d1=origin of sourceblock
; d2/d3=width and height of block to copy
; copy from blitter a1 to a2.
; d4/d5=destination XY
;
; This simple routine will assume both screens are the same width
;
; Using this blitter is a piece of piss.

	move.l #PITCH1|PIXEL16|WIDTH320|XADDINC,d7
	swap d7	
	move.l d7,BLIT_A1FLAGS	;a1 (Source) Gubbins

	move.l #PITCH1|PIXEL16|WIDTH320|XADDPIX|YADD1,d7
	swap d7
	move.l d7,BLIT_A2FLAGS	;a2 (Dest) Gubbins

	move d2,d7
	swap d7
	move d3,d7
	move.l d7,BLIT_COUNT 	;set inner and outer loop counts

	move d0,d7
	swap d7
	move d1,d7
	move.l d7,BLIT_A1PIXEL	;origin of source

	move d4,d7
	swap d7
	move d5,d7
	move.l d7,BLIT_A2PIXEL	;origin of destination

	move.l #0,BLIT_A1PIXELF


 	move.l #$00010000,BLIT_A1INC
	move.l #$0,BLIT_A1INCF

	move d2,d7
	neg d7
	swap d7
	move #1,d7
	move.l d7,BLIT_A1STEP
	move.l d7,BLIT_A2STEP		;set loop steps

	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE
	move.l a1,d7
	swap d7
	move.l d7,BLIT_A2BASE		;set screen window bases

	move.l #SRCEN|UPDA1|UPDA2|DSTA2|LFUFUNC_A|LFUFUNC_AN,d7
	swap d7
	move.l d7,BLIT_CMD
	bra WaitBlit

GetGirl:
;
; this transfers the CRY picture of a girlie, which some
; eejit made in a non-blitter-friendly size, into the
; middle of screen1.

	move.l #PITCH1|PIXEL16|WIDTH256|XADDINC,d7
	swap d7	
	move.l d7,BLIT_A1FLAGS	;a1 (Source) Gubbins

	move.l #PITCH1|PIXEL16|WIDTH320|XADDPIX|YADD1,d7
	swap d7
	move.l d7,BLIT_A2FLAGS	;a2 (Dest) Gubbins

	move #0,d5
	lea screen2,a0

	move.l #screen1,d7
	swap d7
	move.l d7,BLIT_A2BASE		;set screen window bases

ggrl:	move #192,d7
	swap d7
	move #1,d7
	move.l d7,BLIT_COUNT 	;set inner and outer loop counts

	move #2,d7
	swap d7
	move #0,d7
	move.l d7,BLIT_A1PIXEL	;origin of source

	move #60,d7
	swap d7
	move d5,d7
	move.l d7,BLIT_A2PIXEL	;origin of destination

	move.l #0,BLIT_A1PIXELF


 	move.l #$00010000,BLIT_A1INC
	move.l #$0,BLIT_A1INCF

	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE

	move.l #SRCEN|DSTA2|LFUFUNC_A|LFUFUNC_AN,d7
	swap d7
	move.l d7,BLIT_CMD
	bsr WaitBlit
	lea 400(a0),a0
	add #1,d5
	cmp #200,d5
	blt ggrl
	rts

Funky:

;	try some odd stuff

	move.l #$00010000,d6	;initial X scale
	move #0,d4		;dest line
	move.l #0,d5		;srce line
	move.l #$00010000,yinc	
	clr.l xinc

	move.l #PITCH1|PIXEL16|WIDTH320|XADDINC,d7
	swap d7	
	move.l d7,BLIT_A1FLAGS	;a1 (Source) Gubbins

	move.l #PITCH1|PIXEL16|WIDTH320|XADDPIX|YADD1,d7
	swap d7
	move.l d7,BLIT_A2FLAGS	;a2 (Dest) Gubbins

	move.l a0,d7
	swap d7
	move.l d7,BLIT_A1BASE
	move.l a1,d7
	swap d7
	move.l d7,BLIT_A2BASE		;set screen window bases



bloop:
	move d2,d7
	swap d7
	move #1,d7
	move.l d7,BLIT_COUNT 	;set inner and outer loop counts

	move xinc,d7
;	move #0,d7
	swap d7
	swap d5			;get int part of source Y
	move d5,d7
	move.l d7,BLIT_A1PIXEL	;origin of source

;	move #0,d7
	move xinc+2,d7
	swap d7
	swap d5
	move d5,d7
	move.l d7,BLIT_A1PIXELF	;fractional part of y srce origin

	move #0,d7
	swap d7
	move d4,d7
	move.l d7,BLIT_A2PIXEL	;origin of destination

	swap d6		;int part of X scale
	move d6,d7
	swap d7
	clr d7
	move.l d7,BLIT_A1INC
	swap d6		;frac part of X scale
	move d6,d7
	swap d7
	clr d7
	move.l d7,BLIT_A1INCF


	move.l #SRCEN|DSTA2|LFUFUNC_A|LFUFUNC_AN,d7
	swap d7
	move.l d7,BLIT_CMD
	bsr WaitBlit
	move.l xinc_a,d7
	sub.l #$ff,d7
	bpl xiok
	beq xiok
	neg.l d7
xiok:	sub.l d7,d6
	move.l yinc,d7
	add.l d7,d5
	move.l yinc_a,d7
	sub.l #$ff,d7
	bpl yiok
	beq yiok
	neg.l d7
yiok:	sub.l d7,yinc
	move.l xinc_a2,d7
	sub.l #$ff,d7
	bpl xiok2
	beq xiok2
	neg.l d7
xiok2:  asl.l #6,d7
	add.l d7,xinc
	add #1,d4
	cmp #200,d4
	blt bloop
 	rts

GetGpuCode: moveq #8,d0
	swap d0
	move.l d0,GPU_CTRL	;make sure it is stopped
 	lea GPU_RAM,a0	;base of GPU local RAM
	lea gpucode,a1		;where the linker includes GPU code
	move.l (a1)+,d0		;throw away first long
	move.l (a1)+,d0		;length of prog in bytes
	asr.l #2,d0		;convert to longwords
getgpu: move.l (a1)+,(a0)+
	dbra d0,getgpu		;copy the code in
	rts


LeonardsMandy:
	INBUF		equ	GPU_RAM+$fc0

	WIDTH		equ	320
	HEIGHT		equ	200

; The X numbers are shifted right by 13 before use
; These numbers are in units of 1/8192

; The commented out numbers do this SciAm cover
	XINC		equ	1
	XSTART		equ	$fffffa86

;	XSTART		equ	((-2)<<13)
;	XINC		equ	((10<<11)/WIDTH)

;	XSTART		equ	$ffffec1a
;	XINC		equ	1

; The Y numbers are shifted right by 13 before use
; These numbers are in units of 1/8192

; The commented out numbers do this SciAm cover
	YINC		equ	1
	YSTART		equ	$ffffde9a	

;	YSTART		equ	((-19)<<9)
;	YINC		equ	((6<<12)/WIDTH)

;	YSTART		equ	$ffffea00
;	YINC		equ	1

Mandle:

	move.l #-1,GPU_RAM+$fd8	;set GPU prog to mandy inner loop
	move.l	#0,jx
	move.l	#0,jy

	movea.l	#screen1,a1

	move.l	#YSTART,ypos	; Initialize y position

	move.l	#HEIGHT-1,d2

	movea.l	#INBUF+8,a0

	move.l	jx,d0
	swap	d0
	move.l	d0,(a0)+

	move.l	jy,d0
	swap	d0
	move.l	d0,(a0)

outer:
	movea.l	#INBUF+4,a0

	move.l	ypos,d0
	swap	d0
	move.l	d0,(a0)

	move.l	#WIDTH-1,d1
	move.l	#XSTART,xpos	; Initialize x position

inner:
	movea.l	#INBUF,a0

	move.l	xpos,d0
	swap	d0
	move.l	d0,(a0)

	movea.l	#gpu_sem,a0
	move.l	#0,d0
	move.l	d0,(a0)

	move.l	#$30000040,$00d02110	; GPU Program counter gets $403000

	move.l	#$00110000,$00d02114	; Set the GPU going

wait:	move.l	(a0),d0
	beq	wait
	
;	swap	d0
	move.b	d0,(a1)+
;	move.b #$80,(a1)+

	add.l	#XINC,xpos

	dbra	d1,inner


	add.l	#YINC,ypos

	dbra	d2,outer
	move.l #0,GPU_RAM+$fd8

	rts


*----------- thangs

.even
.data

		dcb.l	32,0
list1:	dcb.l	1024,0

		dcb.l	32,0
list2:	dcb.l	1024,0

blist: dc.l list1
dlist: dc.l list2

nbeasties: dc.w 0
beasties: dcb.l 1024,0

 	dcb.l 64,0
stack: dc.l 0

testdata: dc.l $00ff0000,$00000000
	  dc.l $ffff0000,$00000000
	  dc.l $00ff0000,$00000000
	  dc.l $00ff0000,$00000000
	  dc.l $00ff0000,$00000000
	  dc.l $00ff0000,$00000000
	  dc.l $00ffffff,$ffffffff
	  dc.l $00fff0ff,$ffff00ff
	  dc.l $00ffffff,$ffff0000
	  dc.l $00ffffff,$ffff0000
	  dc.l $00ffff00,$00ff0000
	  dc.l $00ff0000,$00ff0000
	  dc.l $00ff0000,$00ff0000
	  dc.l $00ff0000,$00ff0000
	  dc.l $00ff0000,$00ff0000
	  dc.l $ffff0000,$ffff0000
counter: dc.w 0
pointer: dc.l 0
cscreen: dc.l screen2
dscreen: dc.l screen3
rot_scale: dc.l 0
rot_scale_add: dc.l 0

yinc: dc.l 0
yinc_a: dc.l 0
xinc_a: dc.l 0
xinc: dc.l 0
xinc_a2: dc.l 0

grad: dc.w 8
grad2: dc.w 4
sync: dc.w 0
gpu_data: dc.w $0101
screen_ready: dc.w 0
pointer2: dc.w 0

step1: dc.l 0
step2: dc.l 0
step3: dc.l 0
step4: dc.l 0
step5: dc.l 0
step6: dc.l 0
ud: dc.w 0

;tx_pos: dc.l 0
;tx_vel: dc.l $2a00
;ty_pos: dc.l 0
;ty_vel: dc.l $3b00

tx_pos: dc.l 0
tx_vel: dc.l $12a00
ty_pos: dc.l 0
ty_vel: dc.l $fb00


tz_pos: dc.w 0
tz_vel: dc.w 4
tzp: dc.w 0
xpos:	dc.l	1
ypos:	dc.l	1
jx:	dc.l	1
jy:	dc.l	1

gpucode:
