Compare commits
3 commits
05133aabdd
...
829d2860e8
| Author | SHA1 | Date | |
|---|---|---|---|
| 829d2860e8 | |||
| f996c3cbcd | |||
| 405cec6d51 |
2 changed files with 115 additions and 32 deletions
75
imul8xe.s
Normal file
75
imul8xe.s
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
FR0 = $d4 ; float48
|
||||||
|
PORTB = $d301
|
||||||
|
|
||||||
|
|
||||||
|
EXTENDED_RAM = $4000 ; 16KiB bank on the XE
|
||||||
|
|
||||||
|
; lookup table for top byte -> PORTB value for bank-switch
|
||||||
|
.align 256
|
||||||
|
bankswitch:
|
||||||
|
.repeat 256, i
|
||||||
|
.byte ((i & $c0) >> 5) | $c1
|
||||||
|
.endrepeat
|
||||||
|
|
||||||
|
; 58-77 cycles
|
||||||
|
; clobbers x, y, dest to dest + 3
|
||||||
|
.macro imul8xe dest, arg1, arg2
|
||||||
|
.local done
|
||||||
|
.local output
|
||||||
|
.local ptr
|
||||||
|
|
||||||
|
output = dest
|
||||||
|
ptr = dest + 2 ; scratch space assumed
|
||||||
|
|
||||||
|
; bottom 14 bits except the LSB are the per-bank table index
|
||||||
|
; add $4000 for the bank pointer
|
||||||
|
lda arg1 ; 3 cyc
|
||||||
|
and #$fe ; 2 cyc
|
||||||
|
sta ptr ; 3 cyc
|
||||||
|
lda arg2 ; 3 cyc
|
||||||
|
and #$3f ; 2 cyc
|
||||||
|
clc ; 2 cyc
|
||||||
|
adc #$40 ; 2 cyc
|
||||||
|
sta ptr + 1 ; 3 cyc
|
||||||
|
|
||||||
|
; top 2 bits are the table bank selector
|
||||||
|
ldx arg2 ; 3 cyc
|
||||||
|
lda bank_switch,x ; 4 cyc
|
||||||
|
sta PORTB ; 4 cyc
|
||||||
|
|
||||||
|
|
||||||
|
; copy the entry into output
|
||||||
|
ldy #0 ; 2 cyc
|
||||||
|
lda (ptr),y ; 5 cyc
|
||||||
|
sta output ; 3 cyc
|
||||||
|
iny ; 2 cyc
|
||||||
|
lda (ptr),y ; 5 cyc
|
||||||
|
sta output+1 ; 3 cyc
|
||||||
|
|
||||||
|
; note: we are not restoring memory to save 6 cycles!
|
||||||
|
; this means those 16kb have to be switched back to base RAM
|
||||||
|
; if we need to use them anywhere else
|
||||||
|
;;; restore memory
|
||||||
|
;;lda #$81 ; 2 cyc - disabled
|
||||||
|
;;sta PORTB ; 4 cyc - disabled
|
||||||
|
|
||||||
|
; check that 1 bit we skipped to fit into space
|
||||||
|
lda arg1 ; 3 cyc
|
||||||
|
and #1 ; 2 cyc
|
||||||
|
beq done ; 2 cyc
|
||||||
|
|
||||||
|
; add the second param one last time for the skipped bit
|
||||||
|
clc ; 2 cyc
|
||||||
|
lda arg2 ; 3 cyc
|
||||||
|
adc output ; 3 cyc
|
||||||
|
sta output ; 3 cyc
|
||||||
|
lda #0 ; 2 cyc
|
||||||
|
adc output+1 ; 3 cyc
|
||||||
|
sta output+1 ; 3 cyc
|
||||||
|
|
||||||
|
done:
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
proc imul8xe_init
|
||||||
|
rts
|
||||||
|
endproc
|
||||||
72
mandel.s
72
mandel.s
|
|
@ -372,51 +372,59 @@ fill_masks:
|
||||||
.local under256
|
.local under256
|
||||||
.local next
|
.local next
|
||||||
.local small_product
|
.local small_product
|
||||||
|
; circa 92 cycles? this doesn't seem right
|
||||||
|
; 81-92 cycles
|
||||||
.scope
|
.scope
|
||||||
mul_factor_a = arg1
|
mul_factor_a = arg1
|
||||||
mul_factor_x = arg2
|
mul_factor_x = arg2
|
||||||
mul_product_lo = dest
|
mul_product_lo = dest
|
||||||
mul_product_hi = dest + 1
|
mul_product_hi = dest + 1
|
||||||
|
|
||||||
lda mul_factor_a ; setup: 6 cycles
|
lda mul_factor_a ; 3 cyc
|
||||||
;ldx mul_factor_x
|
|
||||||
|
|
||||||
clc ; (a + x)^2/2: 23 cycles
|
; (a + x)^2/2
|
||||||
adc mul_factor_x
|
clc ; 2 cyc
|
||||||
tax
|
adc mul_factor_x ; 3 cyc
|
||||||
bcc under256
|
tax ; 2 cyc
|
||||||
lda mul_hibyte512,x
|
bcc under256 ; 2 cyc
|
||||||
bcs next
|
lda mul_hibyte512,x ; 4 cyc
|
||||||
|
bcs next ; 2 cyc
|
||||||
under256:
|
under256:
|
||||||
lda mul_hibyte256,x
|
lda mul_hibyte256,x ; 4 cyc
|
||||||
sec
|
sec ; 2 cyc
|
||||||
next:
|
next:
|
||||||
sta mul_product_hi
|
sta mul_product_hi ; 3 cyc
|
||||||
lda mul_lobyte256,x
|
lda mul_lobyte256,x ; 4 cyc
|
||||||
|
|
||||||
ldx mul_factor_a ; - a^2/2: 20 cycles
|
; - a^2/2
|
||||||
sbc mul_lobyte256,x
|
ldx mul_factor_a ; 3 cyc
|
||||||
sta mul_product_lo
|
sbc mul_lobyte256,x ; 4 cyc
|
||||||
lda mul_product_hi
|
sta mul_product_lo ; 3 cyc
|
||||||
sbc mul_hibyte256,x
|
lda mul_product_hi ; 3 cyc
|
||||||
sta mul_product_hi
|
sbc mul_hibyte256,x ; 4 cyc
|
||||||
|
sta mul_product_hi ; 3 cyc
|
||||||
|
|
||||||
ldx mul_factor_x ; + x & a & 1: 22 cycles
|
; + x & a & 1:
|
||||||
txa ; (this is a kludge to correct a
|
; (this is a kludge to correct a
|
||||||
and mul_factor_a ; roundoff error that makes odd * odd too low)
|
; roundoff error that makes odd * odd too low)
|
||||||
and #1
|
ldx mul_factor_x ; 3 cyc
|
||||||
|
txa ; 2 cyc
|
||||||
|
and mul_factor_a ; 3 cyc
|
||||||
|
and #1 ; 2 cyc
|
||||||
|
|
||||||
clc
|
clc ; 2 cyc
|
||||||
adc mul_product_lo
|
adc mul_product_lo ; 3 cyc
|
||||||
bcc small_product
|
bcc small_product ; 2 cyc
|
||||||
inc mul_product_hi
|
inc mul_product_hi ; 5 cyc
|
||||||
|
|
||||||
|
; - x^2/2
|
||||||
small_product:
|
small_product:
|
||||||
sec ; - x^2/2: 25 cycles
|
sec ; 2 cyc
|
||||||
sbc mul_lobyte256,x
|
sbc mul_lobyte256,x ; 4 cyc
|
||||||
sta mul_product_lo
|
sta mul_product_lo ; 3 cyc
|
||||||
lda mul_product_hi
|
lda mul_product_hi ; 3 cyc
|
||||||
sbc mul_hibyte256,x
|
sbc mul_hibyte256,x ; 4 cyc
|
||||||
sta mul_product_hi
|
sta mul_product_hi ; 3 cyc
|
||||||
.endscope
|
.endscope
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue