woo
This commit is contained in:
parent
4b1001bfdc
commit
2d67cdc498
1 changed files with 14 additions and 6 deletions
20
mandel.s
20
mandel.s
|
@ -115,7 +115,7 @@ plus:
|
||||||
shr 4, arg
|
shr 4, arg
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 24 to 49 cycles
|
; 24 to 44 cycles
|
||||||
.macro bitmul16 arg1, arg2, result, bitnum
|
.macro bitmul16 arg1, arg2, result, bitnum
|
||||||
.local next
|
.local next
|
||||||
|
|
||||||
|
@ -137,12 +137,18 @@ plus:
|
||||||
beq next ; 2 cyc
|
beq next ; 2 cyc
|
||||||
|
|
||||||
; 16-bit add on the top bits
|
; 16-bit add on the top bits
|
||||||
add16 result + 2, arg2 ; 20 cyc
|
lda result + 2 ; 3 cyc
|
||||||
|
adc arg2 ; 3 cyc
|
||||||
|
sta result + 2 ; 3 cyc
|
||||||
|
lda result + 3 ; 3 cyc
|
||||||
|
adc arg2 + 1 ; 3 cyc
|
||||||
|
ror a ; 2 cyc - get a jump on the shift
|
||||||
|
sta result + 3 ; 3 cyc
|
||||||
|
|
||||||
next:
|
|
||||||
; Shift the 32-bit result down by one bit,
|
; Shift the 32-bit result down by one bit,
|
||||||
; saving the previous carry.
|
; saving the previous carry.
|
||||||
ror result + 3 ; 5 cyc
|
ror result + 3 ; 5 cyc
|
||||||
|
next:
|
||||||
ror result + 2 ; 5 cyc
|
ror result + 2 ; 5 cyc
|
||||||
ror result + 1 ; 5 cyc
|
ror result + 1 ; 5 cyc
|
||||||
.if bitnum >= 8
|
.if bitnum >= 8
|
||||||
|
@ -164,7 +170,8 @@ next:
|
||||||
positive:
|
positive:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 579 to 725 cycles
|
; min 454 cycles
|
||||||
|
; max 756 cycles
|
||||||
.proc imul16
|
.proc imul16
|
||||||
arg1 = FR0 ; 16-bit arg (clobbered)
|
arg1 = FR0 ; 16-bit arg (clobbered)
|
||||||
arg2 = FR1 ; 16-bit arg (clobbered)
|
arg2 = FR1 ; 16-bit arg (clobbered)
|
||||||
|
@ -183,9 +190,10 @@ positive:
|
||||||
|
|
||||||
; unrolled loop for maximum speed, at the cost
|
; unrolled loop for maximum speed, at the cost
|
||||||
; of a larger routine
|
; of a larger routine
|
||||||
|
; 424 to 672 cycles
|
||||||
.repeat 16, bitnum
|
.repeat 16, bitnum
|
||||||
; first half: 24 to 45 cycles
|
; first half: 24 to 40 cycles
|
||||||
; second half: 29 to 49 cycles
|
; second half: 29 to 44 cycles
|
||||||
bitmul16 arg1, arg2, result, bitnum
|
bitmul16 arg1, arg2, result, bitnum
|
||||||
.endrepeat
|
.endrepeat
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue