diff --git a/mandel.s b/mandel.s index 39571b3..b706dc5 100644 --- a/mandel.s +++ b/mandel.s @@ -115,7 +115,7 @@ plus: shr 4, arg .endmacro -; 24 to 49 cycles +; 24 to 44 cycles .macro bitmul16 arg1, arg2, result, bitnum .local next @@ -137,12 +137,18 @@ plus: beq next ; 2 cyc ; 16-bit add on the top bits - add16 result + 2, arg2 ; 20 cyc + lda result + 2 ; 3 cyc + adc arg2 ; 3 cyc + sta result + 2 ; 3 cyc + lda result + 3 ; 3 cyc + adc arg2 + 1 ; 3 cyc + ror a ; 2 cyc - get a jump on the shift + sta result + 3 ; 3 cyc -next: ; Shift the 32-bit result down by one bit, ; saving the previous carry. ror result + 3 ; 5 cyc +next: ror result + 2 ; 5 cyc ror result + 1 ; 5 cyc .if bitnum >= 8 @@ -164,7 +170,8 @@ next: positive: .endmacro -; 579 to 725 cycles +; min 454 cycles +; max 756 cycles .proc imul16 arg1 = FR0 ; 16-bit arg (clobbered) arg2 = FR1 ; 16-bit arg (clobbered) @@ -183,9 +190,10 @@ positive: ; unrolled loop for maximum speed, at the cost ; of a larger routine + ; 424 to 672 cycles .repeat 16, bitnum - ; first half: 24 to 45 cycles - ; second half: 29 to 49 cycles + ; first half: 24 to 40 cycles + ; second half: 29 to 44 cycles bitmul16 arg1, arg2, result, bitnum .endrepeat