diff --git a/mandel.s b/mandel.s index 4444274..9b569e2 100644 --- a/mandel.s +++ b/mandel.s @@ -30,18 +30,15 @@ FRX = $ec .endmacro ; inner loop for imul16 -; bitnum < 8: 25 or 41 cycles -; bitnum >= 8: 30 or 46 cycles +; 24 to 44 cycles .macro bitmul16 arg1, arg2, result, bitnum - .local zero - .local one .local next ; does 16-bit adds ; arg1 must be 0 or positive ; arg2 must be 0 or positive - ; 7 cycles up to the branch + clc ; 2 cyc ; check if arg1 has 0 or 1 bit in this place ; 5 cycles either way @@ -52,29 +49,21 @@ FRX = $ec lda arg1 + 1 ; 3 cyc and #(1 << (bitnum - 8)) ; 2 cyc .endif - bne one ; 2 cyc + beq next ; 2 cyc -zero: ; 18 cyc, 23 cyc - lsr result + 3 ; 5 cyc - ror result + 2 ; 5 cyc - ror result + 1 ; 5 cyc - .if bitnum >= 8 - ; we can save 5 cycles * 8 bits = 40 cycles total by skipping this byte - ; when it's all uninitialized data - ror result ; 5 cyc - .endif - jmp next ; 3 cyc - -one: ; 32 cyc, 37 cyc ; 16-bit add on the top bits - clc ; 2 cyc lda result + 2 ; 3 cyc adc arg2 ; 3 cyc sta result + 2 ; 3 cyc lda result + 3 ; 3 cyc adc arg2 + 1 ; 3 cyc - ror a ; 2 cyc + ror a ; 2 cyc - get a jump on the shift sta result + 3 ; 3 cyc + + ; Shift the 32-bit result down by one bit, + ; saving the previous carry. + ror result + 3 ; 5 cyc +next: ror result + 2 ; 5 cyc ror result + 1 ; 5 cyc .if bitnum >= 8 @@ -82,9 +71,6 @@ one: ; 32 cyc, 37 cyc ; when it's all uninitialized data ror result ; 5 cyc .endif - -next: - .endmacro ; 5 to 25 cycles @@ -121,8 +107,8 @@ positive: ; of a larger routine ; 424 to 672 cycles .repeat 16, bitnum - ; first half: 22 to 40 cycles - ; second half: 29 to 47 cycles + ; first half: 24 to 40 cycles + ; second half: 29 to 44 cycles bitmul16 arg1, arg2, result, bitnum .endrepeat @@ -167,7 +153,7 @@ loop: .proc start -looplong: +loop: ; FR0 = 5 ; FR1 = -3 lda #5 @@ -182,6 +168,5 @@ looplong: jsr imul16 ; should have 32-bit -15 in FR2 -loop: jmp loop .endproc