diff --git a/mandel.s b/mandel.s index 487ceb1..d8b25e1 100644 --- a/mandel.s +++ b/mandel.s @@ -30,16 +30,18 @@ FRX = $ec .endmacro ; inner loop for imul16 -; 24 to 44 cycles +; bitnum < 8: 25 or 30 cycles +; bitnum >= 8: 39 or 44 cycles .macro bitmul16 arg1, arg2, result, bitnum - .local one .local zero + .local one + .local next ; does 16-bit adds ; arg1 must be 0 or positive ; arg2 must be 0 or positive - clc ; 2 cyc + ; 7 cycles up to the branch ; check if arg1 has 0 or 1 bit in this place ; 5 cycles either way @@ -50,24 +52,10 @@ FRX = $ec lda arg1 + 1 ; 3 cyc and #(1 << (bitnum - 8)) ; 2 cyc .endif - beq zero ; 2 cyc + bne one ; 2 cyc -one: - ; 16-bit add on the top bits - lda result + 2 ; 3 cyc - adc arg2 ; 3 cyc - sta result + 2 ; 3 cyc - lda result + 3 ; 3 cyc - adc arg2 + 1 ; 3 cyc - ror a ; 2 cyc - get a jump on the shift - sta result + 3 ; 3 cyc - jmp oneb ; 3 cyc - - ; Shift the 32-bit result down by one bit, - ; saving the previous carry. -zero: - ror result + 3 ; 5 cyc -oneb: +zero: ; 18 cyc / 23 cyc + lsr result + 3 ; 5 cyc ror result + 2 ; 5 cyc ror result + 1 ; 5 cyc .if bitnum >= 8 @@ -75,6 +63,25 @@ oneb: ; when it's all uninitialized data ror result ; 5 cyc .endif + jmp next ; 3 cyc + +one: ; 32 cyc / 37 cyc + ; 16-bit add on the top bits + clc ; 2 cyc + lda result + 2 ; 3 cyc + adc arg2 ; 3 cyc + sta result + 2 ; 3 cyc + lda result + 3 ; 3 cyc + adc arg2 + 1 ; 3 cyc + ror a ; 2 cyc - get a jump on the shift + sta result + 3 ; 3 cyc + ror result + 2 ; 5 cyc + ror result + 1 ; 5 cyc + .if bitnum >= 8 + ror result ; 5 cyc + .endif +next: + .endmacro ; 5 to 25 cycles @@ -157,7 +164,7 @@ loop: .proc start -loop: +looplong: ; FR0 = 5 ; FR1 = -3 lda #5 @@ -172,5 +179,6 @@ loop: jsr imul16 ; should have 32-bit -15 in FR2 +loop: jmp loop .endproc