diff --git a/mandel.s b/mandel.s index 665ea70..7701090 100644 --- a/mandel.s +++ b/mandel.s @@ -208,7 +208,7 @@ negative: sta arg + 2 ; 3 cyc lda arg + 3 ; 3 cyc sbc #0 ; 2 cyc - lda arg + 3 ; 3 cyc + sta arg + 3 ; 3 cyc jmp next ; 3 cyc positive: @@ -225,6 +225,63 @@ next: .endmacro +.macro round16_addsub_copy arg, dest + ; Round top 16 bits of 32-bit fixed-point number and copy it + .local zero + .local one + .local positive + .local negative + .local neg2 + .local next + + ; no round - 17 cycles + ; round, positive - 31 cycles + ; round, negative - 31 cycles + ; average = 17 / 2 + (31 + 31) / 4 + ; = 17 / 2 + 62 / 4 + ; = 24 cycles average + ; + ; compare with 13.75 cyc in-place plus three copies at 12 cycles + ; 13.75 + 36 = 49.75 (41 - 64) + ; versus three rounds+copies: 72 (51 - 93) + + lda arg + 1 ; 3 cyc + bpl zero ; 2 cyc + +one: + ; check sign bit + lda arg + 3 ; 3 cyc + bpl positive ; 2 cyc + +negative: + sec ; 2 cyc + lda arg + 2 ; 3 cyc + sbc #1 ; 2 cyc + sta dest ; 3 cyc + lda arg + 3 ; 3 cyc + sbc #0 ; 2 cyc + jmp next ; 3 cyc + +positive: + clc ; 2 cyc + lda arg + 2 ; 3 cyc + adc #1 ; 2 cyc + sta dest ; 3 cyc + lda arg + 3 ; 3 cyc + adc #0 ; 2 cyc + jmp next ; 3 cyc + +zero: + lda arg + 2 ; 3 cyc + sta dest ; 3 cyc + lda arg + 3 ; 3 cyc + +next: + sta dest + 2 ; 3 cyc + + +.endmacro + .proc iter ; (cx and cy should be pre-scaled to 6.26 fixed point)