diff --git a/mandel.s b/mandel.s index 4ac8d4d..787243f 100644 --- a/mandel.s +++ b/mandel.s @@ -310,18 +310,21 @@ viewport_oy: .endrepeat .endmacro +; 20 cycles .macro add16 dest, arg1, arg2 add 2, dest, arg1, arg2 .endmacro +; 38 cycles .macro add32 dest, arg1, arg2 add 4, dest, arg2, dest .endmacro +; 8 cycles .macro add_carry dest - lda dest - adc #0 - sta dest + lda dest ; 3 cyc + adc #0 ; 2 cyc + sta dest ; 3 cyc .endmacro ; 2 + 9 * byte cycles @@ -334,29 +337,35 @@ viewport_oy: .endrepeat .endmacro +; 20 cycles .macro sub16 dest, arg1, arg2 sub 2, dest, arg1, arg2 .endmacro +; 38 cycles .macro sub32 dest, arg1, arg2 sub 4, dest, arg1, arg2 .endmacro +; 3 + 5 * bytes cycles .macro shl bytes, arg - asl arg + asl arg ; 3 cyc .repeat bytes-1, i - rol arg + 1 + i + rol arg + 1 + i ; 5 cyc .endrepeat .endmacro +; 13 cycles .macro shl16 arg shl 2, arg .endmacro +; 18 cycles .macro shl24 arg shl 3, arg .endmacro +; 23 cycles .macro shl32 arg shl 4, arg .endmacro @@ -369,14 +378,17 @@ viewport_oy: .endrepeat .endmacro +; 12 cycles .macro copy16 dest, arg copy 2, dest, arg .endmacro +; 24 cycles .macro copy32 dest, arg copy 4, dest, arg .endmacro +; 36 cycles .macro copyfloat dest, arg copy 6, dest, arg .endmacro @@ -401,9 +413,10 @@ viewport_oy: neg 4, arg .endmacro +; 23 * shift .macro shift_round_16 arg, shift .repeat shift - shl32 arg + shl32 arg ; 23 cycles .endrepeat round16 arg .endmacro @@ -806,6 +819,7 @@ arg2_pos: sqr16_impl 1 .endproc +; 11-27 cycles .macro round16 arg ; Round top 16 bits of 32-bit fixed-point number in-place .local increment @@ -818,21 +832,28 @@ arg2_pos: ; round down if negative ; < $8000: round down - lda arg + 1 - cmp #$80 - beq high_half - bpl increment - bmi next + ; $8000 17 + ; $8001 27 + ; $8100 21 + ; $7fff 11 + + lda arg + 1 ; 3 cyc + cmp #$80 ; 2 cyc + beq high_half ; 2 cyc + + bpl increment ; 2 cyc + + bmi next ; 2 cyc high_half: - lda arg - beq check_sign - bpl increment - bmi next + lda arg ; 3 cyc + beq check_sign ; 2 cyc + + jmp increment ; 3 cyc check_sign: - lda arg + 3 - bmi next + lda arg + 3 ; 3 cyc + bmi next ; 2 cyc increment: ; 5-10 cyc inc arg + 2 ; 5 cyc