diff --git a/mandel.s b/mandel.s index fc30532..4ac8d4d 100644 --- a/mandel.s +++ b/mandel.s @@ -262,10 +262,7 @@ palette_chroma_entries = 15 .code -;z_buffer_len = 16 ; 10.863 ms/px -;z_buffer_len = 12 ; 10.619 ms/px -z_buffer_len = 8 ; 10.612 ms/px -;z_buffer_len = 4 ; 12.395 ms/px +z_buffer_len = 16 z_buffer_mask = z_buffer_len - 1 z_buffer: ; the last N zx/zy values @@ -276,12 +273,11 @@ z_buffer: .export start -;max_fill_level = 6 -max_fill_level = 3 +max_fill_level = 6 fill_masks: -; .byte %00011111 -; .byte %00001111 -; .byte %00000111 + .byte %00011111 + .byte %00001111 + .byte %00000111 .byte %00000011 .byte %00000001 .byte %00000000 @@ -314,21 +310,18 @@ viewport_oy: .endrepeat .endmacro -; 20 cycles .macro add16 dest, arg1, arg2 add 2, dest, arg1, arg2 .endmacro -; 38 cycles .macro add32 dest, arg1, arg2 add 4, dest, arg2, dest .endmacro -; 8 cycles .macro add_carry dest - lda dest ; 3 cyc - adc #0 ; 2 cyc - sta dest ; 3 cyc + lda dest + adc #0 + sta dest .endmacro ; 2 + 9 * byte cycles @@ -341,35 +334,29 @@ viewport_oy: .endrepeat .endmacro -; 20 cycles .macro sub16 dest, arg1, arg2 sub 2, dest, arg1, arg2 .endmacro -; 38 cycles .macro sub32 dest, arg1, arg2 sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * bytes cycles .macro shl bytes, arg - asl arg ; 3 cyc + asl arg .repeat bytes-1, i - rol arg + 1 + i ; 5 cyc + rol arg + 1 + i .endrepeat .endmacro -; 13 cycles .macro shl16 arg shl 2, arg .endmacro -; 18 cycles .macro shl24 arg shl 3, arg .endmacro -; 23 cycles .macro shl32 arg shl 4, arg .endmacro @@ -382,17 +369,14 @@ viewport_oy: .endrepeat .endmacro -; 12 cycles .macro copy16 dest, arg copy 2, dest, arg .endmacro -; 24 cycles .macro copy32 dest, arg copy 4, dest, arg .endmacro -; 36 cycles .macro copyfloat dest, arg copy 6, dest, arg .endmacro @@ -417,20 +401,18 @@ viewport_oy: neg 4, arg .endmacro -; 11-27 + 23 * shift cycles -; 103-119 cycles for shift=4 .macro shift_round_16 arg, shift .repeat shift - shl32 arg ; 23 cycles + shl32 arg .endrepeat - round16 arg ; 11-27 cycles + round16 arg .endmacro .macro imul16_round dest, arg1, arg2, shift copy16 FR0, arg1 ; 12 cyc copy16 FR1, arg2 ; 12 cyc jsr imul16_func ; ? cyc - shift_round_16 FR2, shift ; 103-119 cycles for shift=4 + shift_round_16 FR2, shift copy16 dest, FR2 + 2 ; 12 cyc .endmacro @@ -438,7 +420,7 @@ viewport_oy: ;imul16_round dest, arg, arg, shift copy16 FR0, arg ; 12 cyc jsr sqr16_func ; ? cyc - shift_round_16 FR2, shift ; 103-119 cycles for shift=4 + shift_round_16 FR2, shift copy16 dest, FR2 + 2 ; 12 cyc .endmacro @@ -824,7 +806,6 @@ arg2_pos: sqr16_impl 1 .endproc -; 11-27 cycles .macro round16 arg ; Round top 16 bits of 32-bit fixed-point number in-place .local increment @@ -837,28 +818,21 @@ arg2_pos: ; round down if negative ; < $8000: round down - ; $8000 17 - ; $8001 27 - ; $8100 21 - ; $7fff 11 - - lda arg + 1 ; 3 cyc - cmp #$80 ; 2 cyc - beq high_half ; 2 cyc - - bpl increment ; 2 cyc - - bmi next ; 2 cyc + lda arg + 1 + cmp #$80 + beq high_half + bpl increment + bmi next high_half: - lda arg ; 3 cyc - beq check_sign ; 2 cyc - - jmp increment ; 3 cyc + lda arg + beq check_sign + bpl increment + bmi next check_sign: - lda arg + 3 ; 3 cyc - bmi next ; 2 cyc + lda arg + 3 + bmi next increment: ; 5-10 cyc inc arg + 2 ; 5 cyc