From 67649d47434b8b30a9c6a3319616e6531d3ba6a5 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Mon, 30 Dec 2024 19:17:02 -0800 Subject: [PATCH 1/4] annotations, tweak --- mandel.s | 55 ++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/mandel.s b/mandel.s index 4ac8d4d..787243f 100644 --- a/mandel.s +++ b/mandel.s @@ -310,18 +310,21 @@ viewport_oy: .endrepeat .endmacro +; 20 cycles .macro add16 dest, arg1, arg2 add 2, dest, arg1, arg2 .endmacro +; 38 cycles .macro add32 dest, arg1, arg2 add 4, dest, arg2, dest .endmacro +; 8 cycles .macro add_carry dest - lda dest - adc #0 - sta dest + lda dest ; 3 cyc + adc #0 ; 2 cyc + sta dest ; 3 cyc .endmacro ; 2 + 9 * byte cycles @@ -334,29 +337,35 @@ viewport_oy: .endrepeat .endmacro +; 20 cycles .macro sub16 dest, arg1, arg2 sub 2, dest, arg1, arg2 .endmacro +; 38 cycles .macro sub32 dest, arg1, arg2 sub 4, dest, arg1, arg2 .endmacro +; 3 + 5 * bytes cycles .macro shl bytes, arg - asl arg + asl arg ; 3 cyc .repeat bytes-1, i - rol arg + 1 + i + rol arg + 1 + i ; 5 cyc .endrepeat .endmacro +; 13 cycles .macro shl16 arg shl 2, arg .endmacro +; 18 cycles .macro shl24 arg shl 3, arg .endmacro +; 23 cycles .macro shl32 arg shl 4, arg .endmacro @@ -369,14 +378,17 @@ viewport_oy: .endrepeat .endmacro +; 12 cycles .macro copy16 dest, arg copy 2, dest, arg .endmacro +; 24 cycles .macro copy32 dest, arg copy 4, dest, arg .endmacro +; 36 cycles .macro copyfloat dest, arg copy 6, dest, arg .endmacro @@ -401,9 +413,10 @@ viewport_oy: neg 4, arg .endmacro +; 23 * shift .macro shift_round_16 arg, shift .repeat shift - shl32 arg + shl32 arg ; 23 cycles .endrepeat round16 arg .endmacro @@ -806,6 +819,7 @@ arg2_pos: sqr16_impl 1 .endproc +; 11-27 cycles .macro round16 arg ; Round top 16 bits of 32-bit fixed-point number in-place .local increment @@ -818,21 +832,28 @@ arg2_pos: ; round down if negative ; < $8000: round down - lda arg + 1 - cmp #$80 - beq high_half - bpl increment - bmi next + ; $8000 17 + ; $8001 27 + ; $8100 21 + ; $7fff 11 + + lda arg + 1 ; 3 cyc + cmp #$80 ; 2 cyc + beq high_half ; 2 cyc + + bpl increment ; 2 cyc + + bmi next ; 2 cyc high_half: - lda arg - beq check_sign - bpl increment - bmi next + lda arg ; 3 cyc + beq check_sign ; 2 cyc + + jmp increment ; 3 cyc check_sign: - lda arg + 3 - bmi next + lda arg + 3 ; 3 cyc + bmi next ; 2 cyc increment: ; 5-10 cyc inc arg + 2 ; 5 cyc From ec42f672d43ab8aecb863791ec55b22569436524 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Mon, 30 Dec 2024 19:48:28 -0800 Subject: [PATCH 2/4] use an 8-item z buffer for slightly fasterness --- mandel.s | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mandel.s b/mandel.s index 787243f..39e71b0 100644 --- a/mandel.s +++ b/mandel.s @@ -262,7 +262,10 @@ palette_chroma_entries = 15 .code -z_buffer_len = 16 +;z_buffer_len = 16 ; 10.863 ms/px +;z_buffer_len = 12 ; 10.619 ms/px +z_buffer_len = 8 ; 10.612 ms/px +;z_buffer_len = 4 ; 12.395 ms/px z_buffer_mask = z_buffer_len - 1 z_buffer: ; the last N zx/zy values From 0a7293d8bca6cc56182c356c993002ae1482f017 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Mon, 30 Dec 2024 19:52:35 -0800 Subject: [PATCH 3/4] do 4x4 2x2 1x1 only in prep for bigger pixels --- mandel.s | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mandel.s b/mandel.s index 39e71b0..b88105b 100644 --- a/mandel.s +++ b/mandel.s @@ -276,11 +276,12 @@ z_buffer: .export start -max_fill_level = 6 +;max_fill_level = 6 +max_fill_level = 3 fill_masks: - .byte %00011111 - .byte %00001111 - .byte %00000111 +; .byte %00011111 +; .byte %00001111 +; .byte %00000111 .byte %00000011 .byte %00000001 .byte %00000000 From b56dc1e98bfeb3c18c4f90df0e0d19fbe5362cde Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Mon, 30 Dec 2024 20:38:33 -0800 Subject: [PATCH 4/4] notes --- mandel.s | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mandel.s b/mandel.s index b88105b..fc30532 100644 --- a/mandel.s +++ b/mandel.s @@ -417,19 +417,20 @@ viewport_oy: neg 4, arg .endmacro -; 23 * shift +; 11-27 + 23 * shift cycles +; 103-119 cycles for shift=4 .macro shift_round_16 arg, shift .repeat shift shl32 arg ; 23 cycles .endrepeat - round16 arg + round16 arg ; 11-27 cycles .endmacro .macro imul16_round dest, arg1, arg2, shift copy16 FR0, arg1 ; 12 cyc copy16 FR1, arg2 ; 12 cyc jsr imul16_func ; ? cyc - shift_round_16 FR2, shift + shift_round_16 FR2, shift ; 103-119 cycles for shift=4 copy16 dest, FR2 + 2 ; 12 cyc .endmacro @@ -437,7 +438,7 @@ viewport_oy: ;imul16_round dest, arg, arg, shift copy16 FR0, arg ; 12 cyc jsr sqr16_func ; ? cyc - shift_round_16 FR2, shift + shift_round_16 FR2, shift ; 103-119 cycles for shift=4 copy16 dest, FR2 + 2 ; 12 cyc .endmacro