From fa0de6dc776a875ed97f74c4261d91629ee58fb7 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Tue, 16 Sep 2025 21:29:40 -0700 Subject: [PATCH 1/2] WIP savings of half a cycle per imul8_xe Uses X to cache arg1, which is always used, instead of arg2, which is only used on odds. Should save half a cycle per imul8_xe, untested --- mandel.s | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/mandel.s b/mandel.s index b0f9c28..ec9f17f 100644 --- a/mandel.s +++ b/mandel.s @@ -461,7 +461,7 @@ input_max: sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * bytes cycles +; 3 + 5 * (bytes - 1) cycles .macro shl bytes, arg asl arg ; 3 cyc .repeat bytes-1, i @@ -469,17 +469,17 @@ input_max: .endrepeat .endmacro -; 13 cycles +; 8 cycles .macro shl16 arg shl 2, arg .endmacro -; 18 cycles +; 13 cycles .macro shl24 arg shl 3, arg .endmacro -; 23 cycles +; 18 cycles .macro shl32 arg shl 4, arg .endmacro @@ -529,11 +529,11 @@ input_max: neg 4, arg .endmacro -; 11-27 + 23 * shift cycles -; 103-119 cycles for shift=4 +; 11-27 + 18 * shift cycles +; 65-81 cycles for shift=3 .macro shift_round_16 arg, shift .repeat shift - shl32 arg ; 23 cycles + shl32 arg ; 18 cycles .endrepeat round16 arg ; 11-27 cycles .endmacro @@ -588,7 +588,7 @@ bank_switch_table: .macro imul8 dest, arg1, arg2, xe .if xe ; using 64KB lookup table - ; 51-70 cycles + ; 50-70 cycles ; clobbers x, y, dest, ptr .scope output = dest @@ -600,13 +600,13 @@ bank_switch_table: ; bottom 14 bits except the LSB are the per-bank table index ; add $4000 for the bank pointer - txa ; 2 cyc and #$3f ; 2 cyc ora #$40 ; 2 cyc sta ptr + 1 ; 3 cyc ; copy the entry into output lda arg1 ; 3 cyc + tax ; 2 cyc and #$fe ; 2 cyc tay ; 2 cyc lda (ptr),y ; 5 cyc @@ -623,13 +623,13 @@ bank_switch_table: ;;sta PORTB ; 4 cyc - disabled ; check that 1 bit we skipped to fit into space - lda arg1 ; 3 cyc + txa ; 2 cyc and #1 ; 2 cyc beq done ; 2 cyc ; add arg2 one last time for the skipped bit clc ; 2 cyc - txa ; 2 cyc + lda arg1 ; 3 cyc adc output ; 3 cyc sta output ; 3 cyc lda #0 ; 2 cyc From 6479cf530c1c584f33b96f2b19885d02415863bb Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Tue, 16 Sep 2025 21:29:40 -0700 Subject: [PATCH 2/2] update some timings --- mandel.s | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mandel.s b/mandel.s index b0f9c28..b52f24a 100644 --- a/mandel.s +++ b/mandel.s @@ -461,7 +461,7 @@ input_max: sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * bytes cycles +; 3 + 5 * (bytes - 1) cycles .macro shl bytes, arg asl arg ; 3 cyc .repeat bytes-1, i @@ -469,17 +469,17 @@ input_max: .endrepeat .endmacro -; 13 cycles +; 8 cycles .macro shl16 arg shl 2, arg .endmacro -; 18 cycles +; 13 cycles .macro shl24 arg shl 3, arg .endmacro -; 23 cycles +; 18 cycles .macro shl32 arg shl 4, arg .endmacro @@ -529,11 +529,11 @@ input_max: neg 4, arg .endmacro -; 11-27 + 23 * shift cycles -; 103-119 cycles for shift=4 +; 11-27 + 18 * shift cycles +; 65-81 cycles for shift=3 .macro shift_round_16 arg, shift .repeat shift - shl32 arg ; 23 cycles + shl32 arg ; 18 cycles .endrepeat round16 arg ; 11-27 cycles .endmacro