From 7f70f14fc229eec7b41c5acfbc52737f7b3e3727 Mon Sep 17 00:00:00 2001
From: Brion Vibber <>
Date: Thu, 5 Jan 2023 11:17:13 -0800
Subject: [PATCH] one last round sketch

combining copy and round
 mandel.s | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/mandel.s b/mandel.s
index 665ea70..7701090 100644
--- a/mandel.s
+++ b/mandel.s
@@ -208,7 +208,7 @@ negative:
     sta arg + 2  ; 3 cyc
     lda arg + 3  ; 3 cyc
     sbc #0       ; 2 cyc
-    lda arg + 3  ; 3 cyc
+    sta arg + 3  ; 3 cyc
     jmp next     ; 3 cyc
@@ -225,6 +225,63 @@ next:
+.macro round16_addsub_copy arg, dest
+    ; Round top 16 bits of 32-bit fixed-point number and copy it
+    .local zero
+    .local one
+    .local positive
+    .local negative
+    .local neg2
+    .local next
+    ; no round        - 17 cycles
+    ; round, positive - 31 cycles
+    ; round, negative - 31 cycles
+    ; average = 17 / 2 + (31 + 31) / 4
+    ;         = 17 / 2 + 62 / 4
+    ;         = 24 cycles average
+    ;
+    ; compare with 13.75 cyc in-place plus three copies at 12 cycles
+    ;              13.75 + 36 = 49.75 (41 - 64)
+    ; versus three rounds+copies: 72 (51 - 93)
+    lda arg + 1  ; 3 cyc
+    bpl zero     ; 2 cyc
+    ; check sign bit
+    lda arg + 3  ; 3 cyc
+    bpl positive ; 2 cyc
+    sec          ; 2 cyc
+    lda arg + 2  ; 3 cyc
+    sbc #1       ; 2 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+    sbc #0       ; 2 cyc
+    jmp next     ; 3 cyc
+    clc          ; 2 cyc
+    lda arg + 2  ; 3 cyc
+    adc #1       ; 2 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+    adc #0       ; 2 cyc
+    jmp next     ; 3 cyc
+    lda arg + 2  ; 3 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+    sta dest + 2 ; 3 cyc
 .proc iter
     ; (cx and cy should be pre-scaled to 6.26 fixed point)