From 7f70f14fc229eec7b41c5acfbc52737f7b3e3727 Mon Sep 17 00:00:00 2001
From: Brion Vibber <brion@pobox.com>
Date: Thu, 5 Jan 2023 11:17:13 -0800
Subject: [PATCH] one last round sketch

combining copy and round
---
 mandel.s | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 58 insertions(+), 1 deletion(-)

diff --git a/mandel.s b/mandel.s
index 665ea70..7701090 100644
--- a/mandel.s
+++ b/mandel.s
@@ -208,7 +208,7 @@ negative:
     sta arg + 2  ; 3 cyc
     lda arg + 3  ; 3 cyc
     sbc #0       ; 2 cyc
-    lda arg + 3  ; 3 cyc
+    sta arg + 3  ; 3 cyc
     jmp next     ; 3 cyc
 
 positive:
@@ -225,6 +225,63 @@ next:
 
 .endmacro
 
+.macro round16_addsub_copy arg, dest
+    ; Round top 16 bits of 32-bit fixed-point number and copy it
+    .local zero
+    .local one
+    .local positive
+    .local negative
+    .local neg2
+    .local next
+
+    ; no round        - 17 cycles
+    ; round, positive - 31 cycles
+    ; round, negative - 31 cycles
+    ; average = 17 / 2 + (31 + 31) / 4
+    ;         = 17 / 2 + 62 / 4
+    ;         = 24 cycles average
+    ;
+    ; compare with 13.75 cyc in-place plus three copies at 12 cycles
+    ;              13.75 + 36 = 49.75 (41 - 64)
+    ; versus three rounds+copies: 72 (51 - 93)
+
+    lda arg + 1  ; 3 cyc
+    bpl zero     ; 2 cyc
+
+one:
+    ; check sign bit
+    lda arg + 3  ; 3 cyc
+    bpl positive ; 2 cyc
+
+negative:
+    sec          ; 2 cyc
+    lda arg + 2  ; 3 cyc
+    sbc #1       ; 2 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+    sbc #0       ; 2 cyc
+    jmp next     ; 3 cyc
+
+positive:
+    clc          ; 2 cyc
+    lda arg + 2  ; 3 cyc
+    adc #1       ; 2 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+    adc #0       ; 2 cyc
+    jmp next     ; 3 cyc
+
+zero:
+    lda arg + 2  ; 3 cyc
+    sta dest     ; 3 cyc
+    lda arg + 3  ; 3 cyc
+
+next:
+    sta dest + 2 ; 3 cyc
+
+
+.endmacro
+
 
 .proc iter
     ; (cx and cy should be pre-scaled to 6.26 fixed point)