precision cleanup

using 4.12 and 8.24 consistently
2023-01-22 11:17:51 -08:00 · 2023-01-22 11:17:51 -08:00 · 1bef004ccd
commit 1bef004ccd
parent ae9dd0674d
1 changed files with 22 additions and 46 deletions
--- a/mandel.s
+++ b/mandel.s
@ -1,17 +1,17 @@
 ; Our zero-page vars
 sx    = $80     ; i16: screen pixel x
 sy    = $82     ; i16: screen pixel y
-ox    = $84     ; fixed3.13: center point x
+ox    = $84     ; fixed4.12: center point x
-oy    = $86     ; fixed3.13: center point y
+oy    = $86     ; fixed4.12: center point y
-cx    = $84     ; fixed3.13: c_x
+cx    = $84     ; fixed4.12: c_x
-cy    = $86     ; fixed3.13: c_y
+cy    = $86     ; fixed4.12: c_y
-zx    = $88     ; fixed3.13: z_x
+zx    = $88     ; fixed4.12: z_x
-zy    = $8a     ; fixed3.13: z_y
+zy    = $8a     ; fixed4.12: z_y
-zx_2  = $90     ; fixed6.26: z_x^2
+zx_2  = $90     ; fixed8.24: z_x^2
-zy_2  = $94     ; fixed6.26: z_y^2
+zy_2  = $94     ; fixed8.24: z_y^2
-zx_zy = $98     ; fixed6.26: z_x * z_y
+zx_zy = $98     ; fixed8.24: z_x * z_y
-dist  = $9c     ; fixed6.26: z_x^2 + z_y^2
+dist  = $9c     ; fixed8.24: z_x^2 + z_y^2
 iter  = $a0     ; u8: iteration count
 zoom  = $a1     ; u8: zoom shift level
@ -42,8 +42,6 @@ half_height = height >> 1
 width = 160
 half_width = width >> 1
 stride = width >> 2
 width_ratio_3_13 = (5 << 11) ; 5/4
 height_ratio_3_13 = (3 << 11) ; 5/4
 DMACTL = $D400
 DLISTL = $D402
@ -101,18 +99,12 @@ aspect:
    ; 184h is the equiv of 220.8h at square pixels
    ; 320 / 220.8 = 1.45 display aspect ratio
 aspect_x:
-    .word 5 << (13 - 2)
+    .word 5 << (12 - 2)
 aspect_y:
-    .word 3 << (13 - 2)
+    .word 3 << (12 - 2)
 bit_masks:
    .byte 3
    .byte 3 << 2
    .byte 3 << 4
    .byte 3 << 6
 display_list_start:
    ; 24 lines overscan
    .repeat 3
@ -244,21 +236,6 @@ color_map:
    neg 4, arg
 .endmacro
 .macro extend_8_16 dest, src
    ; clobbers A, X
    ; 13-15 cycles
    .local positive
    .local negative
    ldx #0       ; 2 cyc
    lda src      ; 3 cyc
    sta dest     ; 3 cyc
    bpl positive ; 2 cyc
 negative:
    dex          ; 2 cyc
 positive:
    stx dest + 1 ; 3 cyc
 .endmacro
 ; inner loop for imul16
 ; bitnum < 8: 25 or 41 cycles
 ; bitnum >= 8: 30 or 46 cycles
@ -307,7 +284,6 @@ next:
        ror result ; 5 cyc
    .endif
 .endmacro
 ; 5 to 25 cycles
@ -330,11 +306,18 @@ positive:
    copy32 dest, FR2  ; 24 cyc
 .endmacro
-.macro imul16_round dest, arg1, arg2
+.macro shift_round_16 arg, shift
    .repeat shift
        shl32 arg
    .endrepeat
    round16 arg
 .endmacro
 .macro imul16_round dest, arg1, arg2, shift
    copy16 FR0, arg1  ; 12 cyc
    copy16 FR1, arg2  ; 12 cyc
    jsr imul16_func   ; 470-780 cyc
-    round16 FR2       ; 5-28 cyc
+    shift_round_16 FR2, shift
    copy16 dest, FR2 + 2  ; 12 cyc
 .endmacro
@ -422,13 +405,6 @@ next:
 .endmacro
 .macro shift_round_16 arg, shift
    .repeat shift
        shl32 arg
    .endrepeat
    round16 arg
 .endmacro
 .proc mandelbrot
    ; input:
    ; cx: position scaled to 4.12 fixed point - -8..+7.9
@ -514,7 +490,7 @@ enough:
    ; cy = cy * (3 / 4)
    ; cx = cx * (5 / 4)
-    imul16_round dest, dest, aspect
+    imul16_round dest, dest, aspect, 4
 .endmacro
 .proc pset