3.13 / 6.26 gives nicer results!

2025-01-01 15:37:12 -08:00 · 2025-01-01 15:37:12 -08:00 · 65fcb44934
commit 65fcb44934
parent c424f1b8bc
3 changed files with 48 additions and 46 deletions
--- a/mandel.s
+++ b/mandel.s
@ -1,16 +1,16 @@
 ; Our zero-page vars
-ox              = $80 ; fixed8.24: center point x
-oy              = $84 ; fixed8.24: center point y
-cx              = $88 ; fixed8.24: c_x
-cy              = $8c ; fixed8.24: c_y
+ox              = $80 ; fixed6.26: center point x
+oy              = $84 ; fixed6.26: center point y
+cx              = $88 ; fixed6.26: c_x
+cy              = $8c ; fixed6.26: c_y

-zx              = $90 ; fixed8.24: z_x
-zy              = $94 ; fixed8.24: z_y
-zx_2            = $98 ; fixed8.24: z_x^2
-zy_2            = $9c ; fixed8.24: z_y^2
+zx              = $90 ; fixed6.26: z_x
+zy              = $94 ; fixed6.26: z_y
+zx_2            = $98 ; fixed6.26: z_x^2
+zy_2            = $9c ; fixed6.26: z_y^2

-zx_zy           = $a0 ; fixed8.24: z_x * z_y
-dist            = $a4 ; fixed8.24: z_x^2 + z_y^2
+zx_zy           = $a0 ; fixed6.26: z_x * z_y
+dist            = $a4 ; fixed6.26: z_x^2 + z_y^2
 sx              = $a8 ; i16: screen pixel x
 sy              = $aa ; i16: screen pixel y
 z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
@ -189,11 +189,11 @@ aspect:
    ;
    ; 184h is the equiv of 220.8h at square pixels
    ; 320 / 220.8 = 1.45 display aspect ratio
-aspect_x: ; fixed4.16 5/4
-    .word 5 << (12 - 2)
+aspect_x: ; fixed3.13 5/4
+    .word 5 << (13 - 2)

-aspect_y: ; fixed4.16 3/4
-    .word 3 << (12 - 2)
+aspect_y: ; fixed3.13 3/4
+    .word 3 << (13 - 2)

 ms_per_frame: ; float48 16.66666667
    .byte 64  ; exponent/sign
@ -291,25 +291,26 @@ pixel_masks:
    .byte %11000000

 viewport_zoom:
-    .byte 1
-    .byte 6
-    .byte 8
-    .byte 6
+    .byte 0
+    .byte 5
+    .byte 7
+    .byte 5
+    .byte 7
    .byte 8

 viewport_ox:
-    .dword $00000000
-    .dword $ff110000
-    .dword $ff110000
-    .dword $fe400000
-    .dword $fe3b0000
+    .dword ($00000000 & $3fffffff) << 2
+    .dword ($ff110000 & $3fffffff) << 2
+    .dword ($ff110000 & $3fffffff) << 2
+    .dword ($fe400000 & $3fffffff) << 2
+    .dword ($fe3b0000 & $3fffffff) << 2

 viewport_oy:
-    .dword $00000000
-    .dword $ffb60000
-    .dword $ffbe0000
-    .dword $00000000
-    .dword $fffe0000
+    .dword ($00000000 & $3fffffff) << 2
+    .dword ($ffb60000 & $3fffffff) << 2
+    .dword ($ffbe0000 & $3fffffff) << 2
+    .dword ($00000000 & $3fffffff) << 2
+    .dword ($fffe0000 & $3fffffff) << 2

 ; 2 + 9 * byte cycles
 .macro add bytes, dest, arg1, arg2
@ -883,8 +884,8 @@ next:

 .proc mandelbrot
    ; input:
-    ; cx: position scaled to 8.24 fixed point - -128..+127.9
-    ; cy: position scaled to 8.24
+    ; cx: position scaled to 6.26 fixed point - -32..+31.9
+    ; cy: position scaled to 6.26
    ;
    ; output:
    ; iter: iteration count at escape or 0
@ -942,7 +943,7 @@ loop:
 keep_going:

    .macro quick_exit arg, max
-        ; arg: fixed8.24
+        ; arg: fixed6.26
        ; max: integer
        .local positive
        .local negative
@ -955,12 +956,12 @@ keep_going:
        bmi negative

    positive:
-        cmp #max
+        cmp #(max << 2)
        bmi all_done ; 'less than'
        jmp exit_path

    negative:
-        cmp #(256 - max)
+        cmp #(256 - (max << 2))
        beq first_equal ; 'equal' on first byte
        bpl all_done    ; 'greater than'

@ -980,7 +981,7 @@ keep_going:
    all_done:
    .endmacro

-    ; 8.24: (-128 .. 127.9)
+    ; 6.26: (-32 .. 31.9)
    ; zx = zx_2  - zy_2  + cx
    sub32 zx, zx_2, zy_2
    add32 zx, zx, cx
@ -991,9 +992,9 @@ keep_going:
    add32 zy, zy, cy
    quick_exit zy, 2

-    ; convert 8.24 -> 4.12: (-8 .. +7.9)
-    shift_round_16 zx, 4
-    shift_round_16 zy, 4
+    ; convert 6.26 -> 3.13: (-4 .. +3.9)
+    shift_round_16 zx, 3
+    shift_round_16 zy, 3

    ; zx_2 = zx * zx
    sqr16 zx_2, zx + 2
@ -1115,9 +1116,9 @@ enough:
 .endmacro

 .macro zoom_factor dest, src, aspect
-    ; output: dest: fixed8.24
-    ; input: src: fixed4.12
-    ; aspect: fixed4.12
+    ; output: dest: fixed6.26
+    ; input: src: fixed3.13
+    ; aspect: fixed3.13
    ; clobbers A, X, flags, etc
    copy16 dest, src
    scale_zoom dest
@ -1426,6 +1427,8 @@ number_keys:
    beq four
    cpy #KEY_5
    beq five
+    cpy #KEY_6
+    beq six
    jmp skip_char

 one:
@ -1442,6 +1445,9 @@ four:
    jmp load_key_viewport
 five:
    ldx #4
+    jmp load_key_viewport
+six:
+    ldx #5
    ; fall through
 load_key_viewport:
    jsr load_viewport
--- a/readme.md
+++ b/readme.md
@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g

 ## Current state

-Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys.
+Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.

 The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.

@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
 * when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
 * without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication

-The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26.
+The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.

 Iterations are capped at 255.

--- a/todo.md
+++ b/todo.md
@ -5,10 +5,6 @@ things to try:

 * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D

-* try 3.13 fixed point instead of 4.12 for more precision
-  * can we get away without the extra bit?
-  * since exit compare space would be 6.26 i think so
-
 * y-axis mirror optimization

 * extract viewport for display & re-input via keyboard