diff --git a/mandel.s b/mandel.s index 1f5a06f..9996c53 100644 --- a/mandel.s +++ b/mandel.s @@ -1,16 +1,16 @@ ; Our zero-page vars -ox = $80 ; fixed8.24: center point x -oy = $84 ; fixed8.24: center point y -cx = $88 ; fixed8.24: c_x -cy = $8c ; fixed8.24: c_y +ox = $80 ; fixed6.26: center point x +oy = $84 ; fixed6.26: center point y +cx = $88 ; fixed6.26: c_x +cy = $8c ; fixed6.26: c_y -zx = $90 ; fixed8.24: z_x -zy = $94 ; fixed8.24: z_y -zx_2 = $98 ; fixed8.24: z_x^2 -zy_2 = $9c ; fixed8.24: z_y^2 +zx = $90 ; fixed6.26: z_x +zy = $94 ; fixed6.26: z_y +zx_2 = $98 ; fixed6.26: z_x^2 +zy_2 = $9c ; fixed6.26: z_y^2 -zx_zy = $a0 ; fixed8.24: z_x * z_y -dist = $a4 ; fixed8.24: z_x^2 + z_y^2 +zx_zy = $a0 ; fixed6.26: z_x * z_y +dist = $a4 ; fixed6.26: z_x^2 + z_y^2 sx = $a8 ; i16: screen pixel x sy = $aa ; i16: screen pixel y z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not @@ -189,11 +189,11 @@ aspect: ; ; 184h is the equiv of 220.8h at square pixels ; 320 / 220.8 = 1.45 display aspect ratio -aspect_x: ; fixed4.16 5/4 - .word 5 << (12 - 2) +aspect_x: ; fixed3.13 5/4 + .word 5 << (13 - 2) -aspect_y: ; fixed4.16 3/4 - .word 3 << (12 - 2) +aspect_y: ; fixed3.13 3/4 + .word 3 << (13 - 2) ms_per_frame: ; float48 16.66666667 .byte 64 ; exponent/sign @@ -291,25 +291,26 @@ pixel_masks: .byte %11000000 viewport_zoom: - .byte 1 - .byte 6 - .byte 8 - .byte 6 + .byte 0 + .byte 5 + .byte 7 + .byte 5 + .byte 7 .byte 8 viewport_ox: - .dword $00000000 - .dword $ff110000 - .dword $ff110000 - .dword $fe400000 - .dword $fe3b0000 + .dword ($00000000 & $3fffffff) << 2 + .dword ($ff110000 & $3fffffff) << 2 + .dword ($ff110000 & $3fffffff) << 2 + .dword ($fe400000 & $3fffffff) << 2 + .dword ($fe3b0000 & $3fffffff) << 2 viewport_oy: - .dword $00000000 - .dword $ffb60000 - .dword $ffbe0000 - .dword $00000000 - .dword $fffe0000 + .dword ($00000000 & $3fffffff) << 2 + .dword ($ffb60000 & $3fffffff) << 2 + .dword ($ffbe0000 & $3fffffff) << 2 + .dword ($00000000 & $3fffffff) << 2 + .dword ($fffe0000 & $3fffffff) << 2 ; 2 + 9 * byte cycles .macro add bytes, dest, arg1, arg2 @@ -883,8 +884,8 @@ next: .proc mandelbrot ; input: - ; cx: position scaled to 8.24 fixed point - -128..+127.9 - ; cy: position scaled to 8.24 + ; cx: position scaled to 6.26 fixed point - -32..+31.9 + ; cy: position scaled to 6.26 ; ; output: ; iter: iteration count at escape or 0 @@ -942,7 +943,7 @@ loop: keep_going: .macro quick_exit arg, max - ; arg: fixed8.24 + ; arg: fixed6.26 ; max: integer .local positive .local negative @@ -955,12 +956,12 @@ keep_going: bmi negative positive: - cmp #max + cmp #(max << 2) bmi all_done ; 'less than' jmp exit_path negative: - cmp #(256 - max) + cmp #(256 - (max << 2)) beq first_equal ; 'equal' on first byte bpl all_done ; 'greater than' @@ -980,7 +981,7 @@ keep_going: all_done: .endmacro - ; 8.24: (-128 .. 127.9) + ; 6.26: (-32 .. 31.9) ; zx = zx_2 - zy_2 + cx sub32 zx, zx_2, zy_2 add32 zx, zx, cx @@ -991,9 +992,9 @@ keep_going: add32 zy, zy, cy quick_exit zy, 2 - ; convert 8.24 -> 4.12: (-8 .. +7.9) - shift_round_16 zx, 4 - shift_round_16 zy, 4 + ; convert 6.26 -> 3.13: (-4 .. +3.9) + shift_round_16 zx, 3 + shift_round_16 zy, 3 ; zx_2 = zx * zx sqr16 zx_2, zx + 2 @@ -1115,9 +1116,9 @@ enough: .endmacro .macro zoom_factor dest, src, aspect - ; output: dest: fixed8.24 - ; input: src: fixed4.12 - ; aspect: fixed4.12 + ; output: dest: fixed6.26 + ; input: src: fixed3.13 + ; aspect: fixed3.13 ; clobbers A, X, flags, etc copy16 dest, src scale_zoom dest @@ -1426,6 +1427,8 @@ number_keys: beq four cpy #KEY_5 beq five + cpy #KEY_6 + beq six jmp skip_char one: @@ -1442,6 +1445,9 @@ four: jmp load_key_viewport five: ldx #4 + jmp load_key_viewport +six: + ldx #5 ; fall through load_key_viewport: jsr load_viewport diff --git a/readme.md b/readme.md index d60644c..2c9efc1 100644 --- a/readme.md +++ b/readme.md @@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g ## Current state -Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys. +Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys. The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered. @@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3 * when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications * without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication -The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26. +The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates. Iterations are capped at 255. diff --git a/todo.md b/todo.md index 7ab092b..1d46281 100644 --- a/todo.md +++ b/todo.md @@ -5,10 +5,6 @@ things to try: * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D -* try 3.13 fixed point instead of 4.12 for more precision - * can we get away without the extra bit? - * since exit compare space would be 6.26 i think so - * y-axis mirror optimization * extract viewport for display & re-input via keyboard