3.13 / 6.26 gives nicer results!

This commit is contained in:
Brooke Vibber 2025-01-01 15:37:12 -08:00
parent c424f1b8bc
commit 65fcb44934
3 changed files with 48 additions and 46 deletions

View file

@ -1,16 +1,16 @@
; Our zero-page vars ; Our zero-page vars
ox = $80 ; fixed8.24: center point x ox = $80 ; fixed6.26: center point x
oy = $84 ; fixed8.24: center point y oy = $84 ; fixed6.26: center point y
cx = $88 ; fixed8.24: c_x cx = $88 ; fixed6.26: c_x
cy = $8c ; fixed8.24: c_y cy = $8c ; fixed6.26: c_y
zx = $90 ; fixed8.24: z_x zx = $90 ; fixed6.26: z_x
zy = $94 ; fixed8.24: z_y zy = $94 ; fixed6.26: z_y
zx_2 = $98 ; fixed8.24: z_x^2 zx_2 = $98 ; fixed6.26: z_x^2
zy_2 = $9c ; fixed8.24: z_y^2 zy_2 = $9c ; fixed6.26: z_y^2
zx_zy = $a0 ; fixed8.24: z_x * z_y zx_zy = $a0 ; fixed6.26: z_x * z_y
dist = $a4 ; fixed8.24: z_x^2 + z_y^2 dist = $a4 ; fixed6.26: z_x^2 + z_y^2
sx = $a8 ; i16: screen pixel x sx = $a8 ; i16: screen pixel x
sy = $aa ; i16: screen pixel y sy = $aa ; i16: screen pixel y
z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
@ -189,11 +189,11 @@ aspect:
; ;
; 184h is the equiv of 220.8h at square pixels ; 184h is the equiv of 220.8h at square pixels
; 320 / 220.8 = 1.45 display aspect ratio ; 320 / 220.8 = 1.45 display aspect ratio
aspect_x: ; fixed4.16 5/4 aspect_x: ; fixed3.13 5/4
.word 5 << (12 - 2) .word 5 << (13 - 2)
aspect_y: ; fixed4.16 3/4 aspect_y: ; fixed3.13 3/4
.word 3 << (12 - 2) .word 3 << (13 - 2)
ms_per_frame: ; float48 16.66666667 ms_per_frame: ; float48 16.66666667
.byte 64 ; exponent/sign .byte 64 ; exponent/sign
@ -291,25 +291,26 @@ pixel_masks:
.byte %11000000 .byte %11000000
viewport_zoom: viewport_zoom:
.byte 1 .byte 0
.byte 6 .byte 5
.byte 8 .byte 7
.byte 6 .byte 5
.byte 7
.byte 8 .byte 8
viewport_ox: viewport_ox:
.dword $00000000 .dword ($00000000 & $3fffffff) << 2
.dword $ff110000 .dword ($ff110000 & $3fffffff) << 2
.dword $ff110000 .dword ($ff110000 & $3fffffff) << 2
.dword $fe400000 .dword ($fe400000 & $3fffffff) << 2
.dword $fe3b0000 .dword ($fe3b0000 & $3fffffff) << 2
viewport_oy: viewport_oy:
.dword $00000000 .dword ($00000000 & $3fffffff) << 2
.dword $ffb60000 .dword ($ffb60000 & $3fffffff) << 2
.dword $ffbe0000 .dword ($ffbe0000 & $3fffffff) << 2
.dword $00000000 .dword ($00000000 & $3fffffff) << 2
.dword $fffe0000 .dword ($fffe0000 & $3fffffff) << 2
; 2 + 9 * byte cycles ; 2 + 9 * byte cycles
.macro add bytes, dest, arg1, arg2 .macro add bytes, dest, arg1, arg2
@ -883,8 +884,8 @@ next:
.proc mandelbrot .proc mandelbrot
; input: ; input:
; cx: position scaled to 8.24 fixed point - -128..+127.9 ; cx: position scaled to 6.26 fixed point - -32..+31.9
; cy: position scaled to 8.24 ; cy: position scaled to 6.26
; ;
; output: ; output:
; iter: iteration count at escape or 0 ; iter: iteration count at escape or 0
@ -942,7 +943,7 @@ loop:
keep_going: keep_going:
.macro quick_exit arg, max .macro quick_exit arg, max
; arg: fixed8.24 ; arg: fixed6.26
; max: integer ; max: integer
.local positive .local positive
.local negative .local negative
@ -955,12 +956,12 @@ keep_going:
bmi negative bmi negative
positive: positive:
cmp #max cmp #(max << 2)
bmi all_done ; 'less than' bmi all_done ; 'less than'
jmp exit_path jmp exit_path
negative: negative:
cmp #(256 - max) cmp #(256 - (max << 2))
beq first_equal ; 'equal' on first byte beq first_equal ; 'equal' on first byte
bpl all_done ; 'greater than' bpl all_done ; 'greater than'
@ -980,7 +981,7 @@ keep_going:
all_done: all_done:
.endmacro .endmacro
; 8.24: (-128 .. 127.9) ; 6.26: (-32 .. 31.9)
; zx = zx_2 - zy_2 + cx ; zx = zx_2 - zy_2 + cx
sub32 zx, zx_2, zy_2 sub32 zx, zx_2, zy_2
add32 zx, zx, cx add32 zx, zx, cx
@ -991,9 +992,9 @@ keep_going:
add32 zy, zy, cy add32 zy, zy, cy
quick_exit zy, 2 quick_exit zy, 2
; convert 8.24 -> 4.12: (-8 .. +7.9) ; convert 6.26 -> 3.13: (-4 .. +3.9)
shift_round_16 zx, 4 shift_round_16 zx, 3
shift_round_16 zy, 4 shift_round_16 zy, 3
; zx_2 = zx * zx ; zx_2 = zx * zx
sqr16 zx_2, zx + 2 sqr16 zx_2, zx + 2
@ -1115,9 +1116,9 @@ enough:
.endmacro .endmacro
.macro zoom_factor dest, src, aspect .macro zoom_factor dest, src, aspect
; output: dest: fixed8.24 ; output: dest: fixed6.26
; input: src: fixed4.12 ; input: src: fixed3.13
; aspect: fixed4.12 ; aspect: fixed3.13
; clobbers A, X, flags, etc ; clobbers A, X, flags, etc
copy16 dest, src copy16 dest, src
scale_zoom dest scale_zoom dest
@ -1426,6 +1427,8 @@ number_keys:
beq four beq four
cpy #KEY_5 cpy #KEY_5
beq five beq five
cpy #KEY_6
beq six
jmp skip_char jmp skip_char
one: one:
@ -1442,6 +1445,9 @@ four:
jmp load_key_viewport jmp load_key_viewport
five: five:
ldx #4 ldx #4
jmp load_key_viewport
six:
ldx #5
; fall through ; fall through
load_key_viewport: load_key_viewport:
jsr load_viewport jsr load_viewport

View file

@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g
## Current state ## Current state
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys. Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered. The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications * when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication * without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26. The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
Iterations are capped at 255. Iterations are capped at 255.

View file

@ -5,10 +5,6 @@ things to try:
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
* try 3.13 fixed point instead of 4.12 for more precision
* can we get away without the extra bit?
* since exit compare space would be 6.26 i think so
* y-axis mirror optimization * y-axis mirror optimization
* extract viewport for display & re-input via keyboard * extract viewport for display & re-input via keyboard