3.13 / 6.26 gives nicer results!
This commit is contained in:
parent
c424f1b8bc
commit
65fcb44934
3 changed files with 48 additions and 46 deletions
86
mandel.s
86
mandel.s
|
@ -1,16 +1,16 @@
|
||||||
; Our zero-page vars
|
; Our zero-page vars
|
||||||
ox = $80 ; fixed8.24: center point x
|
ox = $80 ; fixed6.26: center point x
|
||||||
oy = $84 ; fixed8.24: center point y
|
oy = $84 ; fixed6.26: center point y
|
||||||
cx = $88 ; fixed8.24: c_x
|
cx = $88 ; fixed6.26: c_x
|
||||||
cy = $8c ; fixed8.24: c_y
|
cy = $8c ; fixed6.26: c_y
|
||||||
|
|
||||||
zx = $90 ; fixed8.24: z_x
|
zx = $90 ; fixed6.26: z_x
|
||||||
zy = $94 ; fixed8.24: z_y
|
zy = $94 ; fixed6.26: z_y
|
||||||
zx_2 = $98 ; fixed8.24: z_x^2
|
zx_2 = $98 ; fixed6.26: z_x^2
|
||||||
zy_2 = $9c ; fixed8.24: z_y^2
|
zy_2 = $9c ; fixed6.26: z_y^2
|
||||||
|
|
||||||
zx_zy = $a0 ; fixed8.24: z_x * z_y
|
zx_zy = $a0 ; fixed6.26: z_x * z_y
|
||||||
dist = $a4 ; fixed8.24: z_x^2 + z_y^2
|
dist = $a4 ; fixed6.26: z_x^2 + z_y^2
|
||||||
sx = $a8 ; i16: screen pixel x
|
sx = $a8 ; i16: screen pixel x
|
||||||
sy = $aa ; i16: screen pixel y
|
sy = $aa ; i16: screen pixel y
|
||||||
z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
|
z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
|
||||||
|
@ -189,11 +189,11 @@ aspect:
|
||||||
;
|
;
|
||||||
; 184h is the equiv of 220.8h at square pixels
|
; 184h is the equiv of 220.8h at square pixels
|
||||||
; 320 / 220.8 = 1.45 display aspect ratio
|
; 320 / 220.8 = 1.45 display aspect ratio
|
||||||
aspect_x: ; fixed4.16 5/4
|
aspect_x: ; fixed3.13 5/4
|
||||||
.word 5 << (12 - 2)
|
.word 5 << (13 - 2)
|
||||||
|
|
||||||
aspect_y: ; fixed4.16 3/4
|
aspect_y: ; fixed3.13 3/4
|
||||||
.word 3 << (12 - 2)
|
.word 3 << (13 - 2)
|
||||||
|
|
||||||
ms_per_frame: ; float48 16.66666667
|
ms_per_frame: ; float48 16.66666667
|
||||||
.byte 64 ; exponent/sign
|
.byte 64 ; exponent/sign
|
||||||
|
@ -291,25 +291,26 @@ pixel_masks:
|
||||||
.byte %11000000
|
.byte %11000000
|
||||||
|
|
||||||
viewport_zoom:
|
viewport_zoom:
|
||||||
.byte 1
|
.byte 0
|
||||||
.byte 6
|
.byte 5
|
||||||
.byte 8
|
.byte 7
|
||||||
.byte 6
|
.byte 5
|
||||||
|
.byte 7
|
||||||
.byte 8
|
.byte 8
|
||||||
|
|
||||||
viewport_ox:
|
viewport_ox:
|
||||||
.dword $00000000
|
.dword ($00000000 & $3fffffff) << 2
|
||||||
.dword $ff110000
|
.dword ($ff110000 & $3fffffff) << 2
|
||||||
.dword $ff110000
|
.dword ($ff110000 & $3fffffff) << 2
|
||||||
.dword $fe400000
|
.dword ($fe400000 & $3fffffff) << 2
|
||||||
.dword $fe3b0000
|
.dword ($fe3b0000 & $3fffffff) << 2
|
||||||
|
|
||||||
viewport_oy:
|
viewport_oy:
|
||||||
.dword $00000000
|
.dword ($00000000 & $3fffffff) << 2
|
||||||
.dword $ffb60000
|
.dword ($ffb60000 & $3fffffff) << 2
|
||||||
.dword $ffbe0000
|
.dword ($ffbe0000 & $3fffffff) << 2
|
||||||
.dword $00000000
|
.dword ($00000000 & $3fffffff) << 2
|
||||||
.dword $fffe0000
|
.dword ($fffe0000 & $3fffffff) << 2
|
||||||
|
|
||||||
; 2 + 9 * byte cycles
|
; 2 + 9 * byte cycles
|
||||||
.macro add bytes, dest, arg1, arg2
|
.macro add bytes, dest, arg1, arg2
|
||||||
|
@ -883,8 +884,8 @@ next:
|
||||||
|
|
||||||
.proc mandelbrot
|
.proc mandelbrot
|
||||||
; input:
|
; input:
|
||||||
; cx: position scaled to 8.24 fixed point - -128..+127.9
|
; cx: position scaled to 6.26 fixed point - -32..+31.9
|
||||||
; cy: position scaled to 8.24
|
; cy: position scaled to 6.26
|
||||||
;
|
;
|
||||||
; output:
|
; output:
|
||||||
; iter: iteration count at escape or 0
|
; iter: iteration count at escape or 0
|
||||||
|
@ -942,7 +943,7 @@ loop:
|
||||||
keep_going:
|
keep_going:
|
||||||
|
|
||||||
.macro quick_exit arg, max
|
.macro quick_exit arg, max
|
||||||
; arg: fixed8.24
|
; arg: fixed6.26
|
||||||
; max: integer
|
; max: integer
|
||||||
.local positive
|
.local positive
|
||||||
.local negative
|
.local negative
|
||||||
|
@ -955,12 +956,12 @@ keep_going:
|
||||||
bmi negative
|
bmi negative
|
||||||
|
|
||||||
positive:
|
positive:
|
||||||
cmp #max
|
cmp #(max << 2)
|
||||||
bmi all_done ; 'less than'
|
bmi all_done ; 'less than'
|
||||||
jmp exit_path
|
jmp exit_path
|
||||||
|
|
||||||
negative:
|
negative:
|
||||||
cmp #(256 - max)
|
cmp #(256 - (max << 2))
|
||||||
beq first_equal ; 'equal' on first byte
|
beq first_equal ; 'equal' on first byte
|
||||||
bpl all_done ; 'greater than'
|
bpl all_done ; 'greater than'
|
||||||
|
|
||||||
|
@ -980,7 +981,7 @@ keep_going:
|
||||||
all_done:
|
all_done:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 8.24: (-128 .. 127.9)
|
; 6.26: (-32 .. 31.9)
|
||||||
; zx = zx_2 - zy_2 + cx
|
; zx = zx_2 - zy_2 + cx
|
||||||
sub32 zx, zx_2, zy_2
|
sub32 zx, zx_2, zy_2
|
||||||
add32 zx, zx, cx
|
add32 zx, zx, cx
|
||||||
|
@ -991,9 +992,9 @@ keep_going:
|
||||||
add32 zy, zy, cy
|
add32 zy, zy, cy
|
||||||
quick_exit zy, 2
|
quick_exit zy, 2
|
||||||
|
|
||||||
; convert 8.24 -> 4.12: (-8 .. +7.9)
|
; convert 6.26 -> 3.13: (-4 .. +3.9)
|
||||||
shift_round_16 zx, 4
|
shift_round_16 zx, 3
|
||||||
shift_round_16 zy, 4
|
shift_round_16 zy, 3
|
||||||
|
|
||||||
; zx_2 = zx * zx
|
; zx_2 = zx * zx
|
||||||
sqr16 zx_2, zx + 2
|
sqr16 zx_2, zx + 2
|
||||||
|
@ -1115,9 +1116,9 @@ enough:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.macro zoom_factor dest, src, aspect
|
.macro zoom_factor dest, src, aspect
|
||||||
; output: dest: fixed8.24
|
; output: dest: fixed6.26
|
||||||
; input: src: fixed4.12
|
; input: src: fixed3.13
|
||||||
; aspect: fixed4.12
|
; aspect: fixed3.13
|
||||||
; clobbers A, X, flags, etc
|
; clobbers A, X, flags, etc
|
||||||
copy16 dest, src
|
copy16 dest, src
|
||||||
scale_zoom dest
|
scale_zoom dest
|
||||||
|
@ -1426,6 +1427,8 @@ number_keys:
|
||||||
beq four
|
beq four
|
||||||
cpy #KEY_5
|
cpy #KEY_5
|
||||||
beq five
|
beq five
|
||||||
|
cpy #KEY_6
|
||||||
|
beq six
|
||||||
jmp skip_char
|
jmp skip_char
|
||||||
|
|
||||||
one:
|
one:
|
||||||
|
@ -1442,6 +1445,9 @@ four:
|
||||||
jmp load_key_viewport
|
jmp load_key_viewport
|
||||||
five:
|
five:
|
||||||
ldx #4
|
ldx #4
|
||||||
|
jmp load_key_viewport
|
||||||
|
six:
|
||||||
|
ldx #5
|
||||||
; fall through
|
; fall through
|
||||||
load_key_viewport:
|
load_key_viewport:
|
||||||
jsr load_viewport
|
jsr load_viewport
|
||||||
|
|
|
@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys.
|
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
|
||||||
|
|
||||||
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
|
||||||
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
||||||
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
||||||
|
|
||||||
The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26.
|
The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
|
||||||
|
|
||||||
Iterations are capped at 255.
|
Iterations are capped at 255.
|
||||||
|
|
||||||
|
|
4
todo.md
4
todo.md
|
@ -5,10 +5,6 @@ things to try:
|
||||||
|
|
||||||
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
||||||
|
|
||||||
* try 3.13 fixed point instead of 4.12 for more precision
|
|
||||||
* can we get away without the extra bit?
|
|
||||||
* since exit compare space would be 6.26 i think so
|
|
||||||
|
|
||||||
* y-axis mirror optimization
|
* y-axis mirror optimization
|
||||||
|
|
||||||
* extract viewport for display & re-input via keyboard
|
* extract viewport for display & re-input via keyboard
|
||||||
|
|
Loading…
Reference in a new issue