This commit is contained in:
Brooke Vibber 2024-12-31 07:19:45 -08:00
parent 0d086a179c
commit 4a1e35699a
2 changed files with 49 additions and 24 deletions

View file

@ -433,6 +433,13 @@ viewport_oy:
copy16 dest, FR2 + 2 ; 12 cyc copy16 dest, FR2 + 2 ; 12 cyc
.endmacro .endmacro
.macro imul16 dest, arg1, arg2
copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc
jsr imul16_func ; ? cyc
copy32 dest, FR2 ; 24 cyc
.endmacro
.macro sqr16_round dest, arg, shift .macro sqr16_round dest, arg, shift
;imul16_round dest, arg, arg, shift ;imul16_round dest, arg, arg, shift
copy16 FR0, arg ; 12 cyc copy16 FR0, arg ; 12 cyc
@ -441,6 +448,12 @@ viewport_oy:
copy16 dest, FR2 + 2 ; 12 cyc copy16 dest, FR2 + 2 ; 12 cyc
.endmacro .endmacro
.macro sqr16 dest, arg
copy16 FR0, arg ; 12 cyc
jsr sqr16_func ; ? cyc
copy32 dest, FR2 ; 24 cyc
.endmacro
; clobbers a, x ; clobbers a, x
.macro sqr8 dest, arg .macro sqr8 dest, arg
ldx arg ldx arg
@ -870,8 +883,8 @@ next:
.proc mandelbrot .proc mandelbrot
; input: ; input:
; cx: position scaled to 4.12 fixed point - -8..+7.9 ; cx: position scaled to 8.24 fixed point - -128..+127.9
; cy: position scaled to 4.12 ; cy: position scaled to 8.24
; ;
; output: ; output:
; iter: iteration count at escape or 0 ; iter: iteration count at escape or 0
@ -909,10 +922,6 @@ next:
sta zy_2 + 1 sta zy_2 + 1
sta zy_2 + 2 sta zy_2 + 2
sta zy_2 + 3 sta zy_2 + 3
sta zx_zy
sta zx_zy + 1
sta zx_zy + 2
sta zx_zy + 3
sta dist sta dist
sta dist + 1 sta dist + 1
sta dist + 2 sta dist + 2
@ -929,6 +938,8 @@ loop:
keep_going: keep_going:
.macro quick_exit arg, max .macro quick_exit arg, max
; arg: fixed8.24
; max: integer
.local positive .local positive
.local negative .local negative
.local nope_out .local nope_out
@ -936,16 +947,16 @@ keep_going:
.local all_done .local all_done
; check sign bit ; check sign bit
lda arg + 1 lda arg + 3
bmi negative bmi negative
positive: positive:
cmp #((max) << 4) cmp #max
bmi all_done ; 'less than' bmi all_done ; 'less than'
jmp exit_path jmp exit_path
negative: negative:
cmp #(256 - ((max) << 4)) cmp #(256 - max)
beq first_equal ; 'equal' on first byte beq first_equal ; 'equal' on first byte
bpl all_done ; 'greater than' bpl all_done ; 'greater than'
@ -953,34 +964,44 @@ keep_going:
jmp exit_path jmp exit_path
first_equal: first_equal:
; following bytes all 0 shows it's really 'equal'
lda arg + 2
bne all_done
lda arg + 1
bne all_done
lda arg lda arg
beq nope_out ; 2nd byte 0 shows it's really 'equal' bne all_done
jmp exit_path
all_done: all_done:
.endmacro .endmacro
; 4.12: (-8 .. +7.9) ; 8.24: (-128 .. 127.9) / (-8 .. +7.9)
; zx = zx_2 - zy_2 + cx ; zx = zx_2 - zy_2 + cx
sub16 zx, zx_2, zy_2 sub32 zx, zx_2, zy_2
add16 zx, zx, cx add32 zx, zx, cx
quick_exit zx, 2 quick_exit zx, 2
; zy = zx_zy + zx_zy + cy ; zy = zx_zy + zx_zy + cy
add16 zy, zx_zy, zx_zy add32 zy, zx_zy, zx_zy
add16 zy, zy, cy add32 zy, zy, cy
quick_exit zy, 2 quick_exit zy, 2
; convert 8.24 -> 4.12
shift_round_16 zx, 4
shift_round_16 zy, 4
; zx_2 = zx * zx ; zx_2 = zx * zx
sqr16_round zx_2, zx, 4 sqr16 zx_2, zx + 2
; zy_2 = zy * zy ; zy_2 = zy * zy
sqr16_round zy_2, zy, 4 sqr16 zy_2, zy + 2
; zx_zy = zx * zy ; zx_zy = zx * zy
imul16_round zx_zy, zx, zy, 4 imul16 zx_zy, zx + 2, zy + 2
; dist = zx_2 + zy_2 ; dist = zx_2 + zy_2
add16 dist, zx_2, zy_2 add32 dist, zx_2, zy_2
quick_exit dist, 4 quick_exit dist, 4
; if may be in the lake, look for looping output with a small buffer ; if may be in the lake, look for looping output with a small buffer
@ -1090,13 +1111,17 @@ enough:
.endmacro .endmacro
.macro zoom_factor dest, src, zoom, aspect .macro zoom_factor dest, src, zoom, aspect
; output: dest: fixed8.24
; input: src: fixed4.12
; input: zoom: u8 ???
; aspect: fixed4.12
; clobbers A, X, flags, etc ; clobbers A, X, flags, etc
copy16 dest, src copy16 dest, src
scale_zoom dest scale_zoom dest
; cy = cy * (3 / 4) ; cy = cy * (3 / 4)
; cx = cx * (5 / 4) ; cx = cx * (5 / 4)
imul16_round dest, dest, aspect, 4 imul16 dest, dest, aspect
.endmacro .endmacro
.proc pset .proc pset
@ -1567,9 +1592,9 @@ not_skipped_mask:
; run the fractal! ; run the fractal!
zoom_factor cx, sx, zoom, aspect_x zoom_factor cx, sx, zoom, aspect_x
add16 cx, cx, ox add32 cx, cx, ox
zoom_factor cy, sy, zoom, aspect_y zoom_factor cy, sy, zoom, aspect_y
add16 cy, cy, oy add32 cy, cy, oy
jsr mandelbrot jsr mandelbrot
jsr pset jsr pset

View file

@ -3,7 +3,7 @@ things to try:
* skip add on the top-byte multiply in sqr8/mul8 * skip add on the top-byte multiply in sqr8/mul8
* should save a few cycles, suggestion by jamey * should save a few cycles, suggestion by jamey
* perform the zx += zx^s + cx in 32-bit space, before rounding * perform the zx_next = zx^s + cx in 32-bit space, before rounding
* should improve precision on max zoom, might cost a few cycles * should improve precision on max zoom, might cost a few cycles
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D