precision cleanup

using 4.12 and 8.24 consistently
This commit is contained in:
Brooke Vibber 2023-01-22 11:17:51 -08:00
parent ae9dd0674d
commit 1bef004ccd

View file

@ -1,17 +1,17 @@
; Our zero-page vars ; Our zero-page vars
sx = $80 ; i16: screen pixel x sx = $80 ; i16: screen pixel x
sy = $82 ; i16: screen pixel y sy = $82 ; i16: screen pixel y
ox = $84 ; fixed3.13: center point x ox = $84 ; fixed4.12: center point x
oy = $86 ; fixed3.13: center point y oy = $86 ; fixed4.12: center point y
cx = $84 ; fixed3.13: c_x cx = $84 ; fixed4.12: c_x
cy = $86 ; fixed3.13: c_y cy = $86 ; fixed4.12: c_y
zx = $88 ; fixed3.13: z_x zx = $88 ; fixed4.12: z_x
zy = $8a ; fixed3.13: z_y zy = $8a ; fixed4.12: z_y
zx_2 = $90 ; fixed6.26: z_x^2 zx_2 = $90 ; fixed8.24: z_x^2
zy_2 = $94 ; fixed6.26: z_y^2 zy_2 = $94 ; fixed8.24: z_y^2
zx_zy = $98 ; fixed6.26: z_x * z_y zx_zy = $98 ; fixed8.24: z_x * z_y
dist = $9c ; fixed6.26: z_x^2 + z_y^2 dist = $9c ; fixed8.24: z_x^2 + z_y^2
iter = $a0 ; u8: iteration count iter = $a0 ; u8: iteration count
zoom = $a1 ; u8: zoom shift level zoom = $a1 ; u8: zoom shift level
@ -42,8 +42,6 @@ half_height = height >> 1
width = 160 width = 160
half_width = width >> 1 half_width = width >> 1
stride = width >> 2 stride = width >> 2
width_ratio_3_13 = (5 << 11) ; 5/4
height_ratio_3_13 = (3 << 11) ; 5/4
DMACTL = $D400 DMACTL = $D400
DLISTL = $D402 DLISTL = $D402
@ -101,18 +99,12 @@ aspect:
; 184h is the equiv of 220.8h at square pixels ; 184h is the equiv of 220.8h at square pixels
; 320 / 220.8 = 1.45 display aspect ratio ; 320 / 220.8 = 1.45 display aspect ratio
aspect_x: aspect_x:
.word 5 << (13 - 2) .word 5 << (12 - 2)
aspect_y: aspect_y:
.word 3 << (13 - 2) .word 3 << (12 - 2)
bit_masks:
.byte 3
.byte 3 << 2
.byte 3 << 4
.byte 3 << 6
display_list_start: display_list_start:
; 24 lines overscan ; 24 lines overscan
.repeat 3 .repeat 3
@ -244,21 +236,6 @@ color_map:
neg 4, arg neg 4, arg
.endmacro .endmacro
.macro extend_8_16 dest, src
; clobbers A, X
; 13-15 cycles
.local positive
.local negative
ldx #0 ; 2 cyc
lda src ; 3 cyc
sta dest ; 3 cyc
bpl positive ; 2 cyc
negative:
dex ; 2 cyc
positive:
stx dest + 1 ; 3 cyc
.endmacro
; inner loop for imul16 ; inner loop for imul16
; bitnum < 8: 25 or 41 cycles ; bitnum < 8: 25 or 41 cycles
; bitnum >= 8: 30 or 46 cycles ; bitnum >= 8: 30 or 46 cycles
@ -307,7 +284,6 @@ next:
ror result ; 5 cyc ror result ; 5 cyc
.endif .endif
.endmacro .endmacro
; 5 to 25 cycles ; 5 to 25 cycles
@ -330,11 +306,18 @@ positive:
copy32 dest, FR2 ; 24 cyc copy32 dest, FR2 ; 24 cyc
.endmacro .endmacro
.macro imul16_round dest, arg1, arg2 .macro shift_round_16 arg, shift
.repeat shift
shl32 arg
.endrepeat
round16 arg
.endmacro
.macro imul16_round dest, arg1, arg2, shift
copy16 FR0, arg1 ; 12 cyc copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc copy16 FR1, arg2 ; 12 cyc
jsr imul16_func ; 470-780 cyc jsr imul16_func ; 470-780 cyc
round16 FR2 ; 5-28 cyc shift_round_16 FR2, shift
copy16 dest, FR2 + 2 ; 12 cyc copy16 dest, FR2 + 2 ; 12 cyc
.endmacro .endmacro
@ -422,13 +405,6 @@ next:
.endmacro .endmacro
.macro shift_round_16 arg, shift
.repeat shift
shl32 arg
.endrepeat
round16 arg
.endmacro
.proc mandelbrot .proc mandelbrot
; input: ; input:
; cx: position scaled to 4.12 fixed point - -8..+7.9 ; cx: position scaled to 4.12 fixed point - -8..+7.9
@ -514,7 +490,7 @@ enough:
; cy = cy * (3 / 4) ; cy = cy * (3 / 4)
; cx = cx * (5 / 4) ; cx = cx * (5 / 4)
imul16_round dest, dest, aspect imul16_round dest, dest, aspect, 4
.endmacro .endmacro
.proc pset .proc pset