Compare commits
No commits in common. "57975b7158c8dae6ee2d10519a4bca36f3b6dac1" and "d36667938d8a3b11ac9c1cb7ecaf80125a1c19ba" have entirely different histories.
57975b7158
...
d36667938d
1 changed files with 87 additions and 66 deletions
153
mandel.s
153
mandel.s
|
@ -1,17 +1,17 @@
|
||||||
; Our zero-page vars
|
; Our zero-page vars
|
||||||
sx = $80 ; i16: screen pixel x
|
sx = $80 ; i16: screen pixel x
|
||||||
sy = $82 ; i16: screen pixel y
|
sy = $82 ; i16: screen pixel y
|
||||||
ox = $84 ; fixed4.12: center point x
|
ox = $84 ; fixed3.13: center point x
|
||||||
oy = $86 ; fixed4.12: center point y
|
oy = $86 ; fixed3.13: center point y
|
||||||
cx = $84 ; fixed4.12: c_x
|
cx = $84 ; fixed3.13: c_x
|
||||||
cy = $86 ; fixed4.12: c_y
|
cy = $86 ; fixed3.13: c_y
|
||||||
zx = $88 ; fixed4.12: z_x
|
zx = $88 ; fixed3.13: z_x
|
||||||
zy = $8a ; fixed4.12: z_y
|
zy = $8a ; fixed3.13: z_y
|
||||||
|
|
||||||
zx_2 = $90 ; fixed8.24: z_x^2
|
zx_2 = $90 ; fixed6.26: z_x^2
|
||||||
zy_2 = $94 ; fixed8.24: z_y^2
|
zy_2 = $94 ; fixed6.26: z_y^2
|
||||||
zx_zy = $98 ; fixed8.24: z_x * z_y
|
zx_zy = $98 ; fixed6.26: z_x * z_y
|
||||||
dist = $9c ; fixed8.24: z_x^2 + z_y^2
|
dist = $9c ; fixed6.26: z_x^2 + z_y^2
|
||||||
|
|
||||||
iter = $a0 ; u8: iteration count
|
iter = $a0 ; u8: iteration count
|
||||||
zoom = $a1 ; u8: zoom shift level
|
zoom = $a1 ; u8: zoom shift level
|
||||||
|
@ -42,6 +42,8 @@ half_height = height >> 1
|
||||||
width = 160
|
width = 160
|
||||||
half_width = width >> 1
|
half_width = width >> 1
|
||||||
stride = width >> 2
|
stride = width >> 2
|
||||||
|
width_ratio_3_13 = (5 << 11) ; 5/4
|
||||||
|
height_ratio_3_13 = (3 << 11) ; 5/4
|
||||||
|
|
||||||
DMACTL = $D400
|
DMACTL = $D400
|
||||||
DLISTL = $D402
|
DLISTL = $D402
|
||||||
|
@ -99,12 +101,18 @@ aspect:
|
||||||
; 184h is the equiv of 220.8h at square pixels
|
; 184h is the equiv of 220.8h at square pixels
|
||||||
; 320 / 220.8 = 1.45 display aspect ratio
|
; 320 / 220.8 = 1.45 display aspect ratio
|
||||||
aspect_x:
|
aspect_x:
|
||||||
.word 5 << (12 - 2)
|
.word 5 << (13 - 2)
|
||||||
|
|
||||||
aspect_y:
|
aspect_y:
|
||||||
.word 3 << (12 - 2)
|
.word 3 << (13 - 2)
|
||||||
|
|
||||||
|
|
||||||
|
bit_masks:
|
||||||
|
.byte 3
|
||||||
|
.byte 3 << 2
|
||||||
|
.byte 3 << 4
|
||||||
|
.byte 3 << 6
|
||||||
|
|
||||||
display_list_start:
|
display_list_start:
|
||||||
; 24 lines overscan
|
; 24 lines overscan
|
||||||
.repeat 3
|
.repeat 3
|
||||||
|
@ -160,7 +168,7 @@ color_map:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.macro add32 dest, arg1, arg2
|
.macro add32 dest, arg1, arg2
|
||||||
add 4, dest, arg2, dest
|
add 2, dest, arg2, dest
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 2 + 9 * byte cycles
|
; 2 + 9 * byte cycles
|
||||||
|
@ -236,6 +244,21 @@ color_map:
|
||||||
neg 4, arg
|
neg 4, arg
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
.macro extend_8_16 dest, src
|
||||||
|
; clobbers A, X
|
||||||
|
; 13-15 cycles
|
||||||
|
.local positive
|
||||||
|
.local negative
|
||||||
|
ldx #0 ; 2 cyc
|
||||||
|
lda src ; 3 cyc
|
||||||
|
sta dest ; 3 cyc
|
||||||
|
bpl positive ; 2 cyc
|
||||||
|
negative:
|
||||||
|
dex ; 2 cyc
|
||||||
|
positive:
|
||||||
|
stx dest + 1 ; 3 cyc
|
||||||
|
.endmacro
|
||||||
|
|
||||||
; inner loop for imul16
|
; inner loop for imul16
|
||||||
; bitnum < 8: 25 or 41 cycles
|
; bitnum < 8: 25 or 41 cycles
|
||||||
; bitnum >= 8: 30 or 46 cycles
|
; bitnum >= 8: 30 or 46 cycles
|
||||||
|
@ -254,10 +277,10 @@ color_map:
|
||||||
; 5 cycles either way
|
; 5 cycles either way
|
||||||
.if bitnum < 8
|
.if bitnum < 8
|
||||||
lda arg1 ; 3 cyc
|
lda arg1 ; 3 cyc
|
||||||
and #(1 << (bitnum)) ; 2 cyc
|
and #(1 << bitnum) ; 2 cyc
|
||||||
.else
|
.else
|
||||||
lda arg1 + 1 ; 3 cyc
|
lda arg1 + 1 ; 3 cyc
|
||||||
and #(1 << ((bitnum) - 8)) ; 2 cyc
|
and #(1 << (bitnum - 8)) ; 2 cyc
|
||||||
.endif
|
.endif
|
||||||
bne one ; 2 cyc
|
bne one ; 2 cyc
|
||||||
|
|
||||||
|
@ -284,6 +307,7 @@ next:
|
||||||
ror result ; 5 cyc
|
ror result ; 5 cyc
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
|
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 5 to 25 cycles
|
; 5 to 25 cycles
|
||||||
|
@ -306,18 +330,11 @@ positive:
|
||||||
copy32 dest, FR2 ; 24 cyc
|
copy32 dest, FR2 ; 24 cyc
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.macro shift_round_16 arg, shift
|
.macro imul16_round dest, arg1, arg2
|
||||||
.repeat shift
|
|
||||||
shl32 arg
|
|
||||||
.endrepeat
|
|
||||||
round16 arg
|
|
||||||
.endmacro
|
|
||||||
|
|
||||||
.macro imul16_round dest, arg1, arg2, shift
|
|
||||||
copy16 FR0, arg1 ; 12 cyc
|
copy16 FR0, arg1 ; 12 cyc
|
||||||
copy16 FR1, arg2 ; 12 cyc
|
copy16 FR1, arg2 ; 12 cyc
|
||||||
jsr imul16_func ; 470-780 cyc
|
jsr imul16_func ; 470-780 cyc
|
||||||
shift_round_16 FR2, shift
|
round16 FR2 ; 5-28 cyc
|
||||||
copy16 dest, FR2 + 2 ; 12 cyc
|
copy16 dest, FR2 + 2 ; 12 cyc
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
@ -421,60 +438,71 @@ next:
|
||||||
; dist = 0
|
; dist = 0
|
||||||
; iter = 0
|
; iter = 0
|
||||||
lda #00
|
lda #00
|
||||||
ldx #(iter - zx + 1)
|
ldx iter - zx
|
||||||
initloop:
|
initloop:
|
||||||
sta zx - 1,x
|
sta zx,x
|
||||||
dex
|
dex
|
||||||
bne initloop
|
bne initloop
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
; iter++ & max-iters break
|
; 1939 - 3007 cyc
|
||||||
inc iter
|
|
||||||
bne keep_going
|
; iter++ & max-iters break = 7 cyc
|
||||||
|
inc iter ; 5 cyc
|
||||||
|
bne keep_going ; 2 cyc
|
||||||
rts
|
rts
|
||||||
keep_going:
|
keep_going:
|
||||||
|
|
||||||
.macro quick_exit arg
|
|
||||||
.local keep_going
|
|
||||||
lda arg + 1
|
|
||||||
cmp #(4 << 4)
|
|
||||||
bmi keep_going
|
|
||||||
rts
|
|
||||||
keep_going:
|
|
||||||
.endmacro
|
|
||||||
|
|
||||||
; 4.12: (-8 .. +7.9)
|
; 4.12: (-8 .. +7.9)
|
||||||
; zx = zx_2 - zy_2 + cx
|
; zx = zx_2 - zy_2 + cx = 3 * 20 = 60 cyc
|
||||||
sub16 zx, zx_2, zy_2
|
sub16 zx, zx_2, zy_2
|
||||||
add16 zx, zx, cx
|
add16 zx, zx, cx
|
||||||
quick_exit zx
|
|
||||||
|
|
||||||
; zy = zx_zy + zx_zy + cy
|
; zy = zx_zy + zx_zy + cy = 3 * 20 = 60 cyc
|
||||||
add16 zy, zx_zy, zx_zy
|
sub16 zy, zx_zy, zx_zy
|
||||||
add16 zy, zy, cy
|
add16 zy, zy, cy
|
||||||
|
|
||||||
; zx_2 = zx * zx
|
; 8.24: (-128 .. +127.9)
|
||||||
imul16_round zx_2, zx, zx, 4
|
; zx_2 = zx * zx = 518 - 828 cyc
|
||||||
quick_exit dist
|
imul16 zx_2, zx, zx
|
||||||
|
|
||||||
; zy_2 = zy * zy
|
; zy_2 = zy * zy = 518 - 828 cyc
|
||||||
imul16_round zy_2, zy, zy, 4
|
imul16 zy_2, zy, zy
|
||||||
quick_exit dist
|
|
||||||
|
|
||||||
; zx_zy = zx * zy
|
; zx_zy = zx * zy = 518 - 828 cyc
|
||||||
imul16_round zx_zy, zx, zy, 4
|
imul16 zx_zy, zx, zy
|
||||||
quick_exit dist
|
|
||||||
|
|
||||||
; dist = zx_2 + zy_2
|
; dist = zx_2 + zy_2 = 38 cyc
|
||||||
add16 dist, zx_2, zy_2
|
add32 dist, zx_2, zy_2
|
||||||
quick_exit dist
|
|
||||||
|
; if dist >= 4 break, else continue iterating = 7 cyc
|
||||||
|
lda dist + 3 ; 3 cyc
|
||||||
|
cmp #4 ; 2 cyc
|
||||||
|
bmi still_in ; 2 cyc
|
||||||
|
rts
|
||||||
|
still_in:
|
||||||
|
|
||||||
|
; shift and round zx_2 to 4.12 = (60 + 5) - (60 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zx_2 ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zx_2 ; 5-28 cycles
|
||||||
|
|
||||||
|
; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zy_2 ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zy_2 ; 5-28 cycles
|
||||||
|
|
||||||
|
; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zx_zy ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zx_zy ; 5-28 cycles
|
||||||
|
|
||||||
; if may be in the lake, look for looping output with a small buffer
|
; if may be in the lake, look for looping output with a small buffer
|
||||||
; as an optimization vs running to max iters
|
; as an optimization vs running to max iters
|
||||||
jmp loop
|
jmp loop ; 3 cycles
|
||||||
|
|
||||||
peace_out:
|
|
||||||
rts
|
|
||||||
|
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
|
@ -495,7 +523,7 @@ enough:
|
||||||
|
|
||||||
; cy = cy * (3 / 4)
|
; cy = cy * (3 / 4)
|
||||||
; cx = cx * (5 / 4)
|
; cx = cx * (5 / 4)
|
||||||
imul16_round dest, dest, aspect, 4
|
imul16_round dest, dest, aspect
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.proc pset
|
.proc pset
|
||||||
|
@ -556,9 +584,6 @@ point:
|
||||||
; pixel_mask <<= pixel_shift (shifting in ones)
|
; pixel_mask <<= pixel_shift (shifting in ones)
|
||||||
and #3
|
and #3
|
||||||
sta pixel_shift
|
sta pixel_shift
|
||||||
lda #3
|
|
||||||
sec
|
|
||||||
sbc pixel_shift
|
|
||||||
tax
|
tax
|
||||||
shift_loop:
|
shift_loop:
|
||||||
beq shift_done
|
beq shift_done
|
||||||
|
@ -612,13 +637,9 @@ done:
|
||||||
sta ox + 1
|
sta ox + 1
|
||||||
sta oy
|
sta oy
|
||||||
sta oy + 1
|
sta oy + 1
|
||||||
|
|
||||||
; zoom = 2x
|
|
||||||
lda #1
|
|
||||||
sta zoom
|
sta zoom
|
||||||
|
|
||||||
; Disable display DMA
|
; Disable display DMA
|
||||||
lda #0
|
|
||||||
sta DMACTL
|
sta DMACTL
|
||||||
|
|
||||||
; zero the range from framebuffer_top to framebuffer_end
|
; zero the range from framebuffer_top to framebuffer_end
|
||||||
|
|
Loading…
Reference in a new issue