From dbbec8ed6d648ab2984a8fb0513d14f60b30ca80 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 22 Jan 2023 09:34:42 -0800 Subject: [PATCH 1/5] ok two things wrong: 1) bit masks are backwards 2) iter always returning 0 --- mandel.s | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mandel.s b/mandel.s index be1f59e..9d3a2a6 100644 --- a/mandel.s +++ b/mandel.s @@ -438,9 +438,9 @@ next: ; dist = 0 ; iter = 0 lda #00 - ldx iter - zx + ldx #(iter - zx + 1) initloop: - sta zx,x + sta zx - 1,x dex bne initloop @@ -705,6 +705,7 @@ loop_sx: zoom_factor cx, sx, zoom, aspect_x zoom_factor cy, sy, zoom, aspect_y jsr mandelbrot + inc iter jsr pset clc From b4721ae46b0399deeffed8cf2eca6f3eb9feed5f Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 22 Jan 2023 09:37:37 -0800 Subject: [PATCH 2/5] fix pixel shift --- mandel.s | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mandel.s b/mandel.s index 9d3a2a6..df80ed7 100644 --- a/mandel.s +++ b/mandel.s @@ -584,6 +584,9 @@ point: ; pixel_mask <<= pixel_shift (shifting in ones) and #3 sta pixel_shift + lda #3 + sec + sbc pixel_shift tax shift_loop: beq shift_done From ae9dd0674d002e726566a20e7756c3619244576d Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 22 Jan 2023 10:42:37 -0800 Subject: [PATCH 3/5] corrupt! but it produces pixels --- mandel.s | 64 ++++++++++++++++++++++++-------------------------------- 1 file changed, 27 insertions(+), 37 deletions(-) diff --git a/mandel.s b/mandel.s index df80ed7..b4baad5 100644 --- a/mandel.s +++ b/mandel.s @@ -168,7 +168,7 @@ color_map: .endmacro .macro add32 dest, arg1, arg2 - add 2, dest, arg2, dest + add 4, dest, arg2, dest .endmacro ; 2 + 9 * byte cycles @@ -422,6 +422,13 @@ next: .endmacro +.macro shift_round_16 arg, shift + .repeat shift + shl32 arg + .endrepeat + round16 arg +.endmacro + .proc mandelbrot ; input: ; cx: position scaled to 4.12 fixed point - -8..+7.9 @@ -445,64 +452,48 @@ initloop: bne initloop loop: - ; 1939 - 3007 cyc - - ; iter++ & max-iters break = 7 cyc - inc iter ; 5 cyc - bne keep_going ; 2 cyc + ; iter++ & max-iters break + inc iter + bne keep_going rts keep_going: ; 4.12: (-8 .. +7.9) - ; zx = zx_2 - zy_2 + cx = 3 * 20 = 60 cyc - sub16 zx, zx_2, zy_2 + ; zx = zx_2 - zy_2 + cx + sub16 zx, zx_2 + 2, zy_2 + 2 add16 zx, zx, cx - ; zy = zx_zy + zx_zy + cy = 3 * 20 = 60 cyc - sub16 zy, zx_zy, zx_zy + ; zy = zx_zy + zx_zy + cy + add16 zy, zx_zy + 2, zx_zy + 2 add16 zy, zy, cy ; 8.24: (-128 .. +127.9) - ; zx_2 = zx * zx = 518 - 828 cyc + ; zx_2 = zx * zx imul16 zx_2, zx, zx - ; zy_2 = zy * zy = 518 - 828 cyc + ; zy_2 = zy * zy imul16 zy_2, zy, zy - ; zx_zy = zx * zy = 518 - 828 cyc + ; zx_zy = zx * zy imul16 zx_zy, zx, zy - ; dist = zx_2 + zy_2 = 38 cyc + ; dist = zx_2 + zy_2 add32 dist, zx_2, zy_2 - ; if dist >= 4 break, else continue iterating = 7 cyc - lda dist + 3 ; 3 cyc - cmp #4 ; 2 cyc - bmi still_in ; 2 cyc + ; if dist >= 4 break, else continue iterating + lda dist + 3 + cmp #4 + bmi still_in rts still_in: - ; shift and round zx_2 to 4.12 = (60 + 5) - (60 + 28) = 65 - 88 cyc - .repeat 4 ; 60 cyc - shl24 zx_2 ; 15 cyc - .endrepeat - round16 zx_2 ; 5-28 cycles - - ; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc - .repeat 4 ; 60 cyc - shl24 zy_2 ; 15 cyc - .endrepeat - round16 zy_2 ; 5-28 cycles - - ; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc - .repeat 4 ; 60 cyc - shl24 zx_zy ; 15 cyc - .endrepeat - round16 zx_zy ; 5-28 cycles + shift_round_16 zx_2, 4 + shift_round_16 zy_2, 4 + shift_round_16 zx_zy, 4 ; if may be in the lake, look for looping output with a small buffer ; as an optimization vs running to max iters - jmp loop ; 3 cycles + jmp loop .endproc @@ -708,7 +699,6 @@ loop_sx: zoom_factor cx, sx, zoom, aspect_x zoom_factor cy, sy, zoom, aspect_y jsr mandelbrot - inc iter jsr pset clc From 1bef004ccd08d5716f181417870ea837d7d96cb6 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 22 Jan 2023 11:17:51 -0800 Subject: [PATCH 4/5] precision cleanup using 4.12 and 8.24 consistently --- mandel.s | 68 ++++++++++++++++++-------------------------------------- 1 file changed, 22 insertions(+), 46 deletions(-) diff --git a/mandel.s b/mandel.s index b4baad5..10782b8 100644 --- a/mandel.s +++ b/mandel.s @@ -1,17 +1,17 @@ ; Our zero-page vars sx = $80 ; i16: screen pixel x sy = $82 ; i16: screen pixel y -ox = $84 ; fixed3.13: center point x -oy = $86 ; fixed3.13: center point y -cx = $84 ; fixed3.13: c_x -cy = $86 ; fixed3.13: c_y -zx = $88 ; fixed3.13: z_x -zy = $8a ; fixed3.13: z_y +ox = $84 ; fixed4.12: center point x +oy = $86 ; fixed4.12: center point y +cx = $84 ; fixed4.12: c_x +cy = $86 ; fixed4.12: c_y +zx = $88 ; fixed4.12: z_x +zy = $8a ; fixed4.12: z_y -zx_2 = $90 ; fixed6.26: z_x^2 -zy_2 = $94 ; fixed6.26: z_y^2 -zx_zy = $98 ; fixed6.26: z_x * z_y -dist = $9c ; fixed6.26: z_x^2 + z_y^2 +zx_2 = $90 ; fixed8.24: z_x^2 +zy_2 = $94 ; fixed8.24: z_y^2 +zx_zy = $98 ; fixed8.24: z_x * z_y +dist = $9c ; fixed8.24: z_x^2 + z_y^2 iter = $a0 ; u8: iteration count zoom = $a1 ; u8: zoom shift level @@ -42,8 +42,6 @@ half_height = height >> 1 width = 160 half_width = width >> 1 stride = width >> 2 -width_ratio_3_13 = (5 << 11) ; 5/4 -height_ratio_3_13 = (3 << 11) ; 5/4 DMACTL = $D400 DLISTL = $D402 @@ -101,18 +99,12 @@ aspect: ; 184h is the equiv of 220.8h at square pixels ; 320 / 220.8 = 1.45 display aspect ratio aspect_x: - .word 5 << (13 - 2) + .word 5 << (12 - 2) aspect_y: - .word 3 << (13 - 2) + .word 3 << (12 - 2) -bit_masks: - .byte 3 - .byte 3 << 2 - .byte 3 << 4 - .byte 3 << 6 - display_list_start: ; 24 lines overscan .repeat 3 @@ -244,21 +236,6 @@ color_map: neg 4, arg .endmacro -.macro extend_8_16 dest, src - ; clobbers A, X - ; 13-15 cycles - .local positive - .local negative - ldx #0 ; 2 cyc - lda src ; 3 cyc - sta dest ; 3 cyc - bpl positive ; 2 cyc -negative: - dex ; 2 cyc -positive: - stx dest + 1 ; 3 cyc -.endmacro - ; inner loop for imul16 ; bitnum < 8: 25 or 41 cycles ; bitnum >= 8: 30 or 46 cycles @@ -307,7 +284,6 @@ next: ror result ; 5 cyc .endif - .endmacro ; 5 to 25 cycles @@ -330,11 +306,18 @@ positive: copy32 dest, FR2 ; 24 cyc .endmacro -.macro imul16_round dest, arg1, arg2 +.macro shift_round_16 arg, shift + .repeat shift + shl32 arg + .endrepeat + round16 arg +.endmacro + +.macro imul16_round dest, arg1, arg2, shift copy16 FR0, arg1 ; 12 cyc copy16 FR1, arg2 ; 12 cyc jsr imul16_func ; 470-780 cyc - round16 FR2 ; 5-28 cyc + shift_round_16 FR2, shift copy16 dest, FR2 + 2 ; 12 cyc .endmacro @@ -422,13 +405,6 @@ next: .endmacro -.macro shift_round_16 arg, shift - .repeat shift - shl32 arg - .endrepeat - round16 arg -.endmacro - .proc mandelbrot ; input: ; cx: position scaled to 4.12 fixed point - -8..+7.9 @@ -514,7 +490,7 @@ enough: ; cy = cy * (3 / 4) ; cx = cx * (5 / 4) - imul16_round dest, dest, aspect + imul16_round dest, dest, aspect, 4 .endmacro .proc pset From 57975b7158c8dae6ee2d10519a4bca36f3b6dac1 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 22 Jan 2023 12:02:15 -0800 Subject: [PATCH 5/5] not sure what's wrong have to hceck over --- mandel.s | 49 +++++++++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/mandel.s b/mandel.s index 10782b8..f4862c6 100644 --- a/mandel.s +++ b/mandel.s @@ -254,10 +254,10 @@ color_map: ; 5 cycles either way .if bitnum < 8 lda arg1 ; 3 cyc - and #(1 << bitnum) ; 2 cyc + and #(1 << (bitnum)) ; 2 cyc .else lda arg1 + 1 ; 3 cyc - and #(1 << (bitnum - 8)) ; 2 cyc + and #(1 << ((bitnum) - 8)) ; 2 cyc .endif bne one ; 2 cyc @@ -434,43 +434,48 @@ loop: rts keep_going: + .macro quick_exit arg + .local keep_going + lda arg + 1 + cmp #(4 << 4) + bmi keep_going + rts + keep_going: + .endmacro + ; 4.12: (-8 .. +7.9) ; zx = zx_2 - zy_2 + cx - sub16 zx, zx_2 + 2, zy_2 + 2 + sub16 zx, zx_2, zy_2 add16 zx, zx, cx + quick_exit zx ; zy = zx_zy + zx_zy + cy - add16 zy, zx_zy + 2, zx_zy + 2 + add16 zy, zx_zy, zx_zy add16 zy, zy, cy - ; 8.24: (-128 .. +127.9) ; zx_2 = zx * zx - imul16 zx_2, zx, zx + imul16_round zx_2, zx, zx, 4 + quick_exit dist ; zy_2 = zy * zy - imul16 zy_2, zy, zy + imul16_round zy_2, zy, zy, 4 + quick_exit dist ; zx_zy = zx * zy - imul16 zx_zy, zx, zy + imul16_round zx_zy, zx, zy, 4 + quick_exit dist ; dist = zx_2 + zy_2 - add32 dist, zx_2, zy_2 - - ; if dist >= 4 break, else continue iterating - lda dist + 3 - cmp #4 - bmi still_in - rts -still_in: - - shift_round_16 zx_2, 4 - shift_round_16 zy_2, 4 - shift_round_16 zx_zy, 4 + add16 dist, zx_2, zy_2 + quick_exit dist ; if may be in the lake, look for looping output with a small buffer ; as an optimization vs running to max iters jmp loop +peace_out: + rts + .endproc .macro zoom_factor dest, src, zoom, aspect @@ -607,9 +612,13 @@ done: sta ox + 1 sta oy sta oy + 1 + + ; zoom = 2x + lda #1 sta zoom ; Disable display DMA + lda #0 sta DMACTL ; zero the range from framebuffer_top to framebuffer_end