Compare commits
2 commits
3d94a9b5d4
...
b36ab77e97
Author | SHA1 | Date | |
---|---|---|---|
b36ab77e97 | |||
32bd5a540c |
2 changed files with 186 additions and 34 deletions
207
mandel.s
207
mandel.s
|
@ -1,14 +1,99 @@
|
||||||
|
; Our zero-page vars
|
||||||
|
sx = $80 ; 8 bits: screen pixel x
|
||||||
|
sy = $81 ; 8 bits: screen pixel y
|
||||||
|
cx = $82 ; 16 bits fixed point
|
||||||
|
cy = $84 ; 16 bits fixed point
|
||||||
|
zx = $86 ; 16 bits fixed point
|
||||||
|
zy = $88 ; 16 bits fixed point
|
||||||
|
zx_2 = $8a ; 32 bits fixed point
|
||||||
|
zy_2 = $8e ; 32 bits fixed point
|
||||||
|
zx_zy = $92 ; 32 bits fixed point
|
||||||
|
dist = $96 ; 32 bits fixed point
|
||||||
|
iter = $9a ; 8 bits iteration count
|
||||||
|
|
||||||
|
temp = $a0 ; debug temp area
|
||||||
|
|
||||||
; FP registers in zero page
|
; FP registers in zero page
|
||||||
FR0 = $d4
|
FR0 = $d4
|
||||||
FRE = $da
|
FRE = $da
|
||||||
FR1 = $e0
|
FR1 = $e0
|
||||||
FR2 = $e6
|
FR2 = $e6
|
||||||
FRX = $ec
|
|
||||||
|
|
||||||
.code
|
.code
|
||||||
|
|
||||||
.export start
|
.export start
|
||||||
|
|
||||||
|
; 2 + 9 * byte cycles
|
||||||
|
.macro add bytes, dest, arg1, arg2
|
||||||
|
clc ; 2 cyc
|
||||||
|
.repeat bytes, byte ; 9 * byte cycles
|
||||||
|
lda arg1 + byte
|
||||||
|
adc arg2 + byte
|
||||||
|
sta dest + byte
|
||||||
|
.endrepeat
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro add16 dest, arg1, arg2
|
||||||
|
add 2, dest, arg1, arg2
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro add32 dest, arg1, arg2
|
||||||
|
add 2, dest, arg2, dest
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
; 2 + 9 * byte cycles
|
||||||
|
.macro sub bytes, dest, arg1, arg2
|
||||||
|
sec ; 2 cyc
|
||||||
|
.repeat bytes, byte ; 9 * byte cycles
|
||||||
|
lda arg1 + byte
|
||||||
|
sbc arg2 + byte
|
||||||
|
sta dest + byte
|
||||||
|
.endrepeat
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro sub16 dest, arg1, arg2
|
||||||
|
sub 2, dest, arg1, arg2
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro sub32 dest, arg1, arg2
|
||||||
|
sub 4, dest, arg1, arg2
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro shl bytes, arg
|
||||||
|
asl arg
|
||||||
|
.repeat bytes-1
|
||||||
|
rol arg
|
||||||
|
.endrepeat
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro shl16 arg
|
||||||
|
shl 2, arg
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro shl24 arg
|
||||||
|
shl 3, arg
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro shl32 arg
|
||||||
|
shl 4, arg
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
; 6 * bytes cycles
|
||||||
|
.macro copy bytes, dest, arg
|
||||||
|
.repeat bytes, byte ; 6 * bytes cycles
|
||||||
|
lda arg + byte ; 3 cyc
|
||||||
|
sta dest + byte ; 3 cyc
|
||||||
|
.endrepeat
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro copy16 dest, arg
|
||||||
|
copy 2, dest, arg
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro copy32 dest, arg
|
||||||
|
copy 4, dest, arg
|
||||||
|
.endmacro
|
||||||
|
|
||||||
; 2 + 8 * byte cycles
|
; 2 + 8 * byte cycles
|
||||||
.macro neg bytes, arg
|
.macro neg bytes, arg
|
||||||
sec ; 2 cyc
|
sec ; 2 cyc
|
||||||
|
@ -92,9 +177,17 @@ next:
|
||||||
positive:
|
positive:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
; 518 - 828 cyc
|
||||||
|
.macro imul16 dest, arg1, arg2
|
||||||
|
copy16 FR0, arg1 ; 12 cyc
|
||||||
|
copy16 FR1, arg2 ; 12 cyc
|
||||||
|
jsr imul16_func ; 470-780
|
||||||
|
copy32 dest, FR2 ; 24 cyc
|
||||||
|
.endmacro
|
||||||
|
|
||||||
; min 470 cycles
|
; min 470 cycles
|
||||||
; max 780 cycles
|
; max 780 cycles
|
||||||
.proc imul16
|
.proc imul16_func
|
||||||
arg1 = FR0 ; 16-bit arg (clobbered)
|
arg1 = FR0 ; 16-bit arg (clobbered)
|
||||||
arg2 = FR1 ; 16-bit arg (clobbered)
|
arg2 = FR1 ; 16-bit arg (clobbered)
|
||||||
result = FR2 ; 32-bit result
|
result = FR2 ; 32-bit result
|
||||||
|
@ -128,7 +221,7 @@ positive_result:
|
||||||
rts ; 6 cyc
|
rts ; 6 cyc
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
.macro round16_incdec arg
|
.macro round16 arg
|
||||||
; Round top 16 bits of 32-bit fixed-point number in-place
|
; Round top 16 bits of 32-bit fixed-point number in-place
|
||||||
.local zero
|
.local zero
|
||||||
.local one
|
.local one
|
||||||
|
@ -178,61 +271,113 @@ next:
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
.proc iter
|
.proc mandelbrot
|
||||||
; still working on the fixed-point
|
; input:
|
||||||
; should we just use 16-bit adds?
|
; cx: position scaled to 4.12 fixed point - -8..+7.9
|
||||||
; does that require extra rounding?
|
; cy: position scaled to 4.12
|
||||||
; is the integer precision right?
|
;
|
||||||
|
; output:
|
||||||
|
; iter: iteration count at escape or 0
|
||||||
|
|
||||||
; (cx and cy should be pre-scaled to 4.12 fixed point - -8..+7.9)
|
|
||||||
; zx = 0
|
; zx = 0
|
||||||
; zy = 0
|
; zy = 0
|
||||||
; zx_2 = 0
|
; zx_2 = 0
|
||||||
; zy_2 = 0
|
; zy_2 = 0
|
||||||
; zx_zy = 0
|
; zx_zy = 0
|
||||||
|
; dist = 0
|
||||||
|
; iter = 0
|
||||||
|
lda #00
|
||||||
|
ldx iter - zx
|
||||||
|
initloop:
|
||||||
|
sta zx,x
|
||||||
|
dex
|
||||||
|
bne initloop
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
; 1652 - 2651 cyc
|
; 1939 - 3007 cyc
|
||||||
|
|
||||||
; iters++ = 2 cyc
|
; iter++ & max-iters break = 7 cyc
|
||||||
|
inc iter ; 5 cyc
|
||||||
|
bne keep_going ; 2 cyc
|
||||||
|
rts
|
||||||
|
keep_going:
|
||||||
|
|
||||||
; 4.12: (-8 .. +7.9)
|
; 4.12: (-8 .. +7.9)
|
||||||
; zx = zx_2 + zy_2 + cx = 3 * 20 = 60 cyc
|
; zx = zx_2 - zy_2 + cx = 3 * 20 = 60 cyc
|
||||||
|
sub16 zx, zx_2, zy_2
|
||||||
|
add16 zx, zx, cx
|
||||||
|
|
||||||
; zy = zx_zy + zx_zy + cy = 3 * 20 = 60 cyc
|
; zy = zx_zy + zx_zy + cy = 3 * 20 = 60 cyc
|
||||||
|
sub16 zy, zx_zy, zx_zy
|
||||||
|
add16 zy, zy, cy
|
||||||
|
|
||||||
; 8.24: (-128 .. +127.9)
|
; 8.24: (-128 .. +127.9)
|
||||||
; zx_2 = zx * zx = 470 - 780 cyc
|
; zx_2 = zx * zx = 518 - 828 cyc
|
||||||
; zy_2 = zy * zy = 470 - 780 cyc
|
imul16 zx_2, zx, zx
|
||||||
; zx_zy = zx * zy = 470 - 780 cyc
|
|
||||||
; dist = zx_2 + zy_2 = 38 cyc
|
|
||||||
; if dist >= 4 break, else continue iterating = 7 cyc
|
|
||||||
|
|
||||||
; shift and round zx_2 to 4.12 = (20 + 5) - (20 + 28) = 25 - 48 cyc
|
; zy_2 = zy * zy = 518 - 828 cyc
|
||||||
; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 25 - 48 cyc
|
imul16 zy_2, zy, zy
|
||||||
; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 25 - 48 cyc
|
|
||||||
|
; zx_zy = zx * zy = 518 - 828 cyc
|
||||||
|
imul16 zx_zy, zx, zy
|
||||||
|
|
||||||
|
; dist = zx_2 + zy_2 = 38 cyc
|
||||||
|
add32 dist, zx_2, zy_2
|
||||||
|
|
||||||
|
; if dist >= 4 break, else continue iterating = 7 cyc
|
||||||
|
lda dist + 3 ; 3 cyc
|
||||||
|
cmp #4 ; 2 cyc
|
||||||
|
bmi still_in ; 2 cyc
|
||||||
|
rts
|
||||||
|
still_in:
|
||||||
|
|
||||||
|
; shift and round zx_2 to 4.12 = (60 + 5) - (60 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zx_2 ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zx_2 ; 5-28 cycles
|
||||||
|
|
||||||
|
; shift and round zy_2 to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zy_2 ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zy_2 ; 5-28 cycles
|
||||||
|
|
||||||
|
; shift and round zx_zy to 4.12 = (20 + 5) - (20 + 28) = 65 - 88 cyc
|
||||||
|
.repeat 4 ; 60 cyc
|
||||||
|
shl24 zx_zy ; 15 cyc
|
||||||
|
.endrepeat
|
||||||
|
round16 zx_zy ; 5-28 cycles
|
||||||
|
|
||||||
; if may be in the lake, look for looping output with a small buffer
|
; if may be in the lake, look for looping output with a small buffer
|
||||||
; as an optimization vs running to max iters
|
; as an optimization vs running to max iters
|
||||||
|
jmp loop ; 3 cycles
|
||||||
|
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
.proc start
|
.proc start
|
||||||
|
|
||||||
looplong:
|
looplong:
|
||||||
; FR0 = 5
|
; cx = -0.5
|
||||||
; FR1 = -3
|
lda #$f7
|
||||||
lda #5
|
sta cx
|
||||||
sta FR0
|
|
||||||
lda #0
|
|
||||||
sta FR0 + 1
|
|
||||||
lda #$fd
|
|
||||||
sta FR1
|
|
||||||
lda #$ff
|
lda #$ff
|
||||||
sta FR1 + 1
|
sta cx + 1
|
||||||
|
|
||||||
jsr imul16
|
; cy = 1
|
||||||
|
lda #$10
|
||||||
|
sta cy
|
||||||
|
lda #$00
|
||||||
|
sta cy + 1
|
||||||
|
|
||||||
|
jsr mandelbrot
|
||||||
; should have 32-bit -15 in FR2
|
; should have 32-bit -15 in FR2
|
||||||
|
|
||||||
|
; save the completed iter count for debugging
|
||||||
|
lda iter
|
||||||
|
sta temp
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
jmp loop
|
; keep looping over so we can work in the debugger
|
||||||
|
jmp looplong
|
||||||
.endproc
|
.endproc
|
||||||
|
|
13
readme.md
13
readme.md
|
@ -3,18 +3,19 @@
|
||||||
Work-in-progress Mandelbrot fractal viewer for Atari 8-bit home computers. Mostly an excuse to write an integer multiplication routine for the 6502 for practice.
|
Work-in-progress Mandelbrot fractal viewer for Atari 8-bit home computers. Mostly an excuse to write an integer multiplication routine for the 6502 for practice.
|
||||||
|
|
||||||
Goals:
|
Goals:
|
||||||
|
|
||||||
* have fun learning 6502 assembly
|
* have fun learning 6502 assembly
|
||||||
* make an old machine do something inefficient as efficiently as possible.
|
* make an old machine do something inefficient as efficiently as possible.
|
||||||
* post cool screenshots of low-res fractals
|
* post cool screenshots of low-res fractals
|
||||||
|
|
||||||
Non-goals:
|
Non-goals:
|
||||||
|
|
||||||
* maintain anything long-term (but feel free to copy/fork if you want to make major changes!)
|
* maintain anything long-term (but feel free to copy/fork if you want to make major changes!)
|
||||||
|
|
||||||
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
||||||
|
|
||||||
-- brion, january 2023
|
-- brion, january 2023
|
||||||
|
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
The 16-bit signed integer multiplication seems to be working, though I need to double-check it some more. It takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
The 16-bit signed integer multiplication seems to be working, though I need to double-check it some more. It takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
||||||
|
@ -23,9 +24,15 @@ The main loop is a basic add-and-shift, using 16-bit adds which requires flippin
|
||||||
|
|
||||||
The loop is unrolled which saves 148 cycles, but at the cost of making the routine quite large. This is an acceptable tradeoff for the Mandelbrot, where imul16 is the dominant performance cost and the rest of the program will be small.
|
The loop is unrolled which saves 148 cycles, but at the cost of making the routine quite large. This is an acceptable tradeoff for the Mandelbrot, where imul16 is the dominant performance cost and the rest of the program will be small.
|
||||||
|
|
||||||
The mandelbrot loop is partly sketched out but I have future updates to make on that.
|
The mandelbrot loop is now written out, but untested and probably buggy. With three multiplications, several additions/subtractions, and three sets of annoying bit shifts and rounds, it weighs in at 1939 - 3007 cycles per iteration.
|
||||||
|
|
||||||
I've also sketched out a 16-bit rounding macro, which is not yet committed.
|
## Next steps
|
||||||
|
|
||||||
|
After a quick once-over to make sure it looks right, it's probably time to slap a display list together and draw some pixels to the screen and see what happens.
|
||||||
|
|
||||||
|
Reaching max iterations (256 runs through the loop) will take a half second or so per pixel -- this can be optimized by keeping a buffer of a few past zx/zy values and checking for duplicates which would signal a loop that will never escape. (Another technique I learned from Fractint!)
|
||||||
|
|
||||||
|
160x192 is luckily only 30,720 pixels, so there's a hard rendering time limit of about 4.5 hours. :D
|
||||||
|
|
||||||
## Deps and build instructions
|
## Deps and build instructions
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue