Compare commits
No commits in common. "main" and "status" have entirely different histories.
9 changed files with 299 additions and 1357 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1,4 +1,3 @@
|
||||||
*.o
|
*.o
|
||||||
*.xex
|
*.xex
|
||||||
tables.s
|
|
||||||
.DS_Store
|
.DS_Store
|
||||||
|
|
2
.mailmap
2
.mailmap
|
@ -1,2 +0,0 @@
|
||||||
Brooke Vibber <bvibber@pobox.com>
|
|
||||||
Brooke Vibber <bvibber@pobox.com> <brion@pobox.com>
|
|
8
Makefile
8
Makefile
|
@ -2,17 +2,13 @@
|
||||||
|
|
||||||
all : mandel.xex
|
all : mandel.xex
|
||||||
|
|
||||||
mandel.xex : mandel.o tables.o atari-asm-xex.cfg
|
%.xex : %.o
|
||||||
ld65 -C ./atari-asm-xex.cfg -o $@ mandel.o tables.o
|
ld65 -C atari-asm-xex.cfg -o $@ $<
|
||||||
|
|
||||||
%.o : %.s
|
%.o : %.s
|
||||||
ca65 -o $@ $<
|
ca65 -o $@ $<
|
||||||
|
|
||||||
tables.s : tables.js
|
|
||||||
node tables.js > tables.s
|
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
rm -f tables.s
|
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
rm -f *.xex
|
rm -f *.xex
|
||||||
|
|
||||||
|
|
|
@ -1,28 +0,0 @@
|
||||||
FEATURES {
|
|
||||||
STARTADDRESS: default = $2E00;
|
|
||||||
}
|
|
||||||
SYMBOLS {
|
|
||||||
__STARTADDRESS__: type = export, value = %S;
|
|
||||||
}
|
|
||||||
MEMORY {
|
|
||||||
ZP: file = "", define = yes, start = $0082, size = $007E;
|
|
||||||
MAIN: file = %O, define = yes, start = %S, size = $4000 - %S;
|
|
||||||
# Keep $4000-7fff clear for expanded RAM access window
|
|
||||||
TABLES: file = %O, define = yes, start = $8000, size = $a000 - $8000;
|
|
||||||
# Keep $a000-$bfff clear for BASIC cartridge
|
|
||||||
}
|
|
||||||
FILES {
|
|
||||||
%O: format = atari;
|
|
||||||
}
|
|
||||||
FORMATS {
|
|
||||||
atari: runad = start;
|
|
||||||
}
|
|
||||||
SEGMENTS {
|
|
||||||
ZEROPAGE: load = ZP, type = zp, optional = yes;
|
|
||||||
EXTZP: load = ZP, type = zp, optional = yes; # to enable modules to be able to link to C and assembler programs
|
|
||||||
CODE: load = MAIN, type = rw, define = yes;
|
|
||||||
RODATA: load = MAIN, type = ro optional = yes;
|
|
||||||
DATA: load = MAIN, type = rw optional = yes;
|
|
||||||
BSS: load = MAIN, type = bss, optional = yes, define = yes;
|
|
||||||
TABLES: load = TABLES, type = ro, optional = yes, align = 256;
|
|
||||||
}
|
|
25
readme.md
25
readme.md
|
@ -14,37 +14,30 @@ Non-goals:
|
||||||
|
|
||||||
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
||||||
|
|
||||||
-- brooke, january 2023 - december 2024
|
-- brion, january 2023
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
|
Basic rendering is functional, but no interactive behavior (zoom/pan) or benchmarking is done yet.
|
||||||
|
|
||||||
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
The 16-bit signed integer multiplication works; it takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
||||||
|
|
||||||
* 16-bit multiplies are decomposed into 4 8-bit unsigned multiplies and some addition
|
The main loop is a basic add-and-shift, using 16-bit adds which requires flipping the sign of negative inputs (otherwise you'd have to add all those sign-extension bits). Runs in 470-780 cycles depending on input.
|
||||||
* an optimized case for squares uses a table of 8-bit squares to reduce the number of 8-bit multiplication sub-ops
|
|
||||||
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
|
||||||
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
|
||||||
|
|
||||||
The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
|
The mandelbrot calculations are done using 4.12-precision fixed point numbers. It may be possible to squish this down to 3.13.
|
||||||
|
|
||||||
Iterations are capped at 255.
|
Iterations are capped at 255.
|
||||||
|
|
||||||
The pixels are run in a progressive layout to get the basic shape on screen faster.
|
## Next steps
|
||||||
|
|
||||||
There is a running counter of ms/px using the vertical blank interrupts as a timer, used to track our progress. :D
|
Add a running counter of ms/px using the vertical blank interrupts as a timer. This'll show how further work improves it!
|
||||||
|
|
||||||
There's a check for cycles in (zx,zy) output when in the 'lake'; if values repeat, they cannot escape. This is a big time saver in fractint.
|
Check for cycles in (zx,zy) output when in the 'lake'; if values repeat, they cannot escape. This is a big time saver in fractint.
|
||||||
|
|
||||||
There's some cute color cycling.
|
I may be able to do a faster multiply using tables of squares for 8-bit component multiplication.
|
||||||
|
|
||||||
## Deps and build instructions
|
## Deps and build instructions
|
||||||
|
|
||||||
I'm using `ca65` as a macro assembler, and have a Unix-style `Makefile` for building. Should work fairly easily on Linux and Mac. Might work on "raw" Windows but I use WSL for that.
|
I'm using `ca65` as a macro assembler, and have a Unix-style `Makefile` for building. Should work fairly easily on Linux and Mac. Might work on "raw" Windows but I use WSL for that.
|
||||||
|
|
||||||
Currently produces a `.xex` executable, which can be booted up in common Atari emulators and some i/o devices.
|
Currently produces a `.xex` executable, which can be booted up in common Atari emulators and some i/o devices.
|
||||||
|
|
||||||
## Todo
|
|
||||||
|
|
||||||
See ideas in `todo.md`.
|
|
||||||
|
|
50
tables.js
50
tables.js
|
@ -1,50 +0,0 @@
|
||||||
function db(func) {
|
|
||||||
let lines = [];
|
|
||||||
for (let i = 0; i < 256; i += 16) {
|
|
||||||
let items = [];
|
|
||||||
for (let j = 0; j < 16; j++) {
|
|
||||||
let x = i + j;
|
|
||||||
items.push(func(x));
|
|
||||||
}
|
|
||||||
lines.push(' .byte ' + items.join(', '));
|
|
||||||
}
|
|
||||||
return lines.join('\n');
|
|
||||||
}
|
|
||||||
|
|
||||||
let squares = [];
|
|
||||||
for (let i = 0; i < 512; i++) {
|
|
||||||
squares.push(Math.trunc((i * i + 1) / 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(
|
|
||||||
`.segment "TABLES"
|
|
||||||
|
|
||||||
.export mul_lobyte256
|
|
||||||
.export mul_hibyte256
|
|
||||||
.export mul_hibyte512
|
|
||||||
.export sqr_lobyte
|
|
||||||
.export sqr_hibyte
|
|
||||||
|
|
||||||
; (i * i + 1) / 2 for the multiplier
|
|
||||||
.align 256
|
|
||||||
mul_lobyte256:
|
|
||||||
${db((i) => squares[i] & 0xff)}
|
|
||||||
|
|
||||||
.align 256
|
|
||||||
mul_hibyte256:
|
|
||||||
${db((i) => (squares[i] >> 8) & 0xff)}
|
|
||||||
|
|
||||||
.align 256
|
|
||||||
mul_hibyte512:
|
|
||||||
${db((i) => (squares[i + 256] >> 8) & 0xff)}
|
|
||||||
|
|
||||||
; (i * i) for the plain squares
|
|
||||||
.align 256
|
|
||||||
sqr_lobyte:
|
|
||||||
${db((i) => (i * i) & 0xff)}
|
|
||||||
|
|
||||||
.align 256
|
|
||||||
sqr_hibyte:
|
|
||||||
${db((i) => ((i * i) >> 8) & 0xff)}
|
|
||||||
|
|
||||||
`);
|
|
41
testme.js
41
testme.js
|
@ -1,41 +0,0 @@
|
||||||
// ax = (a + x)2/2 - a2/2 - x2/2
|
|
||||||
|
|
||||||
function half_square(x) {
|
|
||||||
return Math.round(x * x / 2) & 0xffff >>> 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function mul8(a, b) {
|
|
||||||
let result = half_square(a + b) & 0xffff;
|
|
||||||
result = (result - half_square(a)) & 0xffff;
|
|
||||||
result = (result - half_square(b)) & 0xffff;
|
|
||||||
result = (result + (b & a & 1)) & 0xffff;
|
|
||||||
return result >>> 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
function mul16(a, b) {
|
|
||||||
let ah = (a & 0xff00) >>> 8;
|
|
||||||
let al = (a & 0x00ff) >>> 0;
|
|
||||||
let bh = (b & 0xff00) >>> 8;
|
|
||||||
let bl = (b & 0x00ff) >>> 0;
|
|
||||||
let result = (mul8(al, bl) & 0xffff) >>> 0;
|
|
||||||
result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0;
|
|
||||||
result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0;
|
|
||||||
result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
let max = 65536;
|
|
||||||
//let max = 256;
|
|
||||||
//let max = 128;
|
|
||||||
//let max = 8;
|
|
||||||
|
|
||||||
for (let a = 0; a < max; a++) {
|
|
||||||
for (let b = 0; b < max; b++) {
|
|
||||||
let expected = Math.imul(a, b) >>> 0;
|
|
||||||
//let actual = mul8(a, b);
|
|
||||||
let actual = mul16(a, b);
|
|
||||||
if (expected !== actual) {
|
|
||||||
console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
17
todo.md
17
todo.md
|
@ -1,17 +0,0 @@
|
||||||
things to try:
|
|
||||||
|
|
||||||
* fix status bar to show elapsed time, per-iter time, per-pixel iter count
|
|
||||||
|
|
||||||
* 'turbo' mode disabling graphics in full or part
|
|
||||||
|
|
||||||
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
|
||||||
|
|
||||||
* maybe clean up the load/layout of the big mul table
|
|
||||||
|
|
||||||
* consider alternate lookup tables in the top 16KB under ROM
|
|
||||||
|
|
||||||
* y-axis mirror optimization
|
|
||||||
|
|
||||||
* extract viewport for display & re-input via keyboard
|
|
||||||
|
|
||||||
* fujinet screenshot/viewport uploader
|
|
Loading…
Reference in a new issue