Compare commits
No commits in common. "main" and "xe" have entirely different histories.
11 changed files with 1651 additions and 2348 deletions
10
Makefile
10
Makefile
|
|
@ -2,11 +2,8 @@
|
||||||
|
|
||||||
all : mandel.xex
|
all : mandel.xex
|
||||||
|
|
||||||
mandel.xex : mandel.o mandel-core.o tables.o atari-xex.cfg
|
mandel.xex : mandel.o tables.o
|
||||||
ld65 -C ./atari-xex.cfg --mapfile mandel.map -o $@ mandel.o mandel-core.o tables.o atari.lib
|
ld65 -C ./atari-asm-xex.cfg -o $@ $+
|
||||||
|
|
||||||
mandel.s : mandel.c mandel.h
|
|
||||||
cc65 -o $@ mandel.c
|
|
||||||
|
|
||||||
%.o : %.s
|
%.o : %.s
|
||||||
ca65 -o $@ $<
|
ca65 -o $@ $<
|
||||||
|
|
@ -16,7 +13,6 @@ tables.s : tables.js
|
||||||
|
|
||||||
clean :
|
clean :
|
||||||
rm -f tables.s
|
rm -f tables.s
|
||||||
rm -f mandel.s
|
|
||||||
rm -f *.o
|
rm -f *.o
|
||||||
rm -f *.xex
|
rm -f *.xex
|
||||||
rm -f mandel.map
|
|
||||||
|
|
|
||||||
|
|
@ -1,28 +0,0 @@
|
||||||
FEATURES {
|
|
||||||
STARTADDRESS: default = $2E00;
|
|
||||||
}
|
|
||||||
SYMBOLS {
|
|
||||||
__STARTADDRESS__: type = export, value = %S;
|
|
||||||
}
|
|
||||||
MEMORY {
|
|
||||||
ZP: file = "", define = yes, start = $0082, size = $007E;
|
|
||||||
MAIN: file = %O, define = yes, start = %S, size = $4000 - %S;
|
|
||||||
# Keep $4000-7fff clear for expanded RAM access window
|
|
||||||
TABLES: file = %O, define = yes, start = $8000, size = $a000 - $8000;
|
|
||||||
# Keep $a000-$bfff clear for BASIC cartridge
|
|
||||||
}
|
|
||||||
FILES {
|
|
||||||
%O: format = atari;
|
|
||||||
}
|
|
||||||
FORMATS {
|
|
||||||
atari: runad = start;
|
|
||||||
}
|
|
||||||
SEGMENTS {
|
|
||||||
ZEROPAGE: load = ZP, type = zp, optional = yes;
|
|
||||||
EXTZP: load = ZP, type = zp, optional = yes; # to enable modules to be able to link to C and assembler programs
|
|
||||||
CODE: load = MAIN, type = rw, define = yes;
|
|
||||||
RODATA: load = MAIN, type = ro optional = yes;
|
|
||||||
DATA: load = MAIN, type = rw optional = yes;
|
|
||||||
BSS: load = MAIN, type = bss, optional = yes, define = yes;
|
|
||||||
TABLES: load = TABLES, type = ro, optional = yes, align = 256;
|
|
||||||
}
|
|
||||||
|
|
@ -1,69 +0,0 @@
|
||||||
# Sample linker configuration for C programs using the Atari binary file support.
|
|
||||||
# Use with: cl65 -tatari -Catari-xex.cfg prog.c -o prog.xex
|
|
||||||
FEATURES {
|
|
||||||
STARTADDRESS: default = $8000;
|
|
||||||
}
|
|
||||||
SYMBOLS {
|
|
||||||
__SYSTEM_CHECK__: type = import; # force inclusion of "system check" load chunk
|
|
||||||
__STACKSIZE__: type = weak, value = $0800; # 2k stack
|
|
||||||
__STARTADDRESS__: type = export, value = %S;
|
|
||||||
__RESERVED_MEMORY__: type = weak, value = $0000;
|
|
||||||
__SYSCHKHDR__: type = export, value = 0; # Disable system check header
|
|
||||||
__SYSCHKTRL__: type = export, value = 0; # Disable system check trailer
|
|
||||||
__TABLESEG_START__: type = weak, value = $2E00 + $0300;
|
|
||||||
__TABLESEG_SIZE__: type = weak, value = 6 * $100;
|
|
||||||
__BANKSY_START__: type = weak, value = $4000;
|
|
||||||
__BANKSY_SIZE__: type = weak, value = $4000;
|
|
||||||
__FRAMEBUFFER_START__: type = weak, value = $A000;
|
|
||||||
}
|
|
||||||
MEMORY {
|
|
||||||
# Note -- $80 and $81 (LOMEM) appear to be reserved in ZP.
|
|
||||||
ZP: file = "", define = yes, start = $0082, size = $007E;
|
|
||||||
# "system check" load chunk
|
|
||||||
SYSCHKCHNK: file = %O, start = $2E00, size = $0300;
|
|
||||||
# Note $a000-$bfff is against the BASIC cartridge, may require booting with OPTION.
|
|
||||||
TABLES: file = %O, define = yes, start = __TABLESEG_START__, size = __TABLESEG_SIZE__;
|
|
||||||
# We reserve $4000-7fff for the bank-switch window.
|
|
||||||
# In theory we could keep data and code here that we only use on 48k/64k systems.
|
|
||||||
BANKSWITCH: file = "", define = yes, start = __BANKSY_START__, size = __BANKSY_SIZE__;
|
|
||||||
# "main program" load chunk
|
|
||||||
MAIN: file = %O, define = yes, start = %S, size = __FRAMEBUFFER_START__ - __STACKSIZE__ - __RESERVED_MEMORY__ - %S;
|
|
||||||
}
|
|
||||||
FILES {
|
|
||||||
%O: format = atari;
|
|
||||||
}
|
|
||||||
FORMATS {
|
|
||||||
atari: runad = start,
|
|
||||||
initad = SYSCHKCHNK: __SYSTEM_CHECK__;
|
|
||||||
}
|
|
||||||
SEGMENTS {
|
|
||||||
ZEROPAGE: load = ZP, type = zp;
|
|
||||||
EXTZP: load = ZP, type = zp, optional = yes;
|
|
||||||
SYSCHK: load = SYSCHKCHNK, type = rw, define = yes, optional = yes;
|
|
||||||
TABLES: load = TABLES, type = ro, optional = yes, align = 256;
|
|
||||||
BANKSWICH: load = BANKSWITCH, type = ro, optional = yes;
|
|
||||||
STARTUP: load = MAIN, type = ro, define = yes;
|
|
||||||
LOWBSS: load = MAIN, type = rw, optional = yes; # not zero initialized
|
|
||||||
LOWCODE: load = MAIN, type = ro, define = yes, optional = yes;
|
|
||||||
ONCE: load = MAIN, type = ro, optional = yes;
|
|
||||||
CODE: load = MAIN, type = ro, define = yes;
|
|
||||||
RODATA: load = MAIN, type = ro;
|
|
||||||
DATA: load = MAIN, type = rw;
|
|
||||||
INIT: load = MAIN, type = rw, optional = yes;
|
|
||||||
BSS: load = MAIN, type = bss, define = yes;
|
|
||||||
}
|
|
||||||
FEATURES {
|
|
||||||
CONDES: type = constructor,
|
|
||||||
label = __CONSTRUCTOR_TABLE__,
|
|
||||||
count = __CONSTRUCTOR_COUNT__,
|
|
||||||
segment = ONCE;
|
|
||||||
CONDES: type = destructor,
|
|
||||||
label = __DESTRUCTOR_TABLE__,
|
|
||||||
count = __DESTRUCTOR_COUNT__,
|
|
||||||
segment = RODATA;
|
|
||||||
CONDES: type = interruptor,
|
|
||||||
label = __INTERRUPTOR_TABLE__,
|
|
||||||
count = __INTERRUPTOR_COUNT__,
|
|
||||||
segment = RODATA,
|
|
||||||
import = __CALLIRQ__;
|
|
||||||
}
|
|
||||||
175
imul8xe.s
Normal file
175
imul8xe.s
Normal file
|
|
@ -0,0 +1,175 @@
|
||||||
|
FR0 = $d4 ; float48
|
||||||
|
PORTB = $d301
|
||||||
|
|
||||||
|
|
||||||
|
EXTENDED_RAM = $4000 ; 16KiB bank on the XE
|
||||||
|
|
||||||
|
; lookup table for top byte -> PORTB value for bank-switch
|
||||||
|
.align 256
|
||||||
|
bankswitch:
|
||||||
|
.repeat 256, i
|
||||||
|
.byte ((i & $c0) >> 5) | $c1
|
||||||
|
.endrepeat
|
||||||
|
|
||||||
|
; 58-77 cycles
|
||||||
|
; clobbers x, y, dest to dest + 3
|
||||||
|
.macro imul8xe dest, arg1, arg2
|
||||||
|
.local done
|
||||||
|
.local output
|
||||||
|
.local ptr
|
||||||
|
|
||||||
|
output = dest
|
||||||
|
ptr = dest + 2 ; scratch space assumed
|
||||||
|
|
||||||
|
; bottom 14 bits except the LSB are the per-bank table index
|
||||||
|
; add $4000 for the bank pointer
|
||||||
|
lda arg1 ; 3 cyc
|
||||||
|
and #$fe ; 2 cyc
|
||||||
|
sta ptr ; 3 cyc
|
||||||
|
lda arg2 ; 3 cyc
|
||||||
|
and #$3f ; 2 cyc
|
||||||
|
clc ; 2 cyc
|
||||||
|
adc #$40 ; 2 cyc
|
||||||
|
sta ptr + 1 ; 3 cyc
|
||||||
|
|
||||||
|
; top 2 bits are the table bank selector
|
||||||
|
ldx arg2 ; 3 cyc
|
||||||
|
lda bank_switch,x ; 4 cyc
|
||||||
|
sta PORTB ; 4 cyc
|
||||||
|
|
||||||
|
|
||||||
|
; copy the entry into output
|
||||||
|
ldy #0 ; 2 cyc
|
||||||
|
lda (ptr),y ; 5 cyc
|
||||||
|
sta output ; 3 cyc
|
||||||
|
iny ; 2 cyc
|
||||||
|
lda (ptr),y ; 5 cyc
|
||||||
|
sta output+1 ; 3 cyc
|
||||||
|
|
||||||
|
; note: we are not restoring memory to save 6 cycles!
|
||||||
|
; this means those 16kb have to be switched back to base RAM
|
||||||
|
; if we need to use them anywhere else
|
||||||
|
;;; restore memory
|
||||||
|
;;lda #$81 ; 2 cyc - disabled
|
||||||
|
;;sta PORTB ; 4 cyc - disabled
|
||||||
|
|
||||||
|
; check that 1 bit we skipped to fit into space
|
||||||
|
lda arg1 ; 3 cyc
|
||||||
|
and #1 ; 2 cyc
|
||||||
|
beq done ; 2 cyc
|
||||||
|
|
||||||
|
; add the second param one last time for the skipped bit
|
||||||
|
clc ; 2 cyc
|
||||||
|
lda arg2 ; 3 cyc
|
||||||
|
adc output ; 3 cyc
|
||||||
|
sta output ; 3 cyc
|
||||||
|
lda #0 ; 2 cyc
|
||||||
|
adc output+1 ; 3 cyc
|
||||||
|
sta output+1 ; 3 cyc
|
||||||
|
|
||||||
|
done:
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro bank_switch bank
|
||||||
|
lda #((bank << 1) | $c1)
|
||||||
|
sta PORTB
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
proc imul8xe_init
|
||||||
|
|
||||||
|
; go through the input set, in four 16KB chunks
|
||||||
|
|
||||||
|
arg1 = FR1
|
||||||
|
arg2 = FR2
|
||||||
|
result = FR0
|
||||||
|
|
||||||
|
lda #$00
|
||||||
|
sta arg1
|
||||||
|
sta arg2
|
||||||
|
|
||||||
|
; $00 * $00 -> $3f * $ff
|
||||||
|
bank_switch 0
|
||||||
|
jsr imul8xe_init_section
|
||||||
|
|
||||||
|
; $40 * $00 -> $7f * $ff
|
||||||
|
bank_switch 1
|
||||||
|
jsr imul8xe_init_section
|
||||||
|
|
||||||
|
; $80 * $00 -> $bf * $ff
|
||||||
|
bank_switch 2
|
||||||
|
jsr imul8xe_init_section
|
||||||
|
|
||||||
|
; $c0 * $00 -> $ff * $ff
|
||||||
|
bank_switch 3
|
||||||
|
jsr imul8xe_init_section
|
||||||
|
|
||||||
|
rts
|
||||||
|
endproc
|
||||||
|
|
||||||
|
; Initialize a 16 KB chunk of the table
|
||||||
|
; input: multipliers in temp
|
||||||
|
; output: new multipliers in temp
|
||||||
|
; clobbers: temp, temp2
|
||||||
|
proc imul8xe_init_section
|
||||||
|
arg1 = FR1
|
||||||
|
arg2 = FR2
|
||||||
|
result = FR0
|
||||||
|
ptr = temp2
|
||||||
|
|
||||||
|
lda #$00
|
||||||
|
sta ptr
|
||||||
|
lda #$40
|
||||||
|
sta ptr + 1
|
||||||
|
|
||||||
|
ldx #0
|
||||||
|
ldy #0
|
||||||
|
|
||||||
|
; outer loop: $00 -> $3f
|
||||||
|
outer_loop:
|
||||||
|
|
||||||
|
; reset result to 0
|
||||||
|
lda #0
|
||||||
|
sta result
|
||||||
|
sta result + 1
|
||||||
|
|
||||||
|
; inner loop: $00 -> $ff
|
||||||
|
inner_loop:
|
||||||
|
|
||||||
|
; copy result to data set
|
||||||
|
lda result
|
||||||
|
sta (ptr),y
|
||||||
|
lda result + 1
|
||||||
|
sta (ptr),y
|
||||||
|
|
||||||
|
; result += 2 * arg2
|
||||||
|
clc
|
||||||
|
lda arg2
|
||||||
|
adc result
|
||||||
|
sta result
|
||||||
|
lda #0
|
||||||
|
adc result + 1
|
||||||
|
sta result
|
||||||
|
lda arg2
|
||||||
|
adc result
|
||||||
|
sta result
|
||||||
|
lda #0
|
||||||
|
adc result + 1
|
||||||
|
sta result
|
||||||
|
|
||||||
|
; inner loop check
|
||||||
|
inc arg1
|
||||||
|
inc arg1
|
||||||
|
inc ptr
|
||||||
|
inc ptr
|
||||||
|
bne inner_loop
|
||||||
|
|
||||||
|
; outer loop check
|
||||||
|
inc arg2
|
||||||
|
inc ptr + 1
|
||||||
|
lda ptr + 1
|
||||||
|
cmp #$40
|
||||||
|
bne outer_loop
|
||||||
|
|
||||||
|
rts
|
||||||
|
|
||||||
|
endproc
|
||||||
2181
mandel-core.s
2181
mandel-core.s
File diff suppressed because it is too large
Load diff
15
mandel.c
15
mandel.c
|
|
@ -1,15 +0,0 @@
|
||||||
/**
|
|
||||||
* The UI and I/O wrapper for the Mandelbrot runner, in C.
|
|
||||||
*
|
|
||||||
* For the moment *all* logic is in mandel-core.s, I'm just
|
|
||||||
* trying to get this to run within a cc65 environment.
|
|
||||||
* Eventually just the inner loop fun will live in there.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include "mandel.h"
|
|
||||||
|
|
||||||
void main(void) {
|
|
||||||
mandel_start();
|
|
||||||
}
|
|
||||||
4
mandel.h
4
mandel.h
|
|
@ -1,4 +0,0 @@
|
||||||
#include <inttypes.h>
|
|
||||||
|
|
||||||
// From mandel-core.s:
|
|
||||||
extern void mandel_start(void);
|
|
||||||
26
readme.md
26
readme.md
|
|
@ -14,37 +14,33 @@ Non-goals:
|
||||||
|
|
||||||
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
Enjoy! I'll probably work on this off and on for the next few weeks until I've got it producing fractals.
|
||||||
|
|
||||||
-- brooke, january 2023 - december 2024
|
-- brooke, january 2023 - february 2024
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
|
Basic rendering is functional, but no interactive behavior (zoom/pan) or benchmarking is done yet.
|
||||||
|
|
||||||
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
The 16-bit signed integer multiplication works; it takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
||||||
|
|
||||||
* 16-bit multiplies are decomposed into 4 8-bit unsigned multiplies and some addition
|
The main loop is a basic add-and-shift, using 16-bit adds which requires flipping the sign of negative inputs (otherwise you'd have to add all those sign-extension bits). Runs in 470-780 cycles depending on input.
|
||||||
* an optimized case for squares uses a table of 8-bit squares to reduce the number of 8-bit multiplication sub-ops
|
|
||||||
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
|
||||||
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
|
||||||
|
|
||||||
The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
|
The mandelbrot calculations are done using 4.12-precision fixed point numbers. It may be possible to squish this down to 3.13.
|
||||||
|
|
||||||
Iterations are capped at 255.
|
Iterations are capped at 255.
|
||||||
|
|
||||||
The pixels are run in a progressive layout to get the basic shape on screen faster.
|
The pixels are run in a progressive layout to get the basic shape on screen faster.
|
||||||
|
|
||||||
There is a running counter of ms/px using the vertical blank interrupts as a timer, used to track our progress. :D
|
## Next steps
|
||||||
|
|
||||||
There's a check for cycles in (zx,zy) output when in the 'lake'; if values repeat, they cannot escape. This is a big time saver in fractint.
|
Add a running counter of ms/px using the vertical blank interrupts as a timer. This'll show how further work improves it!
|
||||||
|
|
||||||
There's some cute color cycling.
|
Check for cycles in (zx,zy) output when in the 'lake'; if values repeat, they cannot escape. This is a big time saver in fractint.
|
||||||
|
|
||||||
|
I may be able to do a faster multiply using tables of squares for 8-bit component multiplication.
|
||||||
|
(done)
|
||||||
|
|
||||||
## Deps and build instructions
|
## Deps and build instructions
|
||||||
|
|
||||||
I'm using `ca65` as a macro assembler, and have a Unix-style `Makefile` for building. Should work fairly easily on Linux and Mac. Might work on "raw" Windows but I use WSL for that.
|
I'm using `ca65` as a macro assembler, and have a Unix-style `Makefile` for building. Should work fairly easily on Linux and Mac. Might work on "raw" Windows but I use WSL for that.
|
||||||
|
|
||||||
Currently produces a `.xex` executable, which can be booted up in common Atari emulators and some i/o devices.
|
Currently produces a `.xex` executable, which can be booted up in common Atari emulators and some i/o devices.
|
||||||
|
|
||||||
## Todo
|
|
||||||
|
|
||||||
See ideas in `todo.md`.
|
|
||||||
|
|
|
||||||
12
tables.js
12
tables.js
|
|
@ -22,10 +22,7 @@ console.log(
|
||||||
.export mul_lobyte256
|
.export mul_lobyte256
|
||||||
.export mul_hibyte256
|
.export mul_hibyte256
|
||||||
.export mul_hibyte512
|
.export mul_hibyte512
|
||||||
.export sqr_lobyte
|
|
||||||
.export sqr_hibyte
|
|
||||||
|
|
||||||
; (i * i + 1) / 2 for the multiplier
|
|
||||||
.align 256
|
.align 256
|
||||||
mul_lobyte256:
|
mul_lobyte256:
|
||||||
${db((i) => squares[i] & 0xff)}
|
${db((i) => squares[i] & 0xff)}
|
||||||
|
|
@ -38,13 +35,4 @@ ${db((i) => (squares[i] >> 8) & 0xff)}
|
||||||
mul_hibyte512:
|
mul_hibyte512:
|
||||||
${db((i) => (squares[i + 256] >> 8) & 0xff)}
|
${db((i) => (squares[i + 256] >> 8) & 0xff)}
|
||||||
|
|
||||||
; (i * i) for the plain squares
|
|
||||||
.align 256
|
|
||||||
sqr_lobyte:
|
|
||||||
${db((i) => (i * i) & 0xff)}
|
|
||||||
|
|
||||||
.align 256
|
|
||||||
sqr_hibyte:
|
|
||||||
${db((i) => ((i * i) >> 8) & 0xff)}
|
|
||||||
|
|
||||||
`);
|
`);
|
||||||
|
|
|
||||||
17
todo.md
17
todo.md
|
|
@ -1,17 +0,0 @@
|
||||||
things to try:
|
|
||||||
|
|
||||||
* fix status bar to show elapsed time, per-iter time, per-pixel iter count
|
|
||||||
|
|
||||||
* 'turbo' mode disabling graphics in full or part
|
|
||||||
|
|
||||||
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
|
||||||
|
|
||||||
* maybe clean up the load/layout of the big mul table
|
|
||||||
|
|
||||||
* consider alternate lookup tables in the top 16KB under ROM
|
|
||||||
|
|
||||||
* y-axis mirror optimization
|
|
||||||
|
|
||||||
* extract viewport for display & re-input via keyboard
|
|
||||||
|
|
||||||
* fujinet screenshot/viewport uploader
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue