7 changed files with 298 additions and 824 deletions
--- a/10
+++ b/10
@ -2,11 +2,8 @@

 all : mandel.xex

-mandel.xex : mandel.o mandel-core.o tables.o atari-xex.cfg
-	ld65 -C ./atari-xex.cfg --mapfile mandel.map -o $@ mandel.o mandel-core.o tables.o atari.lib
-
-mandel.s : mandel.c mandel.h
-	cc65 -o $@ mandel.c
+mandel.xex : mandel.o tables.o atari-asm-xex.cfg
+	ld65 -C ./atari-asm-xex.cfg -o $@ mandel.o tables.o

 %.o : %.s
 	ca65 -o $@ $<
@ -16,7 +13,6 @@ tables.s : tables.js

 clean :
 	rm -f tables.s
-	rm -f mandel.s
 	rm -f *.o
 	rm -f *.xex
-	rm -f mandel.map
+
--- a/atari-xex.cfg
+++ b/atari-xex.cfg
@ -1,69 +0,0 @@
-# Sample linker configuration for C programs using the Atari binary file support.
-# Use with: cl65 -tatari -Catari-xex.cfg prog.c -o prog.xex
-FEATURES {
-    STARTADDRESS: default = $8000;
-}
-SYMBOLS {
-    __SYSTEM_CHECK__:    type = import;  # force inclusion of "system check" load chunk
-    __STACKSIZE__:       type = weak, value = $0800; # 2k stack
-    __STARTADDRESS__:    type = export, value = %S;
-    __RESERVED_MEMORY__: type = weak, value = $0000;
-    __SYSCHKHDR__:       type = export, value = 0; # Disable system check header
-    __SYSCHKTRL__:       type = export, value = 0; # Disable system check trailer
-    __TABLESEG_START__:    type = weak, value = $2E00 + $0300;
-    __TABLESEG_SIZE__:     type = weak, value = 6 * $100;
-    __BANKSY_START__:  type = weak, value = $4000;
-    __BANKSY_SIZE__:   type = weak, value = $4000;
-    __FRAMEBUFFER_START__: type = weak, value = $A000;
-}
-MEMORY {
-# Note -- $80 and $81 (LOMEM) appear to be reserved in ZP.
-    ZP:         file = "", define = yes, start = $0082, size = $007E;
-# "system check" load chunk
-    SYSCHKCHNK: file = %O,               start = $2E00, size = $0300;
-# Note $a000-$bfff is against the BASIC cartridge, may require booting with OPTION.
-    TABLES:     file = %O, define = yes, start = __TABLESEG_START__, size = __TABLESEG_SIZE__;
-# We reserve $4000-7fff for the bank-switch window.
-# In theory we could keep data and code here that we only use on 48k/64k systems.
-    BANKSWITCH: file = "", define = yes, start = __BANKSY_START__, size = __BANKSY_SIZE__;
-# "main program" load chunk
-    MAIN:       file = %O, define = yes, start = %S, size = __FRAMEBUFFER_START__ - __STACKSIZE__ - __RESERVED_MEMORY__ - %S;
-}
-FILES {
-    %O: format = atari;
-}
-FORMATS {
-    atari: runad = start,
-           initad = SYSCHKCHNK: __SYSTEM_CHECK__;
-}
-SEGMENTS {
-    ZEROPAGE:  load = ZP,         type = zp;
-    EXTZP:     load = ZP,         type = zp,                optional = yes;
-    SYSCHK:    load = SYSCHKCHNK, type = rw,  define = yes, optional = yes;
-    TABLES:    load = TABLES,     type = ro,  optional = yes, align = 256;
-    BANKSWICH: load = BANKSWITCH, type = ro,  optional = yes;
-    STARTUP:   load = MAIN,       type = ro,  define = yes;
-    LOWBSS:    load = MAIN,       type = rw,                optional = yes;  # not zero initialized
-    LOWCODE:   load = MAIN,       type = ro,  define = yes, optional = yes;
-    ONCE:      load = MAIN,       type = ro,                optional = yes;
-    CODE:      load = MAIN,       type = ro,  define = yes;
-    RODATA:    load = MAIN,       type = ro;
-    DATA:      load = MAIN,       type = rw;
-    INIT:      load = MAIN,       type = rw,                optional = yes;
-    BSS:       load = MAIN,       type = bss, define = yes;
-}
-FEATURES {
-    CONDES: type    = constructor,
-            label   = __CONSTRUCTOR_TABLE__,
-            count   = __CONSTRUCTOR_COUNT__,
-            segment = ONCE;
-    CONDES: type    = destructor,
-            label   = __DESTRUCTOR_TABLE__,
-            count   = __DESTRUCTOR_COUNT__,
-            segment = RODATA;
-    CONDES: type    = interruptor,
-            label   = __INTERRUPTOR_TABLE__,
-            count   = __INTERRUPTOR_COUNT__,
-            segment = RODATA,
-            import  = __CALLIRQ__;
-}
--- a/mandel.c
+++ b/mandel.c
@ -1,15 +0,0 @@
-/**
- * The UI and I/O wrapper for the Mandelbrot runner, in C.
- *
- * For the moment *all* logic is in mandel-core.s, I'm just
- * trying to get this to run within a cc65 environment.
- * Eventually just the inner loop fun will live in there.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include "mandel.h"
-
-void main(void) {
-    mandel_start();
-}
--- a/mandel.h
+++ b/mandel.h
@ -1,4 +0,0 @@
-#include <inttypes.h>
-
-// From mandel-core.s:
-extern void mandel_start(void);
--- a/mandel-core.s
+++ b/mandel-core.s
--- a/readme.md
+++ b/readme.md
@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g

 ## Current state

-Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
+Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys.

 The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.

@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
 * when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
 * without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication

-The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
+The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26.

 Iterations are capped at 255.

--- a/todo.md
+++ b/todo.md
@ -1,17 +1,19 @@
 things to try:

-* fix status bar to show elapsed time, per-iter time, per-pixel iter count
-
-* 'turbo' mode disabling graphics in full or part
+* skip add on the top-byte multiply in sqr8/mul8
+  * should save a few cycles, suggestion by jamey

 * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D

-* maybe clean up the load/layout of the big mul table
-
-* consider alternate lookup tables in the top 16KB under ROM
+* try 3.13 fixed point instead of 4.12 for more precision
+  * can we get away without the extra bit?
+  * since exit compare space would be 6.26 i think so

 * y-axis mirror optimization

+* 'wide pixels' 2x and 4x for a fuller initial image in the tiered rendering
+  * maybe redo tiering to just 4x4, 2x2, 1x1?
+
 * extract viewport for display & re-input via keyboard

 * fujinet screenshot/viewport uploader