7 changed files with 298 additions and 824 deletions
--- a/10
+++ b/10
@ -2,11 +2,8 @@
 all : mandel.xex
-mandel.xex : mandel.o mandel-core.o tables.o atari-xex.cfg
+mandel.xex : mandel.o tables.o atari-asm-xex.cfg
-	ld65 -C ./atari-xex.cfg --mapfile mandel.map -o $@ mandel.o mandel-core.o tables.o atari.lib
+	ld65 -C ./atari-asm-xex.cfg -o $@ mandel.o tables.o
 mandel.s : mandel.c mandel.h
 	cc65 -o $@ mandel.c
 %.o : %.s
 	ca65 -o $@ $<
@ -16,7 +13,6 @@ tables.s : tables.js
 clean :
 	rm -f tables.s
 	rm -f mandel.s
 	rm -f *.o
 	rm -f *.xex
-	rm -f mandel.map
+
--- a/atari-xex.cfg
+++ b/atari-xex.cfg
@ -1,69 +0,0 @@
 # Sample linker configuration for C programs using the Atari binary file support.
 # Use with: cl65 -tatari -Catari-xex.cfg prog.c -o prog.xex
 FEATURES {
    STARTADDRESS: default = $8000;
 }
 SYMBOLS {
    __SYSTEM_CHECK__:    type = import;  # force inclusion of "system check" load chunk
    __STACKSIZE__:       type = weak, value = $0800; # 2k stack
    __STARTADDRESS__:    type = export, value = %S;
    __RESERVED_MEMORY__: type = weak, value = $0000;
    __SYSCHKHDR__:       type = export, value = 0; # Disable system check header
    __SYSCHKTRL__:       type = export, value = 0; # Disable system check trailer
    __TABLESEG_START__:    type = weak, value = $2E00 + $0300;
    __TABLESEG_SIZE__:     type = weak, value = 6 * $100;
    __BANKSY_START__:  type = weak, value = $4000;
    __BANKSY_SIZE__:   type = weak, value = $4000;
    __FRAMEBUFFER_START__: type = weak, value = $A000;
 }
 MEMORY {
 # Note -- $80 and $81 (LOMEM) appear to be reserved in ZP.
    ZP:         file = "", define = yes, start = $0082, size = $007E;
 # "system check" load chunk
    SYSCHKCHNK: file = %O,               start = $2E00, size = $0300;
 # Note $a000-$bfff is against the BASIC cartridge, may require booting with OPTION.
    TABLES:     file = %O, define = yes, start = __TABLESEG_START__, size = __TABLESEG_SIZE__;
 # We reserve $4000-7fff for the bank-switch window.
 # In theory we could keep data and code here that we only use on 48k/64k systems.
    BANKSWITCH: file = "", define = yes, start = __BANKSY_START__, size = __BANKSY_SIZE__;
 # "main program" load chunk
    MAIN:       file = %O, define = yes, start = %S, size = __FRAMEBUFFER_START__ - __STACKSIZE__ - __RESERVED_MEMORY__ - %S;
 }
 FILES {
    %O: format = atari;
 }
 FORMATS {
    atari: runad = start,
           initad = SYSCHKCHNK: __SYSTEM_CHECK__;
 }
 SEGMENTS {
    ZEROPAGE:  load = ZP,         type = zp;
    EXTZP:     load = ZP,         type = zp,                optional = yes;
    SYSCHK:    load = SYSCHKCHNK, type = rw,  define = yes, optional = yes;
    TABLES:    load = TABLES,     type = ro,  optional = yes, align = 256;
    BANKSWICH: load = BANKSWITCH, type = ro,  optional = yes;
    STARTUP:   load = MAIN,       type = ro,  define = yes;
    LOWBSS:    load = MAIN,       type = rw,                optional = yes;  # not zero initialized
    LOWCODE:   load = MAIN,       type = ro,  define = yes, optional = yes;
    ONCE:      load = MAIN,       type = ro,                optional = yes;
    CODE:      load = MAIN,       type = ro,  define = yes;
    RODATA:    load = MAIN,       type = ro;
    DATA:      load = MAIN,       type = rw;
    INIT:      load = MAIN,       type = rw,                optional = yes;
    BSS:       load = MAIN,       type = bss, define = yes;
 }
 FEATURES {
    CONDES: type    = constructor,
            label   = __CONSTRUCTOR_TABLE__,
            count   = __CONSTRUCTOR_COUNT__,
            segment = ONCE;
    CONDES: type    = destructor,
            label   = __DESTRUCTOR_TABLE__,
            count   = __DESTRUCTOR_COUNT__,
            segment = RODATA;
    CONDES: type    = interruptor,
            label   = __INTERRUPTOR_TABLE__,
            count   = __INTERRUPTOR_COUNT__,
            segment = RODATA,
            import  = __CALLIRQ__;
 }
--- a/mandel.c
+++ b/mandel.c
@ -1,15 +0,0 @@
 /**
 * The UI and I/O wrapper for the Mandelbrot runner, in C.
 *
 * For the moment *all* logic is in mandel-core.s, I'm just
 * trying to get this to run within a cc65 environment.
 * Eventually just the inner loop fun will live in there.
 */
 #include <stdlib.h>
 #include <stdio.h>
 #include "mandel.h"
 void main(void) {
    mandel_start();
 }
--- a/mandel.h
+++ b/mandel.h
@ -1,4 +0,0 @@
 #include <inttypes.h>
 // From mandel-core.s:
 extern void mandel_start(void);
--- a/mandel-core.s
+++ b/mandel-core.s
--- a/readme.md
+++ b/readme.md
@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g
 ## Current state
-Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
+Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys.
 The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
 * when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
 * without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
-The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
+The mandelbrot calculations are done using 4.12-precision fixed point numbers with 8.24-precision intermediates. It may be possible to squish this down to 3.13/6.26.
 Iterations are capped at 255.
--- a/todo.md
+++ b/todo.md
@ -1,17 +1,19 @@
 things to try:
-* fix status bar to show elapsed time, per-iter time, per-pixel iter count
+* skip add on the top-byte multiply in sqr8/mul8
-
+  * should save a few cycles, suggestion by jamey
 * 'turbo' mode disabling graphics in full or part
 * patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
-* maybe clean up the load/layout of the big mul table
+* try 3.13 fixed point instead of 4.12 for more precision
-
+  * can we get away without the extra bit?
-* consider alternate lookup tables in the top 16KB under ROM
+  * since exit compare space would be 6.26 i think so
 * y-axis mirror optimization
 * 'wide pixels' 2x and 4x for a fuller initial image in the tiered rendering
  * maybe redo tiering to just 4x4, 2x2, 1x1?
 * extract viewport for display & re-input via keyboard
 * fujinet screenshot/viewport uploader