split memory, wip

appears to work on 800 but xl/xe overlap basic lol
2024-12-29 21:06:48 -08:00 · 2024-12-29 21:06:48 -08:00 · 883f926e57
commit 883f926e57
parent 0c63430dd9
2 changed files with 34 additions and 33 deletions
--- a/atari-asm-xex.cfg
+++ b/atari-asm-xex.cfg
@ -8,6 +8,7 @@ MEMORY {
    ZP:      file = "", define = yes, start = $0082, size = $007E;
    #MAIN:    file = %O, define = yes, start = %S,    size = $BC20 - %S;
    MAIN:    file = %O, define = yes, start = %S,    size = $4000 - %S;
+    TABLES:  file = %O, define = yes, start = $a000, size = $c000 - $a000;
 }
 FILES {
    %O: format = atari;
@ -22,5 +23,5 @@ SEGMENTS {
    RODATA:   load = MAIN,    type = ro   optional = yes;
    DATA:     load = MAIN,    type = rw   optional = yes;
    BSS:      load = MAIN,    type = bss, optional = yes, define = yes;
-    TABLES:   load = MAIN,    type = ro,  optional = yes, align = 256;
+    TABLES:   load = TABLES,  type = ro,  optional = yes, align = 256;
 }
--- a/mandel.s
+++ b/mandel.s
@ -113,6 +113,8 @@ KEY_RIGHT = $87
 .import mul_lobyte256
 .import mul_hibyte256
 .import mul_hibyte512
+.import sqr_lobyte
+.import sqr_hibyte

 .data

@ -701,42 +703,40 @@ arg2_pos:
 .endmacro

 .macro sqr16_impl xe
-    .local arg
-    .local result
-    .local inter
-    .local arg_pos
-    arg = FR0    ; 16-bit arg (clobbered)
-    result = FR2 ; 32-bit result
-    inter = temp2
+    .scope
+        arg = FR0    ; 16-bit arg (clobbered)
+        result = FR2 ; 32-bit result
+        ;inter = temp2
+        inter = FR1

-    ; hl * hl
-    ; (h*256 + l) * (h*256 + l)
-    ; h*256*(h*256 + l) + l*(h*256 + l)
-    ; h*h*256*256 + h*l*256 + h*l*256 + l*l
+        lda arg + 1
+        bpl arg_pos
+        neg16 arg
+    arg_pos:

-    sqr8 result, arg
-    lda #0
-    sta result + 2
-    sta result + 3
+        ; hl * hl
+        ; (h*256 + l) * (h*256 + l)
+        ; h*256*(h*256 + l) + l*(h*256 + l)
+        ; h*h*256*256 + h*l*256 + h*l*256 + l*l

-    imul8 inter, arg + 1, arg, xe
-    add16 result + 1, result + 1, inter
-    add_carry result + 3
-    add16 result + 1, result + 1, inter
-    add_carry result + 3
+        sqr8 result, arg
+        ;imul8 inter, arg, arg, xe
+        lda #0
+        sta result + 2
+        sta result + 3

-    sqr8 inter, arg + 1, arg + 1, xe
-    add16 result + 2, result + 2, inter
+        imul8 inter, arg + 1, arg, xe
+        add16 result + 1, result + 1, inter
+        add_carry result + 3
+        add16 result + 1, result + 1, inter
+        add_carry result + 3

-    ; In case of negative inputs, adjust high word
-    ; https://stackoverflow.com/a/28827013
-    lda arg + 1
-    bpl arg_pos
-    sub16 result + 2, result + 2, arg
-    sub16 result + 2, result + 2, arg
-arg_pos:
+        sqr8 inter, arg + 1
+        ;imul8 inter, arg + 1, arg + 1, xe
+        add16 result + 2, result + 2, inter

-    rts ; 6 cyc
+        rts ; 6 cyc
+    .endscope
 .endmacro

 .proc imul16_func
@ -748,11 +748,11 @@ arg_pos:
 .endproc

 .proc sqr16_func
-    imul16_impl 0
+    sqr16_impl 0
 .endproc

 .proc sqr16xe_func
-    imul16_impl 1
+    sqr16_impl 1
 .endproc

 .macro round16 arg