split memory, wip

appears to work on 800 but xl/xe overlap basic lol
This commit is contained in:
Brooke Vibber 2024-12-29 21:06:48 -08:00
parent 0c63430dd9
commit 883f926e57
2 changed files with 34 additions and 33 deletions

View file

@ -8,6 +8,7 @@ MEMORY {
ZP: file = "", define = yes, start = $0082, size = $007E; ZP: file = "", define = yes, start = $0082, size = $007E;
#MAIN: file = %O, define = yes, start = %S, size = $BC20 - %S; #MAIN: file = %O, define = yes, start = %S, size = $BC20 - %S;
MAIN: file = %O, define = yes, start = %S, size = $4000 - %S; MAIN: file = %O, define = yes, start = %S, size = $4000 - %S;
TABLES: file = %O, define = yes, start = $a000, size = $c000 - $a000;
} }
FILES { FILES {
%O: format = atari; %O: format = atari;
@ -22,5 +23,5 @@ SEGMENTS {
RODATA: load = MAIN, type = ro optional = yes; RODATA: load = MAIN, type = ro optional = yes;
DATA: load = MAIN, type = rw optional = yes; DATA: load = MAIN, type = rw optional = yes;
BSS: load = MAIN, type = bss, optional = yes, define = yes; BSS: load = MAIN, type = bss, optional = yes, define = yes;
TABLES: load = MAIN, type = ro, optional = yes, align = 256; TABLES: load = TABLES, type = ro, optional = yes, align = 256;
} }

View file

@ -113,6 +113,8 @@ KEY_RIGHT = $87
.import mul_lobyte256 .import mul_lobyte256
.import mul_hibyte256 .import mul_hibyte256
.import mul_hibyte512 .import mul_hibyte512
.import sqr_lobyte
.import sqr_hibyte
.data .data
@ -701,42 +703,40 @@ arg2_pos:
.endmacro .endmacro
.macro sqr16_impl xe .macro sqr16_impl xe
.local arg .scope
.local result arg = FR0 ; 16-bit arg (clobbered)
.local inter result = FR2 ; 32-bit result
.local arg_pos ;inter = temp2
arg = FR0 ; 16-bit arg (clobbered) inter = FR1
result = FR2 ; 32-bit result
inter = temp2
; hl * hl lda arg + 1
; (h*256 + l) * (h*256 + l) bpl arg_pos
; h*256*(h*256 + l) + l*(h*256 + l) neg16 arg
; h*h*256*256 + h*l*256 + h*l*256 + l*l arg_pos:
sqr8 result, arg ; hl * hl
lda #0 ; (h*256 + l) * (h*256 + l)
sta result + 2 ; h*256*(h*256 + l) + l*(h*256 + l)
sta result + 3 ; h*h*256*256 + h*l*256 + h*l*256 + l*l
imul8 inter, arg + 1, arg, xe sqr8 result, arg
add16 result + 1, result + 1, inter ;imul8 inter, arg, arg, xe
add_carry result + 3 lda #0
add16 result + 1, result + 1, inter sta result + 2
add_carry result + 3 sta result + 3
sqr8 inter, arg + 1, arg + 1, xe imul8 inter, arg + 1, arg, xe
add16 result + 2, result + 2, inter add16 result + 1, result + 1, inter
add_carry result + 3
add16 result + 1, result + 1, inter
add_carry result + 3
; In case of negative inputs, adjust high word sqr8 inter, arg + 1
; https://stackoverflow.com/a/28827013 ;imul8 inter, arg + 1, arg + 1, xe
lda arg + 1 add16 result + 2, result + 2, inter
bpl arg_pos
sub16 result + 2, result + 2, arg
sub16 result + 2, result + 2, arg
arg_pos:
rts ; 6 cyc rts ; 6 cyc
.endscope
.endmacro .endmacro
.proc imul16_func .proc imul16_func
@ -748,11 +748,11 @@ arg_pos:
.endproc .endproc
.proc sqr16_func .proc sqr16_func
imul16_impl 0 sqr16_impl 0
.endproc .endproc
.proc sqr16xe_func .proc sqr16xe_func
imul16_impl 1 sqr16_impl 1
.endproc .endproc
.macro round16 arg .macro round16 arg