split memory, wip

appears to work on 800 but xl/xe overlap basic lol
This commit is contained in:
Brooke Vibber 2024-12-29 21:06:48 -08:00
parent 0c63430dd9
commit 883f926e57
2 changed files with 34 additions and 33 deletions

View file

@ -8,6 +8,7 @@ MEMORY {
ZP: file = "", define = yes, start = $0082, size = $007E;
#MAIN: file = %O, define = yes, start = %S, size = $BC20 - %S;
MAIN: file = %O, define = yes, start = %S, size = $4000 - %S;
TABLES: file = %O, define = yes, start = $a000, size = $c000 - $a000;
}
FILES {
%O: format = atari;
@ -22,5 +23,5 @@ SEGMENTS {
RODATA: load = MAIN, type = ro optional = yes;
DATA: load = MAIN, type = rw optional = yes;
BSS: load = MAIN, type = bss, optional = yes, define = yes;
TABLES: load = MAIN, type = ro, optional = yes, align = 256;
TABLES: load = TABLES, type = ro, optional = yes, align = 256;
}

View file

@ -113,6 +113,8 @@ KEY_RIGHT = $87
.import mul_lobyte256
.import mul_hibyte256
.import mul_hibyte512
.import sqr_lobyte
.import sqr_hibyte
.data
@ -701,42 +703,40 @@ arg2_pos:
.endmacro
.macro sqr16_impl xe
.local arg
.local result
.local inter
.local arg_pos
arg = FR0 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
inter = temp2
.scope
arg = FR0 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
;inter = temp2
inter = FR1
; hl * hl
; (h*256 + l) * (h*256 + l)
; h*256*(h*256 + l) + l*(h*256 + l)
; h*h*256*256 + h*l*256 + h*l*256 + l*l
lda arg + 1
bpl arg_pos
neg16 arg
arg_pos:
sqr8 result, arg
lda #0
sta result + 2
sta result + 3
; hl * hl
; (h*256 + l) * (h*256 + l)
; h*256*(h*256 + l) + l*(h*256 + l)
; h*h*256*256 + h*l*256 + h*l*256 + l*l
imul8 inter, arg + 1, arg, xe
add16 result + 1, result + 1, inter
add_carry result + 3
add16 result + 1, result + 1, inter
add_carry result + 3
sqr8 result, arg
;imul8 inter, arg, arg, xe
lda #0
sta result + 2
sta result + 3
sqr8 inter, arg + 1, arg + 1, xe
add16 result + 2, result + 2, inter
imul8 inter, arg + 1, arg, xe
add16 result + 1, result + 1, inter
add_carry result + 3
add16 result + 1, result + 1, inter
add_carry result + 3
; In case of negative inputs, adjust high word
; https://stackoverflow.com/a/28827013
lda arg + 1
bpl arg_pos
sub16 result + 2, result + 2, arg
sub16 result + 2, result + 2, arg
arg_pos:
sqr8 inter, arg + 1
;imul8 inter, arg + 1, arg + 1, xe
add16 result + 2, result + 2, inter
rts ; 6 cyc
rts ; 6 cyc
.endscope
.endmacro
.proc imul16_func
@ -748,11 +748,11 @@ arg_pos:
.endproc
.proc sqr16_func
imul16_impl 0
sqr16_impl 0
.endproc
.proc sqr16xe_func
imul16_impl 1
sqr16_impl 1
.endproc
.macro round16 arg