wip refacotring

This commit is contained in:
Brooke Vibber 2024-12-29 17:56:14 -08:00
parent f903272335
commit 3ab5006aa3

View file

@ -374,6 +374,14 @@ viewport_oy:
copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
.macro sqr16_round dest, arg, shift
imul16_round dest, arg, arg, shift
;copy16 FR0, arg ; 12 cyc
;jsr sqr16_func ; ? cyc
;shift_round_16 FR2, shift
;copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
; clobbers a, x
.macro sqr8 dest, arg
ldx arg
@ -537,6 +545,14 @@ init:
lda #.hibyte(imul16xe_func)
sta imul16_func + 2
; ditto for sqr16_func -> sqr16xe_func
lda #$4c ; 'jmp' opcode
sta sqr16_func
lda #.lobyte(sqr16xe_func)
sta sqr16_func + 1
lda #.hibyte(sqr16xe_func)
sta sqr16_func + 2
; create the lookup table
; go through the input set, in four 16KB chunks
@ -684,6 +700,45 @@ arg2_pos:
rts ; 6 cyc
.endmacro
.macro sqr16_impl xe
.local arg
.local result
.local inter
.local arg_pos
arg = FR0 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
inter = temp2
; hl * hl
; (h*256 + l) * (h*256 + l)
; h*256*(h*256 + l) + l*(h*256 + l)
; h*h*256*256 + h*l*256 + h*l*256 + l*l
sqr8 result, arg
lda #0
sta result + 2
sta result + 3
imul8 inter, arg + 1, arg, xe
add16 result + 1, result + 1, inter
add_carry result + 3
add16 result + 1, result + 1, inter
add_carry result + 3
sqr8 inter, arg + 1, arg + 1, xe
add16 result + 2, result + 2, inter
; In case of negative inputs, adjust high word
; https://stackoverflow.com/a/28827013
lda arg + 1
bpl arg_pos
sub16 result + 2, result + 2, arg
sub16 result + 2, result + 2, arg
arg_pos:
rts ; 6 cyc
.endmacro
.proc imul16_func
imul16_impl 0
.endproc
@ -692,6 +747,14 @@ arg2_pos:
imul16_impl 1
.endproc
.proc sqr16_func
imul16_impl 0
.endproc
.proc sqr16xe_func
imul16_impl 1
.endproc
.macro round16 arg
; Round top 16 bits of 32-bit fixed-point number in-place
.local increment
@ -803,10 +866,10 @@ keep_going:
quick_exit zy, 2
; zx_2 = zx * zx
imul16_round zx_2, zx, zx, 4
sqr16_round zx_2, zx, 4
; zy_2 = zy * zy
imul16_round zy_2, zy, zy, 4
sqr16_round zy_2, zy, 4
; zx_zy = zx * zy
imul16_round zx_zy, zx, zy, 4