wip refacotring
This commit is contained in:
parent
f903272335
commit
3ab5006aa3
1 changed files with 65 additions and 2 deletions
67
mandel.s
67
mandel.s
|
@ -374,6 +374,14 @@ viewport_oy:
|
|||
copy16 dest, FR2 + 2 ; 12 cyc
|
||||
.endmacro
|
||||
|
||||
.macro sqr16_round dest, arg, shift
|
||||
imul16_round dest, arg, arg, shift
|
||||
;copy16 FR0, arg ; 12 cyc
|
||||
;jsr sqr16_func ; ? cyc
|
||||
;shift_round_16 FR2, shift
|
||||
;copy16 dest, FR2 + 2 ; 12 cyc
|
||||
.endmacro
|
||||
|
||||
; clobbers a, x
|
||||
.macro sqr8 dest, arg
|
||||
ldx arg
|
||||
|
@ -537,6 +545,14 @@ init:
|
|||
lda #.hibyte(imul16xe_func)
|
||||
sta imul16_func + 2
|
||||
|
||||
; ditto for sqr16_func -> sqr16xe_func
|
||||
lda #$4c ; 'jmp' opcode
|
||||
sta sqr16_func
|
||||
lda #.lobyte(sqr16xe_func)
|
||||
sta sqr16_func + 1
|
||||
lda #.hibyte(sqr16xe_func)
|
||||
sta sqr16_func + 2
|
||||
|
||||
; create the lookup table
|
||||
; go through the input set, in four 16KB chunks
|
||||
|
||||
|
@ -684,6 +700,45 @@ arg2_pos:
|
|||
rts ; 6 cyc
|
||||
.endmacro
|
||||
|
||||
.macro sqr16_impl xe
|
||||
.local arg
|
||||
.local result
|
||||
.local inter
|
||||
.local arg_pos
|
||||
arg = FR0 ; 16-bit arg (clobbered)
|
||||
result = FR2 ; 32-bit result
|
||||
inter = temp2
|
||||
|
||||
; hl * hl
|
||||
; (h*256 + l) * (h*256 + l)
|
||||
; h*256*(h*256 + l) + l*(h*256 + l)
|
||||
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
||||
|
||||
sqr8 result, arg
|
||||
lda #0
|
||||
sta result + 2
|
||||
sta result + 3
|
||||
|
||||
imul8 inter, arg + 1, arg, xe
|
||||
add16 result + 1, result + 1, inter
|
||||
add_carry result + 3
|
||||
add16 result + 1, result + 1, inter
|
||||
add_carry result + 3
|
||||
|
||||
sqr8 inter, arg + 1, arg + 1, xe
|
||||
add16 result + 2, result + 2, inter
|
||||
|
||||
; In case of negative inputs, adjust high word
|
||||
; https://stackoverflow.com/a/28827013
|
||||
lda arg + 1
|
||||
bpl arg_pos
|
||||
sub16 result + 2, result + 2, arg
|
||||
sub16 result + 2, result + 2, arg
|
||||
arg_pos:
|
||||
|
||||
rts ; 6 cyc
|
||||
.endmacro
|
||||
|
||||
.proc imul16_func
|
||||
imul16_impl 0
|
||||
.endproc
|
||||
|
@ -692,6 +747,14 @@ arg2_pos:
|
|||
imul16_impl 1
|
||||
.endproc
|
||||
|
||||
.proc sqr16_func
|
||||
imul16_impl 0
|
||||
.endproc
|
||||
|
||||
.proc sqr16xe_func
|
||||
imul16_impl 1
|
||||
.endproc
|
||||
|
||||
.macro round16 arg
|
||||
; Round top 16 bits of 32-bit fixed-point number in-place
|
||||
.local increment
|
||||
|
@ -803,10 +866,10 @@ keep_going:
|
|||
quick_exit zy, 2
|
||||
|
||||
; zx_2 = zx * zx
|
||||
imul16_round zx_2, zx, zx, 4
|
||||
sqr16_round zx_2, zx, 4
|
||||
|
||||
; zy_2 = zy * zy
|
||||
imul16_round zy_2, zy, zy, 4
|
||||
sqr16_round zy_2, zy, 4
|
||||
|
||||
; zx_zy = zx * zy
|
||||
imul16_round zx_zy, zx, zy, 4
|
||||
|
|
Loading…
Reference in a new issue