forked from brooke/mandel-6502
set results from both 8-bit squares first
Since the results from the lo and hi squares don't overlap or overflow, they can be written directly to the final output location without doing any addition. Then only the multiplication that goes in the middle needs any adds.
This commit is contained in:
parent
aee587388d
commit
f06aed0c00
1 changed files with 1 additions and 17 deletions
18
mandel.s
18
mandel.s
|
@ -450,18 +450,6 @@ viewport_oy:
|
|||
sta dest + 1
|
||||
.endmacro
|
||||
|
||||
; clobbers a, x
|
||||
.macro sqr8_add16 dest, arg
|
||||
ldx arg
|
||||
clc
|
||||
lda sqr_lobyte,x
|
||||
adc dest
|
||||
sta dest
|
||||
lda sqr_hibyte,x
|
||||
adc dest + 1
|
||||
sta dest + 1
|
||||
.endmacro
|
||||
|
||||
.segment "TABLES"
|
||||
; lookup table for top byte -> PORTB value for bank-switch
|
||||
.align 256
|
||||
|
@ -794,9 +782,7 @@ arg2_pos:
|
|||
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
||||
|
||||
sqr8 result, arg
|
||||
lda #0
|
||||
sta result + 2
|
||||
sta result + 3
|
||||
sqr8 result + 2, arg + 1
|
||||
|
||||
imul8 inter, arg + 1, arg, xe
|
||||
add16 result + 1, result + 1, inter
|
||||
|
@ -804,8 +790,6 @@ arg2_pos:
|
|||
add16 result + 1, result + 1, inter
|
||||
add_carry result + 3
|
||||
|
||||
sqr8_add16 result + 2, arg + 1
|
||||
|
||||
rts ; 6 cyc
|
||||
.endscope
|
||||
.endmacro
|
||||
|
|
Loading…
Reference in a new issue