From f06aed0c0080b45fdd92544afddcbebea6d74efa Mon Sep 17 00:00:00 2001 From: Jamey Sharp Date: Tue, 31 Dec 2024 02:22:31 -0800 Subject: [PATCH] set results from both 8-bit squares first Since the results from the lo and hi squares don't overlap or overflow, they can be written directly to the final output location without doing any addition. Then only the multiplication that goes in the middle needs any adds. --- mandel.s | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/mandel.s b/mandel.s index ec1b086..a63d96f 100644 --- a/mandel.s +++ b/mandel.s @@ -450,18 +450,6 @@ viewport_oy: sta dest + 1 .endmacro -; clobbers a, x -.macro sqr8_add16 dest, arg - ldx arg - clc - lda sqr_lobyte,x - adc dest - sta dest - lda sqr_hibyte,x - adc dest + 1 - sta dest + 1 -.endmacro - .segment "TABLES" ; lookup table for top byte -> PORTB value for bank-switch .align 256 @@ -794,9 +782,7 @@ arg2_pos: ; h*h*256*256 + h*l*256 + h*l*256 + l*l sqr8 result, arg - lda #0 - sta result + 2 - sta result + 3 + sqr8 result + 2, arg + 1 imul8 inter, arg + 1, arg, xe add16 result + 1, result + 1, inter @@ -804,8 +790,6 @@ arg2_pos: add16 result + 1, result + 1, inter add_carry result + 3 - sqr8_add16 result + 2, arg + 1 - rts ; 6 cyc .endscope .endmacro