From 05133aabdd59739805bbe7bb2eb32e9815120718 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Sun, 15 Dec 2024 20:17:45 -0800 Subject: [PATCH] slightly faster handling of signed mul previously we were flipping the inputs if negative, and then the output if both inputs were negative turns out you can just treat the whole thing as an unsigned mul and then subtract each term from the high word if the other term is negative. https://stackoverflow.com/a/28827013 this saves a handful of cycles, reducing our runtime to a merge 14.211 ms/px \o/ --- mandel.s | 32 ++++++++++---------------------- 1 file changed, 10 insertions(+), 22 deletions(-) diff --git a/mandel.s b/mandel.s index 1244a02..3622995 100644 --- a/mandel.s +++ b/mandel.s @@ -344,18 +344,6 @@ fill_masks: neg 4, arg .endmacro -; 5 to 25 cycles -.macro check_sign arg - ; Check sign bit and flip argument to postive, - ; keeping a count of sign bits in the Y register. - .local positive - lda arg + 1 ; 3 cyc - bpl positive ; 2 cyc - neg16 arg ; 18 cyc - iny ; 2 cyc -positive: -.endmacro - ; 518 - 828 cyc .macro imul16 dest, arg1, arg2 copy16 FR0, arg1 ; 12 cyc @@ -438,11 +426,6 @@ positive: result = FR2 ; 32-bit result inter = temp2 - ldy #0 ; 2 cyc - ; counts the number of sign bits in Y - check_sign arg1 ; 5 to 25 cyc - check_sign arg2 ; 5 to 25 cyc - ; h1l1 * h2l2 ; (h1*256 + l1) * (h2*256 + l2) ; h1*256*(h2*256 + l2) + l1*(h2*256 + l2) @@ -464,11 +447,16 @@ positive: imul8 inter, arg1 + 1, arg2 + 1 add16 result + 2, result + 2, inter - ; In case of mixed input signs, return a negative result. - cpy #1 ; 2 cyc - bne positive_result ; 2 cyc - neg32 result ; 34 cyc -positive_result: + ; In case of negative inputs, adjust high word + ; https://stackoverflow.com/a/28827013 + lda arg1 + 1 + bpl arg1_pos + sub16 result + 2, result + 2, arg2 +arg1_pos: + lda arg2 + 1 + bpl arg2_pos + sub16 result + 2, result + 2, arg1 +arg2_pos: rts ; 6 cyc .endproc