shave some cycles off 16-bit squaring with shift instead of add

also fix the comments about how many cycles shift takes
This commit is contained in:
Jamey Sharp 2024-12-31 02:55:22 -08:00
parent 0f49760aa5
commit 3553ce986f

View file

@ -348,7 +348,7 @@ viewport_oy:
sub 4, dest, arg1, arg2 sub 4, dest, arg1, arg2
.endmacro .endmacro
; 3 + 5 * bytes cycles ; 3 + 5 * (bytes - 1) cycles
.macro shl bytes, arg .macro shl bytes, arg
asl arg ; 3 cyc asl arg ; 3 cyc
.repeat bytes-1, i .repeat bytes-1, i
@ -356,17 +356,17 @@ viewport_oy:
.endrepeat .endrepeat
.endmacro .endmacro
; 13 cycles ; 8 cycles
.macro shl16 arg .macro shl16 arg
shl 2, arg shl 2, arg
.endmacro .endmacro
; 18 cycles ; 13 cycles
.macro shl24 arg .macro shl24 arg
shl 3, arg shl 3, arg
.endmacro .endmacro
; 23 cycles ; 18 cycles
.macro shl32 arg .macro shl32 arg
shl 4, arg shl 4, arg
.endmacro .endmacro
@ -787,7 +787,7 @@ arg2_pos:
sqr8 result + 2, arg + 1 sqr8 result + 2, arg + 1
imul8 inter, arg + 1, arg, xe imul8 inter, arg + 1, arg, xe
add16 result + 1, result + 1, inter shl16 inter
add_carry result + 3 add_carry result + 3
add16 result + 1, result + 1, inter add16 result + 1, result + 1, inter
add_carry result + 3 add_carry result + 3