Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
fa0de6dc77 WIP savings of half a cycle per imul8_xe
Uses X to cache arg1, which is always used, instead of
arg2, which is only used on odds.

Should save half a cycle per imul8_xe, untested
2025-09-16 21:29:40 -07:00

View file

@ -461,7 +461,7 @@ input_max:
sub 4, dest, arg1, arg2 sub 4, dest, arg1, arg2
.endmacro .endmacro
; 3 + 5 * bytes cycles ; 3 + 5 * (bytes - 1) cycles
.macro shl bytes, arg .macro shl bytes, arg
asl arg ; 3 cyc asl arg ; 3 cyc
.repeat bytes-1, i .repeat bytes-1, i
@ -469,17 +469,17 @@ input_max:
.endrepeat .endrepeat
.endmacro .endmacro
; 13 cycles ; 8 cycles
.macro shl16 arg .macro shl16 arg
shl 2, arg shl 2, arg
.endmacro .endmacro
; 18 cycles ; 13 cycles
.macro shl24 arg .macro shl24 arg
shl 3, arg shl 3, arg
.endmacro .endmacro
; 23 cycles ; 18 cycles
.macro shl32 arg .macro shl32 arg
shl 4, arg shl 4, arg
.endmacro .endmacro
@ -529,11 +529,11 @@ input_max:
neg 4, arg neg 4, arg
.endmacro .endmacro
; 11-27 + 23 * shift cycles ; 11-27 + 18 * shift cycles
; 103-119 cycles for shift=4 ; 65-81 cycles for shift=3
.macro shift_round_16 arg, shift .macro shift_round_16 arg, shift
.repeat shift .repeat shift
shl32 arg ; 23 cycles shl32 arg ; 18 cycles
.endrepeat .endrepeat
round16 arg ; 11-27 cycles round16 arg ; 11-27 cycles
.endmacro .endmacro
@ -588,7 +588,7 @@ bank_switch_table:
.macro imul8 dest, arg1, arg2, xe .macro imul8 dest, arg1, arg2, xe
.if xe .if xe
; using 64KB lookup table ; using 64KB lookup table
; 51-70 cycles ; 50-70 cycles
; clobbers x, y, dest, ptr ; clobbers x, y, dest, ptr
.scope .scope
output = dest output = dest
@ -600,13 +600,13 @@ bank_switch_table:
; bottom 14 bits except the LSB are the per-bank table index ; bottom 14 bits except the LSB are the per-bank table index
; add $4000 for the bank pointer ; add $4000 for the bank pointer
txa ; 2 cyc
and #$3f ; 2 cyc and #$3f ; 2 cyc
ora #$40 ; 2 cyc ora #$40 ; 2 cyc
sta ptr + 1 ; 3 cyc sta ptr + 1 ; 3 cyc
; copy the entry into output ; copy the entry into output
lda arg1 ; 3 cyc lda arg1 ; 3 cyc
tax ; 2 cyc
and #$fe ; 2 cyc and #$fe ; 2 cyc
tay ; 2 cyc tay ; 2 cyc
lda (ptr),y ; 5 cyc lda (ptr),y ; 5 cyc
@ -623,13 +623,13 @@ bank_switch_table:
;;sta PORTB ; 4 cyc - disabled ;;sta PORTB ; 4 cyc - disabled
; check that 1 bit we skipped to fit into space ; check that 1 bit we skipped to fit into space
lda arg1 ; 3 cyc txa ; 2 cyc
and #1 ; 2 cyc and #1 ; 2 cyc
beq done ; 2 cyc beq done ; 2 cyc
; add arg2 one last time for the skipped bit ; add arg2 one last time for the skipped bit
clc ; 2 cyc clc ; 2 cyc
txa ; 2 cyc lda arg1 ; 3 cyc
adc output ; 3 cyc adc output ; 3 cyc
sta output ; 3 cyc sta output ; 3 cyc
lda #0 ; 2 cyc lda #0 ; 2 cyc