Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
fa0de6dc77 WIP savings of half a cycle per imul8_xe
Uses X to cache arg1, which is always used, instead of
arg2, which is only used on odds.

Should save half a cycle per imul8_xe, untested
2025-09-16 21:29:40 -07:00

View file

@ -461,7 +461,7 @@ input_max:
sub 4, dest, arg1, arg2
.endmacro
; 3 + 5 * bytes cycles
; 3 + 5 * (bytes - 1) cycles
.macro shl bytes, arg
asl arg ; 3 cyc
.repeat bytes-1, i
@ -469,17 +469,17 @@ input_max:
.endrepeat
.endmacro
; 13 cycles
; 8 cycles
.macro shl16 arg
shl 2, arg
.endmacro
; 18 cycles
; 13 cycles
.macro shl24 arg
shl 3, arg
.endmacro
; 23 cycles
; 18 cycles
.macro shl32 arg
shl 4, arg
.endmacro
@ -529,11 +529,11 @@ input_max:
neg 4, arg
.endmacro
; 11-27 + 23 * shift cycles
; 103-119 cycles for shift=4
; 11-27 + 18 * shift cycles
; 65-81 cycles for shift=3
.macro shift_round_16 arg, shift
.repeat shift
shl32 arg ; 23 cycles
shl32 arg ; 18 cycles
.endrepeat
round16 arg ; 11-27 cycles
.endmacro
@ -588,7 +588,7 @@ bank_switch_table:
.macro imul8 dest, arg1, arg2, xe
.if xe
; using 64KB lookup table
; 51-70 cycles
; 50-70 cycles
; clobbers x, y, dest, ptr
.scope
output = dest
@ -600,13 +600,13 @@ bank_switch_table:
; bottom 14 bits except the LSB are the per-bank table index
; add $4000 for the bank pointer
txa ; 2 cyc
and #$3f ; 2 cyc
ora #$40 ; 2 cyc
sta ptr + 1 ; 3 cyc
; copy the entry into output
lda arg1 ; 3 cyc
tax ; 2 cyc
and #$fe ; 2 cyc
tay ; 2 cyc
lda (ptr),y ; 5 cyc
@ -623,13 +623,13 @@ bank_switch_table:
;;sta PORTB ; 4 cyc - disabled
; check that 1 bit we skipped to fit into space
lda arg1 ; 3 cyc
txa ; 2 cyc
and #1 ; 2 cyc
beq done ; 2 cyc
; add arg2 one last time for the skipped bit
clc ; 2 cyc
txa ; 2 cyc
lda arg1 ; 3 cyc
adc output ; 3 cyc
sta output ; 3 cyc
lda #0 ; 2 cyc