WIP savings of half a cycle per imul8_xe
Uses X to cache arg1, which is always used, instead of arg2, which is only used on odds. Should save half a cycle per imul8_xe, untested
This commit is contained in:
parent
29cd3d968f
commit
fa0de6dc77
1 changed files with 11 additions and 11 deletions
22
mandel.s
22
mandel.s
|
|
@ -461,7 +461,7 @@ input_max:
|
|||
sub 4, dest, arg1, arg2
|
||||
.endmacro
|
||||
|
||||
; 3 + 5 * bytes cycles
|
||||
; 3 + 5 * (bytes - 1) cycles
|
||||
.macro shl bytes, arg
|
||||
asl arg ; 3 cyc
|
||||
.repeat bytes-1, i
|
||||
|
|
@ -469,17 +469,17 @@ input_max:
|
|||
.endrepeat
|
||||
.endmacro
|
||||
|
||||
; 13 cycles
|
||||
; 8 cycles
|
||||
.macro shl16 arg
|
||||
shl 2, arg
|
||||
.endmacro
|
||||
|
||||
; 18 cycles
|
||||
; 13 cycles
|
||||
.macro shl24 arg
|
||||
shl 3, arg
|
||||
.endmacro
|
||||
|
||||
; 23 cycles
|
||||
; 18 cycles
|
||||
.macro shl32 arg
|
||||
shl 4, arg
|
||||
.endmacro
|
||||
|
|
@ -529,11 +529,11 @@ input_max:
|
|||
neg 4, arg
|
||||
.endmacro
|
||||
|
||||
; 11-27 + 23 * shift cycles
|
||||
; 103-119 cycles for shift=4
|
||||
; 11-27 + 18 * shift cycles
|
||||
; 65-81 cycles for shift=3
|
||||
.macro shift_round_16 arg, shift
|
||||
.repeat shift
|
||||
shl32 arg ; 23 cycles
|
||||
shl32 arg ; 18 cycles
|
||||
.endrepeat
|
||||
round16 arg ; 11-27 cycles
|
||||
.endmacro
|
||||
|
|
@ -588,7 +588,7 @@ bank_switch_table:
|
|||
.macro imul8 dest, arg1, arg2, xe
|
||||
.if xe
|
||||
; using 64KB lookup table
|
||||
; 51-70 cycles
|
||||
; 50-70 cycles
|
||||
; clobbers x, y, dest, ptr
|
||||
.scope
|
||||
output = dest
|
||||
|
|
@ -600,13 +600,13 @@ bank_switch_table:
|
|||
|
||||
; bottom 14 bits except the LSB are the per-bank table index
|
||||
; add $4000 for the bank pointer
|
||||
txa ; 2 cyc
|
||||
and #$3f ; 2 cyc
|
||||
ora #$40 ; 2 cyc
|
||||
sta ptr + 1 ; 3 cyc
|
||||
|
||||
; copy the entry into output
|
||||
lda arg1 ; 3 cyc
|
||||
tax ; 2 cyc
|
||||
and #$fe ; 2 cyc
|
||||
tay ; 2 cyc
|
||||
lda (ptr),y ; 5 cyc
|
||||
|
|
@ -623,13 +623,13 @@ bank_switch_table:
|
|||
;;sta PORTB ; 4 cyc - disabled
|
||||
|
||||
; check that 1 bit we skipped to fit into space
|
||||
lda arg1 ; 3 cyc
|
||||
txa ; 2 cyc
|
||||
and #1 ; 2 cyc
|
||||
beq done ; 2 cyc
|
||||
|
||||
; add arg2 one last time for the skipped bit
|
||||
clc ; 2 cyc
|
||||
txa ; 2 cyc
|
||||
lda arg1 ; 3 cyc
|
||||
adc output ; 3 cyc
|
||||
sta output ; 3 cyc
|
||||
lda #0 ; 2 cyc
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue