micro-optimizations in imul8xe

53-72 cycles
overview in 10.896 ms/px
This commit is contained in:
Brooke Vibber 2024-12-30 14:09:02 -08:00
parent 63e74d5152
commit 3bd9b1ac31

View file

@ -37,6 +37,7 @@ palette_offset = $bf ; u8
palette_ticks = $c0 ; u8
chroma_ticks = $c1 ; u8
chroma_offset = $c2 ; u8
ptr = $c4 ; u16
palette_delay = 23
chroma_delay = 137
@ -452,21 +453,19 @@ bank_switch_table:
.macro imul8 dest, arg1, arg2, xe
.if xe
; using 64KB lookup table
; 58-77 cycles
; clobbers x, y, dest to dest + 3
; 53-72 cycles
; clobbers x, y, dest, ptr
.scope
output = dest
ptr = dest + 2 ; scratch space assumed
; bottom 14 bits except the LSB are the per-bank table index
; add $4000 for the bank pointer
lda arg1 ; 3 cyc
and #$fe ; 2 cyc
sta ptr ; 3 cyc
tay ; 2 cyc
lda arg2 ; 3 cyc
and #$3f ; 2 cyc
clc ; 2 cyc
adc #$40 ; 2 cyc
ora #$40 ; 2 cyc
sta ptr + 1 ; 3 cyc
; top 2 bits are the table bank selector
@ -476,7 +475,6 @@ bank_switch_table:
; copy the entry into output
ldy #0 ; 2 cyc
lda (ptr),y ; 5 cyc
sta output ; 3 cyc
iny ; 2 cyc
@ -609,6 +607,9 @@ init:
lda #$00
sta arg1
sta arg2
sta ptr
lda #$40
sta ptr + 1
; $00 * $00 -> $3f * $ff
bank_switch 0