micro-optimizations in imul8xe

53-72 cycles
overview in 10.896 ms/px
This commit is contained in:
Brooke Vibber 2024-12-30 14:09:02 -08:00
parent 63e74d5152
commit 3bd9b1ac31

View file

@ -37,6 +37,7 @@ palette_offset = $bf ; u8
palette_ticks = $c0 ; u8 palette_ticks = $c0 ; u8
chroma_ticks = $c1 ; u8 chroma_ticks = $c1 ; u8
chroma_offset = $c2 ; u8 chroma_offset = $c2 ; u8
ptr = $c4 ; u16
palette_delay = 23 palette_delay = 23
chroma_delay = 137 chroma_delay = 137
@ -452,21 +453,19 @@ bank_switch_table:
.macro imul8 dest, arg1, arg2, xe .macro imul8 dest, arg1, arg2, xe
.if xe .if xe
; using 64KB lookup table ; using 64KB lookup table
; 58-77 cycles ; 53-72 cycles
; clobbers x, y, dest to dest + 3 ; clobbers x, y, dest, ptr
.scope .scope
output = dest output = dest
ptr = dest + 2 ; scratch space assumed
; bottom 14 bits except the LSB are the per-bank table index ; bottom 14 bits except the LSB are the per-bank table index
; add $4000 for the bank pointer ; add $4000 for the bank pointer
lda arg1 ; 3 cyc lda arg1 ; 3 cyc
and #$fe ; 2 cyc and #$fe ; 2 cyc
sta ptr ; 3 cyc tay ; 2 cyc
lda arg2 ; 3 cyc lda arg2 ; 3 cyc
and #$3f ; 2 cyc and #$3f ; 2 cyc
clc ; 2 cyc ora #$40 ; 2 cyc
adc #$40 ; 2 cyc
sta ptr + 1 ; 3 cyc sta ptr + 1 ; 3 cyc
; top 2 bits are the table bank selector ; top 2 bits are the table bank selector
@ -476,7 +475,6 @@ bank_switch_table:
; copy the entry into output ; copy the entry into output
ldy #0 ; 2 cyc
lda (ptr),y ; 5 cyc lda (ptr),y ; 5 cyc
sta output ; 3 cyc sta output ; 3 cyc
iny ; 2 cyc iny ; 2 cyc
@ -609,6 +607,9 @@ init:
lda #$00 lda #$00
sta arg1 sta arg1
sta arg2 sta arg2
sta ptr
lda #$40
sta ptr + 1
; $00 * $00 -> $3f * $ff ; $00 * $00 -> $3f * $ff
bank_switch 0 bank_switch 0