FR0 = $d4 ; float48 PORTB = $d301 EXTENDED_RAM = $4000 ; 16KiB bank on the XE ; lookup table for top byte -> PORTB value for bank-switch .align 256 bankswitch: .repeat 256, i .byte ((i & $c0) >> 5) | $c1 .endrepeat ; 58-77 cycles ; clobbers x, y, dest to dest + 3 .macro imul8xe dest, arg1, arg2 .local done .local output .local ptr output = dest ptr = dest + 2 ; scratch space assumed ; bottom 14 bits except the LSB are the per-bank table index ; add $4000 for the bank pointer lda arg1 ; 3 cyc and #$fe ; 2 cyc sta ptr ; 3 cyc lda arg2 ; 3 cyc and #$3f ; 2 cyc clc ; 2 cyc adc #$40 ; 2 cyc sta ptr + 1 ; 3 cyc ; top 2 bits are the table bank selector ldx arg2 ; 3 cyc lda bank_switch,x ; 4 cyc sta PORTB ; 4 cyc ; copy the entry into output ldy #0 ; 2 cyc lda (ptr),y ; 5 cyc sta output ; 3 cyc iny ; 2 cyc lda (ptr),y ; 5 cyc sta output+1 ; 3 cyc ; note: we are not restoring memory to save 6 cycles! ; this means those 16kb have to be switched back to base RAM ; if we need to use them anywhere else ;;; restore memory ;;lda #$81 ; 2 cyc - disabled ;;sta PORTB ; 4 cyc - disabled ; check that 1 bit we skipped to fit into space lda arg1 ; 3 cyc and #1 ; 2 cyc beq done ; 2 cyc ; add the second param one last time for the skipped bit clc ; 2 cyc lda arg2 ; 3 cyc adc output ; 3 cyc sta output ; 3 cyc lda #0 ; 2 cyc adc output+1 ; 3 cyc sta output+1 ; 3 cyc done: .endmacro .macro bank_switch bank lda #((bank << 1) | $c1) sta PORTB .endmacro proc imul8xe_init ; go through the input set, in four 16KB chunks arg1 = FR1 arg2 = FR2 result = FR0 lda #$00 sta arg1 sta arg2 ; $00 * $00 -> $3f * $ff bank_switch 0 jsr imul8xe_init_section ; $40 * $00 -> $7f * $ff bank_switch 1 jsr imul8xe_init_section ; $80 * $00 -> $bf * $ff bank_switch 2 jsr imul8xe_init_section ; $c0 * $00 -> $ff * $ff bank_switch 3 jsr imul8xe_init_section rts endproc ; Initialize a 16 KB chunk of the table ; input: multipliers in temp ; output: new multipliers in temp ; clobbers: temp, temp2 proc imul8xe_init_section arg1 = FR1 arg2 = FR2 result = FR0 ptr = temp2 lda #$00 sta ptr lda #$40 sta ptr + 1 ldx #0 ldy #0 ; outer loop: $00 -> $3f outer_loop: ; reset result to 0 lda #0 sta result sta result + 1 ; inner loop: $00 -> $ff inner_loop: ; copy result to data set lda result sta (ptr),y lda result + 1 sta (ptr),y ; result += 2 * arg2 clc lda arg2 adc result sta result lda #0 adc result + 1 sta result lda arg2 adc result sta result lda #0 adc result + 1 sta result ; inner loop check inc arg1 inc arg1 inc ptr inc ptr bne inner_loop ; outer loop check inc arg2 inc ptr + 1 lda ptr + 1 cmp #$40 bne outer_loop rts endproc