From f996c3cbcd84b3aff3fd39bf3daee9a6c60a9e2a Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Wed, 25 Dec 2024 12:47:37 -0800 Subject: [PATCH] provisional maybe old mode runs in 81-92 cycles provisional code runs in 58-77 cycles if it works ;) --- imul8xe.s | 76 ++++++++++++++++++++----------------------------------- mandel.s | 1 + 2 files changed, 29 insertions(+), 48 deletions(-) diff --git a/imul8xe.s b/imul8xe.s index 5cbb852..d12f53f 100644 --- a/imul8xe.s +++ b/imul8xe.s @@ -3,55 +3,38 @@ PORTB = $d301 EXTENDED_RAM = $4000 ; 16KiB bank on the XE -bankswitch = ; ??? -; input in X/Y (lo/hi) -; output in FR0 -; clobbers FR0 -; 128 cycles -proc imul8xe - output = FR0 - ptr = FR0 + 2 +; lookup table for top byte -> PORTB value for bank-switch +.align 256 +bankswitch: + .repeat 256, i + .byte ((i & $c0) >> 5) | $c1 + .endrepeat - lda #0 ; 2 cyc - sta ptr ; 3 cyc - sta ptr + 1 ; 3 cyc +; 58-77 cycles +.macro imul8xe dest, arg1, arg2 +.local done +.local output +.local ptr + + output = dest + ptr = dest + 2 ; scratch space assumed ; bottom 14 bits except the LSB are the per-bank table index ; add $4000 for the bank pointer - txa ; 2 cyc + lda arg1 ; 3 cyc and #$fe ; 2 cyc sta ptr ; 3 cyc - tya ; 2 cyc + lda arg2 ; 3 cyc and #$3f ; 2 cyc clc ; 2 cyc adc #$40 ; 2 cyc sta ptr + 1 ; 3 cyc ; top 2 bits are the table bank selector - tya ; 2 cyc - and #$c0 ; 2 cyc - ; shift in extended RAM mode 2x 1 bits - sec ; 2 cyc - ror ; 2 cyc - ror ; 2 cyc - ; shift in 0 bits - asr ; 2 cyc - asr ; 2 cyc - asr ; 2 cyc - - ; save the second param for later - phy ; 3 cyc - - ; disable interrupts - lda NMIEN ; 4 cyc - pha ; 3 cyc - lda #0 ; 2 cyc - sta NMIEN ; 4 cyc - - ; set the standard top RAM and OS ROM on - or #$81 ; 2 cyc - sta PORTB ; 4 cyc + ldx arg2 ; 3 cyc + lda bank_switch,x ; 4 cyc + sta PORTB ; 4 cyc ; copy the entry into output @@ -62,22 +45,21 @@ proc imul8xe lda (ptr),y ; 5 cyc sta output+1 ; 3 cyc - ; restore memory - lda #$81 ; 2 cyc - sta PORTB ; 4 cyc - - ; restore interrupts - pla ; 3 cyc - sta NMIEN ; 4 cyc + ; note: we are not restoring memory to save 6 cycles! + ; this means those 16kb have to be switched back to base RAM + ; if we need to use them anywhere else + ;;; restore memory + ;;lda #$81 ; 2 cyc - disabled + ;;sta PORTB ; 4 cyc - disabled ; check that 1 bit we skipped to fit into space - txa ; 2 cyc + lda arg1 ; 3 cyc and $#1 ; 2 cyc beq done ; 2 cyc ; add the second param one last tie for the skipped bit clc ; 2 cyc - pla ; 3 cyc + lda arg2 ; 3 cyc adc output ; 3 cyc sta output ; 3 cyc lda #0 ; 2 cyc @@ -85,9 +67,7 @@ proc imul8xe sta output+1 ; 3 cyc done: - pla - rts ; 6 cyc -endproc +.endmacro proc imul8xe_init rts diff --git a/mandel.s b/mandel.s index 3b0bc9f..e0a8570 100644 --- a/mandel.s +++ b/mandel.s @@ -373,6 +373,7 @@ fill_masks: .local next .local small_product ; circa 92 cycles? this doesn't seem right + ; 81-92 cycles .scope mul_factor_a = arg1 mul_factor_x = arg2