diff --git a/imul8xe.s b/imul8xe.s index 15adf64..855e044 100644 --- a/imul8xe.s +++ b/imul8xe.s @@ -70,6 +70,106 @@ bankswitch: done: .endmacro +.macro bank_switch bank + lda #((bank << 1) | $c1) + sta PORTB +.endmacro + proc imul8xe_init + + ; go through the input set, in four 16KB chunks + + arg1 = FR1 + arg2 = FR2 + result = FR0 + + lda #$00 + sta arg1 + sta arg2 + + ; $00 * $00 -> $3f * $ff + bank_switch 0 + jsr imul8xe_init_section + + ; $40 * $00 -> $7f * $ff + bank_switch 1 + jsr imul8xe_init_section + + ; $80 * $00 -> $bf * $ff + bank_switch 2 + jsr imul8xe_init_section + + ; $c0 * $00 -> $ff * $ff + bank_switch 3 + jsr imul8xe_init_section + rts endproc + +; Initialize a 16 KB chunk of the table +; input: multipliers in temp +; output: new multipliers in temp +; clobbers: temp, temp2 +proc imul8xe_init_section + arg1 = FR1 + arg2 = FR2 + result = FR0 + ptr = temp2 + + lda #$00 + sta ptr + lda #$40 + sta ptr + 1 + + ldx #0 + ldy #0 + + ; outer loop: $00 -> $3f +outer_loop: + + ; reset result to 0 + lda #0 + sta result + sta result + 1 + + ; inner loop: $00 -> $ff +inner_loop: + + ; copy result to data set + lda result + sta (ptr),y + lda result + 1 + sta (ptr),y + + ; result += 2 * arg2 + clc + lda arg2 + adc result + sta result + lda #0 + adc result + 1 + sta result + lda arg2 + adc result + sta result + lda #0 + adc result + 1 + sta result + + ; inner loop check + inc arg1 + inc arg1 + inc ptr + inc ptr + bne inner_loop + + ; outer loop check + inc arg2 + inc ptr + 1 + lda ptr + 1 + cmp #$40 + bne outer_loop + + rts + +endproc