diff --git a/mandel.s b/mandel.s index 60ffb76..386094f 100644 --- a/mandel.s +++ b/mandel.s @@ -141,46 +141,43 @@ minus: .endif .endmacro -.macro bitmul arg1, arg2, res, bits +.macro bitmul16 arg1, arg2, res, bits .local next + checkbit arg2, bits + clc beq next - add32 res, arg1 + + ; 16-bit add on the top bits + lda res + 2 + adc arg1 + sta res + 2 + lda res + 3 + adc arg1 + 1 + next: - shl32 arg1 + ; shift result right one bit + ; (shifts in the carry bit) + ror a + ror res + sta res + 1 .endmacro .proc imul16 ; 16-bit arg in FR0 ; 16-bit arg in FR1 - ; 16-bit result in FR0 - - ; sign-extend the argument - sext16to32 FR0 + ; 32-bit result in FR2 + ; clobbers FR1 and FR2 ; zero out the 32-bit temp lda #0 - sta FRX - sta FRX+1 - sta FRX+2 - sta FRX+3 + sta FR2 + 2 + sta FR2 + 3 + ; the bottom two bytes will get cleared by the shifts - ; shift and add :D .repeat 16, bitnum - bitmul FR0, FR1, FRX, bitnum + bitmul16 FR0, FR1, FR2, bitnum .endrepeat - - ; Re-normalize the ones place - shr24 FRX - shr24 FRX - shr24 FRX - - ; @fixme round the last bit - - ; And copy out our result - copy16 FRX+2, FR0 - ; @fixme could save a few cycles by combining the last two ops - .endproc .proc iter