Compare commits

...

3 commits

Author SHA1 Message Date
38091e535f hmm 2023-01-04 20:31:39 -08:00
58519381bf hmm 2023-01-04 20:12:34 -08:00
fa599be775 wip fix 2023-01-04 19:52:56 -08:00

View file

@ -30,15 +30,18 @@ FRX = $ec
.endmacro .endmacro
; inner loop for imul16 ; inner loop for imul16
; 24 to 44 cycles ; bitnum < 8: 25 or 41 cycles
; bitnum >= 8: 30 or 46 cycles
.macro bitmul16 arg1, arg2, result, bitnum .macro bitmul16 arg1, arg2, result, bitnum
.local zero
.local one
.local next .local next
; does 16-bit adds ; does 16-bit adds
; arg1 must be 0 or positive ; arg1 must be 0 or positive
; arg2 must be 0 or positive ; arg2 must be 0 or positive
clc ; 2 cyc ; 7 cycles up to the branch
; check if arg1 has 0 or 1 bit in this place ; check if arg1 has 0 or 1 bit in this place
; 5 cycles either way ; 5 cycles either way
@ -49,21 +52,10 @@ FRX = $ec
lda arg1 + 1 ; 3 cyc lda arg1 + 1 ; 3 cyc
and #(1 << (bitnum - 8)) ; 2 cyc and #(1 << (bitnum - 8)) ; 2 cyc
.endif .endif
beq next ; 2 cyc bne one ; 2 cyc
; 16-bit add on the top bits zero: ; 18 cyc, 23 cyc
lda result + 2 ; 3 cyc lsr result + 3 ; 5 cyc
adc arg2 ; 3 cyc
sta result + 2 ; 3 cyc
lda result + 3 ; 3 cyc
adc arg2 + 1 ; 3 cyc
ror a ; 2 cyc - get a jump on the shift
sta result + 3 ; 3 cyc
; Shift the 32-bit result down by one bit,
; saving the previous carry.
ror result + 3 ; 5 cyc
next:
ror result + 2 ; 5 cyc ror result + 2 ; 5 cyc
ror result + 1 ; 5 cyc ror result + 1 ; 5 cyc
.if bitnum >= 8 .if bitnum >= 8
@ -71,6 +63,28 @@ next:
; when it's all uninitialized data ; when it's all uninitialized data
ror result ; 5 cyc ror result ; 5 cyc
.endif .endif
jmp next ; 3 cyc
one: ; 32 cyc, 37 cyc
; 16-bit add on the top bits
clc ; 2 cyc
lda result + 2 ; 3 cyc
adc arg2 ; 3 cyc
sta result + 2 ; 3 cyc
lda result + 3 ; 3 cyc
adc arg2 + 1 ; 3 cyc
ror a ; 2 cyc
sta result + 3 ; 3 cyc
ror result + 2 ; 5 cyc
ror result + 1 ; 5 cyc
.if bitnum >= 8
; we can save 5 cycles * 8 bits = 40 cycles total by skipping this byte
; when it's all uninitialized data
ror result ; 5 cyc
.endif
next:
.endmacro .endmacro
; 5 to 25 cycles ; 5 to 25 cycles
@ -107,8 +121,8 @@ positive:
; of a larger routine ; of a larger routine
; 424 to 672 cycles ; 424 to 672 cycles
.repeat 16, bitnum .repeat 16, bitnum
; first half: 24 to 40 cycles ; first half: 22 to 40 cycles
; second half: 29 to 44 cycles ; second half: 29 to 47 cycles
bitmul16 arg1, arg2, result, bitnum bitmul16 arg1, arg2, result, bitnum
.endrepeat .endrepeat
@ -153,7 +167,7 @@ loop:
.proc start .proc start
loop: looplong:
; FR0 = 5 ; FR0 = 5
; FR1 = -3 ; FR1 = -3
lda #5 lda #5
@ -168,5 +182,6 @@ loop:
jsr imul16 jsr imul16
; should have 32-bit -15 in FR2 ; should have 32-bit -15 in FR2
loop:
jmp loop jmp loop
.endproc .endproc