forked from brooke/mandel-6502
Added two version of 16-bit rounding
round16_incdec uses inc and dec round16_addsub uses adc and sbc the incdec version is the same when no rounding is needed but saves about 8 cycles on the rounding cases, for an average savings of 4.5 cycles for randomly distributed inputs untested so far
This commit is contained in:
parent
6edd2f71d9
commit
7ac0df0343
1 changed files with 98 additions and 0 deletions
98
mandel.s
98
mandel.s
|
@ -128,6 +128,104 @@ positive_result:
|
||||||
rts ; 6 cyc
|
rts ; 6 cyc
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
|
.macro round16_incdec arg
|
||||||
|
; Round top 16 bits of 32-bit fixed-point number in-place
|
||||||
|
.local zero
|
||||||
|
.local one
|
||||||
|
.local positive
|
||||||
|
.local negative
|
||||||
|
.local neg2
|
||||||
|
.local next
|
||||||
|
|
||||||
|
; no round - 5 cycles
|
||||||
|
; round pos, no carry - 17
|
||||||
|
; round pos, carry - 22
|
||||||
|
; round neg, no carry - 23
|
||||||
|
; round neg, carry - 28
|
||||||
|
; average = 5 / 2 + (17 + 22 + 23 + 28) / 8
|
||||||
|
; = 5 / 2 + 90 / 8
|
||||||
|
; = 2.5 + 11.25 = 13.75 cycles average on evenly distributed input
|
||||||
|
|
||||||
|
lda arg + 1 ; 3 cyc
|
||||||
|
bpl zero ; 2 cyc
|
||||||
|
|
||||||
|
one:
|
||||||
|
; check sign bit
|
||||||
|
lda arg + 3 ; 3 cyc
|
||||||
|
bpl positive ; 2 cyc
|
||||||
|
|
||||||
|
negative:
|
||||||
|
lda arg + 2 ; 3 cyc
|
||||||
|
beq neg2 ; 2 cyc
|
||||||
|
|
||||||
|
dec arg + 2 ; 5 cyc
|
||||||
|
jmp next ; 3 cyc
|
||||||
|
|
||||||
|
neg2:
|
||||||
|
dec arg + 2 ; 5 cyc
|
||||||
|
dec arg + 3 ; 5 cyc
|
||||||
|
jmp next ; 3 cyc
|
||||||
|
|
||||||
|
positive:
|
||||||
|
inc arg + 2 ; 5 cyc
|
||||||
|
beq next ; 2 cyc
|
||||||
|
inc arg + 3 ; 5 cyc
|
||||||
|
|
||||||
|
zero:
|
||||||
|
next:
|
||||||
|
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
.macro round16_addsub arg
|
||||||
|
; Round top 16 bits of 32-bit fixed-point number in-place
|
||||||
|
.local zero
|
||||||
|
.local one
|
||||||
|
.local positive
|
||||||
|
.local negative
|
||||||
|
.local neg2
|
||||||
|
.local next
|
||||||
|
|
||||||
|
; no round - 5 cycles
|
||||||
|
; one, pos - 28 cycles
|
||||||
|
; one, neg - 31 cycles
|
||||||
|
; average = 5 / 2 + (28 + 31) / 4
|
||||||
|
; = 5/2 + 59 / 4
|
||||||
|
; = 2.5 + 14.75
|
||||||
|
; = 17.25 cycles average on evenly distributed data
|
||||||
|
|
||||||
|
lda arg + 1 ; 3 cyc
|
||||||
|
bpl zero ; 2 cyc
|
||||||
|
|
||||||
|
one:
|
||||||
|
; check sign bit
|
||||||
|
lda arg + 3 ; 3 cyc
|
||||||
|
bpl positive ; 2 cyc
|
||||||
|
|
||||||
|
negative:
|
||||||
|
sec ; 2 cyc
|
||||||
|
lda arg + 2 ; 3 cyc
|
||||||
|
sbc #1 ; 2 cyc
|
||||||
|
sta arg + 2 ; 3 cyc
|
||||||
|
lda arg + 3 ; 3 cyc
|
||||||
|
sbc #0 ; 2 cyc
|
||||||
|
lda arg + 3 ; 3 cyc
|
||||||
|
jmp next ; 3 cyc
|
||||||
|
|
||||||
|
positive:
|
||||||
|
clc ; 2 cyc
|
||||||
|
lda arg + 2 ; 3 cyc
|
||||||
|
adc #1 ; 2 cyc
|
||||||
|
sta arg + 2 ; 3 cyc
|
||||||
|
lda arg + 3 ; 3 cyc
|
||||||
|
adc #0 ; 2 cyc
|
||||||
|
sta arg + 3 ; 3 cyc
|
||||||
|
|
||||||
|
zero:
|
||||||
|
next:
|
||||||
|
|
||||||
|
.endmacro
|
||||||
|
|
||||||
|
|
||||||
.proc iter
|
.proc iter
|
||||||
; (cx and cy should be pre-scaled to 6.26 fixed point)
|
; (cx and cy should be pre-scaled to 6.26 fixed point)
|
||||||
; zx = 0
|
; zx = 0
|
||||||
|
|
Loading…
Reference in a new issue