2022-12-29 05:08:16 +00:00
|
|
|
; FP registers in zero page
|
|
|
|
FR0 = $d4
|
|
|
|
FRE = $da
|
|
|
|
FR1 = $e0
|
|
|
|
FR2 = $e6
|
|
|
|
FRX = $ec
|
|
|
|
EEXP = $ed
|
|
|
|
NSIGN = $ee
|
|
|
|
ESIGN = $ef
|
|
|
|
FLPTR = $fc
|
|
|
|
FPTR2 = $fe
|
|
|
|
|
|
|
|
; FP routines
|
|
|
|
AFP = $D800
|
|
|
|
FASC = $D8E6
|
|
|
|
IFP = $D9AA
|
|
|
|
FIP = $D9D2
|
|
|
|
ZFR0 = $DA44
|
|
|
|
ZFI = $DA46
|
|
|
|
FSUB = $DA60
|
|
|
|
FADD = $DA66
|
|
|
|
FMUL = $DADB
|
|
|
|
FDIV = $DB28
|
|
|
|
PLYVEL = $DD40
|
|
|
|
FLD0R = $DD49 ; from pointer in X/Y
|
|
|
|
FLD0P = $DD89 ; from pointer in FLPTR
|
|
|
|
FLD1R = $DD89
|
|
|
|
FLD1P = $DD9c
|
|
|
|
FST0R = $DDA7
|
|
|
|
FST0P = $DDAB
|
|
|
|
FMOVE = $DDB6 ; FR0 -> FR1
|
|
|
|
EXP = $DDC0
|
|
|
|
EXP10 = $DDCC
|
|
|
|
LOG = $decd
|
|
|
|
LOG10 = $ded1
|
|
|
|
|
|
|
|
|
|
|
|
.code
|
|
|
|
|
|
|
|
.export start
|
|
|
|
|
|
|
|
.macro sext16to32 arg
|
|
|
|
.local plus
|
|
|
|
.local minus
|
|
|
|
lda arg+1
|
2022-12-30 08:43:44 +00:00
|
|
|
asl ; sign -> carry
|
2022-12-29 05:08:16 +00:00
|
|
|
lda #$ff
|
2022-12-30 08:43:44 +00:00
|
|
|
bcc plus
|
2022-12-29 05:08:16 +00:00
|
|
|
lda #$00
|
2022-12-30 08:43:44 +00:00
|
|
|
plus:
|
2022-12-29 05:08:16 +00:00
|
|
|
sta arg+2
|
|
|
|
sta arg+3
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro copy bytes, arg1, arg2
|
|
|
|
.repeat 2, byte
|
|
|
|
lda arg1+byte
|
|
|
|
sta arg2+byte
|
|
|
|
.endrepeat
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro copy16 arg1, arg2
|
|
|
|
copy 2, arg1, arg2
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro copy32 arg1, arg2
|
|
|
|
copy 4, arg1, arg2
|
|
|
|
.endmacro
|
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
; 2 + 8 * byte cycles
|
|
|
|
.macro neg bytes, arg
|
|
|
|
sec ; 2 cyc
|
|
|
|
.repeat bytes, byte ; 8 * byte cycles
|
|
|
|
lda #00 ; 2 cyc
|
|
|
|
sbc arg + byte ; 3 cyc
|
|
|
|
sta arg + byte ; 3 cyc
|
|
|
|
.endrepeat
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
; 18 cycles
|
|
|
|
.macro neg16 arg
|
|
|
|
neg 2, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
; 34 cycles
|
|
|
|
.macro neg32 arg
|
|
|
|
neg 4, arg
|
|
|
|
.endmacro
|
|
|
|
|
2022-12-29 05:08:16 +00:00
|
|
|
.macro add bytes, arg1, arg2
|
|
|
|
clc
|
|
|
|
.repeat bytes, byte
|
|
|
|
lda arg1+byte
|
|
|
|
adc arg2+byte
|
|
|
|
sta arg1+byte
|
|
|
|
.endrepeat
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro add16 arg1, arg2
|
|
|
|
add 2, arg1, arg2
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro add32 arg1, arg2
|
|
|
|
add 4, arg1, arg2
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shl bytes, arg
|
|
|
|
asl arg
|
|
|
|
.repeat bytes-1, byte
|
|
|
|
rol arg+byte+1
|
|
|
|
.endrepeat
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shl16 arg
|
|
|
|
shl 2, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shl24 arg
|
|
|
|
shl 3, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shl32 arg
|
|
|
|
shl 4, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shr bytes, arg
|
|
|
|
lsr arg
|
|
|
|
.repeat bytes-1, byte
|
|
|
|
ror arg+byte+1
|
|
|
|
.endrepeat
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shr16 arg
|
|
|
|
shr 2, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shr24 arg
|
|
|
|
shr 3, arg
|
|
|
|
.endmacro
|
|
|
|
|
|
|
|
.macro shr32 arg
|
|
|
|
shr 4, arg
|
|
|
|
.endmacro
|
|
|
|
|
2022-12-30 04:18:21 +00:00
|
|
|
.macro bitmul16 arg1, arg2, result, bitnum
|
2022-12-29 05:08:16 +00:00
|
|
|
.local next
|
2022-12-29 11:37:51 +00:00
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
; does 16-bit adds
|
|
|
|
; arg1 must be 0 or positive
|
|
|
|
; arg2 must be 0 or positive
|
|
|
|
|
2022-12-29 11:37:51 +00:00
|
|
|
clc
|
2022-12-30 04:18:21 +00:00
|
|
|
|
|
|
|
; check if arg1 has 0 or 1 bit in this place
|
|
|
|
.if bitnum < 8
|
|
|
|
lda arg1
|
|
|
|
and #(1 << bitnum)
|
|
|
|
.else
|
|
|
|
lda arg1 + 1
|
|
|
|
and #(1 << (bitnum - 8))
|
|
|
|
.endif
|
2022-12-29 05:08:16 +00:00
|
|
|
beq next
|
2022-12-29 11:37:51 +00:00
|
|
|
|
|
|
|
; 16-bit add on the top bits
|
2022-12-30 04:18:21 +00:00
|
|
|
lda result + 2
|
|
|
|
adc arg2
|
|
|
|
sta result + 2
|
|
|
|
lda result + 3
|
|
|
|
adc arg2 + 1
|
|
|
|
sta result + 3
|
2022-12-29 11:37:51 +00:00
|
|
|
|
2022-12-29 05:08:16 +00:00
|
|
|
next:
|
2022-12-30 04:18:21 +00:00
|
|
|
; Shift the 32-bit result down by one bit,
|
|
|
|
; saving the previous carry.
|
|
|
|
ror result + 3
|
|
|
|
ror result + 2
|
|
|
|
ror result + 1
|
|
|
|
.if bitnum >= 8
|
|
|
|
; we can save 5 cycles * 8 bits = 40 cycles total by skipping this byte
|
|
|
|
; when it's all uninitialized data
|
|
|
|
ror result
|
|
|
|
.endif
|
2022-12-29 05:08:16 +00:00
|
|
|
.endmacro
|
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
.macro check_sign arg
|
|
|
|
; Check sign bit and flip argument to postive,
|
|
|
|
; keeping a count of sign bits in the X register.
|
|
|
|
.local positive
|
|
|
|
lda arg + 1
|
|
|
|
bpl positive
|
|
|
|
neg16 arg
|
|
|
|
inx
|
|
|
|
positive:
|
|
|
|
.endmacro
|
2022-12-29 05:08:16 +00:00
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
.proc imul16
|
|
|
|
arg1 = FR0 ; 16-bit arg (clobbered)
|
|
|
|
arg2 = FR1 ; 16-bit arg (clobbered)
|
|
|
|
result = FR2 ; 32-bit result
|
|
|
|
|
|
|
|
ldx #0
|
|
|
|
; counts the number of sign bits in X
|
|
|
|
check_sign arg1
|
|
|
|
check_sign arg2
|
|
|
|
|
2022-12-30 04:18:21 +00:00
|
|
|
; zero out the 32-bit temp's top 16 bits
|
2022-12-29 05:08:16 +00:00
|
|
|
lda #0
|
2022-12-30 08:43:44 +00:00
|
|
|
sta result + 2
|
|
|
|
sta result + 3
|
2022-12-29 11:37:51 +00:00
|
|
|
; the bottom two bytes will get cleared by the shifts
|
2022-12-29 05:08:16 +00:00
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
; unrolled loop for maximum speed, at the cost
|
|
|
|
; of a larger routine
|
2022-12-29 05:08:16 +00:00
|
|
|
.repeat 16, bitnum
|
2022-12-30 08:43:44 +00:00
|
|
|
bitmul16 arg1, arg2, result, bitnum
|
2022-12-29 05:08:16 +00:00
|
|
|
.endrepeat
|
2022-12-30 04:18:21 +00:00
|
|
|
|
2022-12-30 08:43:44 +00:00
|
|
|
; In case of mixed input signs, return a negative result.
|
|
|
|
cpx #1
|
|
|
|
bne positive_result
|
|
|
|
neg32 result
|
|
|
|
positive_result:
|
|
|
|
|
2022-12-30 04:18:21 +00:00
|
|
|
rts
|
2022-12-29 05:08:16 +00:00
|
|
|
.endproc
|
|
|
|
|
|
|
|
.proc iter
|
|
|
|
; (cx and cy should be pre-scaled to 6.26 fixed point)
|
|
|
|
; zx = 0
|
|
|
|
; zy = 0
|
|
|
|
; zx_2 = 0
|
2022-12-30 08:55:48 +00:00
|
|
|
; zy_2 = 0
|
|
|
|
; zx_zy = 0
|
2022-12-29 05:08:16 +00:00
|
|
|
|
|
|
|
loop:
|
|
|
|
; iters++
|
|
|
|
|
2022-12-30 08:55:48 +00:00
|
|
|
; 6.26:
|
|
|
|
; zx = zx_2 + zy_2 + cx
|
|
|
|
; zy = zx_zy + zx_zy + cy
|
|
|
|
; round to 6.10.
|
2022-12-29 05:08:16 +00:00
|
|
|
|
2022-12-30 08:55:48 +00:00
|
|
|
; 12.20:
|
2022-12-29 05:08:16 +00:00
|
|
|
; zx_2 = zx * zx
|
|
|
|
; zy_2 = zy * zy
|
|
|
|
; dist = zx_2 + zy_2
|
|
|
|
; if dist >= 4 break, else continue iterating
|
|
|
|
|
2022-12-30 08:55:48 +00:00
|
|
|
; round zx_2, zy_2, dist to 6.26
|
|
|
|
|
|
|
|
; if may be in the lake, look for looping output with a small buffer
|
|
|
|
; as an optimization vs running to max iters
|
|
|
|
|
2022-12-29 05:08:16 +00:00
|
|
|
.endproc
|
2022-12-30 04:32:58 +00:00
|
|
|
|
|
|
|
.proc start
|
2022-12-30 08:43:44 +00:00
|
|
|
|
|
|
|
loop:
|
|
|
|
; FR0 = 5
|
|
|
|
; FR1 = -3
|
2022-12-30 04:32:58 +00:00
|
|
|
lda #5
|
2022-12-30 08:43:44 +00:00
|
|
|
sta FR0
|
2022-12-30 04:32:58 +00:00
|
|
|
lda #0
|
|
|
|
sta FR0 + 1
|
2022-12-30 08:43:44 +00:00
|
|
|
lda #$fd
|
|
|
|
sta FR1
|
|
|
|
lda #$ff
|
2022-12-30 04:32:58 +00:00
|
|
|
sta FR1 + 1
|
|
|
|
|
|
|
|
jsr imul16
|
2022-12-30 08:43:44 +00:00
|
|
|
; should have 32-bit -15 in FR2
|
2022-12-30 04:32:58 +00:00
|
|
|
|
|
|
|
jmp loop
|
|
|
|
.endproc
|