forked from brooke/mandel-6502
208 lines
3 KiB
ArmAsm
208 lines
3 KiB
ArmAsm
; FP registers in zero page
|
|
FR0 = $d4
|
|
FRE = $da
|
|
FR1 = $e0
|
|
FR2 = $e6
|
|
FRX = $ec
|
|
EEXP = $ed
|
|
NSIGN = $ee
|
|
ESIGN = $ef
|
|
FLPTR = $fc
|
|
FPTR2 = $fe
|
|
|
|
; FP routines
|
|
AFP = $D800
|
|
FASC = $D8E6
|
|
IFP = $D9AA
|
|
FIP = $D9D2
|
|
ZFR0 = $DA44
|
|
ZFI = $DA46
|
|
FSUB = $DA60
|
|
FADD = $DA66
|
|
FMUL = $DADB
|
|
FDIV = $DB28
|
|
PLYVEL = $DD40
|
|
FLD0R = $DD49 ; from pointer in X/Y
|
|
FLD0P = $DD89 ; from pointer in FLPTR
|
|
FLD1R = $DD89
|
|
FLD1P = $DD9c
|
|
FST0R = $DDA7
|
|
FST0P = $DDAB
|
|
FMOVE = $DDB6 ; FR0 -> FR1
|
|
EXP = $DDC0
|
|
EXP10 = $DDCC
|
|
LOG = $decd
|
|
LOG10 = $ded1
|
|
|
|
|
|
.code
|
|
|
|
.export start
|
|
|
|
.proc start
|
|
loop:
|
|
jmp loop
|
|
.endproc
|
|
|
|
.proc mandelfloat
|
|
.endproc
|
|
|
|
.macro sext16to32 arg
|
|
.local plus
|
|
.local minus
|
|
lda arg+1
|
|
bpl plus
|
|
lda #$ff
|
|
jmp minus
|
|
plus:
|
|
lda #$00
|
|
minus:
|
|
sta arg+2
|
|
sta arg+3
|
|
.endmacro
|
|
|
|
.macro copy bytes, arg1, arg2
|
|
.repeat 2, byte
|
|
lda arg1+byte
|
|
sta arg2+byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
.macro copy16 arg1, arg2
|
|
copy 2, arg1, arg2
|
|
.endmacro
|
|
|
|
.macro copy32 arg1, arg2
|
|
copy 4, arg1, arg2
|
|
.endmacro
|
|
|
|
.macro add bytes, arg1, arg2
|
|
clc
|
|
.repeat bytes, byte
|
|
lda arg1+byte
|
|
adc arg2+byte
|
|
sta arg1+byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
.macro add16 arg1, arg2
|
|
add 2, arg1, arg2
|
|
.endmacro
|
|
|
|
.macro add32 arg1, arg2
|
|
add 4, arg1, arg2
|
|
.endmacro
|
|
|
|
.macro shl bytes, arg
|
|
asl arg
|
|
.repeat bytes-1, byte
|
|
rol arg+byte+1
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
.macro shl16 arg
|
|
shl 2, arg
|
|
.endmacro
|
|
|
|
.macro shl24 arg
|
|
shl 3, arg
|
|
.endmacro
|
|
|
|
.macro shl32 arg
|
|
shl 4, arg
|
|
.endmacro
|
|
|
|
.macro shr bytes, arg
|
|
lsr arg
|
|
.repeat bytes-1, byte
|
|
ror arg+byte+1
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
.macro shr16 arg
|
|
shr 2, arg
|
|
.endmacro
|
|
|
|
.macro shr24 arg
|
|
shr 3, arg
|
|
.endmacro
|
|
|
|
.macro shr32 arg
|
|
shr 4, arg
|
|
.endmacro
|
|
|
|
.macro checkbit arg, bits
|
|
.if bits < 8
|
|
lda arg
|
|
and #(1 << bits)
|
|
.else
|
|
lda arg + 1
|
|
and #(1 << (bits - 8))
|
|
.endif
|
|
.endmacro
|
|
|
|
.macro bitmul arg1, arg2, res, bits
|
|
.local next
|
|
checkbit arg2, bits
|
|
beq next
|
|
add32 res, arg1
|
|
next:
|
|
shl32 arg1
|
|
.endmacro
|
|
|
|
.proc imul16
|
|
; 16-bit arg in FR0
|
|
; 16-bit arg in FR1
|
|
; 16-bit result in FR0
|
|
|
|
; sign-extend the argument
|
|
sext16to32 FR0
|
|
|
|
; zero out the 32-bit temp
|
|
lda #0
|
|
sta FRX
|
|
sta FRX+1
|
|
sta FRX+2
|
|
sta FRX+3
|
|
|
|
; shift and add :D
|
|
.repeat 16, bitnum
|
|
bitmul FR0, FR1, FRX, bitnum
|
|
.endrepeat
|
|
|
|
; Re-normalize the ones place
|
|
shr24 FRX
|
|
shr24 FRX
|
|
shr24 FRX
|
|
|
|
; @fixme round the last bit
|
|
|
|
; And copy out our result
|
|
copy16 FRX+2, FR0
|
|
; @fixme could save a few cycles by combining the last two ops
|
|
|
|
.endproc
|
|
|
|
.proc iter
|
|
; (cx and cy should be pre-scaled to 6.26 fixed point)
|
|
|
|
; zx = 0
|
|
; zx_2 = 0
|
|
; zy = 0
|
|
; zx_2 = 0
|
|
|
|
loop:
|
|
; iters++
|
|
|
|
; zx_next = zx_2 + zy_2 + cx
|
|
; zy_next = 2 * zx * zy + cy
|
|
; (detect overflows to -4 or +4 and break if necessary)
|
|
; (re-downshift into zx and zy as 3.13 fixed point; round.)
|
|
|
|
; zx_2 = zx * zx
|
|
; zy_2 = zy * zy
|
|
; dist = zx_2 + zy_2
|
|
|
|
; if dist >= 4 break, else continue iterating
|
|
|
|
.endproc
|