mandel-6502/mandel.s
2022-12-29 20:18:21 -08:00

211 lines
3.2 KiB
ArmAsm

; FP registers in zero page
FR0 = $d4
FRE = $da
FR1 = $e0
FR2 = $e6
FRX = $ec
EEXP = $ed
NSIGN = $ee
ESIGN = $ef
FLPTR = $fc
FPTR2 = $fe
; FP routines
AFP = $D800
FASC = $D8E6
IFP = $D9AA
FIP = $D9D2
ZFR0 = $DA44
ZFI = $DA46
FSUB = $DA60
FADD = $DA66
FMUL = $DADB
FDIV = $DB28
PLYVEL = $DD40
FLD0R = $DD49 ; from pointer in X/Y
FLD0P = $DD89 ; from pointer in FLPTR
FLD1R = $DD89
FLD1P = $DD9c
FST0R = $DDA7
FST0P = $DDAB
FMOVE = $DDB6 ; FR0 -> FR1
EXP = $DDC0
EXP10 = $DDCC
LOG = $decd
LOG10 = $ded1
.code
.export start
.proc start
loop:
jmp loop
.endproc
.proc mandelfloat
.endproc
.macro sext16to32 arg
.local plus
.local minus
lda arg+1
bpl plus
lda #$ff
jmp minus
plus:
lda #$00
minus:
sta arg+2
sta arg+3
.endmacro
.macro copy bytes, arg1, arg2
.repeat 2, byte
lda arg1+byte
sta arg2+byte
.endrepeat
.endmacro
.macro copy16 arg1, arg2
copy 2, arg1, arg2
.endmacro
.macro copy32 arg1, arg2
copy 4, arg1, arg2
.endmacro
.macro add bytes, arg1, arg2
clc
.repeat bytes, byte
lda arg1+byte
adc arg2+byte
sta arg1+byte
.endrepeat
.endmacro
.macro add16 arg1, arg2
add 2, arg1, arg2
.endmacro
.macro add32 arg1, arg2
add 4, arg1, arg2
.endmacro
.macro shl bytes, arg
asl arg
.repeat bytes-1, byte
rol arg+byte+1
.endrepeat
.endmacro
.macro shl16 arg
shl 2, arg
.endmacro
.macro shl24 arg
shl 3, arg
.endmacro
.macro shl32 arg
shl 4, arg
.endmacro
.macro shr bytes, arg
lsr arg
.repeat bytes-1, byte
ror arg+byte+1
.endrepeat
.endmacro
.macro shr16 arg
shr 2, arg
.endmacro
.macro shr24 arg
shr 3, arg
.endmacro
.macro shr32 arg
shr 4, arg
.endmacro
.macro bitmul16 arg1, arg2, result, bitnum
.local next
clc
; check if arg1 has 0 or 1 bit in this place
.if bitnum < 8
lda arg1
and #(1 << bitnum)
.else
lda arg1 + 1
and #(1 << (bitnum - 8))
.endif
beq next
; 16-bit add on the top bits
lda result + 2
adc arg2
sta result + 2
lda result + 3
adc arg2 + 1
sta result + 3
next:
; Shift the 32-bit result down by one bit,
; saving the previous carry.
ror result + 3
ror result + 2
ror result + 1
.if bitnum >= 8
; we can save 5 cycles * 8 bits = 40 cycles total by skipping this byte
; when it's all uninitialized data
ror result
.endif
.endmacro
.proc imul16
; 16-bit arg in FR0
; 16-bit arg in FR1
; 32-bit result in FR2
; zero out the 32-bit temp's top 16 bits
lda #0
sta FR2 + 2
sta FR2 + 3
; the bottom two bytes will get cleared by the shifts
.repeat 16, bitnum
bitmul16 FR0, FR1, FR2, bitnum
.endrepeat
rts
.endproc
.proc iter
; (cx and cy should be pre-scaled to 6.26 fixed point)
; zx = 0
; zx_2 = 0
; zy = 0
; zx_2 = 0
loop:
; iters++
; zx_next = zx_2 + zy_2 + cx
; zy_next = 2 * zx * zy + cy
; (detect overflows to -4 or +4 and break if necessary)
; (re-downshift into zx and zy as 3.13 fixed point; round.)
; zx_2 = zx * zx
; zy_2 = zy * zy
; dist = zx_2 + zy_2
; if dist >= 4 break, else continue iterating
.endproc