mandel-6502/mandel.s

250 lines
4.5 KiB
ArmAsm
Raw Normal View History

2022-12-29 05:08:16 +00:00
; FP registers in zero page
FR0 = $d4
FRE = $da
FR1 = $e0
FR2 = $e6
FRX = $ec
.code
.export start
.macro sext16to32 arg
.local plus
.local minus
lda arg+1
2022-12-30 08:43:44 +00:00
asl ; sign -> carry
2022-12-29 05:08:16 +00:00
lda #$ff
2022-12-30 08:43:44 +00:00
bcc plus
2022-12-29 05:08:16 +00:00
lda #$00
2022-12-30 08:43:44 +00:00
plus:
2022-12-29 05:08:16 +00:00
sta arg+2
sta arg+3
.endmacro
.macro copy bytes, arg1, arg2
.repeat 2, byte
lda arg1+byte
sta arg2+byte
.endrepeat
.endmacro
.macro copy16 arg1, arg2
copy 2, arg1, arg2
.endmacro
.macro copy32 arg1, arg2
copy 4, arg1, arg2
.endmacro
2022-12-30 08:43:44 +00:00
; 2 + 8 * byte cycles
.macro neg bytes, arg
sec ; 2 cyc
.repeat bytes, byte ; 8 * byte cycles
lda #00 ; 2 cyc
sbc arg + byte ; 3 cyc
sta arg + byte ; 3 cyc
.endrepeat
.endmacro
; 18 cycles
.macro neg16 arg
neg 2, arg
.endmacro
; 34 cycles
.macro neg32 arg
neg 4, arg
.endmacro
2022-12-31 01:33:18 +00:00
; 2 + 9 * bytes cycles
2022-12-29 05:08:16 +00:00
.macro add bytes, arg1, arg2
2022-12-31 01:33:18 +00:00
clc ; 2 cyc
2022-12-29 05:08:16 +00:00
.repeat bytes, byte
2022-12-31 01:33:18 +00:00
lda arg1+byte ; 3 cyc
adc arg2+byte ; 3 cyc
sta arg1+byte ; 3 cyc
2022-12-29 05:08:16 +00:00
.endrepeat
.endmacro
2022-12-31 01:33:18 +00:00
; 20 cycles
2022-12-29 05:08:16 +00:00
.macro add16 arg1, arg2
add 2, arg1, arg2
.endmacro
2022-12-31 01:33:18 +00:00
; 38 cycles
2022-12-29 05:08:16 +00:00
.macro add32 arg1, arg2
add 4, arg1, arg2
.endmacro
.macro shl bytes, arg
asl arg
.repeat bytes-1, byte
rol arg+byte+1
.endrepeat
.endmacro
.macro shl16 arg
shl 2, arg
.endmacro
.macro shl24 arg
shl 3, arg
.endmacro
.macro shl32 arg
shl 4, arg
.endmacro
.macro shr bytes, arg
lsr arg
.repeat bytes-1, byte
ror arg+byte+1
.endrepeat
.endmacro
.macro shr16 arg
shr 2, arg
.endmacro
.macro shr24 arg
shr 3, arg
.endmacro
.macro shr32 arg
shr 4, arg
.endmacro
2022-12-31 01:33:18 +00:00
; 24 to 49 cycles
2022-12-30 04:18:21 +00:00
.macro bitmul16 arg1, arg2, result, bitnum
2022-12-29 05:08:16 +00:00
.local next
2022-12-29 11:37:51 +00:00
2022-12-30 08:43:44 +00:00
; does 16-bit adds
; arg1 must be 0 or positive
; arg2 must be 0 or positive
2022-12-31 01:33:18 +00:00
clc ; 2 cyc
2022-12-30 04:18:21 +00:00
; check if arg1 has 0 or 1 bit in this place
2022-12-31 01:33:18 +00:00
; 5 cycles either way
2022-12-30 04:18:21 +00:00
.if bitnum < 8
2022-12-31 01:33:18 +00:00
lda arg1 ; 3 cyc
and #(1 << bitnum) ; 2 cyc
2022-12-30 04:18:21 +00:00
.else
2022-12-31 01:33:18 +00:00
lda arg1 + 1 ; 3 cyc
and #(1 << (bitnum - 8)) ; 2 cyc
2022-12-30 04:18:21 +00:00
.endif
2022-12-31 01:33:18 +00:00
beq next ; 2 cyc
2022-12-29 11:37:51 +00:00
; 16-bit add on the top bits
2022-12-31 01:33:18 +00:00
add16 result + 2, arg2 ; 20 cyc
2022-12-29 11:37:51 +00:00
2022-12-29 05:08:16 +00:00
next:
2022-12-30 04:18:21 +00:00
; Shift the 32-bit result down by one bit,
; saving the previous carry.
2022-12-31 01:33:18 +00:00
ror result + 3 ; 5 cyc
ror result + 2 ; 5 cyc
ror result + 1 ; 5 cyc
2022-12-30 04:18:21 +00:00
.if bitnum >= 8
; we can save 5 cycles * 8 bits = 40 cycles total by skipping this byte
; when it's all uninitialized data
2022-12-31 01:33:18 +00:00
ror result ; 5 cyc
2022-12-30 04:18:21 +00:00
.endif
2022-12-29 05:08:16 +00:00
.endmacro
2022-12-31 01:33:18 +00:00
; 5 to 25 cycles
2022-12-30 08:43:44 +00:00
.macro check_sign arg
; Check sign bit and flip argument to postive,
; keeping a count of sign bits in the X register.
.local positive
2022-12-31 01:33:18 +00:00
lda arg + 1 ; 3 cyc
bpl positive ; 2 cyc
neg16 arg ; 18 cyc
inx ; 2 cyc
2022-12-30 08:43:44 +00:00
positive:
.endmacro
2022-12-29 05:08:16 +00:00
2022-12-31 01:33:18 +00:00
; 579 to 725 cycles
2022-12-30 08:43:44 +00:00
.proc imul16
arg1 = FR0 ; 16-bit arg (clobbered)
arg2 = FR1 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
2022-12-31 01:33:18 +00:00
ldx #0 ; 2 cyc
2022-12-30 08:43:44 +00:00
; counts the number of sign bits in X
2022-12-31 01:33:18 +00:00
check_sign arg1 ; 5 to 25 cyc
check_sign arg2 ; 5 to 25 cyc
2022-12-30 08:43:44 +00:00
2022-12-30 04:18:21 +00:00
; zero out the 32-bit temp's top 16 bits
2022-12-31 01:33:18 +00:00
lda #0 ; 2 cyc
sta result + 2 ; 3 cyc
sta result + 3 ; 3 cyc
2022-12-29 11:37:51 +00:00
; the bottom two bytes will get cleared by the shifts
2022-12-29 05:08:16 +00:00
2022-12-30 08:43:44 +00:00
; unrolled loop for maximum speed, at the cost
; of a larger routine
2022-12-29 05:08:16 +00:00
.repeat 16, bitnum
2022-12-31 01:33:18 +00:00
; first half: 24 to 45 cycles
; second half: 29 to 49 cycles
2022-12-30 08:43:44 +00:00
bitmul16 arg1, arg2, result, bitnum
2022-12-29 05:08:16 +00:00
.endrepeat
2022-12-30 04:18:21 +00:00
2022-12-30 08:43:44 +00:00
; In case of mixed input signs, return a negative result.
2022-12-31 01:33:18 +00:00
cpx #1 ; 2 cyc
bne positive_result ; 2 cyc
neg32 result ; 34 cyc
2022-12-30 08:43:44 +00:00
positive_result:
2022-12-31 01:33:18 +00:00
rts ; 6 cyc
2022-12-29 05:08:16 +00:00
.endproc
.proc iter
; (cx and cy should be pre-scaled to 6.26 fixed point)
; zx = 0
; zy = 0
; zx_2 = 0
2022-12-30 08:55:48 +00:00
; zy_2 = 0
; zx_zy = 0
2022-12-29 05:08:16 +00:00
2022-12-30 09:05:52 +00:00
; still working on the fixed-point
2022-12-29 05:08:16 +00:00
loop:
; iters++
2022-12-30 08:55:48 +00:00
; 6.26:
; zx = zx_2 + zy_2 + cx
; zy = zx_zy + zx_zy + cy
; round to 6.10.
2022-12-29 05:08:16 +00:00
2022-12-30 08:55:48 +00:00
; 12.20:
2022-12-29 05:08:16 +00:00
; zx_2 = zx * zx
; zy_2 = zy * zy
; dist = zx_2 + zy_2
; if dist >= 4 break, else continue iterating
2022-12-30 08:55:48 +00:00
; round zx_2, zy_2, dist to 6.26
; if may be in the lake, look for looping output with a small buffer
; as an optimization vs running to max iters
2022-12-29 05:08:16 +00:00
.endproc
2022-12-30 04:32:58 +00:00
.proc start
2022-12-30 08:43:44 +00:00
loop:
; FR0 = 5
; FR1 = -3
2022-12-30 04:32:58 +00:00
lda #5
2022-12-30 08:43:44 +00:00
sta FR0
2022-12-30 04:32:58 +00:00
lda #0
sta FR0 + 1
2022-12-30 08:43:44 +00:00
lda #$fd
sta FR1
lda #$ff
2022-12-30 04:32:58 +00:00
sta FR1 + 1
jsr imul16
2022-12-30 08:43:44 +00:00
; should have 32-bit -15 in FR2
2022-12-30 04:32:58 +00:00
jmp loop
.endproc