Compare commits
4 commits
main
...
shrink-tab
Author | SHA1 | Date | |
---|---|---|---|
3553ce986f | |||
0f49760aa5 | |||
f06aed0c00 | |||
aee587388d |
4 changed files with 223 additions and 403 deletions
569
mandel.s
569
mandel.s
|
@ -1,42 +1,43 @@
|
||||||
; Our zero-page vars
|
; Our zero-page vars
|
||||||
ox = $80 ; fixed6.26: center point x
|
sx = $80 ; i16: screen pixel x
|
||||||
oy = $84 ; fixed6.26: center point y
|
sy = $82 ; i16: screen pixel y
|
||||||
cx = $88 ; fixed6.26: c_x
|
ox = $84 ; fixed4.12: center point x
|
||||||
cy = $8c ; fixed6.26: c_y
|
oy = $86 ; fixed4.12: center point y
|
||||||
|
cx = $88 ; fixed4.12: c_x
|
||||||
|
cy = $8a ; fixed4.12: c_y
|
||||||
|
zx = $8c ; fixed4.12: z_x
|
||||||
|
zy = $8e ; fixed4.12: z_y
|
||||||
|
|
||||||
zx = $90 ; fixed6.26: z_x
|
zx_2 = $90 ; fixed4.12: z_x^2
|
||||||
zy = $94 ; fixed6.26: z_y
|
zy_2 = $92 ; fixed4.12: z_y^2
|
||||||
zx_2 = $98 ; fixed6.26: z_x^2
|
zx_zy = $94 ; fixed4.12: z_x * z_y
|
||||||
zy_2 = $9c ; fixed6.26: z_y^2
|
dist = $96 ; fixed4.12: z_x^2 + z_y^2
|
||||||
|
|
||||||
zx_zy = $a0 ; fixed6.26: z_x * z_y
|
iter = $a0 ; u8: iteration count
|
||||||
dist = $a4 ; fixed6.26: z_x^2 + z_y^2
|
|
||||||
sx = $a8 ; i16: screen pixel x
|
|
||||||
sy = $aa ; i16: screen pixel y
|
|
||||||
z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
|
|
||||||
z_buffer_start = $ad ; u8: index into z_buffer
|
|
||||||
z_buffer_end = $ae ; u8: index into z_buffer
|
|
||||||
iter = $af ; u8: iteration count
|
|
||||||
|
|
||||||
ptr = $b0 ; u16
|
zoom = $a1 ; u8: zoom shift level
|
||||||
pixel_ptr = $b2 ; u16
|
count_frames = $a2 ; u8
|
||||||
zoom = $b4 ; u8: zoom shift level
|
count_pixels = $a3 ; u8
|
||||||
fill_level = $b5 ; u8
|
total_ms = $a4 ; float48
|
||||||
pixel_color = $b6 ; u8
|
total_pixels = $aa ; float48
|
||||||
pixel_mask = $b7 ; u8
|
|
||||||
pixel_shift = $b8 ; u8
|
|
||||||
pixel_offset = $b9 ; u8
|
|
||||||
palette_offset = $ba ; u8
|
|
||||||
chroma_offset = $bb ; u8
|
|
||||||
palette_ticks = $bc ; u8
|
|
||||||
chroma_ticks = $bd ; u8
|
|
||||||
count_frames = $be ; u8
|
|
||||||
; free space $bf
|
|
||||||
|
|
||||||
count_iters = $c0 ; u16
|
z_buffer_active = $b0 ; boolean: 1 if we triggered the lake, 0 if not
|
||||||
; free space c2-cb
|
z_buffer_start = $b1 ; u8: index into z_buffer
|
||||||
temp = $cc ; u16
|
z_buffer_end = $b2 ; u8: index into z_buffer
|
||||||
temp2 = $ce ; u16
|
temp = $b4 ; u16
|
||||||
|
temp2 = $b6 ; u16
|
||||||
|
pixel_ptr = $b8 ; u16
|
||||||
|
pixel_color = $ba ; u8
|
||||||
|
pixel_mask = $bb ; u8
|
||||||
|
pixel_shift = $bc ; u8
|
||||||
|
pixel_offset = $bd ; u8
|
||||||
|
fill_level = $be ; u8
|
||||||
|
palette_offset = $bf ; u8
|
||||||
|
|
||||||
|
palette_ticks = $c0 ; u8
|
||||||
|
chroma_ticks = $c1 ; u8
|
||||||
|
chroma_offset = $c2 ; u8
|
||||||
|
ptr = $c4 ; u16
|
||||||
|
|
||||||
palette_delay = 23
|
palette_delay = 23
|
||||||
chroma_delay = 137
|
chroma_delay = 137
|
||||||
|
@ -59,12 +60,10 @@ LBUFF = $0580 ; result buffer for FASC routine
|
||||||
; FP ROM routine vectors
|
; FP ROM routine vectors
|
||||||
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
|
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
|
||||||
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
|
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
|
||||||
FPI = $D9D2 ; floating point to integer
|
|
||||||
FADD = $DA66 ; ADDITION (FR0 += FR1)
|
FADD = $DA66 ; ADDITION (FR0 += FR1)
|
||||||
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
|
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
|
||||||
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
|
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
|
||||||
FDIV = $DB28 ; DIVISION (FR0 /= FR1)
|
FDIV = $DB28 ; DIVISION (FR0 /= FR1)
|
||||||
ZFR0 = $DA44 ; clear FR0
|
|
||||||
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
|
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
|
||||||
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
|
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
|
||||||
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
|
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
|
||||||
|
@ -130,11 +129,8 @@ KEY_0 = 50
|
||||||
mantissa .byte 5
|
mantissa .byte 5
|
||||||
.endstruct
|
.endstruct
|
||||||
|
|
||||||
.import mul_lobyte256
|
.import mul_lobyte
|
||||||
.import mul_hibyte256
|
.import mul_hibyte
|
||||||
.import mul_hibyte512
|
|
||||||
.import sqr_lobyte
|
|
||||||
.import sqr_hibyte
|
|
||||||
|
|
||||||
.data
|
.data
|
||||||
|
|
||||||
|
@ -143,7 +139,7 @@ str_self:
|
||||||
.byte "MANDEL-6502"
|
.byte "MANDEL-6502"
|
||||||
str_self_end:
|
str_self_end:
|
||||||
str_speed:
|
str_speed:
|
||||||
.byte "us/iter: "
|
.byte " ms/px"
|
||||||
str_speed_end:
|
str_speed_end:
|
||||||
str_run:
|
str_run:
|
||||||
.byte " RUN"
|
.byte " RUN"
|
||||||
|
@ -191,38 +187,20 @@ aspect:
|
||||||
;
|
;
|
||||||
; 184h is the equiv of 220.8h at square pixels
|
; 184h is the equiv of 220.8h at square pixels
|
||||||
; 320 / 220.8 = 1.45 display aspect ratio
|
; 320 / 220.8 = 1.45 display aspect ratio
|
||||||
aspect_x: ; fixed3.13 5/4
|
aspect_x: ; fixed4.16 5/4
|
||||||
.word 5 << (13 - 2)
|
.word 5 << (12 - 2)
|
||||||
|
|
||||||
aspect_y: ; fixed3.13 3/4
|
aspect_y: ; fixed4.16 3/4
|
||||||
.word 3 << (13 - 2)
|
.word 3 << (12 - 2)
|
||||||
|
|
||||||
sec_per_frame: ; float48 00 . 01 66 66 66 67
|
ms_per_frame: ; float48 16.66666667
|
||||||
.byte 63 ; exponent/sign - -1 bytes
|
.byte 64 ; exponent/sign
|
||||||
.byte $01 ; BCD digits
|
.byte $16 ; BCD digits
|
||||||
.byte $66
|
.byte $66
|
||||||
.byte $66
|
.byte $66
|
||||||
.byte $66
|
.byte $66
|
||||||
.byte $67
|
.byte $67
|
||||||
|
|
||||||
us_per_sec: ; float48 1e9 01 00 0,0 00 . 00
|
|
||||||
.byte 67 ; exponent/sign +3 bytes
|
|
||||||
.byte $01 ; BCD digits
|
|
||||||
.byte $00
|
|
||||||
.byte $00
|
|
||||||
.byte $00
|
|
||||||
.byte $00
|
|
||||||
|
|
||||||
total_iters: ; float48
|
|
||||||
.repeat 6
|
|
||||||
.byte 0
|
|
||||||
.endrepeat
|
|
||||||
|
|
||||||
total_sec: ; float48
|
|
||||||
.repeat 6
|
|
||||||
.byte 0
|
|
||||||
.endrepeat
|
|
||||||
|
|
||||||
display_list_start:
|
display_list_start:
|
||||||
; 24 lines overscan
|
; 24 lines overscan
|
||||||
.repeat 3
|
.repeat 3
|
||||||
|
@ -254,9 +232,9 @@ display_list_len = display_list_end - display_list_start
|
||||||
color_map:
|
color_map:
|
||||||
.byte 0
|
.byte 0
|
||||||
.repeat 85
|
.repeat 85
|
||||||
.byte %01010101
|
.byte 1
|
||||||
.byte %10101010
|
.byte 2
|
||||||
.byte %11111111
|
.byte 3
|
||||||
.endrepeat
|
.endrepeat
|
||||||
|
|
||||||
|
|
||||||
|
@ -305,34 +283,23 @@ fill_masks:
|
||||||
.byte %00000001
|
.byte %00000001
|
||||||
.byte %00000000
|
.byte %00000000
|
||||||
|
|
||||||
pixel_masks:
|
|
||||||
.byte %11111111
|
|
||||||
.byte %11110000
|
|
||||||
.byte %11000000
|
|
||||||
|
|
||||||
viewport_zoom:
|
viewport_zoom:
|
||||||
.byte 0
|
.byte 1
|
||||||
.byte 5
|
.byte 6
|
||||||
.byte 7
|
.byte 8
|
||||||
.byte 5
|
.byte 6
|
||||||
.byte 7
|
|
||||||
.byte 7
|
|
||||||
|
|
||||||
viewport_ox:
|
viewport_ox:
|
||||||
.dword ($00000000 & $3fffffff) << 2
|
.word $0000
|
||||||
.dword ($ff110000 & $3fffffff) << 2
|
.word $f110
|
||||||
.dword ($ff110000 & $3fffffff) << 2
|
.word $f110
|
||||||
.dword ($fe400000 & $3fffffff) << 2
|
.word $e400
|
||||||
.dword ($fe3b0000 & $3fffffff) << 2
|
|
||||||
.dword $fd220000
|
|
||||||
|
|
||||||
viewport_oy:
|
viewport_oy:
|
||||||
.dword ($00000000 & $3fffffff) << 2
|
.word $0000
|
||||||
.dword ($ffb60000 & $3fffffff) << 2
|
.word $fb60
|
||||||
.dword ($ffbe0000 & $3fffffff) << 2
|
.word $fbe0
|
||||||
.dword ($00000000 & $3fffffff) << 2
|
.word $0000
|
||||||
.dword ($fffe0000 & $3fffffff) << 2
|
|
||||||
.dword $ff000000
|
|
||||||
|
|
||||||
; 2 + 9 * byte cycles
|
; 2 + 9 * byte cycles
|
||||||
.macro add bytes, dest, arg1, arg2
|
.macro add bytes, dest, arg1, arg2
|
||||||
|
@ -351,7 +318,7 @@ viewport_oy:
|
||||||
|
|
||||||
; 38 cycles
|
; 38 cycles
|
||||||
.macro add32 dest, arg1, arg2
|
.macro add32 dest, arg1, arg2
|
||||||
add 4, dest, arg1, arg2
|
add 4, dest, arg2, dest
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 8 cycles
|
; 8 cycles
|
||||||
|
@ -381,7 +348,7 @@ viewport_oy:
|
||||||
sub 4, dest, arg1, arg2
|
sub 4, dest, arg1, arg2
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 3 + 5 * bytes cycles
|
; 3 + 5 * (bytes - 1) cycles
|
||||||
.macro shl bytes, arg
|
.macro shl bytes, arg
|
||||||
asl arg ; 3 cyc
|
asl arg ; 3 cyc
|
||||||
.repeat bytes-1, i
|
.repeat bytes-1, i
|
||||||
|
@ -389,17 +356,17 @@ viewport_oy:
|
||||||
.endrepeat
|
.endrepeat
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 13 cycles
|
; 8 cycles
|
||||||
.macro shl16 arg
|
.macro shl16 arg
|
||||||
shl 2, arg
|
shl 2, arg
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 18 cycles
|
; 13 cycles
|
||||||
.macro shl24 arg
|
.macro shl24 arg
|
||||||
shl 3, arg
|
shl 3, arg
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 23 cycles
|
; 18 cycles
|
||||||
.macro shl32 arg
|
.macro shl32 arg
|
||||||
shl 4, arg
|
shl 4, arg
|
||||||
.endmacro
|
.endmacro
|
||||||
|
@ -456,31 +423,32 @@ viewport_oy:
|
||||||
round16 arg ; 11-27 cycles
|
round16 arg ; 11-27 cycles
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; input: arg1, arg2 as fixed4.12
|
.macro imul16_round dest, arg1, arg2, shift
|
||||||
; output: dest as fixed8.24
|
|
||||||
.macro imul16 dest, arg1, arg2
|
|
||||||
copy16 FR0, arg1 ; 12 cyc
|
copy16 FR0, arg1 ; 12 cyc
|
||||||
copy16 FR1, arg2 ; 12 cyc
|
copy16 FR1, arg2 ; 12 cyc
|
||||||
jsr imul16_func ; ? cyc
|
jsr imul16_func ; ? cyc
|
||||||
copy32 dest, FR2 ; 24 cyc
|
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
|
||||||
|
copy16 dest, FR2 + 2 ; 12 cyc
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; input: arg as fixed4.12
|
.macro sqr16_round dest, arg, shift
|
||||||
; output: dest as fixed8.24
|
;imul16_round dest, arg, arg, shift
|
||||||
.macro sqr16 dest, arg
|
|
||||||
copy16 FR0, arg ; 12 cyc
|
copy16 FR0, arg ; 12 cyc
|
||||||
jsr sqr16_func ; ? cyc
|
jsr sqr16_func ; ? cyc
|
||||||
copy32 dest, FR2 ; 24 cyc
|
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
|
||||||
|
copy16 dest, FR2 + 2 ; 12 cyc
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; input: arg as u8
|
|
||||||
; output: dest as u16
|
|
||||||
; clobbers a, x
|
; clobbers a, x
|
||||||
.macro sqr8 dest, arg
|
.macro sqr8 dest, arg
|
||||||
ldx arg
|
ldx arg
|
||||||
lda sqr_lobyte,x
|
txa
|
||||||
|
lsr
|
||||||
|
lda mul_lobyte,x
|
||||||
|
rol
|
||||||
sta dest
|
sta dest
|
||||||
lda sqr_hibyte,x
|
lda mul_hibyte,x
|
||||||
|
rol
|
||||||
sta dest + 1
|
sta dest + 1
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
@ -569,22 +537,25 @@ bank_switch_table:
|
||||||
clc ; 2 cyc
|
clc ; 2 cyc
|
||||||
adc mul_factor_x ; 3 cyc
|
adc mul_factor_x ; 3 cyc
|
||||||
tax ; 2 cyc
|
tax ; 2 cyc
|
||||||
bcc under256 ; 2 cyc
|
lda mul_hibyte,x ; 4 cyc
|
||||||
lda mul_hibyte512,x ; 4 cyc
|
bcc next ; 2 cyc
|
||||||
bcs next ; 2 cyc
|
; carry is set so we get to add 1 for free, but need to add 0x80
|
||||||
under256:
|
adc #$7f ; 2 cyc
|
||||||
lda mul_hibyte256,x ; 4 cyc
|
clc ; 2 cyc
|
||||||
sec ; 2 cyc
|
; stash the sum temporarily so we can use it as an operand to add
|
||||||
|
stx mul_product_lo ; 3 cyc
|
||||||
|
adc mul_product_lo ; 3 cyc
|
||||||
next:
|
next:
|
||||||
|
sec ; 2 cyc
|
||||||
sta mul_product_hi ; 3 cyc
|
sta mul_product_hi ; 3 cyc
|
||||||
lda mul_lobyte256,x ; 4 cyc
|
lda mul_lobyte,x ; 4 cyc
|
||||||
|
|
||||||
; - a^2/2
|
; - a^2/2
|
||||||
ldx mul_factor_a ; 3 cyc
|
ldx mul_factor_a ; 3 cyc
|
||||||
sbc mul_lobyte256,x ; 4 cyc
|
sbc mul_lobyte,x ; 4 cyc
|
||||||
sta mul_product_lo ; 3 cyc
|
sta mul_product_lo ; 3 cyc
|
||||||
lda mul_product_hi ; 3 cyc
|
lda mul_product_hi ; 3 cyc
|
||||||
sbc mul_hibyte256,x ; 4 cyc
|
sbc mul_hibyte,x ; 4 cyc
|
||||||
sta mul_product_hi ; 3 cyc
|
sta mul_product_hi ; 3 cyc
|
||||||
|
|
||||||
; + x & a & 1:
|
; + x & a & 1:
|
||||||
|
@ -603,10 +574,10 @@ bank_switch_table:
|
||||||
; - x^2/2
|
; - x^2/2
|
||||||
small_product:
|
small_product:
|
||||||
sec ; 2 cyc
|
sec ; 2 cyc
|
||||||
sbc mul_lobyte256,x ; 4 cyc
|
sbc mul_lobyte,x ; 4 cyc
|
||||||
sta mul_product_lo ; 3 cyc
|
sta mul_product_lo ; 3 cyc
|
||||||
lda mul_product_hi ; 3 cyc
|
lda mul_product_hi ; 3 cyc
|
||||||
sbc mul_hibyte256,x ; 4 cyc
|
sbc mul_hibyte,x ; 4 cyc
|
||||||
sta mul_product_hi ; 3 cyc
|
sta mul_product_hi ; 3 cyc
|
||||||
.endscope
|
.endscope
|
||||||
.endif
|
.endif
|
||||||
|
@ -766,8 +737,9 @@ inner_loop:
|
||||||
; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2
|
; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2
|
||||||
|
|
||||||
imul8 result, arg1, arg2, xe
|
imul8 result, arg1, arg2, xe
|
||||||
|
lda #0
|
||||||
imul8 result + 2, arg1 + 1, arg2 + 1, xe
|
sta result + 2
|
||||||
|
sta result + 3
|
||||||
|
|
||||||
imul8 inter, arg1 + 1, arg2, xe
|
imul8 inter, arg1 + 1, arg2, xe
|
||||||
add16 result + 1, result + 1, inter
|
add16 result + 1, result + 1, inter
|
||||||
|
@ -777,6 +749,9 @@ inner_loop:
|
||||||
add16 result + 1, result + 1, inter
|
add16 result + 1, result + 1, inter
|
||||||
add_carry result + 3
|
add_carry result + 3
|
||||||
|
|
||||||
|
imul8 inter, arg1 + 1, arg2 + 1, xe
|
||||||
|
add16 result + 2, result + 2, inter
|
||||||
|
|
||||||
; In case of negative inputs, adjust high word
|
; In case of negative inputs, adjust high word
|
||||||
; https://stackoverflow.com/a/28827013
|
; https://stackoverflow.com/a/28827013
|
||||||
lda arg1 + 1
|
lda arg1 + 1
|
||||||
|
@ -809,11 +784,10 @@ arg2_pos:
|
||||||
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
||||||
|
|
||||||
sqr8 result, arg
|
sqr8 result, arg
|
||||||
|
|
||||||
sqr8 result + 2, arg + 1
|
sqr8 result + 2, arg + 1
|
||||||
|
|
||||||
imul8 inter, arg + 1, arg, xe
|
imul8 inter, arg + 1, arg, xe
|
||||||
add16 result + 1, result + 1, inter
|
shl16 inter
|
||||||
add_carry result + 3
|
add_carry result + 3
|
||||||
add16 result + 1, result + 1, inter
|
add16 result + 1, result + 1, inter
|
||||||
add_carry result + 3
|
add_carry result + 3
|
||||||
|
@ -885,8 +859,8 @@ next:
|
||||||
|
|
||||||
.proc mandelbrot
|
.proc mandelbrot
|
||||||
; input:
|
; input:
|
||||||
; cx: position scaled to 6.26 fixed point - -32..+31.9
|
; cx: position scaled to 4.12 fixed point - -8..+7.9
|
||||||
; cy: position scaled to 6.26
|
; cy: position scaled to 4.12
|
||||||
;
|
;
|
||||||
; output:
|
; output:
|
||||||
; iter: iteration count at escape or 0
|
; iter: iteration count at escape or 0
|
||||||
|
@ -898,50 +872,16 @@ next:
|
||||||
; zx_zy = 0
|
; zx_zy = 0
|
||||||
; dist = 0
|
; dist = 0
|
||||||
; iter = 0
|
; iter = 0
|
||||||
; lda #00
|
|
||||||
; ldx #(iter - zx + 1)
|
|
||||||
;initloop:
|
|
||||||
; sta zx - 1,x
|
|
||||||
; dex
|
|
||||||
; bne initloop
|
|
||||||
; sta z_buffer_start
|
|
||||||
; sta z_buffer_end
|
|
||||||
|
|
||||||
lda #00
|
lda #00
|
||||||
sta zx
|
ldx #(iter - zx + 1)
|
||||||
sta zx + 1
|
initloop:
|
||||||
sta zx + 2
|
sta zx - 1,x
|
||||||
sta zx + 3
|
dex
|
||||||
sta zy
|
bne initloop
|
||||||
sta zy + 1
|
|
||||||
sta zy + 2
|
|
||||||
sta zy + 3
|
|
||||||
sta zx_2
|
|
||||||
sta zx_2 + 1
|
|
||||||
sta zx_2 + 2
|
|
||||||
sta zx_2 + 3
|
|
||||||
sta zy_2
|
|
||||||
sta zy_2 + 1
|
|
||||||
sta zy_2 + 2
|
|
||||||
sta zy_2 + 3
|
|
||||||
sta zx_zy
|
|
||||||
sta zx_zy + 1
|
|
||||||
sta zx_zy + 2
|
|
||||||
sta zx_zy + 3
|
|
||||||
sta dist
|
|
||||||
sta dist + 1
|
|
||||||
sta dist + 2
|
|
||||||
sta dist + 3
|
|
||||||
sta iter
|
|
||||||
sta z_buffer_start
|
sta z_buffer_start
|
||||||
sta z_buffer_end
|
sta z_buffer_end
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
inc count_iters
|
|
||||||
bne low_iters
|
|
||||||
inc count_iters + 1
|
|
||||||
low_iters:
|
|
||||||
|
|
||||||
; iter++ & max-iters break
|
; iter++ & max-iters break
|
||||||
inc iter
|
inc iter
|
||||||
bne keep_going
|
bne keep_going
|
||||||
|
@ -949,8 +889,6 @@ low_iters:
|
||||||
keep_going:
|
keep_going:
|
||||||
|
|
||||||
.macro quick_exit arg, max
|
.macro quick_exit arg, max
|
||||||
; arg: fixed6.26
|
|
||||||
; max: integer
|
|
||||||
.local positive
|
.local positive
|
||||||
.local negative
|
.local negative
|
||||||
.local nope_out
|
.local nope_out
|
||||||
|
@ -958,16 +896,16 @@ keep_going:
|
||||||
.local all_done
|
.local all_done
|
||||||
|
|
||||||
; check sign bit
|
; check sign bit
|
||||||
lda arg + 3
|
lda arg + 1
|
||||||
bmi negative
|
bmi negative
|
||||||
|
|
||||||
positive:
|
positive:
|
||||||
cmp #(max << 2)
|
cmp #((max) << 4)
|
||||||
bmi all_done ; 'less than'
|
bmi all_done ; 'less than'
|
||||||
jmp exit_path
|
jmp exit_path
|
||||||
|
|
||||||
negative:
|
negative:
|
||||||
cmp #(256 - (max << 2))
|
cmp #(256 - ((max) << 4))
|
||||||
beq first_equal ; 'equal' on first byte
|
beq first_equal ; 'equal' on first byte
|
||||||
bpl all_done ; 'greater than'
|
bpl all_done ; 'greater than'
|
||||||
|
|
||||||
|
@ -975,44 +913,34 @@ keep_going:
|
||||||
jmp exit_path
|
jmp exit_path
|
||||||
|
|
||||||
first_equal:
|
first_equal:
|
||||||
; following bytes all 0 shows it's really 'equal'
|
|
||||||
lda arg + 2
|
|
||||||
bne all_done
|
|
||||||
lda arg + 1
|
|
||||||
bne all_done
|
|
||||||
lda arg
|
lda arg
|
||||||
bne all_done
|
beq nope_out ; 2nd byte 0 shows it's really 'equal'
|
||||||
jmp exit_path
|
|
||||||
|
|
||||||
all_done:
|
all_done:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
; 6.26: (-32 .. 31.9)
|
; 4.12: (-8 .. +7.9)
|
||||||
; zx = zx_2 - zy_2 + cx
|
; zx = zx_2 - zy_2 + cx
|
||||||
sub32 zx, zx_2, zy_2
|
sub16 zx, zx_2, zy_2
|
||||||
add32 zx, zx, cx
|
add16 zx, zx, cx
|
||||||
quick_exit zx, 2
|
quick_exit zx, 2
|
||||||
|
|
||||||
; zy = zx_zy + zx_zy + cy
|
; zy = zx_zy + zx_zy + cy
|
||||||
add32 zy, zx_zy, zx_zy
|
add16 zy, zx_zy, zx_zy
|
||||||
add32 zy, zy, cy
|
add16 zy, zy, cy
|
||||||
quick_exit zy, 2
|
quick_exit zy, 2
|
||||||
|
|
||||||
; convert 6.26 -> 3.13: (-4 .. +3.9)
|
|
||||||
shift_round_16 zx, 3
|
|
||||||
shift_round_16 zy, 3
|
|
||||||
|
|
||||||
; zx_2 = zx * zx
|
; zx_2 = zx * zx
|
||||||
sqr16 zx_2, zx + 2
|
sqr16_round zx_2, zx, 4
|
||||||
|
|
||||||
; zy_2 = zy * zy
|
; zy_2 = zy * zy
|
||||||
sqr16 zy_2, zy + 2
|
sqr16_round zy_2, zy, 4
|
||||||
|
|
||||||
; zx_zy = zx * zy
|
; zx_zy = zx * zy
|
||||||
imul16 zx_zy, zx + 2, zy + 2
|
imul16_round zx_zy, zx, zy, 4
|
||||||
|
|
||||||
; dist = zx_2 + zy_2
|
; dist = zx_2 + zy_2
|
||||||
add32 dist, zx_2, zy_2
|
add16 dist, zx_2, zy_2
|
||||||
quick_exit dist, 4
|
quick_exit dist, 4
|
||||||
|
|
||||||
; if may be in the lake, look for looping output with a small buffer
|
; if may be in the lake, look for looping output with a small buffer
|
||||||
|
@ -1049,10 +977,10 @@ z_buffer_loop:
|
||||||
|
|
||||||
; Compare the previously stored z values
|
; Compare the previously stored z values
|
||||||
ldy #0
|
ldy #0
|
||||||
z_compare zx + 2
|
z_compare zx
|
||||||
z_compare zx + 3
|
z_compare zx + 1
|
||||||
z_compare zy + 2
|
z_compare zy
|
||||||
z_compare zy + 3
|
z_compare zy + 1
|
||||||
|
|
||||||
cpy #4
|
cpy #4
|
||||||
bne z_no_matches
|
bne z_no_matches
|
||||||
|
@ -1067,10 +995,10 @@ z_no_matches:
|
||||||
z_nothing_to_read:
|
z_nothing_to_read:
|
||||||
|
|
||||||
; Store and expand
|
; Store and expand
|
||||||
z_store zx + 2
|
z_store zx
|
||||||
z_store zx + 3
|
z_store zx + 1
|
||||||
z_store zy + 2
|
z_store zy
|
||||||
z_store zy + 3
|
z_store zy + 1
|
||||||
z_advance
|
z_advance
|
||||||
stx z_buffer_end
|
stx z_buffer_end
|
||||||
|
|
||||||
|
@ -1121,17 +1049,14 @@ cont:
|
||||||
enough:
|
enough:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.macro zoom_factor dest, src, aspect
|
.macro zoom_factor dest, src, zoom, aspect
|
||||||
; output: dest: fixed6.26
|
|
||||||
; input: src: fixed3.13
|
|
||||||
; aspect: fixed3.13
|
|
||||||
; clobbers A, X, flags, etc
|
; clobbers A, X, flags, etc
|
||||||
copy16 dest, src
|
copy16 dest, src
|
||||||
scale_zoom dest
|
scale_zoom dest
|
||||||
|
|
||||||
; cy = cy * (3 / 4)
|
; cy = cy * (3 / 4)
|
||||||
; cx = cx * (5 / 4)
|
; cx = cx * (5 / 4)
|
||||||
imul16 dest, dest, aspect
|
imul16_round dest, dest, aspect, 4
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
.proc pset
|
.proc pset
|
||||||
|
@ -1142,11 +1067,8 @@ enough:
|
||||||
; iter -> color
|
; iter -> color
|
||||||
ldx iter
|
ldx iter
|
||||||
lda color_map,x
|
lda color_map,x
|
||||||
ldx fill_level
|
|
||||||
and pixel_masks,x
|
|
||||||
sta pixel_color
|
sta pixel_color
|
||||||
lda pixel_masks,x
|
lda #(255 - 3)
|
||||||
eor #$ff
|
|
||||||
sta pixel_mask
|
sta pixel_mask
|
||||||
|
|
||||||
; sy -> line base address in temp
|
; sy -> line base address in temp
|
||||||
|
@ -1195,23 +1117,22 @@ point:
|
||||||
; pixel_mask <<= pixel_shift (shifting in ones)
|
; pixel_mask <<= pixel_shift (shifting in ones)
|
||||||
and #3
|
and #3
|
||||||
sta pixel_shift
|
sta pixel_shift
|
||||||
|
lda #3
|
||||||
|
sec
|
||||||
|
sbc pixel_shift
|
||||||
tax
|
tax
|
||||||
shift_loop:
|
shift_loop:
|
||||||
beq shift_done
|
beq shift_done
|
||||||
lsr pixel_color
|
asl pixel_color
|
||||||
lsr pixel_color
|
asl pixel_color
|
||||||
sec
|
sec
|
||||||
ror pixel_mask
|
rol pixel_mask
|
||||||
sec
|
sec
|
||||||
ror pixel_mask
|
rol pixel_mask
|
||||||
dex
|
dex
|
||||||
jmp shift_loop
|
jmp shift_loop
|
||||||
shift_done:
|
shift_done:
|
||||||
|
|
||||||
ldy fill_level
|
|
||||||
ldx fill_masks,y
|
|
||||||
inx
|
|
||||||
|
|
||||||
; pixel_offset = temp >> 2
|
; pixel_offset = temp >> 2
|
||||||
lda temp
|
lda temp
|
||||||
lsr a
|
lsr a
|
||||||
|
@ -1219,25 +1140,12 @@ shift_done:
|
||||||
sta pixel_offset
|
sta pixel_offset
|
||||||
tay
|
tay
|
||||||
|
|
||||||
draw_pixel:
|
|
||||||
; read, mask, or, write
|
; read, mask, or, write
|
||||||
lda (pixel_ptr),y
|
lda (pixel_ptr),y
|
||||||
and pixel_mask
|
and pixel_mask
|
||||||
ora pixel_color
|
ora pixel_color
|
||||||
sta (pixel_ptr),y
|
sta (pixel_ptr),y
|
||||||
|
|
||||||
dex
|
|
||||||
beq done
|
|
||||||
clc
|
|
||||||
lda #40
|
|
||||||
adc pixel_ptr
|
|
||||||
sta pixel_ptr
|
|
||||||
lda #0
|
|
||||||
adc pixel_ptr + 1
|
|
||||||
sta pixel_ptr + 1
|
|
||||||
jmp draw_pixel
|
|
||||||
|
|
||||||
done:
|
|
||||||
rts
|
rts
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
|
@ -1245,7 +1153,6 @@ done:
|
||||||
; clobbers A, X
|
; clobbers A, X
|
||||||
.local loop
|
.local loop
|
||||||
.local done
|
.local done
|
||||||
.local padding
|
|
||||||
ldx #0
|
ldx #0
|
||||||
loop:
|
loop:
|
||||||
cpx #len
|
cpx #len
|
||||||
|
@ -1253,27 +1160,11 @@ loop:
|
||||||
txa
|
txa
|
||||||
tay
|
tay
|
||||||
lda (strptr),y
|
lda (strptr),y
|
||||||
pha ; save the char for terminator check
|
|
||||||
and #$7f ; strip the high bit (terminator)
|
|
||||||
tay
|
tay
|
||||||
lda char_map,y
|
lda char_map,y
|
||||||
sta textbuffer + col,x
|
sta textbuffer + col,x
|
||||||
inx
|
inx
|
||||||
|
|
||||||
pla
|
|
||||||
bmi padding
|
|
||||||
jmp loop
|
jmp loop
|
||||||
|
|
||||||
padding:
|
|
||||||
ldy #32 ; space
|
|
||||||
lda char_map,y
|
|
||||||
|
|
||||||
cpx #len
|
|
||||||
beq done
|
|
||||||
sta textbuffer + col,x
|
|
||||||
inx
|
|
||||||
jmp padding
|
|
||||||
|
|
||||||
done:
|
done:
|
||||||
.endmacro
|
.endmacro
|
||||||
|
|
||||||
|
@ -1390,15 +1281,12 @@ skip_luma:
|
||||||
cpy #KEY_MINUS
|
cpy #KEY_MINUS
|
||||||
beq minus
|
beq minus
|
||||||
|
|
||||||
; temp+temp2 = $00010000 << (8 - zoom)
|
; temp = $0010 << (8 - zoom)
|
||||||
lda #$00
|
lda #$10
|
||||||
sta temp
|
sta temp
|
||||||
sta temp + 1
|
|
||||||
lda #$01
|
|
||||||
sta temp + 2
|
|
||||||
lda #$00
|
lda #$00
|
||||||
sta temp + 3
|
sta temp + 1
|
||||||
scale_zoom temp + 2
|
scale_zoom temp
|
||||||
|
|
||||||
cpy #KEY_UP
|
cpy #KEY_UP
|
||||||
beq up
|
beq up
|
||||||
|
@ -1408,7 +1296,14 @@ skip_luma:
|
||||||
beq left
|
beq left
|
||||||
cpy #KEY_RIGHT
|
cpy #KEY_RIGHT
|
||||||
beq right
|
beq right
|
||||||
jmp number_keys
|
cpy #KEY_1
|
||||||
|
beq one
|
||||||
|
cpy #KEY_2
|
||||||
|
beq two
|
||||||
|
cpy #KEY_3
|
||||||
|
beq three
|
||||||
|
cpy #KEY_4
|
||||||
|
beq four
|
||||||
|
|
||||||
skip_char:
|
skip_char:
|
||||||
lda #0
|
lda #0
|
||||||
|
@ -1416,7 +1311,7 @@ skip_char:
|
||||||
|
|
||||||
plus:
|
plus:
|
||||||
lda zoom
|
lda zoom
|
||||||
cmp #7
|
cmp #8
|
||||||
bpl skip_char
|
bpl skip_char
|
||||||
inc zoom
|
inc zoom
|
||||||
jmp done
|
jmp done
|
||||||
|
@ -1427,33 +1322,17 @@ minus:
|
||||||
dec zoom
|
dec zoom
|
||||||
jmp done
|
jmp done
|
||||||
up:
|
up:
|
||||||
sub32 oy, oy, temp
|
sub16 oy, oy, temp
|
||||||
jmp done
|
jmp done
|
||||||
down:
|
down:
|
||||||
add32 oy, oy, temp
|
add16 oy, oy, temp
|
||||||
jmp done
|
jmp done
|
||||||
left:
|
left:
|
||||||
sub32 ox, ox, temp
|
sub16 ox, ox, temp
|
||||||
jmp done
|
jmp done
|
||||||
right:
|
right:
|
||||||
add32 ox, ox, temp
|
add16 ox, ox, temp
|
||||||
jmp done
|
jmp done
|
||||||
|
|
||||||
number_keys:
|
|
||||||
cpy #KEY_1
|
|
||||||
beq one
|
|
||||||
cpy #KEY_2
|
|
||||||
beq two
|
|
||||||
cpy #KEY_3
|
|
||||||
beq three
|
|
||||||
cpy #KEY_4
|
|
||||||
beq four
|
|
||||||
cpy #KEY_5
|
|
||||||
beq five
|
|
||||||
cpy #KEY_6
|
|
||||||
beq six
|
|
||||||
jmp skip_char
|
|
||||||
|
|
||||||
one:
|
one:
|
||||||
ldx #0
|
ldx #0
|
||||||
jmp load_key_viewport
|
jmp load_key_viewport
|
||||||
|
@ -1465,12 +1344,6 @@ three:
|
||||||
jmp load_key_viewport
|
jmp load_key_viewport
|
||||||
four:
|
four:
|
||||||
ldx #3
|
ldx #3
|
||||||
jmp load_key_viewport
|
|
||||||
five:
|
|
||||||
ldx #4
|
|
||||||
jmp load_key_viewport
|
|
||||||
six:
|
|
||||||
ldx #5
|
|
||||||
; fall through
|
; fall through
|
||||||
load_key_viewport:
|
load_key_viewport:
|
||||||
jsr load_viewport
|
jsr load_viewport
|
||||||
|
@ -1521,32 +1394,17 @@ zero_byte_loop:
|
||||||
|
|
||||||
txa
|
txa
|
||||||
asl a
|
asl a
|
||||||
asl a
|
|
||||||
|
|
||||||
tax
|
tax
|
||||||
lda viewport_ox,x
|
lda viewport_ox,x
|
||||||
sta ox
|
sta ox
|
||||||
lda viewport_oy,x
|
lda viewport_oy,x
|
||||||
sta oy
|
sta oy
|
||||||
|
|
||||||
inx
|
inx
|
||||||
lda viewport_ox,x
|
lda viewport_ox,x
|
||||||
sta ox + 1
|
sta ox + 1
|
||||||
lda viewport_oy,x
|
lda viewport_oy,x
|
||||||
sta oy + 1
|
sta oy + 1
|
||||||
|
|
||||||
inx
|
|
||||||
lda viewport_ox,x
|
|
||||||
sta ox + 2
|
|
||||||
lda viewport_oy,x
|
|
||||||
sta oy + 2
|
|
||||||
|
|
||||||
inx
|
|
||||||
lda viewport_ox,x
|
|
||||||
sta ox + 3
|
|
||||||
lda viewport_oy,x
|
|
||||||
sta oy + 3
|
|
||||||
|
|
||||||
rts
|
rts
|
||||||
.endproc
|
.endproc
|
||||||
|
|
||||||
|
@ -1601,20 +1459,16 @@ copy_byte_loop:
|
||||||
jsr SETVBV
|
jsr SETVBV
|
||||||
|
|
||||||
main_loop:
|
main_loop:
|
||||||
; count_frames = 0; count_iters = 0
|
; count_frames = 0; count_pixels = 0
|
||||||
lda #0
|
lda #0
|
||||||
sta count_frames
|
sta count_frames
|
||||||
sta count_iters
|
sta count_pixels
|
||||||
sta count_iters + 1
|
|
||||||
|
|
||||||
; total_sec = 0.0; total_iters = 0.0
|
; total_ms = 0.0; total_pixels = 0.0
|
||||||
jsr ZFR0
|
ldx #total_ms
|
||||||
ldx #.lobyte(total_sec)
|
jsr ZF1
|
||||||
ldy #.hibyte(total_sec)
|
ldx #total_pixels
|
||||||
jsr FST0R
|
jsr ZF1
|
||||||
ldx #.lobyte(total_iters)
|
|
||||||
ldy #.hibyte(total_iters)
|
|
||||||
jsr FST0R
|
|
||||||
|
|
||||||
jsr clear_screen
|
jsr clear_screen
|
||||||
jsr status_bar
|
jsr status_bar
|
||||||
|
@ -1672,10 +1526,10 @@ skipped_mask:
|
||||||
not_skipped_mask:
|
not_skipped_mask:
|
||||||
|
|
||||||
; run the fractal!
|
; run the fractal!
|
||||||
zoom_factor cx, sx, aspect_x
|
zoom_factor cx, sx, zoom, aspect_x
|
||||||
add32 cx, cx, ox
|
add16 cx, cx, ox
|
||||||
zoom_factor cy, sy, aspect_y
|
zoom_factor cy, sy, zoom, aspect_y
|
||||||
add32 cy, cy, oy
|
add16 cy, cy, oy
|
||||||
jsr mandelbrot
|
jsr mandelbrot
|
||||||
jsr pset
|
jsr pset
|
||||||
|
|
||||||
|
@ -1686,32 +1540,38 @@ not_skipped_mask:
|
||||||
|
|
||||||
no_key:
|
no_key:
|
||||||
; check if we should update the counters
|
; check if we should update the counters
|
||||||
|
;
|
||||||
|
; count_pixels >= width? update!
|
||||||
|
inc count_pixels
|
||||||
|
lda count_pixels
|
||||||
|
cmp #width
|
||||||
|
bmi update_status
|
||||||
|
|
||||||
; count_frames >= 120? update!
|
; count_frames >= 120? update!
|
||||||
lda count_frames
|
lda count_frames
|
||||||
cmp #120 ; >= 2 seconds
|
cmp #120 ; >= 2 seconds
|
||||||
bpl update_status
|
bmi skipped
|
||||||
jmp skipped
|
|
||||||
|
|
||||||
update_status:
|
update_status:
|
||||||
; FR0 = (float)count_iters & clear count_iters
|
; FR0 = (float)count_pixels & clear count_pixels
|
||||||
copy16 FR0, count_iters
|
lda count_pixels
|
||||||
jsr IFP
|
sta FR0
|
||||||
lda #0
|
lda #0
|
||||||
sta count_iters
|
sta FR0 + 1
|
||||||
sta count_iters + 1
|
sta count_pixels
|
||||||
|
jsr IFP
|
||||||
|
|
||||||
; FR1 = total_iters
|
; FR1 = total_pixels
|
||||||
ldx #.lobyte(total_iters)
|
ldx #.lobyte(total_pixels)
|
||||||
ldy #.hibyte(total_iters)
|
ldy #.hibyte(total_pixels)
|
||||||
jsr FLD1R
|
jsr FLD1R
|
||||||
|
|
||||||
; FR0 += FR1
|
; FR0 += FR1
|
||||||
jsr FADD
|
jsr FADD
|
||||||
|
|
||||||
; total_iters = FR0
|
; total_pixels = FR0
|
||||||
ldx #.lobyte(total_iters)
|
ldx #.lobyte(total_pixels)
|
||||||
ldy #.hibyte(total_iters)
|
ldy #.hibyte(total_pixels)
|
||||||
jsr FST0R
|
jsr FST0R
|
||||||
|
|
||||||
|
|
||||||
|
@ -1724,58 +1584,44 @@ update_status:
|
||||||
sta count_frames
|
sta count_frames
|
||||||
jsr IFP
|
jsr IFP
|
||||||
|
|
||||||
; FR0 *= sec_per_frame
|
; FR0 *= ms_per_frame
|
||||||
ldx #.lobyte(sec_per_frame)
|
ldx #.lobyte(ms_per_frame)
|
||||||
ldy #.hibyte(sec_per_frame)
|
ldy #.hibyte(ms_per_frame)
|
||||||
jsr FLD1R
|
jsr FLD1R
|
||||||
jsr FMUL
|
jsr FMUL
|
||||||
|
|
||||||
; FR0 += total_sec
|
; FR0 += total_ms
|
||||||
ldx #.lobyte(total_sec)
|
ldx #total_ms
|
||||||
ldy #.hibyte(total_sec)
|
ldy #0
|
||||||
jsr FLD1R
|
jsr FLD1R
|
||||||
jsr FADD
|
jsr FADD
|
||||||
|
|
||||||
; total_sec = FR0
|
; total_ms = FR0
|
||||||
ldx #.lobyte(total_sec)
|
ldx #total_ms
|
||||||
ldy #.hibyte(total_sec)
|
ldy #0
|
||||||
jsr FST0R
|
jsr FST0R
|
||||||
|
|
||||||
; FR0 /= total_iters
|
; FR0 /= total_pixels
|
||||||
ldx #.lobyte(total_iters)
|
ldx #total_pixels
|
||||||
ldy #.hibyte(total_iters)
|
ldy #0
|
||||||
jsr FLD1R
|
jsr FLD1R
|
||||||
jsr FDIV
|
jsr FDIV
|
||||||
|
|
||||||
; FR0 *= us_per_sec
|
|
||||||
ldx #.lobyte(us_per_sec)
|
|
||||||
ldy #.hibyte(us_per_sec)
|
|
||||||
jsr FLD1R
|
|
||||||
jsr FMUL
|
|
||||||
|
|
||||||
; round (down) to integer
|
|
||||||
jsr FPI
|
|
||||||
clc
|
|
||||||
jsr IFP
|
|
||||||
|
|
||||||
; convert to ASCII in INBUFF
|
; convert to ASCII in INBUFF
|
||||||
jsr FASC
|
jsr FASC
|
||||||
|
|
||||||
; print the first 6 digits
|
; print the first 6 digits
|
||||||
draw_text speed_start, str_speed_len, str_speed
|
draw_text_indirect speed_start, speed_precision, INBUFF
|
||||||
draw_text_indirect speed_start + str_speed_len, speed_precision, INBUFF
|
draw_text speed_start + speed_precision, str_speed_len, str_speed
|
||||||
|
|
||||||
skipped:
|
skipped:
|
||||||
|
|
||||||
; sx += fill_level[fill_masks] + 1
|
|
||||||
ldx fill_level
|
|
||||||
lda fill_masks,x
|
|
||||||
clc
|
clc
|
||||||
adc #1 ; will never carry
|
lda sx
|
||||||
adc sx
|
adc #1
|
||||||
sta sx
|
sta sx
|
||||||
lda #0
|
lda sx + 1
|
||||||
adc sx + 1
|
adc #0
|
||||||
sta sx + 1
|
sta sx + 1
|
||||||
|
|
||||||
lda sx
|
lda sx
|
||||||
|
@ -1785,15 +1631,12 @@ skipped:
|
||||||
|
|
||||||
loop_sx_done:
|
loop_sx_done:
|
||||||
|
|
||||||
; sy += fill_level[fill_masks] + 1
|
|
||||||
ldx fill_level
|
|
||||||
lda fill_masks,x
|
|
||||||
clc
|
clc
|
||||||
adc #1 ; will never carry
|
lda sy
|
||||||
adc sy
|
adc #1
|
||||||
sta sy
|
sta sy
|
||||||
lda #0
|
lda sy + 1
|
||||||
adc sy + 1
|
adc #0
|
||||||
sta sy + 1
|
sta sy + 1
|
||||||
|
|
||||||
lda sy
|
lda sy
|
||||||
|
|
|
@ -18,7 +18,7 @@ Enjoy! I'll probably work on this off and on for the next few weeks until I've g
|
||||||
|
|
||||||
## Current state
|
## Current state
|
||||||
|
|
||||||
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 6 preset viewports via the number keys.
|
Basic rendering is functional, with interactive zoom/pan (+/-/arrows) and 4 preset viewports via the number keys.
|
||||||
|
|
||||||
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 32-bit output in the zero page, using the Atari OS ROM's floating point registers as workspaces. Inputs are clobbered.
|
||||||
|
|
||||||
|
@ -27,7 +27,7 @@ The 16-bit signed integer multiplication takes two 16-bit inputs and emits one 3
|
||||||
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
* when expanded RAM is available as on 130XE, a 64KB 8-bit multiplication table accelerates the remaining multiplications
|
||||||
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
* without expanded RAM, a table of half-squares is used to implement the algorithm from https://everything2.com/title/Fast+6502+multiplication
|
||||||
|
|
||||||
The mandelbrot calculations are done using 3.13-precision fixed point numbers with 6.26-precision intermediates.
|
The mandelbrot calculations are done using 4.12-precision fixed point numbers. It may be possible to squish this down to 3.13.
|
||||||
|
|
||||||
Iterations are capped at 255.
|
Iterations are capped at 255.
|
||||||
|
|
||||||
|
|
35
tables.js
35
tables.js
|
@ -11,40 +11,19 @@ function db(func) {
|
||||||
return lines.join('\n');
|
return lines.join('\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
let squares = [];
|
|
||||||
for (let i = 0; i < 512; i++) {
|
|
||||||
squares.push(Math.trunc((i * i + 1) / 2));
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log(
|
console.log(
|
||||||
`.segment "TABLES"
|
`.segment "TABLES"
|
||||||
|
|
||||||
.export mul_lobyte256
|
.export mul_lobyte
|
||||||
.export mul_hibyte256
|
.export mul_hibyte
|
||||||
.export mul_hibyte512
|
|
||||||
.export sqr_lobyte
|
|
||||||
.export sqr_hibyte
|
|
||||||
|
|
||||||
; (i * i + 1) / 2 for the multiplier
|
; (i * i) / 2 for the multiplier
|
||||||
.align 256
|
.align 256
|
||||||
mul_lobyte256:
|
mul_lobyte:
|
||||||
${db((i) => squares[i] & 0xff)}
|
${db((i) => ((i * i) >> 1) & 0xff)}
|
||||||
|
|
||||||
.align 256
|
.align 256
|
||||||
mul_hibyte256:
|
mul_hibyte:
|
||||||
${db((i) => (squares[i] >> 8) & 0xff)}
|
${db((i) => ((i * i) >> 9) & 0xff)}
|
||||||
|
|
||||||
.align 256
|
|
||||||
mul_hibyte512:
|
|
||||||
${db((i) => (squares[i + 256] >> 8) & 0xff)}
|
|
||||||
|
|
||||||
; (i * i) for the plain squares
|
|
||||||
.align 256
|
|
||||||
sqr_lobyte:
|
|
||||||
${db((i) => (i * i) & 0xff)}
|
|
||||||
|
|
||||||
.align 256
|
|
||||||
sqr_hibyte:
|
|
||||||
${db((i) => ((i * i) >> 8) & 0xff)}
|
|
||||||
|
|
||||||
`);
|
`);
|
||||||
|
|
12
todo.md
12
todo.md
|
@ -1,17 +1,15 @@
|
||||||
things to try:
|
things to try:
|
||||||
|
|
||||||
* fix status bar to show elapsed time, per-iter time, per-pixel iter count
|
|
||||||
|
|
||||||
* 'turbo' mode disabling graphics in full or part
|
|
||||||
|
|
||||||
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
* patch the entire expanded-ram imul8xe on top of imul8 to avoid the 3-cycle thunk penalty :D
|
||||||
|
|
||||||
* maybe clean up the load/layout of the big mul table
|
* try 3.13 fixed point instead of 4.12 for more precision
|
||||||
|
* can we get away without the extra bit?
|
||||||
* consider alternate lookup tables in the top 16KB under ROM
|
|
||||||
|
|
||||||
* y-axis mirror optimization
|
* y-axis mirror optimization
|
||||||
|
|
||||||
|
* 'wide pixels' 2x and 4x for a fuller initial image in the tiered rendering
|
||||||
|
* maybe redo tiering to just 4x4, 2x2, 1x1?
|
||||||
|
|
||||||
* extract viewport for display & re-input via keyboard
|
* extract viewport for display & re-input via keyboard
|
||||||
|
|
||||||
* fujinet screenshot/viewport uploader
|
* fujinet screenshot/viewport uploader
|
||||||
|
|
Loading…
Reference in a new issue