Brooke Vibber
7e5ca79d9a
this frees up 12 bytes of zero page space and costs no measurable time as these variables are not in the hot path and there was only a tiny bit different.
1782 lines
33 KiB
ArmAsm
1782 lines
33 KiB
ArmAsm
; Our zero-page vars
|
|
ox = $80 ; fixed6.26: center point x
|
|
oy = $84 ; fixed6.26: center point y
|
|
cx = $88 ; fixed6.26: c_x
|
|
cy = $8c ; fixed6.26: c_y
|
|
|
|
zx = $90 ; fixed6.26: z_x
|
|
zy = $94 ; fixed6.26: z_y
|
|
zx_2 = $98 ; fixed6.26: z_x^2
|
|
zy_2 = $9c ; fixed6.26: z_y^2
|
|
|
|
zx_zy = $a0 ; fixed6.26: z_x * z_y
|
|
dist = $a4 ; fixed6.26: z_x^2 + z_y^2
|
|
sx = $a8 ; i16: screen pixel x
|
|
sy = $aa ; i16: screen pixel y
|
|
z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not
|
|
z_buffer_start = $ad ; u8: index into z_buffer
|
|
z_buffer_end = $ae ; u8: index into z_buffer
|
|
iter = $af ; u8: iteration count
|
|
|
|
ptr = $b0 ; u16
|
|
pixel_ptr = $b2 ; u16
|
|
zoom = $b4 ; u8: zoom shift level
|
|
fill_level = $b5 ; u8
|
|
pixel_color = $b6 ; u8
|
|
pixel_mask = $b7 ; u8
|
|
pixel_shift = $b8 ; u8
|
|
pixel_offset = $b9 ; u8
|
|
palette_offset = $ba ; u8
|
|
chroma_offset = $bb ; u8
|
|
palette_ticks = $bc ; u8
|
|
chroma_ticks = $bd ; u8
|
|
count_frames = $be ; u8
|
|
count_pixels = $bf ; u8
|
|
|
|
; free space c0-cb
|
|
temp = $cc ; u16
|
|
temp2 = $ce ; u16
|
|
|
|
palette_delay = 23
|
|
chroma_delay = 137
|
|
|
|
|
|
; FP registers in zero page
|
|
FR0 = $d4 ; float48
|
|
FRE = $da
|
|
FR1 = $e0 ; float48
|
|
FR2 = $e6 ; float48
|
|
CIX = $f2 ; u8 - index into INBUFF
|
|
INBUFF = $f3 ; u16 - pointer to ascii
|
|
FLPTR = $fc ; u16 - pointer to user buffer float48
|
|
|
|
CH1 = $02f2 ; previous character read from keyboard
|
|
CH = $02fc ; current character read from keyboard
|
|
|
|
LBUFF = $0580 ; result buffer for FASC routine
|
|
|
|
; FP ROM routine vectors
|
|
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
|
|
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
|
|
FADD = $DA66 ; ADDITION (FR0 += FR1)
|
|
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
|
|
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
|
|
FDIV = $DB28 ; DIVISION (FR0 /= FR1)
|
|
ZFR0 = $DA44 ; clear FR0
|
|
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
|
|
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
|
|
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
|
|
FST0R = $DDA7 ; STORE FR0 IN USER BUFFER (YYXX)
|
|
FMOVE = $DDB6 ; MOVE FR0 TO FR1
|
|
|
|
; High data
|
|
framebuffer_top = $a000
|
|
textbuffer = $af00
|
|
framebuffer_bottom = $b000
|
|
display_list = $bf00
|
|
framebuffer_end = $c000
|
|
|
|
height = 184
|
|
half_height = height >> 1
|
|
width = 160
|
|
half_width = width >> 1
|
|
stride = width >> 2
|
|
|
|
EXTENDED_RAM = $4000 ; 16KiB bank on the XE
|
|
PORTB = $D301 ; memory & bank-switch for XL/XE
|
|
|
|
DMACTL = $D400
|
|
DLISTL = $D402
|
|
DLISTH = $D403
|
|
WSYNC = $D40A
|
|
|
|
; OS shadow registers
|
|
SDLSTL = $230
|
|
SDLSTH = $231
|
|
|
|
; interrupt stuff
|
|
SYSVBV = $E45F
|
|
XITVBV = $E462
|
|
SETVBV = $E45C
|
|
|
|
COLOR0 = $2C4
|
|
COLOR1 = $2C5
|
|
COLOR2 = $2C6
|
|
COLOR3 = $2C7
|
|
COLOR4 = $2C8
|
|
|
|
; Keycodes!
|
|
KEY_PLUS = $06
|
|
KEY_MINUS = $0e
|
|
KEY_UP = $8e
|
|
KEY_DOWN = $8f
|
|
KEY_LEFT = $86
|
|
KEY_RIGHT = $87
|
|
KEY_1 = $1f
|
|
KEY_2 = $1e
|
|
KEY_3 = $1a
|
|
KEY_4 = 24
|
|
KEY_5 = 29
|
|
KEY_6 = 27
|
|
KEY_7 = 51
|
|
KEY_8 = 53
|
|
KEY_9 = 48
|
|
KEY_0 = 50
|
|
|
|
.struct float48
|
|
exponent .byte
|
|
mantissa .byte 5
|
|
.endstruct
|
|
|
|
.import mul_lobyte256
|
|
.import mul_hibyte256
|
|
.import mul_hibyte512
|
|
.import sqr_lobyte
|
|
.import sqr_hibyte
|
|
|
|
.data
|
|
|
|
strings:
|
|
str_self:
|
|
.byte "MANDEL-6502"
|
|
str_self_end:
|
|
str_speed:
|
|
.byte " ms/px"
|
|
str_speed_end:
|
|
str_run:
|
|
.byte " RUN"
|
|
str_run_end:
|
|
str_done:
|
|
.byte "DONE"
|
|
str_done_end:
|
|
|
|
str_self_len = str_self_end - str_self
|
|
str_speed_len = str_speed_end - str_speed
|
|
str_run_len = str_run_end - str_run
|
|
str_done_len = str_done_end - str_done
|
|
speed_precision = 6
|
|
|
|
speed_start = 40 - str_done_len - str_speed_len - speed_precision - 1
|
|
speed_len = 14 + str_speed_len
|
|
|
|
|
|
char_map:
|
|
; Map ATASCII string values to framebuffer font entries
|
|
; Sighhhhh
|
|
.repeat 32, i
|
|
.byte i + 64
|
|
.endrepeat
|
|
.repeat 64, i
|
|
.byte i
|
|
.endrepeat
|
|
.repeat 32, i
|
|
.byte 96 + i
|
|
.endrepeat
|
|
|
|
hex_chars:
|
|
.byte "0123456789abcdef"
|
|
|
|
aspect:
|
|
; aspect ratio!
|
|
; pixels at 320w are 5:6 (narrow)
|
|
; pixels at 160w are 5:3 (wide)
|
|
;
|
|
; cy = (sy << (8 - zoom)) * (96 / 128 = 3 / 4)
|
|
; cx = (sx << (8 - zoom)) * ((3 / 4) * (5 / 3) = 5 / 4)
|
|
;
|
|
; so vertical range -92 .. 91.9 is -2.15625 .. 2.15624
|
|
; &horizontal range -80 .. 79.9 is -3.125 .. 3.124
|
|
;
|
|
; 184h is the equiv of 220.8h at square pixels
|
|
; 320 / 220.8 = 1.45 display aspect ratio
|
|
aspect_x: ; fixed3.13 5/4
|
|
.word 5 << (13 - 2)
|
|
|
|
aspect_y: ; fixed3.13 3/4
|
|
.word 3 << (13 - 2)
|
|
|
|
ms_per_frame: ; float48 16.66666667
|
|
.byte 64 ; exponent/sign
|
|
.byte $16 ; BCD digits
|
|
.byte $66
|
|
.byte $66
|
|
.byte $66
|
|
.byte $67
|
|
|
|
total_pixels: ; float48
|
|
.repeat 6
|
|
.byte 0
|
|
.endrepeat
|
|
|
|
total_ms: ; float48
|
|
.repeat 6
|
|
.byte 0
|
|
.endrepeat
|
|
|
|
display_list_start:
|
|
; 24 lines overscan
|
|
.repeat 3
|
|
.byte $70 ; 8 blank lines
|
|
.endrep
|
|
|
|
; 8 scan lines, 1 row of 40-column text
|
|
.byte $42
|
|
.addr textbuffer
|
|
|
|
; 184 lines graphics
|
|
; ANTIC mode e (160px 2bpp, 1 scan line per line)
|
|
.byte $4e
|
|
.addr framebuffer_top
|
|
.repeat half_height - 1
|
|
.byte $0e
|
|
.endrep
|
|
.byte $4e
|
|
.addr framebuffer_bottom
|
|
.repeat half_height - 1
|
|
.byte $0e
|
|
.endrep
|
|
|
|
.byte $41 ; jump and blank
|
|
.addr display_list
|
|
display_list_end:
|
|
display_list_len = display_list_end - display_list_start
|
|
|
|
color_map:
|
|
.byte 0
|
|
.repeat 85
|
|
.byte %01010101
|
|
.byte %10101010
|
|
.byte %11111111
|
|
.endrepeat
|
|
|
|
|
|
palette_start:
|
|
.byte $0e
|
|
.byte $08
|
|
.byte $04
|
|
palette_repeat:
|
|
.byte $0e
|
|
.byte $08
|
|
|
|
palette_entries = 3
|
|
|
|
palette_chroma:
|
|
.repeat 15, i
|
|
.byte (i + 1) << 4
|
|
.endrepeat
|
|
.repeat 2, i
|
|
.byte (i + 1) << 4
|
|
.endrepeat
|
|
palette_chroma_entries = 15
|
|
|
|
.code
|
|
|
|
;z_buffer_len = 16 ; 10.863 ms/px
|
|
;z_buffer_len = 12 ; 10.619 ms/px
|
|
z_buffer_len = 8 ; 10.612 ms/px
|
|
;z_buffer_len = 4 ; 12.395 ms/px
|
|
z_buffer_mask = z_buffer_len - 1
|
|
z_buffer:
|
|
; the last N zx/zy values
|
|
.repeat z_buffer_len
|
|
.word 0
|
|
.word 0
|
|
.endrepeat
|
|
|
|
.export start
|
|
|
|
;max_fill_level = 6
|
|
max_fill_level = 3
|
|
fill_masks:
|
|
; .byte %00011111
|
|
; .byte %00001111
|
|
; .byte %00000111
|
|
.byte %00000011
|
|
.byte %00000001
|
|
.byte %00000000
|
|
|
|
pixel_masks:
|
|
.byte %11111111
|
|
.byte %11110000
|
|
.byte %11000000
|
|
|
|
viewport_zoom:
|
|
.byte 0
|
|
.byte 5
|
|
.byte 7
|
|
.byte 5
|
|
.byte 7
|
|
.byte 7
|
|
|
|
viewport_ox:
|
|
.dword ($00000000 & $3fffffff) << 2
|
|
.dword ($ff110000 & $3fffffff) << 2
|
|
.dword ($ff110000 & $3fffffff) << 2
|
|
.dword ($fe400000 & $3fffffff) << 2
|
|
.dword ($fe3b0000 & $3fffffff) << 2
|
|
.dword $fd220000
|
|
|
|
viewport_oy:
|
|
.dword ($00000000 & $3fffffff) << 2
|
|
.dword ($ffb60000 & $3fffffff) << 2
|
|
.dword ($ffbe0000 & $3fffffff) << 2
|
|
.dword ($00000000 & $3fffffff) << 2
|
|
.dword ($fffe0000 & $3fffffff) << 2
|
|
.dword $ff000000
|
|
|
|
; 2 + 9 * byte cycles
|
|
.macro add bytes, dest, arg1, arg2
|
|
clc ; 2 cyc
|
|
.repeat bytes, byte ; 9 * byte cycles
|
|
lda arg1 + byte
|
|
adc arg2 + byte
|
|
sta dest + byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 20 cycles
|
|
.macro add16 dest, arg1, arg2
|
|
add 2, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 38 cycles
|
|
.macro add32 dest, arg1, arg2
|
|
add 4, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 8 cycles
|
|
.macro add_carry dest
|
|
lda dest ; 3 cyc
|
|
adc #0 ; 2 cyc
|
|
sta dest ; 3 cyc
|
|
.endmacro
|
|
|
|
; 2 + 9 * byte cycles
|
|
.macro sub bytes, dest, arg1, arg2
|
|
sec ; 2 cyc
|
|
.repeat bytes, byte ; 9 * byte cycles
|
|
lda arg1 + byte
|
|
sbc arg2 + byte
|
|
sta dest + byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 20 cycles
|
|
.macro sub16 dest, arg1, arg2
|
|
sub 2, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 38 cycles
|
|
.macro sub32 dest, arg1, arg2
|
|
sub 4, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 3 + 5 * bytes cycles
|
|
.macro shl bytes, arg
|
|
asl arg ; 3 cyc
|
|
.repeat bytes-1, i
|
|
rol arg + 1 + i ; 5 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 13 cycles
|
|
.macro shl16 arg
|
|
shl 2, arg
|
|
.endmacro
|
|
|
|
; 18 cycles
|
|
.macro shl24 arg
|
|
shl 3, arg
|
|
.endmacro
|
|
|
|
; 23 cycles
|
|
.macro shl32 arg
|
|
shl 4, arg
|
|
.endmacro
|
|
|
|
; 6 * bytes cycles
|
|
.macro copy bytes, dest, arg
|
|
.repeat bytes, byte ; 6 * bytes cycles
|
|
lda arg + byte ; 3 cyc
|
|
sta dest + byte ; 3 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 12 cycles
|
|
.macro copy16 dest, arg
|
|
copy 2, dest, arg
|
|
.endmacro
|
|
|
|
; 24 cycles
|
|
.macro copy32 dest, arg
|
|
copy 4, dest, arg
|
|
.endmacro
|
|
|
|
; 36 cycles
|
|
.macro copyfloat dest, arg
|
|
copy 6, dest, arg
|
|
.endmacro
|
|
|
|
; 2 + 8 * byte cycles
|
|
.macro neg bytes, arg
|
|
sec ; 2 cyc
|
|
.repeat bytes, byte ; 8 * byte cycles
|
|
lda #00 ; 2 cyc
|
|
sbc arg + byte ; 3 cyc
|
|
sta arg + byte ; 3 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 18 cycles
|
|
.macro neg16 arg
|
|
neg 2, arg
|
|
.endmacro
|
|
|
|
; 34 cycles
|
|
.macro neg32 arg
|
|
neg 4, arg
|
|
.endmacro
|
|
|
|
; 11-27 + 23 * shift cycles
|
|
; 103-119 cycles for shift=4
|
|
.macro shift_round_16 arg, shift
|
|
.repeat shift
|
|
shl32 arg ; 23 cycles
|
|
.endrepeat
|
|
round16 arg ; 11-27 cycles
|
|
.endmacro
|
|
|
|
; input: arg1, arg2 as fixed4.12
|
|
; output: dest as fixed8.24
|
|
.macro imul16 dest, arg1, arg2
|
|
copy16 FR0, arg1 ; 12 cyc
|
|
copy16 FR1, arg2 ; 12 cyc
|
|
jsr imul16_func ; ? cyc
|
|
copy32 dest, FR2 ; 24 cyc
|
|
.endmacro
|
|
|
|
; input: arg as fixed4.12
|
|
; output: dest as fixed8.24
|
|
.macro sqr16 dest, arg
|
|
copy16 FR0, arg ; 12 cyc
|
|
jsr sqr16_func ; ? cyc
|
|
copy32 dest, FR2 ; 24 cyc
|
|
.endmacro
|
|
|
|
; input: arg as u8
|
|
; output: dest as u16
|
|
; clobbers a, x
|
|
.macro sqr8 dest, arg
|
|
ldx arg
|
|
lda sqr_lobyte,x
|
|
sta dest
|
|
lda sqr_hibyte,x
|
|
sta dest + 1
|
|
.endmacro
|
|
|
|
.segment "TABLES"
|
|
; lookup table for top byte -> PORTB value for bank-switch
|
|
.align 256
|
|
bank_switch_table:
|
|
.repeat 256, i
|
|
.byte ((i & $c0) >> 4) | $e3
|
|
.endrepeat
|
|
|
|
.code
|
|
|
|
.macro bank_switch bank
|
|
lda #((bank << 2) | $e3)
|
|
sta PORTB
|
|
.endmacro
|
|
|
|
.macro imul8 dest, arg1, arg2, xe
|
|
.if xe
|
|
; using 64KB lookup table
|
|
; 51-70 cycles
|
|
; clobbers x, y, dest, ptr
|
|
.scope
|
|
output = dest
|
|
|
|
; top 2 bits are the table bank selector
|
|
ldx arg2 ; 3 cyc
|
|
lda bank_switch_table,x ; 4 cyc
|
|
sta PORTB ; 4 cyc
|
|
|
|
; bottom 14 bits except the LSB are the per-bank table index
|
|
; add $4000 for the bank pointer
|
|
txa ; 2 cyc
|
|
and #$3f ; 2 cyc
|
|
ora #$40 ; 2 cyc
|
|
sta ptr + 1 ; 3 cyc
|
|
|
|
; copy the entry into output
|
|
lda arg1 ; 3 cyc
|
|
and #$fe ; 2 cyc
|
|
tay ; 2 cyc
|
|
lda (ptr),y ; 5 cyc
|
|
sta output ; 3 cyc
|
|
iny ; 2 cyc
|
|
lda (ptr),y ; 5 cyc
|
|
sta output+1 ; 3 cyc
|
|
|
|
; note: we are not restoring memory to save 6 cycles!
|
|
; this means those 16kb have to be switched back to base RAM
|
|
; if we need to use them anywhere else
|
|
;;; restore memory
|
|
;;lda #$81 ; 2 cyc - disabled
|
|
;;sta PORTB ; 4 cyc - disabled
|
|
|
|
; check that 1 bit we skipped to fit into space
|
|
lda arg1 ; 3 cyc
|
|
and #1 ; 2 cyc
|
|
beq done ; 2 cyc
|
|
|
|
; add arg2 one last time for the skipped bit
|
|
clc ; 2 cyc
|
|
txa ; 2 cyc
|
|
adc output ; 3 cyc
|
|
sta output ; 3 cyc
|
|
lda #0 ; 2 cyc
|
|
adc output+1 ; 3 cyc
|
|
sta output+1 ; 3 cyc
|
|
|
|
done:
|
|
.endscope
|
|
.else
|
|
; Using base 48k RAM compatibility mode
|
|
; Small table of half squares
|
|
; Adapted from https://everything2.com/title/Fast+6502+multiplication
|
|
; 81-92 cycles
|
|
.scope
|
|
mul_factor_a = arg1
|
|
mul_factor_x = arg2
|
|
mul_product_lo = dest
|
|
mul_product_hi = dest + 1
|
|
|
|
lda mul_factor_a ; 3 cyc
|
|
|
|
; (a + x)^2/2
|
|
clc ; 2 cyc
|
|
adc mul_factor_x ; 3 cyc
|
|
tax ; 2 cyc
|
|
bcc under256 ; 2 cyc
|
|
lda mul_hibyte512,x ; 4 cyc
|
|
bcs next ; 2 cyc
|
|
under256:
|
|
lda mul_hibyte256,x ; 4 cyc
|
|
sec ; 2 cyc
|
|
next:
|
|
sta mul_product_hi ; 3 cyc
|
|
lda mul_lobyte256,x ; 4 cyc
|
|
|
|
; - a^2/2
|
|
ldx mul_factor_a ; 3 cyc
|
|
sbc mul_lobyte256,x ; 4 cyc
|
|
sta mul_product_lo ; 3 cyc
|
|
lda mul_product_hi ; 3 cyc
|
|
sbc mul_hibyte256,x ; 4 cyc
|
|
sta mul_product_hi ; 3 cyc
|
|
|
|
; + x & a & 1:
|
|
; (this is a kludge to correct a
|
|
; roundoff error that makes odd * odd too low)
|
|
ldx mul_factor_x ; 3 cyc
|
|
txa ; 2 cyc
|
|
and mul_factor_a ; 3 cyc
|
|
and #1 ; 2 cyc
|
|
|
|
clc ; 2 cyc
|
|
adc mul_product_lo ; 3 cyc
|
|
bcc small_product ; 2 cyc
|
|
inc mul_product_hi ; 5 cyc
|
|
|
|
; - x^2/2
|
|
small_product:
|
|
sec ; 2 cyc
|
|
sbc mul_lobyte256,x ; 4 cyc
|
|
sta mul_product_lo ; 3 cyc
|
|
lda mul_product_hi ; 3 cyc
|
|
sbc mul_hibyte256,x ; 4 cyc
|
|
sta mul_product_hi ; 3 cyc
|
|
.endscope
|
|
.endif
|
|
.endmacro
|
|
|
|
.proc imul8xe_init
|
|
|
|
bank_switch 0
|
|
lda #0
|
|
sta EXTENDED_RAM
|
|
bank_switch 1
|
|
lda #1
|
|
sta EXTENDED_RAM
|
|
bank_switch 0
|
|
lda EXTENDED_RAM
|
|
beq init
|
|
|
|
; no bank switching available, we just overwrite the value in base ram
|
|
rts
|
|
|
|
init:
|
|
|
|
; patch imul16_func into a forwarding thunk to imul16xe_func
|
|
lda #$4c ; 'jmp' opcode
|
|
sta imul16_func
|
|
lda #.lobyte(imul16xe_func)
|
|
sta imul16_func + 1
|
|
lda #.hibyte(imul16xe_func)
|
|
sta imul16_func + 2
|
|
|
|
; ditto for sqr16_func -> sqr16xe_func
|
|
lda #$4c ; 'jmp' opcode
|
|
sta sqr16_func
|
|
lda #.lobyte(sqr16xe_func)
|
|
sta sqr16_func + 1
|
|
lda #.hibyte(sqr16xe_func)
|
|
sta sqr16_func + 2
|
|
|
|
; create the lookup table
|
|
; go through the input set, in four 16KB chunks
|
|
|
|
arg1 = FR1
|
|
arg2 = FR2
|
|
result = FR0
|
|
|
|
lda #$00
|
|
sta arg1
|
|
sta arg2
|
|
sta ptr
|
|
lda #$40
|
|
sta ptr + 1
|
|
|
|
; $00 * $00 -> $3f * $ff
|
|
bank_switch 0
|
|
jsr imul8xe_init_section
|
|
|
|
; $40 * $00 -> $7f * $ff
|
|
bank_switch 1
|
|
jsr imul8xe_init_section
|
|
|
|
; $80 * $00 -> $bf * $ff
|
|
bank_switch 2
|
|
jsr imul8xe_init_section
|
|
|
|
; $c0 * $00 -> $ff * $ff
|
|
bank_switch 3
|
|
jsr imul8xe_init_section
|
|
|
|
rts
|
|
.endproc
|
|
|
|
; Initialize a 16 KB chunk of the table
|
|
; input: multipliers in temp
|
|
; output: new multipliers in temp
|
|
; clobbers: temp, temp2
|
|
.proc imul8xe_init_section
|
|
arg1 = FR1
|
|
arg2 = FR2
|
|
result = FR0
|
|
ptr = temp2
|
|
|
|
lda #$00
|
|
sta ptr
|
|
lda #$40
|
|
sta ptr + 1
|
|
|
|
ldy #0
|
|
|
|
; outer loop: $00 -> $3f
|
|
outer_loop:
|
|
|
|
; reset result to 0
|
|
lda #0
|
|
sta result
|
|
sta result + 1
|
|
|
|
; inner loop: $00 -> $ff
|
|
inner_loop:
|
|
|
|
; copy result to data set
|
|
lda result
|
|
sta (ptr),y
|
|
lda result + 1
|
|
iny
|
|
sta (ptr),y
|
|
dey
|
|
|
|
; result += 2 * arg2
|
|
clc
|
|
lda arg2
|
|
adc result
|
|
sta result
|
|
lda #0
|
|
adc result + 1
|
|
sta result + 1
|
|
clc
|
|
lda arg2
|
|
adc result
|
|
sta result
|
|
lda #0
|
|
adc result + 1
|
|
sta result + 1
|
|
|
|
; inner loop check
|
|
inc arg1
|
|
inc arg1
|
|
inc ptr
|
|
inc ptr
|
|
bne inner_loop
|
|
|
|
; outer loop check
|
|
inc arg2
|
|
inc ptr + 1
|
|
lda ptr + 1
|
|
cmp #$80
|
|
bne outer_loop
|
|
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.macro imul16_impl xe
|
|
.local arg1
|
|
.local arg2
|
|
.local result
|
|
.local inter
|
|
.local arg1_pos
|
|
.local arg2_pos
|
|
arg1 = FR0 ; 16-bit arg (clobbered)
|
|
arg2 = FR1 ; 16-bit arg (clobbered)
|
|
result = FR2 ; 32-bit result
|
|
inter = temp2
|
|
|
|
; h1l1 * h2l2
|
|
; (h1*256 + l1) * (h2*256 + l2)
|
|
; h1*256*(h2*256 + l2) + l1*(h2*256 + l2)
|
|
; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2
|
|
|
|
imul8 result, arg1, arg2, xe
|
|
|
|
imul8 result + 2, arg1 + 1, arg2 + 1, xe
|
|
|
|
imul8 inter, arg1 + 1, arg2, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
imul8 inter, arg1, arg2 + 1, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
; In case of negative inputs, adjust high word
|
|
; https://stackoverflow.com/a/28827013
|
|
lda arg1 + 1
|
|
bpl arg1_pos
|
|
sub16 result + 2, result + 2, arg2
|
|
arg1_pos:
|
|
lda arg2 + 1
|
|
bpl arg2_pos
|
|
sub16 result + 2, result + 2, arg1
|
|
arg2_pos:
|
|
|
|
rts ; 6 cyc
|
|
.endmacro
|
|
|
|
.macro sqr16_impl xe
|
|
.scope
|
|
arg = FR0 ; 16-bit arg (clobbered)
|
|
result = FR2 ; 32-bit result
|
|
;inter = temp2
|
|
inter = FR1
|
|
|
|
lda arg + 1
|
|
bpl arg_pos
|
|
neg16 arg
|
|
arg_pos:
|
|
|
|
; hl * hl
|
|
; (h*256 + l) * (h*256 + l)
|
|
; h*256*(h*256 + l) + l*(h*256 + l)
|
|
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
|
|
|
sqr8 result, arg
|
|
|
|
sqr8 result + 2, arg + 1
|
|
|
|
imul8 inter, arg + 1, arg, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
rts ; 6 cyc
|
|
.endscope
|
|
.endmacro
|
|
|
|
.proc imul16_func
|
|
imul16_impl 0
|
|
.endproc
|
|
|
|
.proc imul16xe_func
|
|
imul16_impl 1
|
|
.endproc
|
|
|
|
.proc sqr16_func
|
|
sqr16_impl 0
|
|
.endproc
|
|
|
|
.proc sqr16xe_func
|
|
sqr16_impl 1
|
|
.endproc
|
|
|
|
; 11-27 cycles
|
|
.macro round16 arg
|
|
; Round top 16 bits of 32-bit fixed-point number in-place
|
|
.local increment
|
|
.local high_half
|
|
.local check_sign
|
|
.local next
|
|
|
|
; low word > $8000: round up
|
|
; = $8000: round up if positive
|
|
; round down if negative
|
|
; < $8000: round down
|
|
|
|
; $8000 17
|
|
; $8001 27
|
|
; $8100 21
|
|
; $7fff 11
|
|
|
|
lda arg + 1 ; 3 cyc
|
|
cmp #$80 ; 2 cyc
|
|
beq high_half ; 2 cyc
|
|
|
|
bpl increment ; 2 cyc
|
|
|
|
bmi next ; 2 cyc
|
|
|
|
high_half:
|
|
lda arg ; 3 cyc
|
|
beq check_sign ; 2 cyc
|
|
|
|
jmp increment ; 3 cyc
|
|
|
|
check_sign:
|
|
lda arg + 3 ; 3 cyc
|
|
bmi next ; 2 cyc
|
|
|
|
increment: ; 5-10 cyc
|
|
inc arg + 2 ; 5 cyc
|
|
bne next ; 2 cyc
|
|
inc arg + 3 ; 5 cyc
|
|
|
|
next:
|
|
|
|
.endmacro
|
|
|
|
.proc mandelbrot
|
|
; input:
|
|
; cx: position scaled to 6.26 fixed point - -32..+31.9
|
|
; cy: position scaled to 6.26
|
|
;
|
|
; output:
|
|
; iter: iteration count at escape or 0
|
|
|
|
; zx = 0
|
|
; zy = 0
|
|
; zx_2 = 0
|
|
; zy_2 = 0
|
|
; zx_zy = 0
|
|
; dist = 0
|
|
; iter = 0
|
|
; lda #00
|
|
; ldx #(iter - zx + 1)
|
|
;initloop:
|
|
; sta zx - 1,x
|
|
; dex
|
|
; bne initloop
|
|
; sta z_buffer_start
|
|
; sta z_buffer_end
|
|
|
|
lda #00
|
|
sta zx
|
|
sta zx + 1
|
|
sta zx + 2
|
|
sta zx + 3
|
|
sta zy
|
|
sta zy + 1
|
|
sta zy + 2
|
|
sta zy + 3
|
|
sta zx_2
|
|
sta zx_2 + 1
|
|
sta zx_2 + 2
|
|
sta zx_2 + 3
|
|
sta zy_2
|
|
sta zy_2 + 1
|
|
sta zy_2 + 2
|
|
sta zy_2 + 3
|
|
sta zx_zy
|
|
sta zx_zy + 1
|
|
sta zx_zy + 2
|
|
sta zx_zy + 3
|
|
sta dist
|
|
sta dist + 1
|
|
sta dist + 2
|
|
sta dist + 3
|
|
sta iter
|
|
sta z_buffer_start
|
|
sta z_buffer_end
|
|
|
|
loop:
|
|
; iter++ & max-iters break
|
|
inc iter
|
|
bne keep_going
|
|
jmp exit_path
|
|
keep_going:
|
|
|
|
.macro quick_exit arg, max
|
|
; arg: fixed6.26
|
|
; max: integer
|
|
.local positive
|
|
.local negative
|
|
.local nope_out
|
|
.local first_equal
|
|
.local all_done
|
|
|
|
; check sign bit
|
|
lda arg + 3
|
|
bmi negative
|
|
|
|
positive:
|
|
cmp #(max << 2)
|
|
bmi all_done ; 'less than'
|
|
jmp exit_path
|
|
|
|
negative:
|
|
cmp #(256 - (max << 2))
|
|
beq first_equal ; 'equal' on first byte
|
|
bpl all_done ; 'greater than'
|
|
|
|
nope_out:
|
|
jmp exit_path
|
|
|
|
first_equal:
|
|
; following bytes all 0 shows it's really 'equal'
|
|
lda arg + 2
|
|
bne all_done
|
|
lda arg + 1
|
|
bne all_done
|
|
lda arg
|
|
bne all_done
|
|
jmp exit_path
|
|
|
|
all_done:
|
|
.endmacro
|
|
|
|
; 6.26: (-32 .. 31.9)
|
|
; zx = zx_2 - zy_2 + cx
|
|
sub32 zx, zx_2, zy_2
|
|
add32 zx, zx, cx
|
|
quick_exit zx, 2
|
|
|
|
; zy = zx_zy + zx_zy + cy
|
|
add32 zy, zx_zy, zx_zy
|
|
add32 zy, zy, cy
|
|
quick_exit zy, 2
|
|
|
|
; convert 6.26 -> 3.13: (-4 .. +3.9)
|
|
shift_round_16 zx, 3
|
|
shift_round_16 zy, 3
|
|
|
|
; zx_2 = zx * zx
|
|
sqr16 zx_2, zx + 2
|
|
|
|
; zy_2 = zy * zy
|
|
sqr16 zy_2, zy + 2
|
|
|
|
; zx_zy = zx * zy
|
|
imul16 zx_zy, zx + 2, zy + 2
|
|
|
|
; dist = zx_2 + zy_2
|
|
add32 dist, zx_2, zy_2
|
|
quick_exit dist, 4
|
|
|
|
; if may be in the lake, look for looping output with a small buffer
|
|
; as an optimization vs running to max iters
|
|
lda z_buffer_active
|
|
beq skip_z_buffer
|
|
|
|
ldx z_buffer_start
|
|
cpx z_buffer_end
|
|
beq z_nothing_to_read
|
|
|
|
z_buffer_loop:
|
|
.macro z_compare arg
|
|
.local compare_no_match
|
|
lda z_buffer,x
|
|
inx
|
|
cmp arg
|
|
bne compare_no_match
|
|
iny
|
|
compare_no_match:
|
|
.endmacro
|
|
.macro z_advance
|
|
.local skip_reset_x
|
|
cpx #(z_buffer_len * 4)
|
|
bmi skip_reset_x
|
|
ldx #0
|
|
skip_reset_x:
|
|
.endmacro
|
|
.macro z_store arg
|
|
lda arg
|
|
sta z_buffer,x
|
|
inx
|
|
.endmacro
|
|
|
|
; Compare the previously stored z values
|
|
ldy #0
|
|
z_compare zx + 2
|
|
z_compare zx + 3
|
|
z_compare zy + 2
|
|
z_compare zy + 3
|
|
|
|
cpy #4
|
|
bne z_no_matches
|
|
jmp z_exit
|
|
|
|
z_no_matches:
|
|
z_advance
|
|
|
|
cpx z_buffer_end
|
|
bne z_buffer_loop
|
|
|
|
z_nothing_to_read:
|
|
|
|
; Store and expand
|
|
z_store zx + 2
|
|
z_store zx + 3
|
|
z_store zy + 2
|
|
z_store zy + 3
|
|
z_advance
|
|
stx z_buffer_end
|
|
|
|
; Increment the start roller if necessary (limit size)
|
|
lda iter
|
|
cmp #(z_buffer_len * 4)
|
|
bmi skip_inc_start
|
|
lda z_buffer_start
|
|
clc
|
|
adc #4
|
|
tax
|
|
z_advance
|
|
stx z_buffer_start
|
|
skip_inc_start:
|
|
|
|
skip_z_buffer:
|
|
|
|
jmp loop
|
|
|
|
z_exit:
|
|
lda #0
|
|
sta iter
|
|
|
|
exit_path:
|
|
ldx #0
|
|
lda iter
|
|
bne next
|
|
inx
|
|
next:
|
|
stx z_buffer_active
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.macro scale_zoom dest
|
|
; clobbers X, flags
|
|
.local cont
|
|
.local enough
|
|
|
|
; cx = (sx << (8 - zoom))
|
|
ldx zoom
|
|
cont:
|
|
cpx #8
|
|
beq enough
|
|
shl16 dest
|
|
inx
|
|
jmp cont
|
|
enough:
|
|
.endmacro
|
|
|
|
.macro zoom_factor dest, src, aspect
|
|
; output: dest: fixed6.26
|
|
; input: src: fixed3.13
|
|
; aspect: fixed3.13
|
|
; clobbers A, X, flags, etc
|
|
copy16 dest, src
|
|
scale_zoom dest
|
|
|
|
; cy = cy * (3 / 4)
|
|
; cx = cx * (5 / 4)
|
|
imul16 dest, dest, aspect
|
|
.endmacro
|
|
|
|
.proc pset
|
|
; screen coords in signed sx,sy
|
|
; iter holds the target to use
|
|
; @todo implement
|
|
|
|
; iter -> color
|
|
ldx iter
|
|
lda color_map,x
|
|
ldx fill_level
|
|
and pixel_masks,x
|
|
sta pixel_color
|
|
lda pixel_masks,x
|
|
eor #$ff
|
|
sta pixel_mask
|
|
|
|
; sy -> line base address in temp
|
|
lda sy
|
|
bpl positive
|
|
|
|
negative:
|
|
; temp1 = top half
|
|
lda #.lobyte(framebuffer_top + stride * half_height)
|
|
sta pixel_ptr
|
|
lda #.hibyte(framebuffer_top + stride * half_height)
|
|
sta pixel_ptr + 1
|
|
jmp point
|
|
|
|
positive:
|
|
|
|
lda #.lobyte(framebuffer_bottom)
|
|
sta pixel_ptr
|
|
lda #.hibyte(framebuffer_bottom)
|
|
sta pixel_ptr + 1
|
|
|
|
point:
|
|
|
|
; pixel_ptr += sy * stride
|
|
; temp * 40
|
|
; = temp * 32 + temp * 8
|
|
; = (temp << 5) + (temp << 3)
|
|
copy16 temp, sy
|
|
shl16 temp
|
|
shl16 temp
|
|
shl16 temp
|
|
add16 pixel_ptr, pixel_ptr, temp
|
|
shl16 temp
|
|
shl16 temp
|
|
add16 pixel_ptr, pixel_ptr, temp
|
|
|
|
; Ok so temp1 points to the start of the line, which is 40 bytes.
|
|
; Get the byte and bit offsets
|
|
lda sx
|
|
clc
|
|
adc #half_width
|
|
sta temp
|
|
|
|
; pixel_shift = temp & 3
|
|
; pixel_color <<= pixel_shift (shifting in zeros)
|
|
; pixel_mask <<= pixel_shift (shifting in ones)
|
|
and #3
|
|
sta pixel_shift
|
|
tax
|
|
shift_loop:
|
|
beq shift_done
|
|
lsr pixel_color
|
|
lsr pixel_color
|
|
sec
|
|
ror pixel_mask
|
|
sec
|
|
ror pixel_mask
|
|
dex
|
|
jmp shift_loop
|
|
shift_done:
|
|
|
|
ldy fill_level
|
|
ldx fill_masks,y
|
|
inx
|
|
|
|
; pixel_offset = temp >> 2
|
|
lda temp
|
|
lsr a
|
|
lsr a
|
|
sta pixel_offset
|
|
tay
|
|
|
|
draw_pixel:
|
|
; read, mask, or, write
|
|
lda (pixel_ptr),y
|
|
and pixel_mask
|
|
ora pixel_color
|
|
sta (pixel_ptr),y
|
|
|
|
dex
|
|
beq done
|
|
clc
|
|
lda #40
|
|
adc pixel_ptr
|
|
sta pixel_ptr
|
|
lda #0
|
|
adc pixel_ptr + 1
|
|
sta pixel_ptr + 1
|
|
jmp draw_pixel
|
|
|
|
done:
|
|
rts
|
|
.endproc
|
|
|
|
.macro draw_text_indirect col, len, strptr
|
|
; clobbers A, X
|
|
.local loop
|
|
.local done
|
|
ldx #0
|
|
loop:
|
|
cpx #len
|
|
beq done
|
|
txa
|
|
tay
|
|
lda (strptr),y
|
|
tay
|
|
lda char_map,y
|
|
sta textbuffer + col,x
|
|
inx
|
|
jmp loop
|
|
done:
|
|
.endmacro
|
|
|
|
.macro draw_text col, len, cstr
|
|
; clobbers A, X
|
|
.local loop
|
|
.local done
|
|
ldx #0
|
|
loop:
|
|
cpx #len
|
|
beq done
|
|
ldy cstr,x
|
|
lda char_map,y
|
|
sta textbuffer + col,x
|
|
inx
|
|
jmp loop
|
|
done:
|
|
.endmacro
|
|
|
|
.proc vblank_handler
|
|
inc count_frames
|
|
|
|
inc chroma_ticks
|
|
lda chroma_ticks
|
|
cmp #(chroma_delay)
|
|
bne skip_chroma
|
|
|
|
lda #0
|
|
sta chroma_ticks
|
|
|
|
inc chroma_offset
|
|
lda chroma_offset
|
|
cmp #(palette_chroma_entries)
|
|
bne skip_chroma
|
|
|
|
lda #0
|
|
sta chroma_offset
|
|
skip_chroma:
|
|
|
|
inc palette_ticks
|
|
lda palette_ticks
|
|
cmp #(palette_delay)
|
|
bne skip_luma
|
|
|
|
lda #0
|
|
sta palette_ticks
|
|
|
|
inc palette_offset
|
|
lda palette_offset
|
|
cmp #(palette_entries)
|
|
bne skip_luma
|
|
|
|
lda #0
|
|
sta palette_offset
|
|
|
|
skip_luma:
|
|
jsr update_palette
|
|
jmp XITVBV
|
|
.endproc
|
|
|
|
.proc update_palette
|
|
lda #0
|
|
sta COLOR4
|
|
|
|
ldx chroma_offset
|
|
ldy palette_offset
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR2
|
|
|
|
;inx
|
|
iny
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR1
|
|
|
|
;inx
|
|
iny
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR0
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc update_speed
|
|
; convert frames (u16) to fp
|
|
; add to frames_total
|
|
; convert pixels (u16) to fp
|
|
; add to pixels_total
|
|
; (frames_total * 16.66666667) / pixels_total
|
|
; convert to ATASCII
|
|
; draw text
|
|
.endproc
|
|
|
|
.proc keycheck
|
|
; clobbers all
|
|
; returns 255 in A if state change or 0 if no change
|
|
|
|
; check keyboard buffer
|
|
lda CH
|
|
cmp #$ff
|
|
beq skip_char
|
|
|
|
; Clear the keyboard buffer and re-enable interrupts
|
|
ldx #$ff
|
|
stx CH
|
|
|
|
tay
|
|
|
|
lda zoom
|
|
cpy #KEY_PLUS
|
|
beq plus
|
|
cpy #KEY_MINUS
|
|
beq minus
|
|
|
|
; temp+temp2 = $00010000 << (8 - zoom)
|
|
lda #$00
|
|
sta temp
|
|
sta temp + 1
|
|
lda #$01
|
|
sta temp + 2
|
|
lda #$00
|
|
sta temp + 3
|
|
scale_zoom temp + 2
|
|
|
|
cpy #KEY_UP
|
|
beq up
|
|
cpy #KEY_DOWN
|
|
beq down
|
|
cpy #KEY_LEFT
|
|
beq left
|
|
cpy #KEY_RIGHT
|
|
beq right
|
|
jmp number_keys
|
|
|
|
skip_char:
|
|
lda #0
|
|
rts
|
|
|
|
plus:
|
|
lda zoom
|
|
cmp #7
|
|
bpl skip_char
|
|
inc zoom
|
|
jmp done
|
|
minus:
|
|
lda zoom
|
|
cmp #1
|
|
bmi skip_char
|
|
dec zoom
|
|
jmp done
|
|
up:
|
|
sub32 oy, oy, temp
|
|
jmp done
|
|
down:
|
|
add32 oy, oy, temp
|
|
jmp done
|
|
left:
|
|
sub32 ox, ox, temp
|
|
jmp done
|
|
right:
|
|
add32 ox, ox, temp
|
|
jmp done
|
|
|
|
number_keys:
|
|
cpy #KEY_1
|
|
beq one
|
|
cpy #KEY_2
|
|
beq two
|
|
cpy #KEY_3
|
|
beq three
|
|
cpy #KEY_4
|
|
beq four
|
|
cpy #KEY_5
|
|
beq five
|
|
cpy #KEY_6
|
|
beq six
|
|
jmp skip_char
|
|
|
|
one:
|
|
ldx #0
|
|
jmp load_key_viewport
|
|
two:
|
|
ldx #1
|
|
jmp load_key_viewport
|
|
three:
|
|
ldx #2
|
|
jmp load_key_viewport
|
|
four:
|
|
ldx #3
|
|
jmp load_key_viewport
|
|
five:
|
|
ldx #4
|
|
jmp load_key_viewport
|
|
six:
|
|
ldx #5
|
|
; fall through
|
|
load_key_viewport:
|
|
jsr load_viewport
|
|
; fall through
|
|
done:
|
|
lda #255
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.proc clear_screen
|
|
; zero the range from framebuffer_top to display_list
|
|
lda #.lobyte(framebuffer_top)
|
|
sta temp
|
|
lda #.hibyte(framebuffer_top)
|
|
sta temp + 1
|
|
|
|
zero_page_loop:
|
|
lda #0
|
|
ldy #0
|
|
zero_byte_loop:
|
|
sta (temp),y
|
|
iny
|
|
bne zero_byte_loop
|
|
|
|
inc temp + 1
|
|
lda temp + 1
|
|
cmp #.hibyte(display_list)
|
|
bne zero_page_loop
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc status_bar
|
|
; Status bar
|
|
draw_text 0, str_self_len, str_self
|
|
draw_text 40 - str_run_len, str_run_len, str_run
|
|
|
|
rts
|
|
.endproc
|
|
|
|
; input: viewport selector in x
|
|
; clobbers: a, x
|
|
.proc load_viewport
|
|
|
|
lda viewport_zoom,x
|
|
sta zoom
|
|
|
|
txa
|
|
asl a
|
|
asl a
|
|
|
|
tax
|
|
lda viewport_ox,x
|
|
sta ox
|
|
lda viewport_oy,x
|
|
sta oy
|
|
|
|
inx
|
|
lda viewport_ox,x
|
|
sta ox + 1
|
|
lda viewport_oy,x
|
|
sta oy + 1
|
|
|
|
inx
|
|
lda viewport_ox,x
|
|
sta ox + 2
|
|
lda viewport_oy,x
|
|
sta oy + 2
|
|
|
|
inx
|
|
lda viewport_ox,x
|
|
sta ox + 3
|
|
lda viewport_oy,x
|
|
sta oy + 3
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc start
|
|
|
|
jsr imul8xe_init
|
|
|
|
; initialize viewport
|
|
ldx #0 ; overview
|
|
jsr load_viewport
|
|
|
|
; Disable display DMA
|
|
lda #0
|
|
sta DMACTL
|
|
|
|
jsr clear_screen
|
|
|
|
; Copy the display list into properly aligned memory
|
|
; Can't cross 1024-byte boundaries :D
|
|
ldx #0
|
|
copy_byte_loop:
|
|
lda display_list_start,x
|
|
sta display_list,x
|
|
inx
|
|
cpx #display_list_len
|
|
bne copy_byte_loop
|
|
|
|
; Set up the display list
|
|
lda #.lobyte(display_list)
|
|
sta DLISTL ; actual register
|
|
sta SDLSTL ; shadow register the OS will copy in
|
|
lda #.hibyte(display_list)
|
|
sta DLISTH ; actual register
|
|
sta SDLSTH ; shadow register the OS will copy in
|
|
|
|
; Re-enable display DMA
|
|
lda #$22
|
|
sta DMACTL
|
|
|
|
; Initialize the palette
|
|
lda #0
|
|
sta palette_offset
|
|
sta palette_delay
|
|
sta chroma_offset
|
|
sta chroma_delay
|
|
jsr update_palette
|
|
|
|
; install the vblank handler
|
|
lda #7 ; deferred
|
|
ldx #.hibyte(vblank_handler)
|
|
ldy #.lobyte(vblank_handler)
|
|
jsr SETVBV
|
|
|
|
main_loop:
|
|
; count_frames = 0; count_pixels = 0
|
|
lda #0
|
|
sta count_frames
|
|
sta count_pixels
|
|
|
|
; total_ms = 0.0; total_pixels = 0.0
|
|
jsr ZFR0
|
|
ldx #.lobyte(total_ms)
|
|
ldy #.hibyte(total_ms)
|
|
jsr FST0R
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FST0R
|
|
|
|
jsr clear_screen
|
|
jsr status_bar
|
|
|
|
lda #0
|
|
sta fill_level
|
|
|
|
fill_loop:
|
|
|
|
; sy = -92 .. 91
|
|
lda #(256-half_height)
|
|
sta sy
|
|
lda #(256-1)
|
|
sta sy + 1
|
|
|
|
loop_sy:
|
|
; sx = -80 .. 79
|
|
lda #(256-half_width)
|
|
sta sx
|
|
lda #(256-1)
|
|
sta sx + 1
|
|
|
|
loop_sx:
|
|
; check the fill mask
|
|
ldy #0
|
|
|
|
loop_skip_level:
|
|
cpy fill_level
|
|
beq current_level
|
|
|
|
lda fill_masks,y
|
|
and sx
|
|
bne not_skipped_mask1
|
|
|
|
lda fill_masks,y
|
|
and sy
|
|
beq skipped_mask
|
|
|
|
not_skipped_mask1:
|
|
iny
|
|
jmp loop_skip_level
|
|
|
|
current_level:
|
|
lda fill_masks,y
|
|
and sx
|
|
bne skipped_mask
|
|
|
|
lda fill_masks,y
|
|
and sy
|
|
beq not_skipped_mask
|
|
|
|
skipped_mask:
|
|
jmp skipped
|
|
|
|
not_skipped_mask:
|
|
|
|
; run the fractal!
|
|
zoom_factor cx, sx, aspect_x
|
|
add32 cx, cx, ox
|
|
zoom_factor cy, sy, aspect_y
|
|
add32 cy, cy, oy
|
|
jsr mandelbrot
|
|
jsr pset
|
|
|
|
jsr keycheck
|
|
beq no_key
|
|
; @fixme clear the pixel stats
|
|
jmp main_loop
|
|
|
|
no_key:
|
|
; check if we should update the counters
|
|
;
|
|
; count_pixels >= width? update!
|
|
inc count_pixels
|
|
lda count_pixels
|
|
cmp #width
|
|
bmi update_status
|
|
|
|
; count_frames >= 120? update!
|
|
lda count_frames
|
|
cmp #120 ; >= 2 seconds
|
|
bmi skipped
|
|
|
|
update_status:
|
|
; FR0 = (float)count_pixels & clear count_pixels
|
|
lda count_pixels
|
|
sta FR0
|
|
lda #0
|
|
sta FR0 + 1
|
|
sta count_pixels
|
|
jsr IFP
|
|
|
|
; FR1 = total_pixels
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FLD1R
|
|
|
|
; FR0 += FR1
|
|
jsr FADD
|
|
|
|
; total_pixels = FR0
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FST0R
|
|
|
|
|
|
; FR0 = (float)count_frames & clear count_frames
|
|
; warning: this should really disable interrupts @TODO
|
|
lda count_frames
|
|
sta FR0
|
|
lda #0
|
|
sta FR0 + 1
|
|
sta count_frames
|
|
jsr IFP
|
|
|
|
; FR0 *= ms_per_frame
|
|
ldx #.lobyte(ms_per_frame)
|
|
ldy #.hibyte(ms_per_frame)
|
|
jsr FLD1R
|
|
jsr FMUL
|
|
|
|
; FR0 += total_ms
|
|
ldx #.lobyte(total_ms)
|
|
ldy #.hibyte(total_ms)
|
|
jsr FLD1R
|
|
jsr FADD
|
|
|
|
; total_ms = FR0
|
|
ldx #.lobyte(total_ms)
|
|
ldy #.hibyte(total_ms)
|
|
jsr FST0R
|
|
|
|
; FR0 /= total_pixels
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FLD1R
|
|
jsr FDIV
|
|
|
|
; convert to ASCII in INBUFF
|
|
jsr FASC
|
|
|
|
; print the first 6 digits
|
|
draw_text_indirect speed_start, speed_precision, INBUFF
|
|
draw_text speed_start + speed_precision, str_speed_len, str_speed
|
|
|
|
skipped:
|
|
|
|
; sx += fill_level[fill_masks] + 1
|
|
ldx fill_level
|
|
lda fill_masks,x
|
|
clc
|
|
adc #1 ; will never carry
|
|
adc sx
|
|
sta sx
|
|
lda #0
|
|
adc sx + 1
|
|
sta sx + 1
|
|
|
|
lda sx
|
|
cmp #half_width
|
|
beq loop_sx_done
|
|
jmp loop_sx
|
|
|
|
loop_sx_done:
|
|
|
|
; sy += fill_level[fill_masks] + 1
|
|
ldx fill_level
|
|
lda fill_masks,x
|
|
clc
|
|
adc #1 ; will never carry
|
|
adc sy
|
|
sta sy
|
|
lda #0
|
|
adc sy + 1
|
|
sta sy + 1
|
|
|
|
lda sy
|
|
cmp #half_height
|
|
beq loop_sy_done
|
|
jmp loop_sy
|
|
|
|
loop_sy_done:
|
|
|
|
fill_loop_done:
|
|
inc fill_level
|
|
lda fill_level
|
|
cmp #max_fill_level
|
|
beq loop
|
|
jmp fill_loop
|
|
|
|
loop:
|
|
; finished
|
|
draw_text 40 - str_done_len, str_done_len, str_done
|
|
jsr keycheck
|
|
beq loop
|
|
jmp main_loop
|
|
|
|
.endproc
|