forked from brooke/mandel-6502
Jamey Sharp
f06aed0c00
Since the results from the lo and hi squares don't overlap or overflow, they can be written directly to the final output location without doing any addition. Then only the multiplication that goes in the middle needs any adds.
1661 lines
31 KiB
ArmAsm
1661 lines
31 KiB
ArmAsm
; Our zero-page vars
|
|
sx = $80 ; i16: screen pixel x
|
|
sy = $82 ; i16: screen pixel y
|
|
ox = $84 ; fixed4.12: center point x
|
|
oy = $86 ; fixed4.12: center point y
|
|
cx = $88 ; fixed4.12: c_x
|
|
cy = $8a ; fixed4.12: c_y
|
|
zx = $8c ; fixed4.12: z_x
|
|
zy = $8e ; fixed4.12: z_y
|
|
|
|
zx_2 = $90 ; fixed4.12: z_x^2
|
|
zy_2 = $92 ; fixed4.12: z_y^2
|
|
zx_zy = $94 ; fixed4.12: z_x * z_y
|
|
dist = $96 ; fixed4.12: z_x^2 + z_y^2
|
|
|
|
iter = $a0 ; u8: iteration count
|
|
|
|
zoom = $a1 ; u8: zoom shift level
|
|
count_frames = $a2 ; u8
|
|
count_pixels = $a3 ; u8
|
|
total_ms = $a4 ; float48
|
|
total_pixels = $aa ; float48
|
|
|
|
z_buffer_active = $b0 ; boolean: 1 if we triggered the lake, 0 if not
|
|
z_buffer_start = $b1 ; u8: index into z_buffer
|
|
z_buffer_end = $b2 ; u8: index into z_buffer
|
|
temp = $b4 ; u16
|
|
temp2 = $b6 ; u16
|
|
pixel_ptr = $b8 ; u16
|
|
pixel_color = $ba ; u8
|
|
pixel_mask = $bb ; u8
|
|
pixel_shift = $bc ; u8
|
|
pixel_offset = $bd ; u8
|
|
fill_level = $be ; u8
|
|
palette_offset = $bf ; u8
|
|
|
|
palette_ticks = $c0 ; u8
|
|
chroma_ticks = $c1 ; u8
|
|
chroma_offset = $c2 ; u8
|
|
ptr = $c4 ; u16
|
|
|
|
palette_delay = 23
|
|
chroma_delay = 137
|
|
|
|
|
|
; FP registers in zero page
|
|
FR0 = $d4 ; float48
|
|
FRE = $da
|
|
FR1 = $e0 ; float48
|
|
FR2 = $e6 ; float48
|
|
CIX = $f2 ; u8 - index into INBUFF
|
|
INBUFF = $f3 ; u16 - pointer to ascii
|
|
FLPTR = $fc ; u16 - pointer to user buffer float48
|
|
|
|
CH1 = $02f2 ; previous character read from keyboard
|
|
CH = $02fc ; current character read from keyboard
|
|
|
|
LBUFF = $0580 ; result buffer for FASC routine
|
|
|
|
; FP ROM routine vectors
|
|
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
|
|
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
|
|
FADD = $DA66 ; ADDITION (FR0 += FR1)
|
|
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
|
|
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
|
|
FDIV = $DB28 ; DIVISION (FR0 /= FR1)
|
|
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
|
|
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
|
|
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
|
|
FST0R = $DDA7 ; STORE FR0 IN USER BUFFER (YYXX)
|
|
FMOVE = $DDB6 ; MOVE FR0 TO FR1
|
|
|
|
; High data
|
|
framebuffer_top = $a000
|
|
textbuffer = $af00
|
|
framebuffer_bottom = $b000
|
|
display_list = $bf00
|
|
framebuffer_end = $c000
|
|
|
|
height = 184
|
|
half_height = height >> 1
|
|
width = 160
|
|
half_width = width >> 1
|
|
stride = width >> 2
|
|
|
|
EXTENDED_RAM = $4000 ; 16KiB bank on the XE
|
|
PORTB = $D301 ; memory & bank-switch for XL/XE
|
|
|
|
DMACTL = $D400
|
|
DLISTL = $D402
|
|
DLISTH = $D403
|
|
WSYNC = $D40A
|
|
|
|
; OS shadow registers
|
|
SDLSTL = $230
|
|
SDLSTH = $231
|
|
|
|
; interrupt stuff
|
|
SYSVBV = $E45F
|
|
XITVBV = $E462
|
|
SETVBV = $E45C
|
|
|
|
COLOR0 = $2C4
|
|
COLOR1 = $2C5
|
|
COLOR2 = $2C6
|
|
COLOR3 = $2C7
|
|
COLOR4 = $2C8
|
|
|
|
; Keycodes!
|
|
KEY_PLUS = $06
|
|
KEY_MINUS = $0e
|
|
KEY_UP = $8e
|
|
KEY_DOWN = $8f
|
|
KEY_LEFT = $86
|
|
KEY_RIGHT = $87
|
|
KEY_1 = $1f
|
|
KEY_2 = $1e
|
|
KEY_3 = $1a
|
|
KEY_4 = 24
|
|
KEY_5 = 29
|
|
KEY_6 = 27
|
|
KEY_7 = 51
|
|
KEY_8 = 53
|
|
KEY_9 = 48
|
|
KEY_0 = 50
|
|
|
|
.struct float48
|
|
exponent .byte
|
|
mantissa .byte 5
|
|
.endstruct
|
|
|
|
.import mul_lobyte
|
|
.import mul_hibyte
|
|
.import sqr_lobyte
|
|
.import sqr_hibyte
|
|
|
|
.data
|
|
|
|
strings:
|
|
str_self:
|
|
.byte "MANDEL-6502"
|
|
str_self_end:
|
|
str_speed:
|
|
.byte " ms/px"
|
|
str_speed_end:
|
|
str_run:
|
|
.byte " RUN"
|
|
str_run_end:
|
|
str_done:
|
|
.byte "DONE"
|
|
str_done_end:
|
|
|
|
str_self_len = str_self_end - str_self
|
|
str_speed_len = str_speed_end - str_speed
|
|
str_run_len = str_run_end - str_run
|
|
str_done_len = str_done_end - str_done
|
|
speed_precision = 6
|
|
|
|
speed_start = 40 - str_done_len - str_speed_len - speed_precision - 1
|
|
speed_len = 14 + str_speed_len
|
|
|
|
|
|
char_map:
|
|
; Map ATASCII string values to framebuffer font entries
|
|
; Sighhhhh
|
|
.repeat 32, i
|
|
.byte i + 64
|
|
.endrepeat
|
|
.repeat 64, i
|
|
.byte i
|
|
.endrepeat
|
|
.repeat 32, i
|
|
.byte 96 + i
|
|
.endrepeat
|
|
|
|
hex_chars:
|
|
.byte "0123456789abcdef"
|
|
|
|
aspect:
|
|
; aspect ratio!
|
|
; pixels at 320w are 5:6 (narrow)
|
|
; pixels at 160w are 5:3 (wide)
|
|
;
|
|
; cy = (sy << (8 - zoom)) * (96 / 128 = 3 / 4)
|
|
; cx = (sx << (8 - zoom)) * ((3 / 4) * (5 / 3) = 5 / 4)
|
|
;
|
|
; so vertical range -92 .. 91.9 is -2.15625 .. 2.15624
|
|
; &horizontal range -80 .. 79.9 is -3.125 .. 3.124
|
|
;
|
|
; 184h is the equiv of 220.8h at square pixels
|
|
; 320 / 220.8 = 1.45 display aspect ratio
|
|
aspect_x: ; fixed4.16 5/4
|
|
.word 5 << (12 - 2)
|
|
|
|
aspect_y: ; fixed4.16 3/4
|
|
.word 3 << (12 - 2)
|
|
|
|
ms_per_frame: ; float48 16.66666667
|
|
.byte 64 ; exponent/sign
|
|
.byte $16 ; BCD digits
|
|
.byte $66
|
|
.byte $66
|
|
.byte $66
|
|
.byte $67
|
|
|
|
display_list_start:
|
|
; 24 lines overscan
|
|
.repeat 3
|
|
.byte $70 ; 8 blank lines
|
|
.endrep
|
|
|
|
; 8 scan lines, 1 row of 40-column text
|
|
.byte $42
|
|
.addr textbuffer
|
|
|
|
; 184 lines graphics
|
|
; ANTIC mode e (160px 2bpp, 1 scan line per line)
|
|
.byte $4e
|
|
.addr framebuffer_top
|
|
.repeat half_height - 1
|
|
.byte $0e
|
|
.endrep
|
|
.byte $4e
|
|
.addr framebuffer_bottom
|
|
.repeat half_height - 1
|
|
.byte $0e
|
|
.endrep
|
|
|
|
.byte $41 ; jump and blank
|
|
.addr display_list
|
|
display_list_end:
|
|
display_list_len = display_list_end - display_list_start
|
|
|
|
color_map:
|
|
.byte 0
|
|
.repeat 85
|
|
.byte 1
|
|
.byte 2
|
|
.byte 3
|
|
.endrepeat
|
|
|
|
|
|
palette_start:
|
|
.byte $0e
|
|
.byte $08
|
|
.byte $04
|
|
palette_repeat:
|
|
.byte $0e
|
|
.byte $08
|
|
|
|
palette_entries = 3
|
|
|
|
palette_chroma:
|
|
.repeat 15, i
|
|
.byte (i + 1) << 4
|
|
.endrepeat
|
|
.repeat 2, i
|
|
.byte (i + 1) << 4
|
|
.endrepeat
|
|
palette_chroma_entries = 15
|
|
|
|
.code
|
|
|
|
;z_buffer_len = 16 ; 10.863 ms/px
|
|
;z_buffer_len = 12 ; 10.619 ms/px
|
|
z_buffer_len = 8 ; 10.612 ms/px
|
|
;z_buffer_len = 4 ; 12.395 ms/px
|
|
z_buffer_mask = z_buffer_len - 1
|
|
z_buffer:
|
|
; the last N zx/zy values
|
|
.repeat z_buffer_len
|
|
.word 0
|
|
.word 0
|
|
.endrepeat
|
|
|
|
.export start
|
|
|
|
;max_fill_level = 6
|
|
max_fill_level = 3
|
|
fill_masks:
|
|
; .byte %00011111
|
|
; .byte %00001111
|
|
; .byte %00000111
|
|
.byte %00000011
|
|
.byte %00000001
|
|
.byte %00000000
|
|
|
|
viewport_zoom:
|
|
.byte 1
|
|
.byte 6
|
|
.byte 8
|
|
.byte 6
|
|
|
|
viewport_ox:
|
|
.word $0000
|
|
.word $f110
|
|
.word $f110
|
|
.word $e400
|
|
|
|
viewport_oy:
|
|
.word $0000
|
|
.word $fb60
|
|
.word $fbe0
|
|
.word $0000
|
|
|
|
; 2 + 9 * byte cycles
|
|
.macro add bytes, dest, arg1, arg2
|
|
clc ; 2 cyc
|
|
.repeat bytes, byte ; 9 * byte cycles
|
|
lda arg1 + byte
|
|
adc arg2 + byte
|
|
sta dest + byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 20 cycles
|
|
.macro add16 dest, arg1, arg2
|
|
add 2, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 38 cycles
|
|
.macro add32 dest, arg1, arg2
|
|
add 4, dest, arg2, dest
|
|
.endmacro
|
|
|
|
; 8 cycles
|
|
.macro add_carry dest
|
|
lda dest ; 3 cyc
|
|
adc #0 ; 2 cyc
|
|
sta dest ; 3 cyc
|
|
.endmacro
|
|
|
|
; 2 + 9 * byte cycles
|
|
.macro sub bytes, dest, arg1, arg2
|
|
sec ; 2 cyc
|
|
.repeat bytes, byte ; 9 * byte cycles
|
|
lda arg1 + byte
|
|
sbc arg2 + byte
|
|
sta dest + byte
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 20 cycles
|
|
.macro sub16 dest, arg1, arg2
|
|
sub 2, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 38 cycles
|
|
.macro sub32 dest, arg1, arg2
|
|
sub 4, dest, arg1, arg2
|
|
.endmacro
|
|
|
|
; 3 + 5 * bytes cycles
|
|
.macro shl bytes, arg
|
|
asl arg ; 3 cyc
|
|
.repeat bytes-1, i
|
|
rol arg + 1 + i ; 5 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 13 cycles
|
|
.macro shl16 arg
|
|
shl 2, arg
|
|
.endmacro
|
|
|
|
; 18 cycles
|
|
.macro shl24 arg
|
|
shl 3, arg
|
|
.endmacro
|
|
|
|
; 23 cycles
|
|
.macro shl32 arg
|
|
shl 4, arg
|
|
.endmacro
|
|
|
|
; 6 * bytes cycles
|
|
.macro copy bytes, dest, arg
|
|
.repeat bytes, byte ; 6 * bytes cycles
|
|
lda arg + byte ; 3 cyc
|
|
sta dest + byte ; 3 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 12 cycles
|
|
.macro copy16 dest, arg
|
|
copy 2, dest, arg
|
|
.endmacro
|
|
|
|
; 24 cycles
|
|
.macro copy32 dest, arg
|
|
copy 4, dest, arg
|
|
.endmacro
|
|
|
|
; 36 cycles
|
|
.macro copyfloat dest, arg
|
|
copy 6, dest, arg
|
|
.endmacro
|
|
|
|
; 2 + 8 * byte cycles
|
|
.macro neg bytes, arg
|
|
sec ; 2 cyc
|
|
.repeat bytes, byte ; 8 * byte cycles
|
|
lda #00 ; 2 cyc
|
|
sbc arg + byte ; 3 cyc
|
|
sta arg + byte ; 3 cyc
|
|
.endrepeat
|
|
.endmacro
|
|
|
|
; 18 cycles
|
|
.macro neg16 arg
|
|
neg 2, arg
|
|
.endmacro
|
|
|
|
; 34 cycles
|
|
.macro neg32 arg
|
|
neg 4, arg
|
|
.endmacro
|
|
|
|
; 11-27 + 23 * shift cycles
|
|
; 103-119 cycles for shift=4
|
|
.macro shift_round_16 arg, shift
|
|
.repeat shift
|
|
shl32 arg ; 23 cycles
|
|
.endrepeat
|
|
round16 arg ; 11-27 cycles
|
|
.endmacro
|
|
|
|
.macro imul16_round dest, arg1, arg2, shift
|
|
copy16 FR0, arg1 ; 12 cyc
|
|
copy16 FR1, arg2 ; 12 cyc
|
|
jsr imul16_func ; ? cyc
|
|
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
|
|
copy16 dest, FR2 + 2 ; 12 cyc
|
|
.endmacro
|
|
|
|
.macro sqr16_round dest, arg, shift
|
|
;imul16_round dest, arg, arg, shift
|
|
copy16 FR0, arg ; 12 cyc
|
|
jsr sqr16_func ; ? cyc
|
|
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
|
|
copy16 dest, FR2 + 2 ; 12 cyc
|
|
.endmacro
|
|
|
|
; clobbers a, x
|
|
.macro sqr8 dest, arg
|
|
ldx arg
|
|
lda sqr_lobyte,x
|
|
sta dest
|
|
lda sqr_hibyte,x
|
|
sta dest + 1
|
|
.endmacro
|
|
|
|
.segment "TABLES"
|
|
; lookup table for top byte -> PORTB value for bank-switch
|
|
.align 256
|
|
bank_switch_table:
|
|
.repeat 256, i
|
|
.byte ((i & $c0) >> 4) | $e3
|
|
.endrepeat
|
|
|
|
.code
|
|
|
|
.macro bank_switch bank
|
|
lda #((bank << 2) | $e3)
|
|
sta PORTB
|
|
.endmacro
|
|
|
|
.macro imul8 dest, arg1, arg2, xe
|
|
.if xe
|
|
; using 64KB lookup table
|
|
; 51-70 cycles
|
|
; clobbers x, y, dest, ptr
|
|
.scope
|
|
output = dest
|
|
|
|
; top 2 bits are the table bank selector
|
|
ldx arg2 ; 3 cyc
|
|
lda bank_switch_table,x ; 4 cyc
|
|
sta PORTB ; 4 cyc
|
|
|
|
; bottom 14 bits except the LSB are the per-bank table index
|
|
; add $4000 for the bank pointer
|
|
txa ; 2 cyc
|
|
and #$3f ; 2 cyc
|
|
ora #$40 ; 2 cyc
|
|
sta ptr + 1 ; 3 cyc
|
|
|
|
; copy the entry into output
|
|
lda arg1 ; 3 cyc
|
|
and #$fe ; 2 cyc
|
|
tay ; 2 cyc
|
|
lda (ptr),y ; 5 cyc
|
|
sta output ; 3 cyc
|
|
iny ; 2 cyc
|
|
lda (ptr),y ; 5 cyc
|
|
sta output+1 ; 3 cyc
|
|
|
|
; note: we are not restoring memory to save 6 cycles!
|
|
; this means those 16kb have to be switched back to base RAM
|
|
; if we need to use them anywhere else
|
|
;;; restore memory
|
|
;;lda #$81 ; 2 cyc - disabled
|
|
;;sta PORTB ; 4 cyc - disabled
|
|
|
|
; check that 1 bit we skipped to fit into space
|
|
lda arg1 ; 3 cyc
|
|
and #1 ; 2 cyc
|
|
beq done ; 2 cyc
|
|
|
|
; add arg2 one last time for the skipped bit
|
|
clc ; 2 cyc
|
|
txa ; 2 cyc
|
|
adc output ; 3 cyc
|
|
sta output ; 3 cyc
|
|
lda #0 ; 2 cyc
|
|
adc output+1 ; 3 cyc
|
|
sta output+1 ; 3 cyc
|
|
|
|
done:
|
|
.endscope
|
|
.else
|
|
; Using base 48k RAM compatibility mode
|
|
; Small table of half squares
|
|
; Adapted from https://everything2.com/title/Fast+6502+multiplication
|
|
; 81-92 cycles
|
|
.scope
|
|
mul_factor_a = arg1
|
|
mul_factor_x = arg2
|
|
mul_product_lo = dest
|
|
mul_product_hi = dest + 1
|
|
|
|
lda mul_factor_a ; 3 cyc
|
|
|
|
; (a + x)^2/2
|
|
clc ; 2 cyc
|
|
adc mul_factor_x ; 3 cyc
|
|
tax ; 2 cyc
|
|
lda mul_hibyte,x ; 4 cyc
|
|
bcc next ; 2 cyc
|
|
; carry is set so we get to add 1 for free, but need to add 0x80
|
|
adc #$7f ; 2 cyc
|
|
clc ; 2 cyc
|
|
; stash the sum temporarily so we can use it as an operand to add
|
|
stx mul_product_lo ; 3 cyc
|
|
adc mul_product_lo ; 3 cyc
|
|
next:
|
|
sec ; 2 cyc
|
|
sta mul_product_hi ; 3 cyc
|
|
lda mul_lobyte,x ; 4 cyc
|
|
|
|
; - a^2/2
|
|
ldx mul_factor_a ; 3 cyc
|
|
sbc mul_lobyte,x ; 4 cyc
|
|
sta mul_product_lo ; 3 cyc
|
|
lda mul_product_hi ; 3 cyc
|
|
sbc mul_hibyte,x ; 4 cyc
|
|
sta mul_product_hi ; 3 cyc
|
|
|
|
; + x & a & 1:
|
|
; (this is a kludge to correct a
|
|
; roundoff error that makes odd * odd too low)
|
|
ldx mul_factor_x ; 3 cyc
|
|
txa ; 2 cyc
|
|
and mul_factor_a ; 3 cyc
|
|
and #1 ; 2 cyc
|
|
|
|
clc ; 2 cyc
|
|
adc mul_product_lo ; 3 cyc
|
|
bcc small_product ; 2 cyc
|
|
inc mul_product_hi ; 5 cyc
|
|
|
|
; - x^2/2
|
|
small_product:
|
|
sec ; 2 cyc
|
|
sbc mul_lobyte,x ; 4 cyc
|
|
sta mul_product_lo ; 3 cyc
|
|
lda mul_product_hi ; 3 cyc
|
|
sbc mul_hibyte,x ; 4 cyc
|
|
sta mul_product_hi ; 3 cyc
|
|
.endscope
|
|
.endif
|
|
.endmacro
|
|
|
|
.proc imul8xe_init
|
|
|
|
bank_switch 0
|
|
lda #0
|
|
sta EXTENDED_RAM
|
|
bank_switch 1
|
|
lda #1
|
|
sta EXTENDED_RAM
|
|
bank_switch 0
|
|
lda EXTENDED_RAM
|
|
beq init
|
|
|
|
; no bank switching available, we just overwrite the value in base ram
|
|
rts
|
|
|
|
init:
|
|
|
|
; patch imul16_func into a forwarding thunk to imul16xe_func
|
|
lda #$4c ; 'jmp' opcode
|
|
sta imul16_func
|
|
lda #.lobyte(imul16xe_func)
|
|
sta imul16_func + 1
|
|
lda #.hibyte(imul16xe_func)
|
|
sta imul16_func + 2
|
|
|
|
; ditto for sqr16_func -> sqr16xe_func
|
|
lda #$4c ; 'jmp' opcode
|
|
sta sqr16_func
|
|
lda #.lobyte(sqr16xe_func)
|
|
sta sqr16_func + 1
|
|
lda #.hibyte(sqr16xe_func)
|
|
sta sqr16_func + 2
|
|
|
|
; create the lookup table
|
|
; go through the input set, in four 16KB chunks
|
|
|
|
arg1 = FR1
|
|
arg2 = FR2
|
|
result = FR0
|
|
|
|
lda #$00
|
|
sta arg1
|
|
sta arg2
|
|
sta ptr
|
|
lda #$40
|
|
sta ptr + 1
|
|
|
|
; $00 * $00 -> $3f * $ff
|
|
bank_switch 0
|
|
jsr imul8xe_init_section
|
|
|
|
; $40 * $00 -> $7f * $ff
|
|
bank_switch 1
|
|
jsr imul8xe_init_section
|
|
|
|
; $80 * $00 -> $bf * $ff
|
|
bank_switch 2
|
|
jsr imul8xe_init_section
|
|
|
|
; $c0 * $00 -> $ff * $ff
|
|
bank_switch 3
|
|
jsr imul8xe_init_section
|
|
|
|
rts
|
|
.endproc
|
|
|
|
; Initialize a 16 KB chunk of the table
|
|
; input: multipliers in temp
|
|
; output: new multipliers in temp
|
|
; clobbers: temp, temp2
|
|
.proc imul8xe_init_section
|
|
arg1 = FR1
|
|
arg2 = FR2
|
|
result = FR0
|
|
ptr = temp2
|
|
|
|
lda #$00
|
|
sta ptr
|
|
lda #$40
|
|
sta ptr + 1
|
|
|
|
ldy #0
|
|
|
|
; outer loop: $00 -> $3f
|
|
outer_loop:
|
|
|
|
; reset result to 0
|
|
lda #0
|
|
sta result
|
|
sta result + 1
|
|
|
|
; inner loop: $00 -> $ff
|
|
inner_loop:
|
|
|
|
; copy result to data set
|
|
lda result
|
|
sta (ptr),y
|
|
lda result + 1
|
|
iny
|
|
sta (ptr),y
|
|
dey
|
|
|
|
; result += 2 * arg2
|
|
clc
|
|
lda arg2
|
|
adc result
|
|
sta result
|
|
lda #0
|
|
adc result + 1
|
|
sta result + 1
|
|
clc
|
|
lda arg2
|
|
adc result
|
|
sta result
|
|
lda #0
|
|
adc result + 1
|
|
sta result + 1
|
|
|
|
; inner loop check
|
|
inc arg1
|
|
inc arg1
|
|
inc ptr
|
|
inc ptr
|
|
bne inner_loop
|
|
|
|
; outer loop check
|
|
inc arg2
|
|
inc ptr + 1
|
|
lda ptr + 1
|
|
cmp #$80
|
|
bne outer_loop
|
|
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.macro imul16_impl xe
|
|
.local arg1
|
|
.local arg2
|
|
.local result
|
|
.local inter
|
|
.local arg1_pos
|
|
.local arg2_pos
|
|
arg1 = FR0 ; 16-bit arg (clobbered)
|
|
arg2 = FR1 ; 16-bit arg (clobbered)
|
|
result = FR2 ; 32-bit result
|
|
inter = temp2
|
|
|
|
; h1l1 * h2l2
|
|
; (h1*256 + l1) * (h2*256 + l2)
|
|
; h1*256*(h2*256 + l2) + l1*(h2*256 + l2)
|
|
; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2
|
|
|
|
imul8 result, arg1, arg2, xe
|
|
lda #0
|
|
sta result + 2
|
|
sta result + 3
|
|
|
|
imul8 inter, arg1 + 1, arg2, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
imul8 inter, arg1, arg2 + 1, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
imul8 inter, arg1 + 1, arg2 + 1, xe
|
|
add16 result + 2, result + 2, inter
|
|
|
|
; In case of negative inputs, adjust high word
|
|
; https://stackoverflow.com/a/28827013
|
|
lda arg1 + 1
|
|
bpl arg1_pos
|
|
sub16 result + 2, result + 2, arg2
|
|
arg1_pos:
|
|
lda arg2 + 1
|
|
bpl arg2_pos
|
|
sub16 result + 2, result + 2, arg1
|
|
arg2_pos:
|
|
|
|
rts ; 6 cyc
|
|
.endmacro
|
|
|
|
.macro sqr16_impl xe
|
|
.scope
|
|
arg = FR0 ; 16-bit arg (clobbered)
|
|
result = FR2 ; 32-bit result
|
|
;inter = temp2
|
|
inter = FR1
|
|
|
|
lda arg + 1
|
|
bpl arg_pos
|
|
neg16 arg
|
|
arg_pos:
|
|
|
|
; hl * hl
|
|
; (h*256 + l) * (h*256 + l)
|
|
; h*256*(h*256 + l) + l*(h*256 + l)
|
|
; h*h*256*256 + h*l*256 + h*l*256 + l*l
|
|
|
|
sqr8 result, arg
|
|
sqr8 result + 2, arg + 1
|
|
|
|
imul8 inter, arg + 1, arg, xe
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
add16 result + 1, result + 1, inter
|
|
add_carry result + 3
|
|
|
|
rts ; 6 cyc
|
|
.endscope
|
|
.endmacro
|
|
|
|
.proc imul16_func
|
|
imul16_impl 0
|
|
.endproc
|
|
|
|
.proc imul16xe_func
|
|
imul16_impl 1
|
|
.endproc
|
|
|
|
.proc sqr16_func
|
|
sqr16_impl 0
|
|
.endproc
|
|
|
|
.proc sqr16xe_func
|
|
sqr16_impl 1
|
|
.endproc
|
|
|
|
; 11-27 cycles
|
|
.macro round16 arg
|
|
; Round top 16 bits of 32-bit fixed-point number in-place
|
|
.local increment
|
|
.local high_half
|
|
.local check_sign
|
|
.local next
|
|
|
|
; low word > $8000: round up
|
|
; = $8000: round up if positive
|
|
; round down if negative
|
|
; < $8000: round down
|
|
|
|
; $8000 17
|
|
; $8001 27
|
|
; $8100 21
|
|
; $7fff 11
|
|
|
|
lda arg + 1 ; 3 cyc
|
|
cmp #$80 ; 2 cyc
|
|
beq high_half ; 2 cyc
|
|
|
|
bpl increment ; 2 cyc
|
|
|
|
bmi next ; 2 cyc
|
|
|
|
high_half:
|
|
lda arg ; 3 cyc
|
|
beq check_sign ; 2 cyc
|
|
|
|
jmp increment ; 3 cyc
|
|
|
|
check_sign:
|
|
lda arg + 3 ; 3 cyc
|
|
bmi next ; 2 cyc
|
|
|
|
increment: ; 5-10 cyc
|
|
inc arg + 2 ; 5 cyc
|
|
bne next ; 2 cyc
|
|
inc arg + 3 ; 5 cyc
|
|
|
|
next:
|
|
|
|
.endmacro
|
|
|
|
.proc mandelbrot
|
|
; input:
|
|
; cx: position scaled to 4.12 fixed point - -8..+7.9
|
|
; cy: position scaled to 4.12
|
|
;
|
|
; output:
|
|
; iter: iteration count at escape or 0
|
|
|
|
; zx = 0
|
|
; zy = 0
|
|
; zx_2 = 0
|
|
; zy_2 = 0
|
|
; zx_zy = 0
|
|
; dist = 0
|
|
; iter = 0
|
|
lda #00
|
|
ldx #(iter - zx + 1)
|
|
initloop:
|
|
sta zx - 1,x
|
|
dex
|
|
bne initloop
|
|
sta z_buffer_start
|
|
sta z_buffer_end
|
|
|
|
loop:
|
|
; iter++ & max-iters break
|
|
inc iter
|
|
bne keep_going
|
|
jmp exit_path
|
|
keep_going:
|
|
|
|
.macro quick_exit arg, max
|
|
.local positive
|
|
.local negative
|
|
.local nope_out
|
|
.local first_equal
|
|
.local all_done
|
|
|
|
; check sign bit
|
|
lda arg + 1
|
|
bmi negative
|
|
|
|
positive:
|
|
cmp #((max) << 4)
|
|
bmi all_done ; 'less than'
|
|
jmp exit_path
|
|
|
|
negative:
|
|
cmp #(256 - ((max) << 4))
|
|
beq first_equal ; 'equal' on first byte
|
|
bpl all_done ; 'greater than'
|
|
|
|
nope_out:
|
|
jmp exit_path
|
|
|
|
first_equal:
|
|
lda arg
|
|
beq nope_out ; 2nd byte 0 shows it's really 'equal'
|
|
|
|
all_done:
|
|
.endmacro
|
|
|
|
; 4.12: (-8 .. +7.9)
|
|
; zx = zx_2 - zy_2 + cx
|
|
sub16 zx, zx_2, zy_2
|
|
add16 zx, zx, cx
|
|
quick_exit zx, 2
|
|
|
|
; zy = zx_zy + zx_zy + cy
|
|
add16 zy, zx_zy, zx_zy
|
|
add16 zy, zy, cy
|
|
quick_exit zy, 2
|
|
|
|
; zx_2 = zx * zx
|
|
sqr16_round zx_2, zx, 4
|
|
|
|
; zy_2 = zy * zy
|
|
sqr16_round zy_2, zy, 4
|
|
|
|
; zx_zy = zx * zy
|
|
imul16_round zx_zy, zx, zy, 4
|
|
|
|
; dist = zx_2 + zy_2
|
|
add16 dist, zx_2, zy_2
|
|
quick_exit dist, 4
|
|
|
|
; if may be in the lake, look for looping output with a small buffer
|
|
; as an optimization vs running to max iters
|
|
lda z_buffer_active
|
|
beq skip_z_buffer
|
|
|
|
ldx z_buffer_start
|
|
cpx z_buffer_end
|
|
beq z_nothing_to_read
|
|
|
|
z_buffer_loop:
|
|
.macro z_compare arg
|
|
.local compare_no_match
|
|
lda z_buffer,x
|
|
inx
|
|
cmp arg
|
|
bne compare_no_match
|
|
iny
|
|
compare_no_match:
|
|
.endmacro
|
|
.macro z_advance
|
|
.local skip_reset_x
|
|
cpx #(z_buffer_len * 4)
|
|
bmi skip_reset_x
|
|
ldx #0
|
|
skip_reset_x:
|
|
.endmacro
|
|
.macro z_store arg
|
|
lda arg
|
|
sta z_buffer,x
|
|
inx
|
|
.endmacro
|
|
|
|
; Compare the previously stored z values
|
|
ldy #0
|
|
z_compare zx
|
|
z_compare zx + 1
|
|
z_compare zy
|
|
z_compare zy + 1
|
|
|
|
cpy #4
|
|
bne z_no_matches
|
|
jmp z_exit
|
|
|
|
z_no_matches:
|
|
z_advance
|
|
|
|
cpx z_buffer_end
|
|
bne z_buffer_loop
|
|
|
|
z_nothing_to_read:
|
|
|
|
; Store and expand
|
|
z_store zx
|
|
z_store zx + 1
|
|
z_store zy
|
|
z_store zy + 1
|
|
z_advance
|
|
stx z_buffer_end
|
|
|
|
; Increment the start roller if necessary (limit size)
|
|
lda iter
|
|
cmp #(z_buffer_len * 4)
|
|
bmi skip_inc_start
|
|
lda z_buffer_start
|
|
clc
|
|
adc #4
|
|
tax
|
|
z_advance
|
|
stx z_buffer_start
|
|
skip_inc_start:
|
|
|
|
skip_z_buffer:
|
|
|
|
jmp loop
|
|
|
|
z_exit:
|
|
lda #0
|
|
sta iter
|
|
|
|
exit_path:
|
|
ldx #0
|
|
lda iter
|
|
bne next
|
|
inx
|
|
next:
|
|
stx z_buffer_active
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.macro scale_zoom dest
|
|
; clobbers X, flags
|
|
.local cont
|
|
.local enough
|
|
|
|
; cx = (sx << (8 - zoom))
|
|
ldx zoom
|
|
cont:
|
|
cpx #8
|
|
beq enough
|
|
shl16 dest
|
|
inx
|
|
jmp cont
|
|
enough:
|
|
.endmacro
|
|
|
|
.macro zoom_factor dest, src, zoom, aspect
|
|
; clobbers A, X, flags, etc
|
|
copy16 dest, src
|
|
scale_zoom dest
|
|
|
|
; cy = cy * (3 / 4)
|
|
; cx = cx * (5 / 4)
|
|
imul16_round dest, dest, aspect, 4
|
|
.endmacro
|
|
|
|
.proc pset
|
|
; screen coords in signed sx,sy
|
|
; iter holds the target to use
|
|
; @todo implement
|
|
|
|
; iter -> color
|
|
ldx iter
|
|
lda color_map,x
|
|
sta pixel_color
|
|
lda #(255 - 3)
|
|
sta pixel_mask
|
|
|
|
; sy -> line base address in temp
|
|
lda sy
|
|
bpl positive
|
|
|
|
negative:
|
|
; temp1 = top half
|
|
lda #.lobyte(framebuffer_top + stride * half_height)
|
|
sta pixel_ptr
|
|
lda #.hibyte(framebuffer_top + stride * half_height)
|
|
sta pixel_ptr + 1
|
|
jmp point
|
|
|
|
positive:
|
|
|
|
lda #.lobyte(framebuffer_bottom)
|
|
sta pixel_ptr
|
|
lda #.hibyte(framebuffer_bottom)
|
|
sta pixel_ptr + 1
|
|
|
|
point:
|
|
|
|
; pixel_ptr += sy * stride
|
|
; temp * 40
|
|
; = temp * 32 + temp * 8
|
|
; = (temp << 5) + (temp << 3)
|
|
copy16 temp, sy
|
|
shl16 temp
|
|
shl16 temp
|
|
shl16 temp
|
|
add16 pixel_ptr, pixel_ptr, temp
|
|
shl16 temp
|
|
shl16 temp
|
|
add16 pixel_ptr, pixel_ptr, temp
|
|
|
|
; Ok so temp1 points to the start of the line, which is 40 bytes.
|
|
; Get the byte and bit offsets
|
|
lda sx
|
|
clc
|
|
adc #half_width
|
|
sta temp
|
|
|
|
; pixel_shift = temp & 3
|
|
; pixel_color <<= pixel_shift (shifting in zeros)
|
|
; pixel_mask <<= pixel_shift (shifting in ones)
|
|
and #3
|
|
sta pixel_shift
|
|
lda #3
|
|
sec
|
|
sbc pixel_shift
|
|
tax
|
|
shift_loop:
|
|
beq shift_done
|
|
asl pixel_color
|
|
asl pixel_color
|
|
sec
|
|
rol pixel_mask
|
|
sec
|
|
rol pixel_mask
|
|
dex
|
|
jmp shift_loop
|
|
shift_done:
|
|
|
|
; pixel_offset = temp >> 2
|
|
lda temp
|
|
lsr a
|
|
lsr a
|
|
sta pixel_offset
|
|
tay
|
|
|
|
; read, mask, or, write
|
|
lda (pixel_ptr),y
|
|
and pixel_mask
|
|
ora pixel_color
|
|
sta (pixel_ptr),y
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.macro draw_text_indirect col, len, strptr
|
|
; clobbers A, X
|
|
.local loop
|
|
.local done
|
|
ldx #0
|
|
loop:
|
|
cpx #len
|
|
beq done
|
|
txa
|
|
tay
|
|
lda (strptr),y
|
|
tay
|
|
lda char_map,y
|
|
sta textbuffer + col,x
|
|
inx
|
|
jmp loop
|
|
done:
|
|
.endmacro
|
|
|
|
.macro draw_text col, len, cstr
|
|
; clobbers A, X
|
|
.local loop
|
|
.local done
|
|
ldx #0
|
|
loop:
|
|
cpx #len
|
|
beq done
|
|
ldy cstr,x
|
|
lda char_map,y
|
|
sta textbuffer + col,x
|
|
inx
|
|
jmp loop
|
|
done:
|
|
.endmacro
|
|
|
|
.proc vblank_handler
|
|
inc count_frames
|
|
|
|
inc chroma_ticks
|
|
lda chroma_ticks
|
|
cmp #(chroma_delay)
|
|
bne skip_chroma
|
|
|
|
lda #0
|
|
sta chroma_ticks
|
|
|
|
inc chroma_offset
|
|
lda chroma_offset
|
|
cmp #(palette_chroma_entries)
|
|
bne skip_chroma
|
|
|
|
lda #0
|
|
sta chroma_offset
|
|
skip_chroma:
|
|
|
|
inc palette_ticks
|
|
lda palette_ticks
|
|
cmp #(palette_delay)
|
|
bne skip_luma
|
|
|
|
lda #0
|
|
sta palette_ticks
|
|
|
|
inc palette_offset
|
|
lda palette_offset
|
|
cmp #(palette_entries)
|
|
bne skip_luma
|
|
|
|
lda #0
|
|
sta palette_offset
|
|
|
|
skip_luma:
|
|
jsr update_palette
|
|
jmp XITVBV
|
|
.endproc
|
|
|
|
.proc update_palette
|
|
lda #0
|
|
sta COLOR4
|
|
|
|
ldx chroma_offset
|
|
ldy palette_offset
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR2
|
|
|
|
;inx
|
|
iny
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR1
|
|
|
|
;inx
|
|
iny
|
|
lda palette_chroma,x
|
|
ora palette_start,y
|
|
sta COLOR0
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc update_speed
|
|
; convert frames (u16) to fp
|
|
; add to frames_total
|
|
; convert pixels (u16) to fp
|
|
; add to pixels_total
|
|
; (frames_total * 16.66666667) / pixels_total
|
|
; convert to ATASCII
|
|
; draw text
|
|
.endproc
|
|
|
|
.proc keycheck
|
|
; clobbers all
|
|
; returns 255 in A if state change or 0 if no change
|
|
|
|
; check keyboard buffer
|
|
lda CH
|
|
cmp #$ff
|
|
beq skip_char
|
|
|
|
; Clear the keyboard buffer and re-enable interrupts
|
|
ldx #$ff
|
|
stx CH
|
|
|
|
tay
|
|
|
|
lda zoom
|
|
cpy #KEY_PLUS
|
|
beq plus
|
|
cpy #KEY_MINUS
|
|
beq minus
|
|
|
|
; temp = $0010 << (8 - zoom)
|
|
lda #$10
|
|
sta temp
|
|
lda #$00
|
|
sta temp + 1
|
|
scale_zoom temp
|
|
|
|
cpy #KEY_UP
|
|
beq up
|
|
cpy #KEY_DOWN
|
|
beq down
|
|
cpy #KEY_LEFT
|
|
beq left
|
|
cpy #KEY_RIGHT
|
|
beq right
|
|
cpy #KEY_1
|
|
beq one
|
|
cpy #KEY_2
|
|
beq two
|
|
cpy #KEY_3
|
|
beq three
|
|
cpy #KEY_4
|
|
beq four
|
|
|
|
skip_char:
|
|
lda #0
|
|
rts
|
|
|
|
plus:
|
|
lda zoom
|
|
cmp #8
|
|
bpl skip_char
|
|
inc zoom
|
|
jmp done
|
|
minus:
|
|
lda zoom
|
|
cmp #1
|
|
bmi skip_char
|
|
dec zoom
|
|
jmp done
|
|
up:
|
|
sub16 oy, oy, temp
|
|
jmp done
|
|
down:
|
|
add16 oy, oy, temp
|
|
jmp done
|
|
left:
|
|
sub16 ox, ox, temp
|
|
jmp done
|
|
right:
|
|
add16 ox, ox, temp
|
|
jmp done
|
|
one:
|
|
ldx #0
|
|
jmp load_key_viewport
|
|
two:
|
|
ldx #1
|
|
jmp load_key_viewport
|
|
three:
|
|
ldx #2
|
|
jmp load_key_viewport
|
|
four:
|
|
ldx #3
|
|
; fall through
|
|
load_key_viewport:
|
|
jsr load_viewport
|
|
; fall through
|
|
done:
|
|
lda #255
|
|
rts
|
|
|
|
.endproc
|
|
|
|
.proc clear_screen
|
|
; zero the range from framebuffer_top to display_list
|
|
lda #.lobyte(framebuffer_top)
|
|
sta temp
|
|
lda #.hibyte(framebuffer_top)
|
|
sta temp + 1
|
|
|
|
zero_page_loop:
|
|
lda #0
|
|
ldy #0
|
|
zero_byte_loop:
|
|
sta (temp),y
|
|
iny
|
|
bne zero_byte_loop
|
|
|
|
inc temp + 1
|
|
lda temp + 1
|
|
cmp #.hibyte(display_list)
|
|
bne zero_page_loop
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc status_bar
|
|
; Status bar
|
|
draw_text 0, str_self_len, str_self
|
|
draw_text 40 - str_run_len, str_run_len, str_run
|
|
|
|
rts
|
|
.endproc
|
|
|
|
; input: viewport selector in x
|
|
; clobbers: a, x
|
|
.proc load_viewport
|
|
|
|
lda viewport_zoom,x
|
|
sta zoom
|
|
|
|
txa
|
|
asl a
|
|
tax
|
|
lda viewport_ox,x
|
|
sta ox
|
|
lda viewport_oy,x
|
|
sta oy
|
|
inx
|
|
lda viewport_ox,x
|
|
sta ox + 1
|
|
lda viewport_oy,x
|
|
sta oy + 1
|
|
|
|
rts
|
|
.endproc
|
|
|
|
.proc start
|
|
|
|
jsr imul8xe_init
|
|
|
|
; initialize viewport
|
|
ldx #0 ; overview
|
|
jsr load_viewport
|
|
|
|
; Disable display DMA
|
|
lda #0
|
|
sta DMACTL
|
|
|
|
jsr clear_screen
|
|
|
|
; Copy the display list into properly aligned memory
|
|
; Can't cross 1024-byte boundaries :D
|
|
ldx #0
|
|
copy_byte_loop:
|
|
lda display_list_start,x
|
|
sta display_list,x
|
|
inx
|
|
cpx #display_list_len
|
|
bne copy_byte_loop
|
|
|
|
; Set up the display list
|
|
lda #.lobyte(display_list)
|
|
sta DLISTL ; actual register
|
|
sta SDLSTL ; shadow register the OS will copy in
|
|
lda #.hibyte(display_list)
|
|
sta DLISTH ; actual register
|
|
sta SDLSTH ; shadow register the OS will copy in
|
|
|
|
; Re-enable display DMA
|
|
lda #$22
|
|
sta DMACTL
|
|
|
|
; Initialize the palette
|
|
lda #0
|
|
sta palette_offset
|
|
sta palette_delay
|
|
sta chroma_offset
|
|
sta chroma_delay
|
|
jsr update_palette
|
|
|
|
; install the vblank handler
|
|
lda #7 ; deferred
|
|
ldx #.hibyte(vblank_handler)
|
|
ldy #.lobyte(vblank_handler)
|
|
jsr SETVBV
|
|
|
|
main_loop:
|
|
; count_frames = 0; count_pixels = 0
|
|
lda #0
|
|
sta count_frames
|
|
sta count_pixels
|
|
|
|
; total_ms = 0.0; total_pixels = 0.0
|
|
ldx #total_ms
|
|
jsr ZF1
|
|
ldx #total_pixels
|
|
jsr ZF1
|
|
|
|
jsr clear_screen
|
|
jsr status_bar
|
|
|
|
lda #0
|
|
sta fill_level
|
|
|
|
fill_loop:
|
|
|
|
; sy = -92 .. 91
|
|
lda #(256-half_height)
|
|
sta sy
|
|
lda #(256-1)
|
|
sta sy + 1
|
|
|
|
loop_sy:
|
|
; sx = -80 .. 79
|
|
lda #(256-half_width)
|
|
sta sx
|
|
lda #(256-1)
|
|
sta sx + 1
|
|
|
|
loop_sx:
|
|
; check the fill mask
|
|
ldy #0
|
|
|
|
loop_skip_level:
|
|
cpy fill_level
|
|
beq current_level
|
|
|
|
lda fill_masks,y
|
|
and sx
|
|
bne not_skipped_mask1
|
|
|
|
lda fill_masks,y
|
|
and sy
|
|
beq skipped_mask
|
|
|
|
not_skipped_mask1:
|
|
iny
|
|
jmp loop_skip_level
|
|
|
|
current_level:
|
|
lda fill_masks,y
|
|
and sx
|
|
bne skipped_mask
|
|
|
|
lda fill_masks,y
|
|
and sy
|
|
beq not_skipped_mask
|
|
|
|
skipped_mask:
|
|
jmp skipped
|
|
|
|
not_skipped_mask:
|
|
|
|
; run the fractal!
|
|
zoom_factor cx, sx, zoom, aspect_x
|
|
add16 cx, cx, ox
|
|
zoom_factor cy, sy, zoom, aspect_y
|
|
add16 cy, cy, oy
|
|
jsr mandelbrot
|
|
jsr pset
|
|
|
|
jsr keycheck
|
|
beq no_key
|
|
; @fixme clear the pixel stats
|
|
jmp main_loop
|
|
|
|
no_key:
|
|
; check if we should update the counters
|
|
;
|
|
; count_pixels >= width? update!
|
|
inc count_pixels
|
|
lda count_pixels
|
|
cmp #width
|
|
bmi update_status
|
|
|
|
; count_frames >= 120? update!
|
|
lda count_frames
|
|
cmp #120 ; >= 2 seconds
|
|
bmi skipped
|
|
|
|
update_status:
|
|
; FR0 = (float)count_pixels & clear count_pixels
|
|
lda count_pixels
|
|
sta FR0
|
|
lda #0
|
|
sta FR0 + 1
|
|
sta count_pixels
|
|
jsr IFP
|
|
|
|
; FR1 = total_pixels
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FLD1R
|
|
|
|
; FR0 += FR1
|
|
jsr FADD
|
|
|
|
; total_pixels = FR0
|
|
ldx #.lobyte(total_pixels)
|
|
ldy #.hibyte(total_pixels)
|
|
jsr FST0R
|
|
|
|
|
|
; FR0 = (float)count_frames & clear count_frames
|
|
; warning: this should really disable interrupts @TODO
|
|
lda count_frames
|
|
sta FR0
|
|
lda #0
|
|
sta FR0 + 1
|
|
sta count_frames
|
|
jsr IFP
|
|
|
|
; FR0 *= ms_per_frame
|
|
ldx #.lobyte(ms_per_frame)
|
|
ldy #.hibyte(ms_per_frame)
|
|
jsr FLD1R
|
|
jsr FMUL
|
|
|
|
; FR0 += total_ms
|
|
ldx #total_ms
|
|
ldy #0
|
|
jsr FLD1R
|
|
jsr FADD
|
|
|
|
; total_ms = FR0
|
|
ldx #total_ms
|
|
ldy #0
|
|
jsr FST0R
|
|
|
|
; FR0 /= total_pixels
|
|
ldx #total_pixels
|
|
ldy #0
|
|
jsr FLD1R
|
|
jsr FDIV
|
|
|
|
; convert to ASCII in INBUFF
|
|
jsr FASC
|
|
|
|
; print the first 6 digits
|
|
draw_text_indirect speed_start, speed_precision, INBUFF
|
|
draw_text speed_start + speed_precision, str_speed_len, str_speed
|
|
|
|
skipped:
|
|
|
|
clc
|
|
lda sx
|
|
adc #1
|
|
sta sx
|
|
lda sx + 1
|
|
adc #0
|
|
sta sx + 1
|
|
|
|
lda sx
|
|
cmp #half_width
|
|
beq loop_sx_done
|
|
jmp loop_sx
|
|
|
|
loop_sx_done:
|
|
|
|
clc
|
|
lda sy
|
|
adc #1
|
|
sta sy
|
|
lda sy + 1
|
|
adc #0
|
|
sta sy + 1
|
|
|
|
lda sy
|
|
cmp #half_height
|
|
beq loop_sy_done
|
|
jmp loop_sy
|
|
|
|
loop_sy_done:
|
|
|
|
fill_loop_done:
|
|
inc fill_level
|
|
lda fill_level
|
|
cmp #max_fill_level
|
|
beq loop
|
|
jmp fill_loop
|
|
|
|
loop:
|
|
; finished
|
|
draw_text 40 - str_done_len, str_done_len, str_done
|
|
jsr keycheck
|
|
beq loop
|
|
jmp main_loop
|
|
|
|
.endproc
|