mandel-6502/mandel.s

1671 lines
30 KiB
ArmAsm
Raw Normal View History

; Our zero-page vars
2023-01-22 03:17:30 +00:00
sx = $80 ; i16: screen pixel x
sy = $82 ; i16: screen pixel y
ox = $84 ; fixed4.12: center point x
oy = $86 ; fixed4.12: center point y
2023-01-22 22:34:30 +00:00
cx = $88 ; fixed4.12: c_x
cy = $8a ; fixed4.12: c_y
zx = $8c ; fixed4.12: z_x
zy = $8e ; fixed4.12: z_y
2023-01-22 22:34:30 +00:00
zx_2 = $90 ; fixed4.12: z_x^2
zy_2 = $92 ; fixed4.12: z_y^2
zx_zy = $94 ; fixed4.12: z_x * z_y
dist = $96 ; fixed4.12: z_x^2 + z_y^2
2023-01-22 03:17:30 +00:00
2024-12-30 18:21:52 +00:00
iter = $a0 ; u8: iteration count
2024-12-30 18:21:52 +00:00
zoom = $a1 ; u8: zoom shift level
count_frames = $a2 ; u8
count_pixels = $a3 ; u8
total_ms = $a4 ; float48
total_pixels = $aa ; float48
2023-02-05 22:26:58 +00:00
z_buffer_active = $b0 ; boolean: 1 if we triggered the lake, 0 if not
z_buffer_start = $b1 ; u8: index into z_buffer
z_buffer_end = $b2 ; u8: index into z_buffer
temp = $b4 ; u16
temp2 = $b6 ; u16
pixel_ptr = $b8 ; u16
pixel_color = $ba ; u8
pixel_mask = $bb ; u8
pixel_shift = $bc ; u8
pixel_offset = $bd ; u8
fill_level = $be ; u8
palette_offset = $bf ; u8
2023-01-22 17:09:12 +00:00
2024-12-30 18:21:52 +00:00
palette_ticks = $c0 ; u8
chroma_ticks = $c1 ; u8
chroma_offset = $c2 ; u8
ptr = $c4 ; u16
2024-12-30 18:21:52 +00:00
2024-12-30 19:33:55 +00:00
palette_delay = 23
chroma_delay = 137
2024-12-30 18:21:52 +00:00
2022-12-29 05:08:16 +00:00
; FP registers in zero page
2023-02-05 22:26:58 +00:00
FR0 = $d4 ; float48
FRE = $da
FR1 = $e0 ; float48
FR2 = $e6 ; float48
CIX = $f2 ; u8 - index into INBUFF
INBUFF = $f3 ; u16 - pointer to ascii
FLPTR = $fc ; u16 - pointer to user buffer float48
CH1 = $02f2 ; previous character read from keyboard
CH = $02fc ; current character read from keyboard
2023-02-05 22:26:58 +00:00
LBUFF = $0580 ; result buffer for FASC routine
; FP ROM routine vectors
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
FADD = $DA66 ; ADDITION (FR0 += FR1)
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
FDIV = $DB28 ; DIVISION (FR0 /= FR1)
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
FST0R = $DDA7 ; STORE FR0 IN USER BUFFER (YYXX)
FMOVE = $DDB6 ; MOVE FR0 TO FR1
2022-12-29 05:08:16 +00:00
2023-01-22 03:17:30 +00:00
; High data
framebuffer_top = $a000
textbuffer = $af00
framebuffer_bottom = $b000
display_list = $bf00
framebuffer_end = $c000
2023-01-22 03:17:30 +00:00
height = 184
half_height = height >> 1
width = 160
2023-01-22 16:20:59 +00:00
half_width = width >> 1
stride = width >> 2
2023-01-22 03:17:30 +00:00
2024-12-27 02:17:01 +00:00
EXTENDED_RAM = $4000 ; 16KiB bank on the XE
PORTB = $D301 ; memory & bank-switch for XL/XE
2023-01-22 03:17:30 +00:00
DMACTL = $D400
DLISTL = $D402
DLISTH = $D403
WSYNC = $D40A
2023-01-22 03:17:30 +00:00
; OS shadow registers
SDLSTL = $230
SDLSTH = $231
; interrupt stuff
SYSVBV = $E45F
XITVBV = $E462
SETVBV = $E45C
2024-08-19 04:06:30 +00:00
COLOR0 = $2C4
COLOR1 = $2C5
COLOR2 = $2C6
COLOR3 = $2C7
COLOR4 = $2C8
2023-03-12 04:45:32 +00:00
; Keycodes!
KEY_PLUS = $06
KEY_MINUS = $0e
KEY_UP = $8e
KEY_DOWN = $8f
KEY_LEFT = $86
KEY_RIGHT = $87
2024-12-30 17:16:08 +00:00
KEY_1 = $1f
KEY_2 = $1e
KEY_3 = $1a
2024-12-30 22:22:03 +00:00
KEY_4 = 24
KEY_5 = 29
KEY_6 = 27
KEY_7 = 51
KEY_8 = 53
KEY_9 = 48
KEY_0 = 50
2023-02-05 22:26:58 +00:00
.struct float48
exponent .byte
2024-12-29 21:19:58 +00:00
mantissa .byte 5
2023-02-05 22:26:58 +00:00
.endstruct
.import mul_lobyte256
.import mul_hibyte256
.import mul_hibyte512
.import sqr_lobyte
.import sqr_hibyte
2023-01-22 03:17:30 +00:00
.data
2023-01-22 14:12:40 +00:00
strings:
str_self:
.byte "MANDEL-6502"
str_self_end:
2023-01-22 14:12:40 +00:00
str_speed:
.byte " ms/px"
str_speed_end:
str_run:
2023-01-22 15:23:46 +00:00
.byte " RUN"
str_run_end:
2023-01-22 14:12:40 +00:00
str_done:
.byte "DONE"
str_done_end:
str_self_len = str_self_end - str_self
str_speed_len = str_speed_end - str_speed
str_run_len = str_run_end - str_run
str_done_len = str_done_end - str_done
2023-03-05 21:56:50 +00:00
speed_precision = 6
2023-03-05 21:56:50 +00:00
speed_start = 40 - str_done_len - str_speed_len - speed_precision - 1
2023-02-05 22:26:58 +00:00
speed_len = 14 + str_speed_len
char_map:
; Map ATASCII string values to framebuffer font entries
; Sighhhhh
.repeat 32, i
.byte i + 64
.endrepeat
.repeat 64, i
.byte i
.endrepeat
.repeat 32, i
.byte 96 + i
.endrepeat
2023-01-22 14:12:40 +00:00
hex_chars:
.byte "0123456789abcdef"
2023-01-22 03:17:30 +00:00
aspect:
; aspect ratio!
; pixels at 320w are 5:6 (narrow)
; pixels at 160w are 5:3 (wide)
;
; cy = (sy << (8 - zoom)) * (96 / 128 = 3 / 4)
; cx = (sx << (8 - zoom)) * ((3 / 4) * (5 / 3) = 5 / 4)
;
; so vertical range -92 .. 91.9 is -2.15625 .. 2.15624
; &horizontal range -80 .. 79.9 is -3.125 .. 3.124
;
; 184h is the equiv of 220.8h at square pixels
; 320 / 220.8 = 1.45 display aspect ratio
2023-02-05 22:26:58 +00:00
aspect_x: ; fixed4.16 5/4
.word 5 << (12 - 2)
2023-01-22 03:17:30 +00:00
2023-02-05 22:26:58 +00:00
aspect_y: ; fixed4.16 3/4
.word 3 << (12 - 2)
2023-01-22 03:17:30 +00:00
2023-02-05 22:26:58 +00:00
ms_per_frame: ; float48 16.66666667
.byte 64 ; exponent/sign
.byte $16 ; BCD digits
.byte $66
.byte $66
.byte $66
.byte $67
2023-01-22 03:17:30 +00:00
display_list_start:
2023-01-22 17:09:12 +00:00
; 24 lines overscan
.repeat 3
2023-01-22 03:17:30 +00:00
.byte $70 ; 8 blank lines
.endrep
; 8 scan lines, 1 row of 40-column text
.byte $42
.addr textbuffer
2023-01-22 03:17:30 +00:00
; 184 lines graphics
; ANTIC mode e (160px 2bpp, 1 scan line per line)
.byte $4e
.addr framebuffer_top
.repeat half_height - 1
.byte $0e
.endrep
.byte $4e
.addr framebuffer_bottom
.repeat half_height - 1
.byte $0e
.endrep
.byte $41 ; jump and blank
.addr display_list
display_list_end:
display_list_len = display_list_end - display_list_start
2023-01-22 03:17:30 +00:00
2023-01-22 16:20:59 +00:00
color_map:
.byte 0
.repeat 85
.byte 1
.byte 2
.byte 3
.endrepeat
2024-12-30 18:21:52 +00:00
palette_start:
.byte $0e
2024-12-30 19:35:45 +00:00
.byte $08
.byte $04
2024-12-30 18:21:52 +00:00
palette_repeat:
2024-12-30 19:35:45 +00:00
.byte $0e
.byte $08
2024-12-30 18:21:52 +00:00
palette_entries = 3
palette_chroma:
.repeat 15, i
.byte (i + 1) << 4
.endrepeat
.repeat 2, i
.byte (i + 1) << 4
.endrepeat
palette_chroma_entries = 15
2022-12-29 05:08:16 +00:00
.code
z_buffer_len = 16
z_buffer_mask = z_buffer_len - 1
z_buffer:
; the last N zx/zy values
.repeat z_buffer_len
.word 0
.word 0
.endrepeat
2022-12-29 05:08:16 +00:00
.export start
2024-02-04 22:25:15 +00:00
max_fill_level = 6
fill_masks:
.byte %00011111
.byte %00001111
.byte %00000111
.byte %00000011
.byte %00000001
.byte %00000000
viewport_zoom:
.byte 1
2024-12-30 17:16:08 +00:00
.byte 6
.byte 8
2024-12-30 22:22:03 +00:00
.byte 6
viewport_ox:
.word $0000
.word $f110
2024-12-30 17:16:08 +00:00
.word $f110
2024-12-30 22:22:03 +00:00
.word $e400
viewport_oy:
.word $0000
2024-12-30 17:16:08 +00:00
.word $fb60
.word $fbe0
2024-12-30 22:22:03 +00:00
.word $0000
; 2 + 9 * byte cycles
.macro add bytes, dest, arg1, arg2
clc ; 2 cyc
.repeat bytes, byte ; 9 * byte cycles
lda arg1 + byte
adc arg2 + byte
sta dest + byte
.endrepeat
.endmacro
2024-12-31 03:17:02 +00:00
; 20 cycles
.macro add16 dest, arg1, arg2
add 2, dest, arg1, arg2
.endmacro
2024-12-31 03:17:02 +00:00
; 38 cycles
.macro add32 dest, arg1, arg2
2023-01-22 18:42:37 +00:00
add 4, dest, arg2, dest
.endmacro
2024-12-31 03:17:02 +00:00
; 8 cycles
.macro add_carry dest
2024-12-31 03:17:02 +00:00
lda dest ; 3 cyc
adc #0 ; 2 cyc
sta dest ; 3 cyc
.endmacro
; 2 + 9 * byte cycles
.macro sub bytes, dest, arg1, arg2
sec ; 2 cyc
.repeat bytes, byte ; 9 * byte cycles
lda arg1 + byte
sbc arg2 + byte
sta dest + byte
.endrepeat
.endmacro
2024-12-31 03:17:02 +00:00
; 20 cycles
.macro sub16 dest, arg1, arg2
sub 2, dest, arg1, arg2
.endmacro
2024-12-31 03:17:02 +00:00
; 38 cycles
.macro sub32 dest, arg1, arg2
sub 4, dest, arg1, arg2
.endmacro
2024-12-31 03:17:02 +00:00
; 3 + 5 * bytes cycles
.macro shl bytes, arg
2024-12-31 03:17:02 +00:00
asl arg ; 3 cyc
2023-01-22 17:09:12 +00:00
.repeat bytes-1, i
2024-12-31 03:17:02 +00:00
rol arg + 1 + i ; 5 cyc
.endrepeat
.endmacro
2024-12-31 03:17:02 +00:00
; 13 cycles
.macro shl16 arg
shl 2, arg
.endmacro
2024-12-31 03:17:02 +00:00
; 18 cycles
.macro shl24 arg
shl 3, arg
.endmacro
2024-12-31 03:17:02 +00:00
; 23 cycles
.macro shl32 arg
shl 4, arg
.endmacro
; 6 * bytes cycles
.macro copy bytes, dest, arg
.repeat bytes, byte ; 6 * bytes cycles
lda arg + byte ; 3 cyc
sta dest + byte ; 3 cyc
.endrepeat
.endmacro
2024-12-31 03:17:02 +00:00
; 12 cycles
.macro copy16 dest, arg
copy 2, dest, arg
.endmacro
2024-12-31 03:17:02 +00:00
; 24 cycles
.macro copy32 dest, arg
copy 4, dest, arg
.endmacro
2024-12-31 03:17:02 +00:00
; 36 cycles
.macro copyfloat dest, arg
copy 6, dest, arg
.endmacro
2022-12-30 08:43:44 +00:00
; 2 + 8 * byte cycles
.macro neg bytes, arg
sec ; 2 cyc
.repeat bytes, byte ; 8 * byte cycles
lda #00 ; 2 cyc
sbc arg + byte ; 3 cyc
sta arg + byte ; 3 cyc
.endrepeat
.endmacro
; 18 cycles
.macro neg16 arg
neg 2, arg
.endmacro
; 34 cycles
.macro neg32 arg
neg 4, arg
.endmacro
2024-12-31 03:17:02 +00:00
; 23 * shift
.macro shift_round_16 arg, shift
.repeat shift
2024-12-31 03:17:02 +00:00
shl32 arg ; 23 cycles
.endrepeat
round16 arg
.endmacro
.macro imul16_round dest, arg1, arg2, shift
2023-01-22 03:17:30 +00:00
copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc
jsr imul16_func ; ? cyc
shift_round_16 FR2, shift
2023-01-22 03:17:30 +00:00
copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
2024-12-30 01:56:14 +00:00
.macro sqr16_round dest, arg, shift
2024-12-30 04:37:58 +00:00
;imul16_round dest, arg, arg, shift
copy16 FR0, arg ; 12 cyc
jsr sqr16_func ; ? cyc
shift_round_16 FR2, shift
copy16 dest, FR2 + 2 ; 12 cyc
2024-12-30 01:56:14 +00:00
.endmacro
2024-12-30 01:37:06 +00:00
; clobbers a, x
.macro sqr8 dest, arg
ldx arg
lda sqr_lobyte,x
sta dest
lda sqr_hibyte,x
sta dest + 1
.endmacro
; clobbers a, x
.macro sqr8_add16 dest, arg
ldx arg
clc
lda sqr_lobyte,x
adc dest
sta dest
lda sqr_hibyte,x
adc dest + 1
sta dest + 1
.endmacro
2024-12-30 21:44:31 +00:00
.segment "TABLES"
2024-12-27 02:17:01 +00:00
; lookup table for top byte -> PORTB value for bank-switch
2024-12-30 21:44:31 +00:00
.align 256
2024-12-27 02:20:10 +00:00
bank_switch_table:
2024-12-27 02:17:01 +00:00
.repeat 256, i
.byte ((i & $c0) >> 4) | $e3
2024-12-27 02:17:01 +00:00
.endrepeat
2024-12-30 21:44:31 +00:00
.code
2024-12-27 05:41:03 +00:00
.macro bank_switch bank
lda #((bank << 2) | $e3)
2024-12-27 05:41:03 +00:00
sta PORTB
.endmacro
2024-12-30 01:37:06 +00:00
.macro imul8 dest, arg1, arg2, xe
.if xe
; using 64KB lookup table
2024-12-30 23:17:50 +00:00
; 51-70 cycles
; clobbers x, y, dest, ptr
2024-12-30 01:37:06 +00:00
.scope
output = dest
; top 2 bits are the table bank selector
ldx arg2 ; 3 cyc
lda bank_switch_table,x ; 4 cyc
sta PORTB ; 4 cyc
2024-12-30 23:17:50 +00:00
; bottom 14 bits except the LSB are the per-bank table index
; add $4000 for the bank pointer
txa ; 2 cyc
and #$3f ; 2 cyc
ora #$40 ; 2 cyc
sta ptr + 1 ; 3 cyc
2024-12-30 01:37:06 +00:00
; copy the entry into output
2024-12-30 23:17:50 +00:00
lda arg1 ; 3 cyc
and #$fe ; 2 cyc
tay ; 2 cyc
2024-12-30 01:37:06 +00:00
lda (ptr),y ; 5 cyc
sta output ; 3 cyc
iny ; 2 cyc
lda (ptr),y ; 5 cyc
sta output+1 ; 3 cyc
; note: we are not restoring memory to save 6 cycles!
; this means those 16kb have to be switched back to base RAM
; if we need to use them anywhere else
;;; restore memory
;;lda #$81 ; 2 cyc - disabled
;;sta PORTB ; 4 cyc - disabled
; check that 1 bit we skipped to fit into space
lda arg1 ; 3 cyc
and #1 ; 2 cyc
beq done ; 2 cyc
2024-12-30 23:17:50 +00:00
; add arg2 one last time for the skipped bit
2024-12-30 01:37:06 +00:00
clc ; 2 cyc
2024-12-30 23:17:50 +00:00
txa ; 2 cyc
2024-12-30 01:37:06 +00:00
adc output ; 3 cyc
sta output ; 3 cyc
lda #0 ; 2 cyc
adc output+1 ; 3 cyc
sta output+1 ; 3 cyc
done:
.endscope
.else
; Using base 48k RAM compatibility mode
; Small table of half squares
; Adapted from https://everything2.com/title/Fast+6502+multiplication
; 81-92 cycles
.scope
mul_factor_a = arg1
mul_factor_x = arg2
mul_product_lo = dest
mul_product_hi = dest + 1
lda mul_factor_a ; 3 cyc
; (a + x)^2/2
clc ; 2 cyc
adc mul_factor_x ; 3 cyc
tax ; 2 cyc
bcc under256 ; 2 cyc
lda mul_hibyte512,x ; 4 cyc
bcs next ; 2 cyc
under256:
lda mul_hibyte256,x ; 4 cyc
sec ; 2 cyc
next:
sta mul_product_hi ; 3 cyc
lda mul_lobyte256,x ; 4 cyc
; - a^2/2
ldx mul_factor_a ; 3 cyc
sbc mul_lobyte256,x ; 4 cyc
sta mul_product_lo ; 3 cyc
lda mul_product_hi ; 3 cyc
sbc mul_hibyte256,x ; 4 cyc
sta mul_product_hi ; 3 cyc
; + x & a & 1:
; (this is a kludge to correct a
; roundoff error that makes odd * odd too low)
ldx mul_factor_x ; 3 cyc
txa ; 2 cyc
and mul_factor_a ; 3 cyc
and #1 ; 2 cyc
clc ; 2 cyc
adc mul_product_lo ; 3 cyc
bcc small_product ; 2 cyc
inc mul_product_hi ; 5 cyc
; - x^2/2
small_product:
sec ; 2 cyc
sbc mul_lobyte256,x ; 4 cyc
sta mul_product_lo ; 3 cyc
lda mul_product_hi ; 3 cyc
sbc mul_hibyte256,x ; 4 cyc
sta mul_product_hi ; 3 cyc
.endscope
.endif
2024-12-27 02:17:01 +00:00
.endmacro
.proc imul8xe_init
bank_switch 0
lda #0
sta EXTENDED_RAM
bank_switch 1
lda #1
sta EXTENDED_RAM
bank_switch 0
lda EXTENDED_RAM
beq init
; no bank switching available, we just overwrite the value in base ram
rts
init:
; patch imul16_func into a forwarding thunk to imul16xe_func
lda #$4c ; 'jmp' opcode
sta imul16_func
lda #.lobyte(imul16xe_func)
sta imul16_func + 1
lda #.hibyte(imul16xe_func)
sta imul16_func + 2
2024-12-30 01:56:14 +00:00
; ditto for sqr16_func -> sqr16xe_func
lda #$4c ; 'jmp' opcode
sta sqr16_func
lda #.lobyte(sqr16xe_func)
sta sqr16_func + 1
lda #.hibyte(sqr16xe_func)
sta sqr16_func + 2
; create the lookup table
2024-12-27 02:17:01 +00:00
; go through the input set, in four 16KB chunks
arg1 = FR1
arg2 = FR2
result = FR0
lda #$00
sta arg1
sta arg2
sta ptr
lda #$40
sta ptr + 1
2024-12-27 02:17:01 +00:00
; $00 * $00 -> $3f * $ff
bank_switch 0
jsr imul8xe_init_section
; $40 * $00 -> $7f * $ff
bank_switch 1
jsr imul8xe_init_section
; $80 * $00 -> $bf * $ff
bank_switch 2
jsr imul8xe_init_section
; $c0 * $00 -> $ff * $ff
bank_switch 3
jsr imul8xe_init_section
rts
.endproc
; Initialize a 16 KB chunk of the table
; input: multipliers in temp
; output: new multipliers in temp
; clobbers: temp, temp2
.proc imul8xe_init_section
arg1 = FR1
arg2 = FR2
result = FR0
ptr = temp2
lda #$00
sta ptr
lda #$40
sta ptr + 1
ldy #0
; outer loop: $00 -> $3f
outer_loop:
; reset result to 0
lda #0
sta result
sta result + 1
; inner loop: $00 -> $ff
inner_loop:
; copy result to data set
lda result
sta (ptr),y
lda result + 1
2024-12-27 02:35:37 +00:00
iny
2024-12-27 02:17:01 +00:00
sta (ptr),y
2024-12-27 02:35:37 +00:00
dey
2024-12-27 02:17:01 +00:00
; result += 2 * arg2
clc
lda arg2
adc result
sta result
lda #0
adc result + 1
2024-12-27 05:41:03 +00:00
sta result + 1
clc
2024-12-27 02:17:01 +00:00
lda arg2
adc result
sta result
lda #0
adc result + 1
2024-12-27 05:41:03 +00:00
sta result + 1
2024-12-27 02:17:01 +00:00
; inner loop check
inc arg1
inc arg1
inc ptr
inc ptr
bne inner_loop
; outer loop check
inc arg2
inc ptr + 1
lda ptr + 1
2024-12-27 02:35:37 +00:00
cmp #$80
2024-12-27 02:17:01 +00:00
bne outer_loop
rts
.endproc
2024-12-30 01:37:06 +00:00
.macro imul16_impl xe
.local arg1
.local arg2
.local result
.local inter
.local arg1_pos
.local arg2_pos
2022-12-30 08:43:44 +00:00
arg1 = FR0 ; 16-bit arg (clobbered)
arg2 = FR1 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
inter = temp2
2022-12-30 08:43:44 +00:00
; h1l1 * h2l2
; (h1*256 + l1) * (h2*256 + l2)
; h1*256*(h2*256 + l2) + l1*(h2*256 + l2)
; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2
2024-12-30 01:37:06 +00:00
imul8 result, arg1, arg2, xe
lda #0
sta result + 2
sta result + 3
2024-12-30 01:37:06 +00:00
imul8 inter, arg1 + 1, arg2, xe
add16 result + 1, result + 1, inter
add_carry result + 3
2024-12-30 01:37:06 +00:00
imul8 inter, arg1, arg2 + 1, xe
add16 result + 1, result + 1, inter
add_carry result + 3
2024-12-30 01:37:06 +00:00
imul8 inter, arg1 + 1, arg2 + 1, xe
add16 result + 2, result + 2, inter
; In case of negative inputs, adjust high word
; https://stackoverflow.com/a/28827013
lda arg1 + 1
bpl arg1_pos
sub16 result + 2, result + 2, arg2
arg1_pos:
lda arg2 + 1
bpl arg2_pos
sub16 result + 2, result + 2, arg1
arg2_pos:
rts ; 6 cyc
2024-12-30 01:37:06 +00:00
.endmacro
2024-12-30 01:56:14 +00:00
.macro sqr16_impl xe
.scope
arg = FR0 ; 16-bit arg (clobbered)
result = FR2 ; 32-bit result
;inter = temp2
inter = FR1
lda arg + 1
bpl arg_pos
neg16 arg
arg_pos:
; hl * hl
; (h*256 + l) * (h*256 + l)
; h*256*(h*256 + l) + l*(h*256 + l)
; h*h*256*256 + h*l*256 + h*l*256 + l*l
sqr8 result, arg
lda #0
sta result + 2
sta result + 3
imul8 inter, arg + 1, arg, xe
add16 result + 1, result + 1, inter
add_carry result + 3
add16 result + 1, result + 1, inter
add_carry result + 3
sqr8_add16 result + 2, arg + 1
rts ; 6 cyc
.endscope
2024-12-30 01:56:14 +00:00
.endmacro
2024-12-30 01:37:06 +00:00
.proc imul16_func
imul16_impl 0
.endproc
.proc imul16xe_func
2024-12-30 01:37:06 +00:00
imul16_impl 1
2022-12-29 05:08:16 +00:00
.endproc
2024-12-30 01:56:14 +00:00
.proc sqr16_func
sqr16_impl 0
2024-12-30 01:56:14 +00:00
.endproc
.proc sqr16xe_func
sqr16_impl 1
2024-12-30 01:56:14 +00:00
.endproc
2024-12-31 03:17:02 +00:00
; 11-27 cycles
.macro round16 arg
; Round top 16 bits of 32-bit fixed-point number in-place
.local increment
.local high_half
.local check_sign
.local next
; low word > $8000: round up
; = $8000: round up if positive
; round down if negative
; < $8000: round down
2024-12-31 03:17:02 +00:00
; $8000 17
; $8001 27
; $8100 21
; $7fff 11
lda arg + 1 ; 3 cyc
cmp #$80 ; 2 cyc
beq high_half ; 2 cyc
bpl increment ; 2 cyc
bmi next ; 2 cyc
high_half:
2024-12-31 03:17:02 +00:00
lda arg ; 3 cyc
beq check_sign ; 2 cyc
jmp increment ; 3 cyc
check_sign:
2024-12-31 03:17:02 +00:00
lda arg + 3 ; 3 cyc
bmi next ; 2 cyc
increment: ; 5-10 cyc
inc arg + 2 ; 5 cyc
bne next ; 2 cyc
inc arg + 3 ; 5 cyc
next:
.endmacro
.proc mandelbrot
; input:
; cx: position scaled to 4.12 fixed point - -8..+7.9
; cy: position scaled to 4.12
;
; output:
; iter: iteration count at escape or 0
2022-12-29 05:08:16 +00:00
; zx = 0
; zy = 0
; zx_2 = 0
2022-12-30 08:55:48 +00:00
; zy_2 = 0
; zx_zy = 0
; dist = 0
; iter = 0
lda #00
ldx #(iter - zx + 1)
initloop:
sta zx - 1,x
dex
bne initloop
sta z_buffer_start
2023-03-06 00:54:40 +00:00
sta z_buffer_end
2022-12-29 05:08:16 +00:00
loop:
2023-01-22 18:42:37 +00:00
; iter++ & max-iters break
inc iter
bne keep_going
jmp exit_path
keep_going:
2022-12-29 05:08:16 +00:00
2023-01-22 21:56:19 +00:00
.macro quick_exit arg, max
.local positive
.local negative
.local nope_out
.local first_equal
.local all_done
; check sign bit
lda arg + 1
bmi negative
positive:
2023-01-22 21:56:19 +00:00
cmp #((max) << 4)
bmi all_done ; 'less than'
jmp exit_path
negative:
2023-01-22 21:56:19 +00:00
cmp #(256 - ((max) << 4))
beq first_equal ; 'equal' on first byte
bpl all_done ; 'greater than'
nope_out:
jmp exit_path
first_equal:
lda arg
beq nope_out ; 2nd byte 0 shows it's really 'equal'
all_done:
.endmacro
; 4.12: (-8 .. +7.9)
2023-01-22 18:42:37 +00:00
; zx = zx_2 - zy_2 + cx
sub16 zx, zx_2, zy_2
add16 zx, zx, cx
2023-01-22 21:56:19 +00:00
quick_exit zx, 2
2023-01-22 18:42:37 +00:00
; zy = zx_zy + zx_zy + cy
add16 zy, zx_zy, zx_zy
add16 zy, zy, cy
2023-01-22 21:56:19 +00:00
quick_exit zy, 2
2022-12-29 05:08:16 +00:00
2023-01-22 18:42:37 +00:00
; zx_2 = zx * zx
2024-12-30 01:56:14 +00:00
sqr16_round zx_2, zx, 4
2023-01-22 18:42:37 +00:00
; zy_2 = zy * zy
2024-12-30 01:56:14 +00:00
sqr16_round zy_2, zy, 4
2023-01-22 18:42:37 +00:00
; zx_zy = zx * zy
imul16_round zx_zy, zx, zy, 4
2023-01-22 18:42:37 +00:00
; dist = zx_2 + zy_2
add16 dist, zx_2, zy_2
2023-01-22 21:56:19 +00:00
quick_exit dist, 4
2022-12-30 08:55:48 +00:00
; if may be in the lake, look for looping output with a small buffer
; as an optimization vs running to max iters
lda z_buffer_active
beq skip_z_buffer
ldx z_buffer_start
cpx z_buffer_end
beq z_nothing_to_read
z_buffer_loop:
.macro z_compare arg
.local compare_no_match
lda z_buffer,x
inx
cmp arg
bne compare_no_match
iny
compare_no_match:
.endmacro
.macro z_advance
.local skip_reset_x
cpx #(z_buffer_len * 4)
bmi skip_reset_x
ldx #0
skip_reset_x:
.endmacro
.macro z_store arg
lda arg
sta z_buffer,x
inx
.endmacro
; Compare the previously stored z values
ldy #0
z_compare zx
z_compare zx + 1
z_compare zy
z_compare zy + 1
cpy #4
bne z_no_matches
jmp z_exit
z_no_matches:
z_advance
cpx z_buffer_end
bne z_buffer_loop
z_nothing_to_read:
; Store and expand
z_store zx
z_store zx + 1
z_store zy
z_store zy + 1
z_advance
stx z_buffer_end
; Increment the start roller if necessary (limit size)
lda iter
cmp #(z_buffer_len * 4)
bmi skip_inc_start
lda z_buffer_start
clc
adc #4
tax
z_advance
stx z_buffer_start
skip_inc_start:
skip_z_buffer:
2023-01-22 18:42:37 +00:00
jmp loop
2022-12-30 08:55:48 +00:00
z_exit:
lda #0
sta iter
exit_path:
ldx #0
lda iter
bne next
inx
next:
stx z_buffer_active
rts
2022-12-29 05:08:16 +00:00
.endproc
2022-12-30 04:32:58 +00:00
2023-03-12 04:45:32 +00:00
.macro scale_zoom dest
; clobbers X, flags
2023-01-22 03:17:30 +00:00
.local cont
.local enough
; cx = (sx << (8 - zoom))
ldx zoom
cont:
cpx #8
beq enough
shl16 dest
inx
jmp cont
enough:
2023-03-12 04:45:32 +00:00
.endmacro
.macro zoom_factor dest, src, zoom, aspect
; clobbers A, X, flags, etc
copy16 dest, src
scale_zoom dest
2023-01-22 03:17:30 +00:00
; cy = cy * (3 / 4)
; cx = cx * (5 / 4)
imul16_round dest, dest, aspect, 4
2023-01-22 03:17:30 +00:00
.endmacro
.proc pset
; screen coords in signed sx,sy
; iter holds the target to use
; @todo implement
2023-01-22 16:20:59 +00:00
; iter -> color
ldx iter
lda color_map,x
sta pixel_color
lda #(255 - 3)
sta pixel_mask
; sy -> line base address in temp
lda sy
bpl positive
negative:
; temp1 = top half
lda #.lobyte(framebuffer_top + stride * half_height)
sta pixel_ptr
lda #.hibyte(framebuffer_top + stride * half_height)
sta pixel_ptr + 1
jmp point
positive:
lda #.lobyte(framebuffer_bottom)
sta pixel_ptr
lda #.hibyte(framebuffer_bottom)
sta pixel_ptr + 1
point:
; pixel_ptr += sy * stride
; temp * 40
; = temp * 32 + temp * 8
; = (temp << 5) + (temp << 3)
2023-01-22 17:09:12 +00:00
copy16 temp, sy
2023-01-22 16:20:59 +00:00
shl16 temp
shl16 temp
shl16 temp
2023-01-22 16:34:06 +00:00
add16 pixel_ptr, pixel_ptr, temp
2023-01-22 16:20:59 +00:00
shl16 temp
shl16 temp
2023-01-22 16:34:06 +00:00
add16 pixel_ptr, pixel_ptr, temp
2023-01-22 16:20:59 +00:00
; Ok so temp1 points to the start of the line, which is 40 bytes.
; Get the byte and bit offsets
lda sx
clc
adc #half_width
sta temp
; pixel_shift = temp & 3
; pixel_color <<= pixel_shift (shifting in zeros)
; pixel_mask <<= pixel_shift (shifting in ones)
and #3
sta pixel_shift
2023-01-22 17:37:37 +00:00
lda #3
sec
sbc pixel_shift
2023-01-22 16:20:59 +00:00
tax
shift_loop:
beq shift_done
asl pixel_color
2023-01-22 17:09:12 +00:00
asl pixel_color
sec
rol pixel_mask
2023-01-22 16:20:59 +00:00
sec
rol pixel_mask
dex
jmp shift_loop
shift_done:
; pixel_offset = temp >> 2
lda temp
lsr a
lsr a
sta pixel_offset
tay
; read, mask, or, write
2023-01-22 17:09:12 +00:00
lda (pixel_ptr),y
2023-01-22 16:20:59 +00:00
and pixel_mask
ora pixel_color
2023-01-22 17:09:12 +00:00
sta (pixel_ptr),y
2023-01-22 16:20:59 +00:00
2023-01-22 03:17:30 +00:00
rts
.endproc
.macro draw_text_indirect col, len, strptr
; clobbers A, X
.local loop
.local done
ldx #0
loop:
cpx #len
beq done
txa
tay
lda (strptr),y
tay
lda char_map,y
sta textbuffer + col,x
inx
jmp loop
done:
.endmacro
.macro draw_text col, len, cstr
2023-01-22 14:12:40 +00:00
; clobbers A, X
.local loop
.local done
ldx #0
loop:
cpx #len
2023-01-22 14:12:40 +00:00
beq done
ldy cstr,x
lda char_map,y
2023-01-22 14:12:40 +00:00
sta textbuffer + col,x
inx
jmp loop
done:
.endmacro
2023-02-05 22:26:58 +00:00
.proc vblank_handler
inc count_frames
2024-12-30 18:21:52 +00:00
inc chroma_ticks
lda chroma_ticks
cmp #(chroma_delay)
bne skip_chroma
lda #0
sta chroma_ticks
inc chroma_offset
lda chroma_offset
cmp #(palette_chroma_entries)
bne skip_chroma
lda #0
sta chroma_offset
skip_chroma:
inc palette_ticks
lda palette_ticks
cmp #(palette_delay)
bne skip_luma
lda #0
sta palette_ticks
2024-08-19 04:06:30 +00:00
inc palette_offset
2024-12-30 18:21:52 +00:00
lda palette_offset
cmp #(palette_entries)
bne skip_luma
lda #0
sta palette_offset
skip_luma:
2024-08-19 20:21:44 +00:00
jsr update_palette
jmp XITVBV
.endproc
2024-08-19 04:06:30 +00:00
.proc update_palette
2024-12-30 18:21:52 +00:00
lda #0
2024-08-19 04:06:30 +00:00
sta COLOR4
2024-12-30 18:21:52 +00:00
ldx chroma_offset
ldy palette_offset
lda palette_chroma,x
ora palette_start,y
2024-12-30 19:35:45 +00:00
sta COLOR2
2024-08-19 04:06:30 +00:00
2024-12-30 19:33:55 +00:00
;inx
2024-12-30 18:21:52 +00:00
iny
lda palette_chroma,x
ora palette_start,y
2024-08-19 04:06:30 +00:00
sta COLOR1
2024-12-30 19:33:55 +00:00
;inx
2024-12-30 18:21:52 +00:00
iny
lda palette_chroma,x
ora palette_start,y
2024-12-30 19:35:45 +00:00
sta COLOR0
rts
2024-08-19 04:06:30 +00:00
.endproc
.proc update_speed
; convert frames (u16) to fp
; add to frames_total
; convert pixels (u16) to fp
; add to pixels_total
; (frames_total * 16.66666667) / pixels_total
; convert to ATASCII
; draw text
.endproc
2023-03-12 04:45:32 +00:00
.proc keycheck
; clobbers all
; returns 255 in A if state change or 0 if no change
; check keyboard buffer
lda CH
cmp #$ff
beq skip_char
; Clear the keyboard buffer and re-enable interrupts
ldx #$ff
stx CH
tay
lda zoom
cpy #KEY_PLUS
beq plus
cpy #KEY_MINUS
beq minus
; temp = $0010 << (8 - zoom)
lda #$10
sta temp
lda #$00
sta temp + 1
scale_zoom temp
cpy #KEY_UP
beq up
cpy #KEY_DOWN
beq down
cpy #KEY_LEFT
beq left
cpy #KEY_RIGHT
beq right
2024-12-30 17:16:08 +00:00
cpy #KEY_1
beq one
cpy #KEY_2
beq two
cpy #KEY_3
beq three
2024-12-30 22:22:03 +00:00
cpy #KEY_4
beq four
2024-12-30 17:16:08 +00:00
2023-03-12 04:45:32 +00:00
skip_char:
lda #0
rts
plus:
2024-12-29 02:11:35 +00:00
lda zoom
2023-03-12 04:45:32 +00:00
cmp #8
bpl skip_char
inc zoom
jmp done
minus:
2024-12-29 02:11:35 +00:00
lda zoom
2023-03-12 04:45:32 +00:00
cmp #1
bmi skip_char
dec zoom
jmp done
up:
sub16 oy, oy, temp
jmp done
down:
add16 oy, oy, temp
jmp done
left:
sub16 ox, ox, temp
jmp done
right:
add16 ox, ox, temp
2024-12-30 17:16:08 +00:00
jmp done
one:
ldx #0
jmp load_key_viewport
two:
ldx #1
jmp load_key_viewport
three:
ldx #2
2024-12-30 22:22:03 +00:00
jmp load_key_viewport
four:
ldx #3
2024-12-30 17:16:08 +00:00
; fall through
load_key_viewport:
jsr load_viewport
; fall through
2023-03-12 04:45:32 +00:00
done:
lda #255
rts
.endproc
2024-02-25 23:15:23 +00:00
.proc clear_screen
; zero the range from framebuffer_top to display_list
lda #.lobyte(framebuffer_top)
sta temp
lda #.hibyte(framebuffer_top)
sta temp + 1
zero_page_loop:
lda #0
ldy #0
zero_byte_loop:
sta (temp),y
iny
bne zero_byte_loop
inc temp + 1
lda temp + 1
cmp #.hibyte(display_list)
bne zero_page_loop
rts
.endproc
.proc status_bar
; Status bar
draw_text 0, str_self_len, str_self
draw_text 40 - str_run_len, str_run_len, str_run
rts
.endproc
2024-12-30 17:16:08 +00:00
; input: viewport selector in x
; clobbers: a, x
.proc load_viewport
2024-12-27 02:20:10 +00:00
lda viewport_zoom,x
sta zoom
txa
asl a
tax
lda viewport_ox,x
2023-01-22 03:17:30 +00:00
sta ox
lda viewport_oy,x
2023-01-22 03:17:30 +00:00
sta oy
inx
lda viewport_ox,x
sta ox + 1
lda viewport_oy,x
2023-01-22 03:17:30 +00:00
sta oy + 1
2024-12-30 17:16:08 +00:00
rts
.endproc
2023-02-05 22:26:58 +00:00
2024-12-30 17:16:08 +00:00
.proc start
jsr imul8xe_init
; initialize viewport
ldx #0 ; overview
jsr load_viewport
2023-01-22 03:17:30 +00:00
; Disable display DMA
lda #0
2023-01-22 03:17:30 +00:00
sta DMACTL
2024-02-25 23:15:23 +00:00
jsr clear_screen
2023-01-22 03:17:30 +00:00
; Copy the display list into properly aligned memory
; Can't cross 1024-byte boundaries :D
ldx #0
copy_byte_loop:
lda display_list_start,x
sta display_list,x
inx
cpx #display_list_len
bne copy_byte_loop
; Set up the display list
lda #.lobyte(display_list)
sta DLISTL ; actual register
sta SDLSTL ; shadow register the OS will copy in
lda #.hibyte(display_list)
sta DLISTH ; actual register
sta SDLSTH ; shadow register the OS will copy in
2023-01-22 03:17:30 +00:00
; Re-enable display DMA
lda #$22
sta DMACTL
2024-08-19 04:06:30 +00:00
; Initialize the palette
lda #0
sta palette_offset
2024-12-30 18:21:52 +00:00
sta palette_delay
sta chroma_offset
sta chroma_delay
2024-08-19 04:06:30 +00:00
jsr update_palette
; install the vblank handler
lda #7 ; deferred
ldx #.hibyte(vblank_handler)
ldy #.lobyte(vblank_handler)
jsr SETVBV
2023-01-22 03:17:30 +00:00
main_loop:
2024-12-30 17:16:08 +00:00
; count_frames = 0; count_pixels = 0
lda #0
sta count_frames
sta count_pixels
; total_ms = 0.0; total_pixels = 0.0
ldx #total_ms
jsr ZF1
ldx #total_pixels
jsr ZF1
2024-02-25 23:15:23 +00:00
jsr clear_screen
jsr status_bar
2024-02-04 22:25:15 +00:00
lda #0
sta fill_level
fill_loop:
2023-01-22 03:17:30 +00:00
; sy = -92 .. 91
lda #(256-half_height)
sta sy
lda #(256-1)
sta sy + 1
loop_sy:
; sx = -80 .. 79
lda #(256-half_width)
sta sx
lda #(256-1)
sta sx + 1
loop_sx:
2024-02-04 22:25:15 +00:00
; check the fill mask
ldy #0
loop_skip_level:
cpy fill_level
beq current_level
lda fill_masks,y
and sx
bne not_skipped_mask1
lda fill_masks,y
and sy
beq skipped_mask
not_skipped_mask1:
iny
jmp loop_skip_level
current_level:
lda fill_masks,y
and sx
bne skipped_mask
lda fill_masks,y
and sy
beq not_skipped_mask
skipped_mask:
jmp skipped
not_skipped_mask:
; run the fractal!
2023-01-22 03:17:30 +00:00
zoom_factor cx, sx, zoom, aspect_x
2023-03-12 04:45:32 +00:00
add16 cx, cx, ox
2023-01-22 03:17:30 +00:00
zoom_factor cy, sy, zoom, aspect_y
2023-03-12 04:45:32 +00:00
add16 cy, cy, oy
2023-01-22 17:13:19 +00:00
jsr mandelbrot
2023-01-22 03:17:30 +00:00
jsr pset
2023-03-12 04:45:32 +00:00
jsr keycheck
beq no_key
; @fixme clear the pixel stats
2024-02-25 23:15:23 +00:00
jmp main_loop
2023-02-05 23:14:24 +00:00
2023-03-12 04:45:32 +00:00
no_key:
2023-02-05 22:26:58 +00:00
; check if we should update the counters
;
; count_pixels >= width? update!
inc count_pixels
lda count_pixels
cmp #width
2023-02-05 23:14:24 +00:00
bmi update_status
2023-02-05 22:26:58 +00:00
; count_frames >= 120? update!
lda count_frames
cmp #120 ; >= 2 seconds
2024-02-04 22:25:15 +00:00
bmi skipped
2023-02-05 22:26:58 +00:00
update_status:
; FR0 = (float)count_pixels & clear count_pixels
lda count_pixels
sta FR0
lda #0
sta FR0 + 1
sta count_pixels
jsr IFP
; FR1 = total_pixels
ldx #.lobyte(total_pixels)
ldy #.hibyte(total_pixels)
jsr FLD1R
; FR0 += FR1
jsr FADD
; total_pixels = FR0
ldx #.lobyte(total_pixels)
ldy #.hibyte(total_pixels)
jsr FST0R
; FR0 = (float)count_frames & clear count_frames
; warning: this should really disable interrupts @TODO
lda count_frames
sta FR0
lda #0
sta FR0 + 1
sta count_frames
jsr IFP
; FR0 *= ms_per_frame
ldx #.lobyte(ms_per_frame)
ldy #.hibyte(ms_per_frame)
2023-02-05 23:14:24 +00:00
jsr FLD1R
2023-02-05 22:26:58 +00:00
jsr FMUL
2023-02-05 23:14:24 +00:00
; FR0 += total_ms
2023-02-05 22:26:58 +00:00
ldx #total_ms
ldy #0
jsr FLD1R
jsr FADD
; total_ms = FR0
ldx #total_ms
ldy #0
jsr FST0R
2023-02-05 23:14:24 +00:00
; FR0 /= total_pixels
2023-02-05 22:26:58 +00:00
ldx #total_pixels
ldy #0
jsr FLD1R
jsr FDIV
; convert to ASCII in INBUFF
jsr FASC
; print the first 6 digits
2023-03-05 21:56:50 +00:00
draw_text_indirect speed_start, speed_precision, INBUFF
draw_text speed_start + speed_precision, str_speed_len, str_speed
2023-02-05 22:26:58 +00:00
2024-02-04 22:25:15 +00:00
skipped:
2023-02-05 22:26:58 +00:00
2023-01-22 03:17:30 +00:00
clc
2023-01-22 17:09:12 +00:00
lda sx
2023-01-22 03:17:30 +00:00
adc #1
sta sx
lda sx + 1
adc #0
sta sx + 1
2023-01-22 16:34:06 +00:00
lda sx
cmp #half_width
beq loop_sx_done
2023-01-22 03:17:30 +00:00
jmp loop_sx
loop_sx_done:
clc
2023-01-22 17:09:12 +00:00
lda sy
2023-01-22 03:17:30 +00:00
adc #1
sta sy
lda sy + 1
adc #0
sta sy + 1
2023-01-22 16:34:06 +00:00
lda sy
cmp #half_height
beq loop_sy_done
2023-01-22 03:17:30 +00:00
jmp loop_sy
loop_sy_done:
2024-02-04 22:25:15 +00:00
fill_loop_done:
inc fill_level
lda fill_level
cmp #max_fill_level
beq loop
jmp fill_loop
2023-01-05 04:12:34 +00:00
loop:
2023-01-22 03:17:30 +00:00
; finished
2024-08-19 03:29:39 +00:00
draw_text 40 - str_done_len, str_done_len, str_done
2023-03-12 04:45:32 +00:00
jsr keycheck
beq loop
jmp main_loop
2022-12-30 04:32:58 +00:00
.endproc