Compare commits

..

No commits in common. "main" and "elapsed" have entirely different histories.

2 changed files with 123 additions and 207 deletions

View file

@ -3,7 +3,7 @@
all : mandel.xex
mandel.xex : mandel.o tables.o atari-asm-xex.cfg
ld65 -C ./atari-asm-xex.cfg --mapfile mandel.map -o $@ mandel.o tables.o
ld65 -C ./atari-asm-xex.cfg -o $@ mandel.o tables.o
%.o : %.s
ca65 -o $@ $<
@ -15,6 +15,4 @@ clean :
rm -f tables.s
rm -f *.o
rm -f *.xex
rm -f mandel.map

320
mandel.s
View file

@ -126,10 +126,6 @@ KEY_7 = 51
KEY_8 = 53
KEY_9 = 48
KEY_0 = 50
KEY_PERIOD = 34
KEY_E = 42
KEY_X = 22
KEY_Y = 43
.struct float48
exponent .byte
@ -407,13 +403,6 @@ elapsed_work:
elapsed_digit:
.byte 0
input_col:
.byte 0
input_row:
.byte 0
input_max:
.byte 0
; 2 + 9 * byte cycles
.macro add bytes, dest, arg1, arg2
clc ; 2 cyc
@ -461,7 +450,7 @@ input_max:
sub 4, dest, arg1, arg2
.endmacro
; 3 + 5 * (bytes - 1) cycles
; 3 + 5 * bytes cycles
.macro shl bytes, arg
asl arg ; 3 cyc
.repeat bytes-1, i
@ -469,23 +458,22 @@ input_max:
.endrepeat
.endmacro
; 8 cycles
; 13 cycles
.macro shl16 arg
shl 2, arg
.endmacro
; 13 cycles
; 18 cycles
.macro shl24 arg
shl 3, arg
.endmacro
; 18 cycles
; 23 cycles
.macro shl32 arg
shl 4, arg
.endmacro
; 6 * bytes cycles
; 4 * bytes bytes
.macro copy bytes, dest, arg
.repeat bytes, byte ; 6 * bytes cycles
lda arg + byte ; 3 cyc
@ -494,7 +482,6 @@ input_max:
.endmacro
; 12 cycles
; 8 bytes
.macro copy16 dest, arg
copy 2, dest, arg
.endmacro
@ -529,19 +516,17 @@ input_max:
neg 4, arg
.endmacro
; 11-27 + 18 * shift cycles
; 65-81 cycles for shift=3
; 11-27 + 23 * shift cycles
; 103-119 cycles for shift=4
.macro shift_round_16 arg, shift
.repeat shift
shl32 arg ; 18 cycles
shl32 arg ; 23 cycles
.endrepeat
round16 arg ; 11-27 cycles
.endmacro
; input: arg1, arg2 as fixed4.12
; output: dest as fixed8.24
; patch point jsr at 16 bytes in
imul16_patch_offset = 16
.macro imul16 dest, arg1, arg2
copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc
@ -551,8 +536,6 @@ imul16_patch_offset = 16
; input: arg as fixed4.12
; output: dest as fixed8.24
; patch point jsr at 8 bytes in
sqr16_patch_offset = 8
.macro sqr16 dest, arg
copy16 FR0, arg ; 12 cyc
jsr sqr16_func ; ? cyc
@ -698,6 +681,71 @@ bank_switch_table:
.endif
.endmacro
.proc imul8xe_init
bank_switch 0
lda #0
sta EXTENDED_RAM
bank_switch 1
lda #1
sta EXTENDED_RAM
bank_switch 0
lda EXTENDED_RAM
beq init
; no bank switching available, we just overwrite the value in base ram
rts
init:
; patch imul16_func into a forwarding thunk to imul16xe_func
lda #$4c ; 'jmp' opcode
sta imul16_func
lda #.lobyte(imul16xe_func)
sta imul16_func + 1
lda #.hibyte(imul16xe_func)
sta imul16_func + 2
; ditto for sqr16_func -> sqr16xe_func
lda #$4c ; 'jmp' opcode
sta sqr16_func
lda #.lobyte(sqr16xe_func)
sta sqr16_func + 1
lda #.hibyte(sqr16xe_func)
sta sqr16_func + 2
; create the lookup table
; go through the input set, in four 16KB chunks
arg1 = FR1
arg2 = FR2
result = FR0
lda #$00
sta arg1
sta arg2
sta ptr
lda #$40
sta ptr + 1
; $00 * $00 -> $3f * $ff
bank_switch 0
jsr imul8xe_init_section
; $40 * $00 -> $7f * $ff
bank_switch 1
jsr imul8xe_init_section
; $80 * $00 -> $bf * $ff
bank_switch 2
jsr imul8xe_init_section
; $c0 * $00 -> $ff * $ff
bank_switch 3
jsr imul8xe_init_section
rts
.endproc
; Initialize a 16 KB chunk of the table
; input: multipliers in temp
@ -935,17 +983,6 @@ common:
.endproc
; rounds to 16-bit first!
; input in FR0, 32 bits signed 6.26 fixed
; output in FR0, Atari float
; clobbers a, x, y, FR0, FR1
.proc fixed6_26_to_float
shift_round_16 FR0, 3
copy16 FR0, FR0 + 2
jsr fixed3_13_to_float
rts
.endproc
; input in FR0, Atari float
; output in FR0, 16 bits signed 3.13 fixed
; clobbers a, x, y, FR0, FR1
@ -1097,15 +1134,12 @@ keep_going:
shift_round_16 zy, 3
; zx_2 = zx * zx
fixup_sqr16_1:
sqr16 zx_2, zx + 2
; zy_2 = zy * zy
fixup_sqr16_2:
sqr16 zy_2, zy + 2
; zx_zy = zx * zy
fixup_imul16_1:
imul16 zx_zy, zx + 2, zy + 2
; dist = zx_2 + zy_2
@ -1569,7 +1603,7 @@ number_keys:
beq five
cpy #KEY_6
beq six
jmp letter_keys
jmp skip_char
one:
ldx #0
@ -1588,21 +1622,7 @@ five:
jmp load_key_viewport
six:
ldx #5
jmp load_key_viewport
letter_keys:
cpy #KEY_X
bne not_x
jsr input_x
jmp done
not_x:
cpy #KEY_Y
bne not_y
jsr input_y
jmp done
not_y:
jmp skip_char
; fall through
load_key_viewport:
jsr load_viewport
; fall through
@ -1612,23 +1632,6 @@ done:
.endproc
.proc input_x
ldx #col_x
ldy #1
jsr input_number
rts
.endproc
.proc input_y
rts
.endproc
.proc input_number
rts
.endproc
.proc clear_screen
; zero the range from framebuffer_top to display_list
lda #.lobyte(framebuffer_top)
@ -1676,7 +1679,9 @@ zero_byte_loop:
draw_string_const str_x
copy32 FR0, ox
jsr fixed6_26_to_float
shift_round_16 FR0, 3
copy16 FR0, FR0 + 2
jsr fixed3_13_to_float
jsr FASC
jsr draw_string
@ -1685,7 +1690,9 @@ zero_byte_loop:
draw_string_const str_y
copy32 FR0, oy
jsr fixed6_26_to_float
shift_round_16 FR0, 3
copy16 FR0, FR0 + 2
jsr fixed3_13_to_float
jsr FASC
jsr draw_string
@ -1984,25 +1991,51 @@ update_status:
lda FR0 + 1
sta elapsed_work + 1
draw_string_const str_space
;jsr IFP
;jsr FASC
;jsr draw_string
.macro do_countdown divisor, digits
ldx #.lobyte(divisor)
ldy #.hibyte(divisor)
lda #.lobyte(digits)
.macro countdown divisor, digits
.scope
; count the hours
ldx #0
countdown_loop:
lda elapsed_work + 1
cmp #.hibyte(divisor)
bcc countdown_done
lda elapsed_work
cmp #.lobyte(divisor)
bcc countdown_done
sec
lda elapsed_work
sbc #.lobyte(divisor)
sta elapsed_work
lda elapsed_work + 1
sbc #.hibyte(divisor)
sta elapsed_work + 1
inx
jmp countdown_loop
countdown_done:
lda digits,x
eor #$80
sta elapsed_digit
lda #.lobyte(elapsed_digit)
sta INBUFF
lda #.hibyte(digits)
lda #.hibyte(elapsed_digit)
sta INBUFF + 1
jsr countdown
jsr draw_string
.endscope
.endmacro
do_countdown 36000, digits_space
do_countdown 3600, digits_zero
draw_string_const str_space
countdown 36000, digits_space
countdown 3600, digits_zero
draw_string_const str_h
do_countdown 600, digits_zero
do_countdown 60, digits_zero
countdown 600, digits_zero
countdown 60, digits_zero
draw_string_const str_m
do_countdown 10, digits_zero
do_countdown 1, digits_zero
countdown 10, digits_zero
countdown 1, digits_zero
draw_string_const str_s
skipped:
@ -2064,118 +2097,3 @@ loop:
jmp main_loop
.endproc
; digit string in INBUFF
; divisor X/Y
; clobbers temp, calls draw_string
.proc countdown
divisor = temp
stx divisor
sty divisor + 1
; count the hours
ldy #0
countdown_loop:
lda elapsed_work + 1
cmp divisor + 1
beq countdown_lobyte
bcc countdown_done
bcs countdown_inc
countdown_lobyte:
lda elapsed_work
cmp divisor
bcc countdown_done
countdown_inc:
sec
lda elapsed_work
sbc divisor
sta elapsed_work
lda elapsed_work + 1
sbc divisor + 1
sta elapsed_work + 1
iny
jmp countdown_loop
countdown_done:
lda (INBUFF),y
eor #$80
sta elapsed_digit
lda #.lobyte(elapsed_digit)
sta INBUFF
lda #.hibyte(elapsed_digit)
sta INBUFF + 1
jsr draw_string
rts
.endproc
.proc imul8xe_init
bank_switch 0
lda #0
sta EXTENDED_RAM
bank_switch 1
lda #1
sta EXTENDED_RAM
bank_switch 0
lda EXTENDED_RAM
beq init
; no bank switching available, we just overwrite the value in base ram
rts
init:
; patch imul16_func into a forwarding thunk to imul16xe_func
lda #$4c ; 'jmp' opcode
sta imul16_func
lda #.lobyte(imul16xe_func)
sta imul16_func + 1
sta mandelbrot::fixup_imul16_1 + imul16_patch_offset + 1
lda #.hibyte(imul16xe_func)
sta imul16_func + 2
sta mandelbrot::fixup_imul16_1 + imul16_patch_offset + 2
; ditto for sqr16_func -> sqr16xe_func
lda #$4c ; 'jmp' opcode
sta sqr16_func
lda #.lobyte(sqr16xe_func)
sta sqr16_func + 1
sta mandelbrot::fixup_sqr16_1 + sqr16_patch_offset + 1
sta mandelbrot::fixup_sqr16_2 + sqr16_patch_offset + 1
lda #.hibyte(sqr16xe_func)
sta sqr16_func + 2
sta mandelbrot::fixup_sqr16_1 + sqr16_patch_offset + 2
sta mandelbrot::fixup_sqr16_2 + sqr16_patch_offset + 2
; create the lookup table
; go through the input set, in four 16KB chunks
arg1 = FR1
arg2 = FR2
result = FR0
lda #$00
sta arg1
sta arg2
sta ptr
lda #$40
sta ptr + 1
; $00 * $00 -> $3f * $ff
bank_switch 0
jsr imul8xe_init_section
; $40 * $00 -> $7f * $ff
bank_switch 1
jsr imul8xe_init_section
; $80 * $00 -> $bf * $ff
bank_switch 2
jsr imul8xe_init_section
; $c0 * $00 -> $ff * $ff
bank_switch 3
jsr imul8xe_init_section
rts
.endproc