diff --git a/Makefile b/Makefile index c94074b..bd14c7d 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,8 @@ all : mandel.xex -mandel.xex : mandel.o mandel-core.o tables.o atari-xex.cfg - ld65 -C ./atari-xex.cfg --mapfile mandel.map -o $@ mandel.o mandel-core.o tables.o atari.lib - -mandel.s : mandel.c mandel.h - cc65 -o $@ mandel.c +mandel.xex : mandel.o tables.o atari-asm-xex.cfg + ld65 -C ./atari-asm-xex.cfg -o $@ mandel.o tables.o %.o : %.s ca65 -o $@ $< @@ -16,7 +13,6 @@ tables.s : tables.js clean : rm -f tables.s - rm -f mandel.s rm -f *.o rm -f *.xex - rm -f mandel.map + diff --git a/atari-xex.cfg b/atari-xex.cfg deleted file mode 100644 index 467d9d4..0000000 --- a/atari-xex.cfg +++ /dev/null @@ -1,69 +0,0 @@ -# Sample linker configuration for C programs using the Atari binary file support. -# Use with: cl65 -tatari -Catari-xex.cfg prog.c -o prog.xex -FEATURES { - STARTADDRESS: default = $8000; -} -SYMBOLS { - __SYSTEM_CHECK__: type = import; # force inclusion of "system check" load chunk - __STACKSIZE__: type = weak, value = $0800; # 2k stack - __STARTADDRESS__: type = export, value = %S; - __RESERVED_MEMORY__: type = weak, value = $0000; - __SYSCHKHDR__: type = export, value = 0; # Disable system check header - __SYSCHKTRL__: type = export, value = 0; # Disable system check trailer - __TABLESEG_START__: type = weak, value = $2E00 + $0300; - __TABLESEG_SIZE__: type = weak, value = 6 * $100; - __BANKSY_START__: type = weak, value = $4000; - __BANKSY_SIZE__: type = weak, value = $4000; - __FRAMEBUFFER_START__: type = weak, value = $A000; -} -MEMORY { -# Note -- $80 and $81 (LOMEM) appear to be reserved in ZP. - ZP: file = "", define = yes, start = $0082, size = $007E; -# "system check" load chunk - SYSCHKCHNK: file = %O, start = $2E00, size = $0300; -# Note $a000-$bfff is against the BASIC cartridge, may require booting with OPTION. - TABLES: file = %O, define = yes, start = __TABLESEG_START__, size = __TABLESEG_SIZE__; -# We reserve $4000-7fff for the bank-switch window. -# In theory we could keep data and code here that we only use on 48k/64k systems. - BANKSWITCH: file = "", define = yes, start = __BANKSY_START__, size = __BANKSY_SIZE__; -# "main program" load chunk - MAIN: file = %O, define = yes, start = %S, size = __FRAMEBUFFER_START__ - __STACKSIZE__ - __RESERVED_MEMORY__ - %S; -} -FILES { - %O: format = atari; -} -FORMATS { - atari: runad = start, - initad = SYSCHKCHNK: __SYSTEM_CHECK__; -} -SEGMENTS { - ZEROPAGE: load = ZP, type = zp; - EXTZP: load = ZP, type = zp, optional = yes; - SYSCHK: load = SYSCHKCHNK, type = rw, define = yes, optional = yes; - TABLES: load = TABLES, type = ro, optional = yes, align = 256; - BANKSWICH: load = BANKSWITCH, type = ro, optional = yes; - STARTUP: load = MAIN, type = ro, define = yes; - LOWBSS: load = MAIN, type = rw, optional = yes; # not zero initialized - LOWCODE: load = MAIN, type = ro, define = yes, optional = yes; - ONCE: load = MAIN, type = ro, optional = yes; - CODE: load = MAIN, type = ro, define = yes; - RODATA: load = MAIN, type = ro; - DATA: load = MAIN, type = rw; - INIT: load = MAIN, type = rw, optional = yes; - BSS: load = MAIN, type = bss, define = yes; -} -FEATURES { - CONDES: type = constructor, - label = __CONSTRUCTOR_TABLE__, - count = __CONSTRUCTOR_COUNT__, - segment = ONCE; - CONDES: type = destructor, - label = __DESTRUCTOR_TABLE__, - count = __DESTRUCTOR_COUNT__, - segment = RODATA; - CONDES: type = interruptor, - label = __INTERRUPTOR_TABLE__, - count = __INTERRUPTOR_COUNT__, - segment = RODATA, - import = __CALLIRQ__; -} diff --git a/mandel.c b/mandel.c deleted file mode 100644 index f287fa3..0000000 --- a/mandel.c +++ /dev/null @@ -1,15 +0,0 @@ -/** - * The UI and I/O wrapper for the Mandelbrot runner, in C. - * - * For the moment *all* logic is in mandel-core.s, I'm just - * trying to get this to run within a cc65 environment. - * Eventually just the inner loop fun will live in there. - */ - -#include -#include -#include "mandel.h" - -void main(void) { - mandel_start(); -} \ No newline at end of file diff --git a/mandel.h b/mandel.h deleted file mode 100644 index e43fad7..0000000 --- a/mandel.h +++ /dev/null @@ -1,4 +0,0 @@ -#include - -// From mandel-core.s: -extern void mandel_start(void); diff --git a/mandel-core.s b/mandel.s similarity index 89% rename from mandel-core.s rename to mandel.s index 34dff8b..6837e00 100644 --- a/mandel-core.s +++ b/mandel.s @@ -1,44 +1,44 @@ -.zeropage +; Our zero-page vars +ox = $80 ; fixed6.26: center point x +oy = $84 ; fixed6.26: center point y +cx = $88 ; fixed6.26: c_x +cy = $8c ; fixed6.26: c_y -ox: .res 4 ; fixed6.26: center point x -oy: .res 4 ; fixed6.26: center point y -cx: .res 4 ; fixed6.26: c_x -cy: .res 4 ; fixed6.26: c_y +zx = $90 ; fixed6.26: z_x +zy = $94 ; fixed6.26: z_y +zx_2 = $98 ; fixed6.26: z_x^2 +zy_2 = $9c ; fixed6.26: z_y^2 -zx: .res 4 ; fixed6.26: z_x -zy: .res 4 ; fixed6.26: z_y -zx_2: .res 4 ; fixed6.26: z_x^2 -zy_2: .res 4 ; fixed6.26: z_y^2 +zx_zy = $a0 ; fixed6.26: z_x * z_y +dist = $a4 ; fixed6.26: z_x^2 + z_y^2 +sx = $a8 ; i16: screen pixel x +sy = $aa ; i16: screen pixel y +z_buffer_active = $ac ; boolean: 1 if we triggered the lake, 0 if not +z_buffer_start = $ad ; u8: index into z_buffer +z_buffer_end = $ae ; u8: index into z_buffer +iter = $af ; u8: iteration count -zx_zy: .res 4 ; fixed6.26: z_x * z_y -dist: .res 4 ; fixed6.26: z_x^2 + z_y^2 +ptr = $b0 ; u16 +pixel_ptr = $b2 ; u16 +zoom = $b4 ; u8: zoom shift level +fill_level = $b5 ; u8 +pixel_color = $b6 ; u8 +pixel_mask = $b7 ; u8 +pixel_shift = $b8 ; u8 +pixel_offset = $b9 ; u8 +palette_offset = $ba ; u8 +chroma_offset = $bb ; u8 +palette_ticks = $bc ; u8 +chroma_ticks = $bd ; u8 +count_frames = $be ; u8 +; free space $bf -z_buffer_active: .res 1 ; boolean: 1 if we triggered the lake, 0 if not -z_buffer_start: .res 1 ; u8: index into z_buffer -z_buffer_end: .res 1 ; u8: index into z_buffer -iter: .res 1 ; u8: iteration count -ptr: .res 2 ; u16 -temp: .res 2 ; u16 -temp2: .res 2 ; u16 - -.data -; can move to .data -sx: .res 2 ; i16: screen pixel x -sy: .res 2 ; i16: screen pixel y -zoom: .res 1 ; u8: zoom shift level -fill_level: .res 1 ; u8 -pixel_color: .res 1 ; u8 -pixel_mask: .res 1 ; u8 -pixel_shift: .res 1 ; u8 -pixel_offset: .res 1 ; u8 -palette_offset: .res 1 ; u8 -chroma_offset: .res 1 ; u8 -palette_ticks: .res 1 ; u8 -chroma_ticks: .res 1 ; u8 -count_frames: .res 1 ; u8 -count_iters: .res 2 ; u16 -text_col: .res 1 ; u8 -text_row: .res 1 ; u8 +count_iters = $c0 ; u16 +text_col = $c2 ; u8 +text_row = $c3 ; u8 +; free space c4-cb +temp = $cc ; u16 +temp2 = $ce ; u16 palette_delay = 23 chroma_delay = 137 @@ -126,12 +126,6 @@ KEY_7 = 51 KEY_8 = 53 KEY_9 = 48 KEY_0 = 50 -KEY_PERIOD = 34 -KEY_E = 42 -KEY_X = 22 -KEY_Y = 43 - -.data .struct float48 exponent .byte @@ -144,6 +138,7 @@ KEY_Y = 43 .import sqr_lobyte .import sqr_hibyte +.data strings: str_self: @@ -362,7 +357,7 @@ z_buffer: .word 0 .endrepeat -.export _mandel_start +.export start ;max_fill_level = 6 max_fill_level = 3 @@ -408,13 +403,6 @@ elapsed_work: elapsed_digit: .byte 0 -input_col: - .byte 0 -input_row: - .byte 0 -input_max: - .byte 0 - ; 2 + 9 * byte cycles .macro add bytes, dest, arg1, arg2 clc ; 2 cyc @@ -462,7 +450,7 @@ input_max: sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * (bytes - 1) cycles +; 3 + 5 * bytes cycles .macro shl bytes, arg asl arg ; 3 cyc .repeat bytes-1, i @@ -470,23 +458,22 @@ input_max: .endrepeat .endmacro -; 8 cycles +; 13 cycles .macro shl16 arg shl 2, arg .endmacro -; 13 cycles +; 18 cycles .macro shl24 arg shl 3, arg .endmacro -; 18 cycles +; 23 cycles .macro shl32 arg shl 4, arg .endmacro ; 6 * bytes cycles -; 4 * bytes bytes .macro copy bytes, dest, arg .repeat bytes, byte ; 6 * bytes cycles lda arg + byte ; 3 cyc @@ -495,7 +482,6 @@ input_max: .endmacro ; 12 cycles -; 8 bytes .macro copy16 dest, arg copy 2, dest, arg .endmacro @@ -530,19 +516,17 @@ input_max: neg 4, arg .endmacro -; 11-27 + 18 * shift cycles -; 65-81 cycles for shift=3 +; 11-27 + 23 * shift cycles +; 103-119 cycles for shift=4 .macro shift_round_16 arg, shift .repeat shift - shl32 arg ; 18 cycles + shl32 arg ; 23 cycles .endrepeat round16 arg ; 11-27 cycles .endmacro ; input: arg1, arg2 as fixed4.12 ; output: dest as fixed8.24 -; patch point jsr at 16 bytes in -imul16_patch_offset = 16 .macro imul16 dest, arg1, arg2 copy16 FR0, arg1 ; 12 cyc copy16 FR1, arg2 ; 12 cyc @@ -552,8 +536,6 @@ imul16_patch_offset = 16 ; input: arg as fixed4.12 ; output: dest as fixed8.24 -; patch point jsr at 8 bytes in -sqr16_patch_offset = 8 .macro sqr16 dest, arg copy16 FR0, arg ; 12 cyc jsr sqr16_func ; ? cyc @@ -699,6 +681,71 @@ bank_switch_table: .endif .endmacro +.proc imul8xe_init + + bank_switch 0 + lda #0 + sta EXTENDED_RAM + bank_switch 1 + lda #1 + sta EXTENDED_RAM + bank_switch 0 + lda EXTENDED_RAM + beq init + + ; no bank switching available, we just overwrite the value in base ram + rts + +init: + + ; patch imul16_func into a forwarding thunk to imul16xe_func + lda #$4c ; 'jmp' opcode + sta imul16_func + lda #.lobyte(imul16xe_func) + sta imul16_func + 1 + lda #.hibyte(imul16xe_func) + sta imul16_func + 2 + + ; ditto for sqr16_func -> sqr16xe_func + lda #$4c ; 'jmp' opcode + sta sqr16_func + lda #.lobyte(sqr16xe_func) + sta sqr16_func + 1 + lda #.hibyte(sqr16xe_func) + sta sqr16_func + 2 + + ; create the lookup table + ; go through the input set, in four 16KB chunks + + arg1 = FR1 + arg2 = FR2 + result = FR0 + + lda #$00 + sta arg1 + sta arg2 + sta ptr + lda #$40 + sta ptr + 1 + + ; $00 * $00 -> $3f * $ff + bank_switch 0 + jsr imul8xe_init_section + + ; $40 * $00 -> $7f * $ff + bank_switch 1 + jsr imul8xe_init_section + + ; $80 * $00 -> $bf * $ff + bank_switch 2 + jsr imul8xe_init_section + + ; $c0 * $00 -> $ff * $ff + bank_switch 3 + jsr imul8xe_init_section + + rts +.endproc ; Initialize a 16 KB chunk of the table ; input: multipliers in temp @@ -936,17 +983,6 @@ common: .endproc -; rounds to 16-bit first! -; input in FR0, 32 bits signed 6.26 fixed -; output in FR0, Atari float -; clobbers a, x, y, FR0, FR1 -.proc fixed6_26_to_float - shift_round_16 FR0, 3 - copy16 FR0, FR0 + 2 - jsr fixed3_13_to_float - rts -.endproc - ; input in FR0, Atari float ; output in FR0, 16 bits signed 3.13 fixed ; clobbers a, x, y, FR0, FR1 @@ -1098,15 +1134,12 @@ keep_going: shift_round_16 zy, 3 ; zx_2 = zx * zx -fixup_sqr16_1: sqr16 zx_2, zx + 2 ; zy_2 = zy * zy -fixup_sqr16_2: sqr16 zy_2, zy + 2 ; zx_zy = zx * zy -fixup_imul16_1: imul16 zx_zy, zx + 2, zy + 2 ; dist = zx_2 + zy_2 @@ -1254,21 +1287,21 @@ enough: negative: ; temp1 = top half lda #.lobyte(framebuffer_top + stride * half_height) - sta ptr + sta pixel_ptr lda #.hibyte(framebuffer_top + stride * half_height) - sta ptr + 1 + sta pixel_ptr + 1 jmp point positive: lda #.lobyte(framebuffer_bottom) - sta ptr + sta pixel_ptr lda #.hibyte(framebuffer_bottom) - sta ptr + 1 + sta pixel_ptr + 1 point: - ; ptr += sy * stride + ; pixel_ptr += sy * stride ; temp * 40 ; = temp * 32 + temp * 8 ; = (temp << 5) + (temp << 3) @@ -1276,10 +1309,10 @@ point: shl16 temp shl16 temp shl16 temp - add16 ptr, ptr, temp + add16 pixel_ptr, pixel_ptr, temp shl16 temp shl16 temp - add16 ptr, ptr, temp + add16 pixel_ptr, pixel_ptr, temp ; Ok so temp1 points to the start of the line, which is 40 bytes. ; Get the byte and bit offsets @@ -1319,20 +1352,20 @@ shift_done: draw_pixel: ; read, mask, or, write - lda (ptr),y + lda (pixel_ptr),y and pixel_mask ora pixel_color - sta (ptr),y + sta (pixel_ptr),y dex beq done clc lda #40 - adc ptr - sta ptr + adc pixel_ptr + sta pixel_ptr lda #0 - adc ptr + 1 - sta ptr + 1 + adc pixel_ptr + 1 + sta pixel_ptr + 1 jmp draw_pixel done: @@ -1570,7 +1603,7 @@ number_keys: beq five cpy #KEY_6 beq six - jmp letter_keys + jmp skip_char one: ldx #0 @@ -1589,21 +1622,7 @@ five: jmp load_key_viewport six: ldx #5 - jmp load_key_viewport - -letter_keys: - cpy #KEY_X - bne not_x - jsr input_x - jmp done -not_x: - cpy #KEY_Y - bne not_y - jsr input_y - jmp done -not_y: - jmp skip_char - + ; fall through load_key_viewport: jsr load_viewport ; fall through @@ -1613,23 +1632,6 @@ done: .endproc -.proc input_x - ldx #col_x - ldy #1 - jsr input_number - - - rts -.endproc - -.proc input_y - rts -.endproc - -.proc input_number - rts -.endproc - .proc clear_screen ; zero the range from framebuffer_top to display_list lda #.lobyte(framebuffer_top) @@ -1677,7 +1679,9 @@ zero_byte_loop: draw_string_const str_x copy32 FR0, ox - jsr fixed6_26_to_float + shift_round_16 FR0, 3 + copy16 FR0, FR0 + 2 + jsr fixed3_13_to_float jsr FASC jsr draw_string @@ -1686,7 +1690,9 @@ zero_byte_loop: draw_string_const str_y copy32 FR0, oy - jsr fixed6_26_to_float + shift_round_16 FR0, 3 + copy16 FR0, FR0 + 2 + jsr fixed3_13_to_float jsr FASC jsr draw_string @@ -1746,7 +1752,7 @@ zero_byte_loop: rts .endproc -.proc _mandel_start +.proc start jsr imul8xe_init @@ -1985,25 +1991,51 @@ update_status: lda FR0 + 1 sta elapsed_work + 1 - draw_string_const str_space - - .macro do_countdown divisor, digits - ldx #.lobyte(divisor) - ldy #.hibyte(divisor) - lda #.lobyte(digits) - sta INBUFF - lda #.hibyte(digits) - sta INBUFF + 1 - jsr countdown + ;jsr IFP + ;jsr FASC + ;jsr draw_string + + .macro countdown divisor, digits + .scope + ; count the hours + ldx #0 + countdown_loop: + lda elapsed_work + 1 + cmp #.hibyte(divisor) + bcc countdown_done + lda elapsed_work + cmp #.lobyte(divisor) + bcc countdown_done + sec + lda elapsed_work + sbc #.lobyte(divisor) + sta elapsed_work + lda elapsed_work + 1 + sbc #.hibyte(divisor) + sta elapsed_work + 1 + inx + jmp countdown_loop + countdown_done: + lda digits,x + eor #$80 + sta elapsed_digit + lda #.lobyte(elapsed_digit) + sta INBUFF + lda #.hibyte(elapsed_digit) + sta INBUFF + 1 + jsr draw_string + .endscope .endmacro - do_countdown 36000, digits_space - do_countdown 3600, digits_zero + + draw_string_const str_space + countdown 36000, digits_space + countdown 3600, digits_zero draw_string_const str_h - do_countdown 600, digits_zero - do_countdown 60, digits_zero + countdown 600, digits_zero + countdown 60, digits_zero draw_string_const str_m - do_countdown 10, digits_zero - do_countdown 1, digits_zero + countdown 10, digits_zero + countdown 1, digits_zero draw_string_const str_s skipped: @@ -2065,118 +2097,3 @@ loop: jmp main_loop .endproc - -; digit string in INBUFF -; divisor X/Y -; clobbers temp, calls draw_string -.proc countdown - divisor = temp - stx divisor - sty divisor + 1 - - ; count the hours - ldy #0 -countdown_loop: - lda elapsed_work + 1 - cmp divisor + 1 - beq countdown_lobyte - bcc countdown_done - bcs countdown_inc -countdown_lobyte: - lda elapsed_work - cmp divisor - bcc countdown_done -countdown_inc: - sec - lda elapsed_work - sbc divisor - sta elapsed_work - lda elapsed_work + 1 - sbc divisor + 1 - sta elapsed_work + 1 - iny - jmp countdown_loop -countdown_done: - lda (INBUFF),y - eor #$80 - sta elapsed_digit - lda #.lobyte(elapsed_digit) - sta INBUFF - lda #.hibyte(elapsed_digit) - sta INBUFF + 1 - jsr draw_string - rts -.endproc - -.proc imul8xe_init - - bank_switch 0 - lda #0 - sta EXTENDED_RAM - bank_switch 1 - lda #1 - sta EXTENDED_RAM - bank_switch 0 - lda EXTENDED_RAM - beq init - - ; no bank switching available, we just overwrite the value in base ram - rts - -init: - - ; patch imul16_func into a forwarding thunk to imul16xe_func - lda #$4c ; 'jmp' opcode - sta imul16_func - lda #.lobyte(imul16xe_func) - sta imul16_func + 1 - sta mandelbrot::fixup_imul16_1 + imul16_patch_offset + 1 - lda #.hibyte(imul16xe_func) - sta imul16_func + 2 - sta mandelbrot::fixup_imul16_1 + imul16_patch_offset + 2 - - ; ditto for sqr16_func -> sqr16xe_func - lda #$4c ; 'jmp' opcode - sta sqr16_func - lda #.lobyte(sqr16xe_func) - sta sqr16_func + 1 - sta mandelbrot::fixup_sqr16_1 + sqr16_patch_offset + 1 - sta mandelbrot::fixup_sqr16_2 + sqr16_patch_offset + 1 - lda #.hibyte(sqr16xe_func) - sta sqr16_func + 2 - sta mandelbrot::fixup_sqr16_1 + sqr16_patch_offset + 2 - sta mandelbrot::fixup_sqr16_2 + sqr16_patch_offset + 2 - - - ; create the lookup table - ; go through the input set, in four 16KB chunks - - arg1 = FR1 - arg2 = FR2 - result = FR0 - - lda #$00 - sta arg1 - sta arg2 - sta ptr - lda #$40 - sta ptr + 1 - - ; $00 * $00 -> $3f * $ff - bank_switch 0 - jsr imul8xe_init_section - - ; $40 * $00 -> $7f * $ff - bank_switch 1 - jsr imul8xe_init_section - - ; $80 * $00 -> $bf * $ff - bank_switch 2 - jsr imul8xe_init_section - - ; $c0 * $00 -> $ff * $ff - bank_switch 3 - jsr imul8xe_init_section - - rts -.endproc