From 7e5ca79d9a4bd419a3a004f7c96a612c9e41cee7 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Sat, 4 Jan 2025 14:25:25 -0800 Subject: [PATCH 1/4] move total_ms, total_pixels out of zero page this frees up 12 bytes of zero page space and costs no measurable time as these variables are not in the hot path and there was only a tiny bit different. --- mandel.s | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/mandel.s b/mandel.s index 526953a..317e3b1 100644 --- a/mandel.s +++ b/mandel.s @@ -33,8 +33,7 @@ chroma_ticks = $bd ; u8 count_frames = $be ; u8 count_pixels = $bf ; u8 -total_pixels = $c0 ; float48 -total_ms = $c6 ; float48 +; free space c0-cb temp = $cc ; u16 temp2 = $ce ; u16 @@ -63,6 +62,7 @@ FADD = $DA66 ; ADDITION (FR0 += FR1) FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1) FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1) FDIV = $DB28 ; DIVISION (FR0 /= FR1) +ZFR0 = $DA44 ; clear FR0 ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX) FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX) FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX) @@ -203,6 +203,16 @@ ms_per_frame: ; float48 16.66666667 .byte $66 .byte $67 +total_pixels: ; float48 + .repeat 6 + .byte 0 + .endrepeat + +total_ms: ; float48 + .repeat 6 + .byte 0 + .endrepeat + display_list_start: ; 24 lines overscan .repeat 3 @@ -1565,10 +1575,13 @@ main_loop: sta count_pixels ; total_ms = 0.0; total_pixels = 0.0 - ldx #total_ms - jsr ZF1 - ldx #total_pixels - jsr ZF1 + jsr ZFR0 + ldx #.lobyte(total_ms) + ldy #.hibyte(total_ms) + jsr FST0R + ldx #.lobyte(total_pixels) + ldy #.hibyte(total_pixels) + jsr FST0R jsr clear_screen jsr status_bar @@ -1691,19 +1704,19 @@ update_status: jsr FMUL ; FR0 += total_ms - ldx #total_ms - ldy #0 + ldx #.lobyte(total_ms) + ldy #.hibyte(total_ms) jsr FLD1R jsr FADD ; total_ms = FR0 - ldx #total_ms - ldy #0 + ldx #.lobyte(total_ms) + ldy #.hibyte(total_ms) jsr FST0R ; FR0 /= total_pixels - ldx #total_pixels - ldy #0 + ldx #.lobyte(total_pixels) + ldy #.hibyte(total_pixels) jsr FLD1R jsr FDIV From eaa00a055ac6ff39291a42b458b3e41806025035 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Sat, 4 Jan 2025 18:46:51 -0800 Subject: [PATCH 2/4] wip changing time units it does this weird thing where sometimes it's reading out wrong digits and then switches to expected unit of sec/px work in progress no clue what's going on --- mandel.s | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/mandel.s b/mandel.s index 317e3b1..f053748 100644 --- a/mandel.s +++ b/mandel.s @@ -141,7 +141,7 @@ str_self: .byte "MANDEL-6502" str_self_end: str_speed: - .byte " ms/px" + .byte " sec/px" str_speed_end: str_run: .byte " RUN" @@ -195,9 +195,9 @@ aspect_x: ; fixed3.13 5/4 aspect_y: ; fixed3.13 3/4 .word 3 << (13 - 2) -ms_per_frame: ; float48 16.66666667 - .byte 64 ; exponent/sign - .byte $16 ; BCD digits +sec_per_frame: ; float48 0.016666667 + .byte 63 ; exponent/sign + .byte $01 ; BCD digits .byte $66 .byte $66 .byte $66 @@ -208,7 +208,7 @@ total_pixels: ; float48 .byte 0 .endrepeat -total_ms: ; float48 +total_sec: ; float48 .repeat 6 .byte 0 .endrepeat @@ -1574,10 +1574,10 @@ main_loop: sta count_frames sta count_pixels - ; total_ms = 0.0; total_pixels = 0.0 + ; total_sec = 0.0; total_pixels = 0.0 jsr ZFR0 - ldx #.lobyte(total_ms) - ldy #.hibyte(total_ms) + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) jsr FST0R ldx #.lobyte(total_pixels) ldy #.hibyte(total_pixels) @@ -1697,21 +1697,21 @@ update_status: sta count_frames jsr IFP - ; FR0 *= ms_per_frame - ldx #.lobyte(ms_per_frame) - ldy #.hibyte(ms_per_frame) + ; FR0 *= sec_per_frame + ldx #.lobyte(sec_per_frame) + ldy #.hibyte(sec_per_frame) jsr FLD1R jsr FMUL - ; FR0 += total_ms - ldx #.lobyte(total_ms) - ldy #.hibyte(total_ms) + ; FR0 += total_sec + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) jsr FLD1R jsr FADD - ; total_ms = FR0 - ldx #.lobyte(total_ms) - ldy #.hibyte(total_ms) + ; total_sec = FR0 + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) jsr FST0R ; FR0 /= total_pixels From 918d15e8139d21c15f05776bfdb6780000a687f9 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Sun, 5 Jan 2025 14:05:24 -0800 Subject: [PATCH 3/4] wip us/iter counter seems wrong, gives 32 all the time and that seems too small --- mandel.s | 103 ++++++++++++++++++++++++++++++++++++------------------- 1 file changed, 68 insertions(+), 35 deletions(-) diff --git a/mandel.s b/mandel.s index f053748..09af6a5 100644 --- a/mandel.s +++ b/mandel.s @@ -31,9 +31,10 @@ chroma_offset = $bb ; u8 palette_ticks = $bc ; u8 chroma_ticks = $bd ; u8 count_frames = $be ; u8 -count_pixels = $bf ; u8 +; free space $bf -; free space c0-cb +count_iters = $c0 ; u16 +; free space c2-cb temp = $cc ; u16 temp2 = $ce ; u16 @@ -58,6 +59,7 @@ LBUFF = $0580 ; result buffer for FASC routine ; FP ROM routine vectors FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set) IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48) +FPI = $D9D2 ; floating point to integer FADD = $DA66 ; ADDITION (FR0 += FR1) FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1) FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1) @@ -141,7 +143,7 @@ str_self: .byte "MANDEL-6502" str_self_end: str_speed: - .byte " sec/px" + .byte "us/iter: " str_speed_end: str_run: .byte " RUN" @@ -154,7 +156,7 @@ str_self_len = str_self_end - str_self str_speed_len = str_speed_end - str_speed str_run_len = str_run_end - str_run str_done_len = str_done_end - str_done -speed_precision = 6 +speed_precision = 5 speed_start = 40 - str_done_len - str_speed_len - speed_precision - 1 speed_len = 14 + str_speed_len @@ -196,14 +198,22 @@ aspect_y: ; fixed3.13 3/4 .word 3 << (13 - 2) sec_per_frame: ; float48 0.016666667 - .byte 63 ; exponent/sign + .byte 63 ; exponent/sign - -2 .byte $01 ; BCD digits .byte $66 .byte $66 .byte $66 .byte $67 -total_pixels: ; float48 +us_per_sec: ; float48 1e9 + .byte 68 ; exponent/sign +8 + .byte $10 ; BCD digits + .byte $00 + .byte $00 + .byte $00 + .byte $00 + +total_iters: ; float48 .repeat 6 .byte 0 .endrepeat @@ -927,6 +937,11 @@ next: sta z_buffer_end loop: + inc count_iters + bne low_iters + inc count_iters + 1 +low_iters: + ; iter++ & max-iters break inc iter bne keep_going @@ -1230,6 +1245,7 @@ done: ; clobbers A, X .local loop .local done + .local padding ldx #0 loop: cpx #len @@ -1237,11 +1253,23 @@ loop: txa tay lda (strptr),y + bmi padding tay lda char_map,y sta textbuffer + col,x inx jmp loop + +padding: + ldy #32 ; space + lda char_map,y + + cpx #len + beq done + sta textbuffer + col,x + inx + jmp padding + done: .endmacro @@ -1569,18 +1597,19 @@ copy_byte_loop: jsr SETVBV main_loop: - ; count_frames = 0; count_pixels = 0 + ; count_frames = 0; count_iters = 0 lda #0 sta count_frames - sta count_pixels + sta count_iters + sta count_iters + 1 - ; total_sec = 0.0; total_pixels = 0.0 + ; total_sec = 0.0; total_iters = 0.0 jsr ZFR0 ldx #.lobyte(total_sec) ldy #.hibyte(total_sec) jsr FST0R - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FST0R jsr clear_screen @@ -1653,38 +1682,32 @@ not_skipped_mask: no_key: ; check if we should update the counters - ; - ; count_pixels >= width? update! - inc count_pixels - lda count_pixels - cmp #width - bmi update_status ; count_frames >= 120? update! lda count_frames cmp #120 ; >= 2 seconds - bmi skipped + bpl update_status + jmp skipped update_status: - ; FR0 = (float)count_pixels & clear count_pixels - lda count_pixels - sta FR0 - lda #0 - sta FR0 + 1 - sta count_pixels + ; FR0 = (float)count_iters & clear count_iters + copy16 FR0, count_iters jsr IFP + lda #0 + sta count_iters + sta count_iters + 1 - ; FR1 = total_pixels - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ; FR1 = total_iters + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FLD1R ; FR0 += FR1 jsr FADD - ; total_pixels = FR0 - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ; total_iters = FR0 + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FST0R @@ -1714,18 +1737,28 @@ update_status: ldy #.hibyte(total_sec) jsr FST0R - ; FR0 /= total_pixels - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ; FR0 /= total_iters + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FLD1R jsr FDIV + ; FR0 *= us_per_sec + ldx #.lobyte(us_per_sec) + ldy #.hibyte(us_per_sec) + jsr FLD1R + jsr FMUL + + ; round to integer + jsr FPI + jsr IFP + ; convert to ASCII in INBUFF jsr FASC ; print the first 6 digits - draw_text_indirect speed_start, speed_precision, INBUFF - draw_text speed_start + speed_precision, str_speed_len, str_speed + draw_text speed_start, str_speed_len, str_speed + draw_text_indirect speed_start + str_speed_len, speed_precision, INBUFF skipped: From 7c04862d70b16a8e35392255371cdbaca0340396 Mon Sep 17 00:00:00 2001 From: Brooke Vibber Date: Sun, 5 Jan 2025 14:29:27 -0800 Subject: [PATCH 4/4] workaround for rounding us/iter for some reason rounding is giving me wrong results not sure what i'm doing wrong :D just show 6 digits :P ok this gets the us/iter working, and it is more stable but the elapsed time still needs to be added --- mandel.s | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/mandel.s b/mandel.s index 09af6a5..7466cd9 100644 --- a/mandel.s +++ b/mandel.s @@ -156,7 +156,7 @@ str_self_len = str_self_end - str_self str_speed_len = str_speed_end - str_speed str_run_len = str_run_end - str_run str_done_len = str_done_end - str_done -speed_precision = 5 +speed_precision = 6 speed_start = 40 - str_done_len - str_speed_len - speed_precision - 1 speed_len = 14 + str_speed_len @@ -197,17 +197,17 @@ aspect_x: ; fixed3.13 5/4 aspect_y: ; fixed3.13 3/4 .word 3 << (13 - 2) -sec_per_frame: ; float48 0.016666667 - .byte 63 ; exponent/sign - -2 +sec_per_frame: ; float48 00 . 01 66 66 66 67 + .byte 63 ; exponent/sign - -1 bytes .byte $01 ; BCD digits .byte $66 .byte $66 .byte $66 .byte $67 -us_per_sec: ; float48 1e9 - .byte 68 ; exponent/sign +8 - .byte $10 ; BCD digits +us_per_sec: ; float48 1e9 01 00 0,0 00 . 00 + .byte 67 ; exponent/sign +3 bytes + .byte $01 ; BCD digits .byte $00 .byte $00 .byte $00 @@ -1749,9 +1749,12 @@ update_status: jsr FLD1R jsr FMUL + ; @fixme ; round to integer - jsr FPI - jsr IFP + ; for some reason this gives bad results? + ;clc + ;jsr FPI + ;jsr IFP ; convert to ASCII in INBUFF jsr FASC