diff --git a/mandel.s b/mandel.s index 526953a..7466cd9 100644 --- a/mandel.s +++ b/mandel.s @@ -31,10 +31,10 @@ chroma_offset = $bb ; u8 palette_ticks = $bc ; u8 chroma_ticks = $bd ; u8 count_frames = $be ; u8 -count_pixels = $bf ; u8 +; free space $bf -total_pixels = $c0 ; float48 -total_ms = $c6 ; float48 +count_iters = $c0 ; u16 +; free space c2-cb temp = $cc ; u16 temp2 = $ce ; u16 @@ -59,10 +59,12 @@ LBUFF = $0580 ; result buffer for FASC routine ; FP ROM routine vectors FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set) IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48) +FPI = $D9D2 ; floating point to integer FADD = $DA66 ; ADDITION (FR0 += FR1) FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1) FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1) FDIV = $DB28 ; DIVISION (FR0 /= FR1) +ZFR0 = $DA44 ; clear FR0 ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX) FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX) FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX) @@ -141,7 +143,7 @@ str_self: .byte "MANDEL-6502" str_self_end: str_speed: - .byte " ms/px" + .byte "us/iter: " str_speed_end: str_run: .byte " RUN" @@ -195,14 +197,32 @@ aspect_x: ; fixed3.13 5/4 aspect_y: ; fixed3.13 3/4 .word 3 << (13 - 2) -ms_per_frame: ; float48 16.66666667 - .byte 64 ; exponent/sign - .byte $16 ; BCD digits +sec_per_frame: ; float48 00 . 01 66 66 66 67 + .byte 63 ; exponent/sign - -1 bytes + .byte $01 ; BCD digits .byte $66 .byte $66 .byte $66 .byte $67 +us_per_sec: ; float48 1e9 01 00 0,0 00 . 00 + .byte 67 ; exponent/sign +3 bytes + .byte $01 ; BCD digits + .byte $00 + .byte $00 + .byte $00 + .byte $00 + +total_iters: ; float48 + .repeat 6 + .byte 0 + .endrepeat + +total_sec: ; float48 + .repeat 6 + .byte 0 + .endrepeat + display_list_start: ; 24 lines overscan .repeat 3 @@ -917,6 +937,11 @@ next: sta z_buffer_end loop: + inc count_iters + bne low_iters + inc count_iters + 1 +low_iters: + ; iter++ & max-iters break inc iter bne keep_going @@ -1220,6 +1245,7 @@ done: ; clobbers A, X .local loop .local done + .local padding ldx #0 loop: cpx #len @@ -1227,11 +1253,23 @@ loop: txa tay lda (strptr),y + bmi padding tay lda char_map,y sta textbuffer + col,x inx jmp loop + +padding: + ldy #32 ; space + lda char_map,y + + cpx #len + beq done + sta textbuffer + col,x + inx + jmp padding + done: .endmacro @@ -1559,16 +1597,20 @@ copy_byte_loop: jsr SETVBV main_loop: - ; count_frames = 0; count_pixels = 0 + ; count_frames = 0; count_iters = 0 lda #0 sta count_frames - sta count_pixels + sta count_iters + sta count_iters + 1 - ; total_ms = 0.0; total_pixels = 0.0 - ldx #total_ms - jsr ZF1 - ldx #total_pixels - jsr ZF1 + ; total_sec = 0.0; total_iters = 0.0 + jsr ZFR0 + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) + jsr FST0R + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) + jsr FST0R jsr clear_screen jsr status_bar @@ -1640,38 +1682,32 @@ not_skipped_mask: no_key: ; check if we should update the counters - ; - ; count_pixels >= width? update! - inc count_pixels - lda count_pixels - cmp #width - bmi update_status ; count_frames >= 120? update! lda count_frames cmp #120 ; >= 2 seconds - bmi skipped + bpl update_status + jmp skipped update_status: - ; FR0 = (float)count_pixels & clear count_pixels - lda count_pixels - sta FR0 - lda #0 - sta FR0 + 1 - sta count_pixels + ; FR0 = (float)count_iters & clear count_iters + copy16 FR0, count_iters jsr IFP + lda #0 + sta count_iters + sta count_iters + 1 - ; FR1 = total_pixels - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ; FR1 = total_iters + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FLD1R ; FR0 += FR1 jsr FADD - ; total_pixels = FR0 - ldx #.lobyte(total_pixels) - ldy #.hibyte(total_pixels) + ; total_iters = FR0 + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FST0R @@ -1684,35 +1720,48 @@ update_status: sta count_frames jsr IFP - ; FR0 *= ms_per_frame - ldx #.lobyte(ms_per_frame) - ldy #.hibyte(ms_per_frame) + ; FR0 *= sec_per_frame + ldx #.lobyte(sec_per_frame) + ldy #.hibyte(sec_per_frame) jsr FLD1R jsr FMUL - ; FR0 += total_ms - ldx #total_ms - ldy #0 + ; FR0 += total_sec + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) jsr FLD1R jsr FADD - ; total_ms = FR0 - ldx #total_ms - ldy #0 + ; total_sec = FR0 + ldx #.lobyte(total_sec) + ldy #.hibyte(total_sec) jsr FST0R - ; FR0 /= total_pixels - ldx #total_pixels - ldy #0 + ; FR0 /= total_iters + ldx #.lobyte(total_iters) + ldy #.hibyte(total_iters) jsr FLD1R jsr FDIV + ; FR0 *= us_per_sec + ldx #.lobyte(us_per_sec) + ldy #.hibyte(us_per_sec) + jsr FLD1R + jsr FMUL + + ; @fixme + ; round to integer + ; for some reason this gives bad results? + ;clc + ;jsr FPI + ;jsr IFP + ; convert to ASCII in INBUFF jsr FASC ; print the first 6 digits - draw_text_indirect speed_start, speed_precision, INBUFF - draw_text speed_start + speed_precision, str_speed_len, str_speed + draw_text speed_start, str_speed_len, str_speed + draw_text_indirect speed_start + str_speed_len, speed_precision, INBUFF skipped: