Compare commits

...

4 commits

Author SHA1 Message Date
7c04862d70 workaround for rounding us/iter
for some reason rounding is giving me wrong results
not sure what i'm doing wrong :D

just show 6 digits :P

ok this gets the us/iter working, and it is more stable
but the elapsed time still needs to be added
2025-01-05 14:29:27 -08:00
918d15e813 wip us/iter counter
seems wrong, gives 32 all the time and that seems too small
2025-01-05 14:05:24 -08:00
eaa00a055a wip changing time units
it does this weird thing where sometimes it's reading out wrong digits
and then switches to expected unit of sec/px

work in progress no clue what's going on
2025-01-04 18:46:51 -08:00
7e5ca79d9a move total_ms, total_pixels out of zero page
this frees up 12 bytes of zero page space and costs no measurable
time as these variables are not in the hot path and there was only
a tiny bit different.
2025-01-04 14:25:25 -08:00

143
mandel.s
View file

@ -31,10 +31,10 @@ chroma_offset = $bb ; u8
palette_ticks = $bc ; u8 palette_ticks = $bc ; u8
chroma_ticks = $bd ; u8 chroma_ticks = $bd ; u8
count_frames = $be ; u8 count_frames = $be ; u8
count_pixels = $bf ; u8 ; free space $bf
total_pixels = $c0 ; float48 count_iters = $c0 ; u16
total_ms = $c6 ; float48 ; free space c2-cb
temp = $cc ; u16 temp = $cc ; u16
temp2 = $ce ; u16 temp2 = $ce ; u16
@ -59,10 +59,12 @@ LBUFF = $0580 ; result buffer for FASC routine
; FP ROM routine vectors ; FP ROM routine vectors
FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set) FASC = $D8E6 ; FLOATING POINT TO ASCII (output in INBUFF, last char has high bit set)
IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48) IFP = $D9AA ; INTEGER TO FLOATING POINT CONVERSION (FR0:u16 -> FR0:float48)
FPI = $D9D2 ; floating point to integer
FADD = $DA66 ; ADDITION (FR0 += FR1) FADD = $DA66 ; ADDITION (FR0 += FR1)
FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1) FSUB = $DA60 ; SUBTRACTION (FR0 -= FR1)
FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1) FMUL = $DADB ; MULTIPLICATION (FR0 *= FR1)
FDIV = $DB28 ; DIVISION (FR0 /= FR1) FDIV = $DB28 ; DIVISION (FR0 /= FR1)
ZFR0 = $DA44 ; clear FR0
ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX) ZF1 = $DA46 ; CLEAR ZERO PAGE FLOATING POINT NUMBER (XX)
FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX) FLD0R = $DD89 ; LOAD FR0 WITH FLOATING POINT NUMBER (YYXX)
FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX) FLD1R = $DD98 ; LOAD FR1 WITH FLOATING POINT NUMBER (YYXX)
@ -141,7 +143,7 @@ str_self:
.byte "MANDEL-6502" .byte "MANDEL-6502"
str_self_end: str_self_end:
str_speed: str_speed:
.byte " ms/px" .byte "us/iter: "
str_speed_end: str_speed_end:
str_run: str_run:
.byte " RUN" .byte " RUN"
@ -195,14 +197,32 @@ aspect_x: ; fixed3.13 5/4
aspect_y: ; fixed3.13 3/4 aspect_y: ; fixed3.13 3/4
.word 3 << (13 - 2) .word 3 << (13 - 2)
ms_per_frame: ; float48 16.66666667 sec_per_frame: ; float48 00 . 01 66 66 66 67
.byte 64 ; exponent/sign .byte 63 ; exponent/sign - -1 bytes
.byte $16 ; BCD digits .byte $01 ; BCD digits
.byte $66 .byte $66
.byte $66 .byte $66
.byte $66 .byte $66
.byte $67 .byte $67
us_per_sec: ; float48 1e9 01 00 0,0 00 . 00
.byte 67 ; exponent/sign +3 bytes
.byte $01 ; BCD digits
.byte $00
.byte $00
.byte $00
.byte $00
total_iters: ; float48
.repeat 6
.byte 0
.endrepeat
total_sec: ; float48
.repeat 6
.byte 0
.endrepeat
display_list_start: display_list_start:
; 24 lines overscan ; 24 lines overscan
.repeat 3 .repeat 3
@ -917,6 +937,11 @@ next:
sta z_buffer_end sta z_buffer_end
loop: loop:
inc count_iters
bne low_iters
inc count_iters + 1
low_iters:
; iter++ & max-iters break ; iter++ & max-iters break
inc iter inc iter
bne keep_going bne keep_going
@ -1220,6 +1245,7 @@ done:
; clobbers A, X ; clobbers A, X
.local loop .local loop
.local done .local done
.local padding
ldx #0 ldx #0
loop: loop:
cpx #len cpx #len
@ -1227,11 +1253,23 @@ loop:
txa txa
tay tay
lda (strptr),y lda (strptr),y
bmi padding
tay tay
lda char_map,y lda char_map,y
sta textbuffer + col,x sta textbuffer + col,x
inx inx
jmp loop jmp loop
padding:
ldy #32 ; space
lda char_map,y
cpx #len
beq done
sta textbuffer + col,x
inx
jmp padding
done: done:
.endmacro .endmacro
@ -1559,16 +1597,20 @@ copy_byte_loop:
jsr SETVBV jsr SETVBV
main_loop: main_loop:
; count_frames = 0; count_pixels = 0 ; count_frames = 0; count_iters = 0
lda #0 lda #0
sta count_frames sta count_frames
sta count_pixels sta count_iters
sta count_iters + 1
; total_ms = 0.0; total_pixels = 0.0 ; total_sec = 0.0; total_iters = 0.0
ldx #total_ms jsr ZFR0
jsr ZF1 ldx #.lobyte(total_sec)
ldx #total_pixels ldy #.hibyte(total_sec)
jsr ZF1 jsr FST0R
ldx #.lobyte(total_iters)
ldy #.hibyte(total_iters)
jsr FST0R
jsr clear_screen jsr clear_screen
jsr status_bar jsr status_bar
@ -1640,38 +1682,32 @@ not_skipped_mask:
no_key: no_key:
; check if we should update the counters ; check if we should update the counters
;
; count_pixels >= width? update!
inc count_pixels
lda count_pixels
cmp #width
bmi update_status
; count_frames >= 120? update! ; count_frames >= 120? update!
lda count_frames lda count_frames
cmp #120 ; >= 2 seconds cmp #120 ; >= 2 seconds
bmi skipped bpl update_status
jmp skipped
update_status: update_status:
; FR0 = (float)count_pixels & clear count_pixels ; FR0 = (float)count_iters & clear count_iters
lda count_pixels copy16 FR0, count_iters
sta FR0
lda #0
sta FR0 + 1
sta count_pixels
jsr IFP jsr IFP
lda #0
sta count_iters
sta count_iters + 1
; FR1 = total_pixels ; FR1 = total_iters
ldx #.lobyte(total_pixels) ldx #.lobyte(total_iters)
ldy #.hibyte(total_pixels) ldy #.hibyte(total_iters)
jsr FLD1R jsr FLD1R
; FR0 += FR1 ; FR0 += FR1
jsr FADD jsr FADD
; total_pixels = FR0 ; total_iters = FR0
ldx #.lobyte(total_pixels) ldx #.lobyte(total_iters)
ldy #.hibyte(total_pixels) ldy #.hibyte(total_iters)
jsr FST0R jsr FST0R
@ -1684,35 +1720,48 @@ update_status:
sta count_frames sta count_frames
jsr IFP jsr IFP
; FR0 *= ms_per_frame ; FR0 *= sec_per_frame
ldx #.lobyte(ms_per_frame) ldx #.lobyte(sec_per_frame)
ldy #.hibyte(ms_per_frame) ldy #.hibyte(sec_per_frame)
jsr FLD1R jsr FLD1R
jsr FMUL jsr FMUL
; FR0 += total_ms ; FR0 += total_sec
ldx #total_ms ldx #.lobyte(total_sec)
ldy #0 ldy #.hibyte(total_sec)
jsr FLD1R jsr FLD1R
jsr FADD jsr FADD
; total_ms = FR0 ; total_sec = FR0
ldx #total_ms ldx #.lobyte(total_sec)
ldy #0 ldy #.hibyte(total_sec)
jsr FST0R jsr FST0R
; FR0 /= total_pixels ; FR0 /= total_iters
ldx #total_pixels ldx #.lobyte(total_iters)
ldy #0 ldy #.hibyte(total_iters)
jsr FLD1R jsr FLD1R
jsr FDIV jsr FDIV
; FR0 *= us_per_sec
ldx #.lobyte(us_per_sec)
ldy #.hibyte(us_per_sec)
jsr FLD1R
jsr FMUL
; @fixme
; round to integer
; for some reason this gives bad results?
;clc
;jsr FPI
;jsr IFP
; convert to ASCII in INBUFF ; convert to ASCII in INBUFF
jsr FASC jsr FASC
; print the first 6 digits ; print the first 6 digits
draw_text_indirect speed_start, speed_precision, INBUFF draw_text speed_start, str_speed_len, str_speed
draw_text speed_start + speed_precision, str_speed_len, str_speed draw_text_indirect speed_start + str_speed_len, speed_precision, INBUFF
skipped: skipped: