Compare commits

...

4 commits

Author SHA1 Message Date
b56dc1e98b notes 2024-12-30 20:38:33 -08:00
0a7293d8bc do 4x4 2x2 1x1 only
in prep for bigger pixels
2024-12-30 19:52:35 -08:00
ec42f672d4 use an 8-item z buffer for slightly fasterness 2024-12-30 19:48:28 -08:00
67649d4743 annotations, tweak 2024-12-30 19:17:02 -08:00

View file

@ -262,7 +262,10 @@ palette_chroma_entries = 15
.code
z_buffer_len = 16
;z_buffer_len = 16 ; 10.863 ms/px
;z_buffer_len = 12 ; 10.619 ms/px
z_buffer_len = 8 ; 10.612 ms/px
;z_buffer_len = 4 ; 12.395 ms/px
z_buffer_mask = z_buffer_len - 1
z_buffer:
; the last N zx/zy values
@ -273,11 +276,12 @@ z_buffer:
.export start
max_fill_level = 6
;max_fill_level = 6
max_fill_level = 3
fill_masks:
.byte %00011111
.byte %00001111
.byte %00000111
; .byte %00011111
; .byte %00001111
; .byte %00000111
.byte %00000011
.byte %00000001
.byte %00000000
@ -310,18 +314,21 @@ viewport_oy:
.endrepeat
.endmacro
; 20 cycles
.macro add16 dest, arg1, arg2
add 2, dest, arg1, arg2
.endmacro
; 38 cycles
.macro add32 dest, arg1, arg2
add 4, dest, arg2, dest
.endmacro
; 8 cycles
.macro add_carry dest
lda dest
adc #0
sta dest
lda dest ; 3 cyc
adc #0 ; 2 cyc
sta dest ; 3 cyc
.endmacro
; 2 + 9 * byte cycles
@ -334,29 +341,35 @@ viewport_oy:
.endrepeat
.endmacro
; 20 cycles
.macro sub16 dest, arg1, arg2
sub 2, dest, arg1, arg2
.endmacro
; 38 cycles
.macro sub32 dest, arg1, arg2
sub 4, dest, arg1, arg2
.endmacro
; 3 + 5 * bytes cycles
.macro shl bytes, arg
asl arg
asl arg ; 3 cyc
.repeat bytes-1, i
rol arg + 1 + i
rol arg + 1 + i ; 5 cyc
.endrepeat
.endmacro
; 13 cycles
.macro shl16 arg
shl 2, arg
.endmacro
; 18 cycles
.macro shl24 arg
shl 3, arg
.endmacro
; 23 cycles
.macro shl32 arg
shl 4, arg
.endmacro
@ -369,14 +382,17 @@ viewport_oy:
.endrepeat
.endmacro
; 12 cycles
.macro copy16 dest, arg
copy 2, dest, arg
.endmacro
; 24 cycles
.macro copy32 dest, arg
copy 4, dest, arg
.endmacro
; 36 cycles
.macro copyfloat dest, arg
copy 6, dest, arg
.endmacro
@ -401,18 +417,20 @@ viewport_oy:
neg 4, arg
.endmacro
; 11-27 + 23 * shift cycles
; 103-119 cycles for shift=4
.macro shift_round_16 arg, shift
.repeat shift
shl32 arg
shl32 arg ; 23 cycles
.endrepeat
round16 arg
round16 arg ; 11-27 cycles
.endmacro
.macro imul16_round dest, arg1, arg2, shift
copy16 FR0, arg1 ; 12 cyc
copy16 FR1, arg2 ; 12 cyc
jsr imul16_func ; ? cyc
shift_round_16 FR2, shift
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
@ -420,7 +438,7 @@ viewport_oy:
;imul16_round dest, arg, arg, shift
copy16 FR0, arg ; 12 cyc
jsr sqr16_func ; ? cyc
shift_round_16 FR2, shift
shift_round_16 FR2, shift ; 103-119 cycles for shift=4
copy16 dest, FR2 + 2 ; 12 cyc
.endmacro
@ -806,6 +824,7 @@ arg2_pos:
sqr16_impl 1
.endproc
; 11-27 cycles
.macro round16 arg
; Round top 16 bits of 32-bit fixed-point number in-place
.local increment
@ -818,21 +837,28 @@ arg2_pos:
; round down if negative
; < $8000: round down
lda arg + 1
cmp #$80
beq high_half
bpl increment
bmi next
; $8000 17
; $8001 27
; $8100 21
; $7fff 11
lda arg + 1 ; 3 cyc
cmp #$80 ; 2 cyc
beq high_half ; 2 cyc
bpl increment ; 2 cyc
bmi next ; 2 cyc
high_half:
lda arg
beq check_sign
bpl increment
bmi next
lda arg ; 3 cyc
beq check_sign ; 2 cyc
jmp increment ; 3 cyc
check_sign:
lda arg + 3
bmi next
lda arg + 3 ; 3 cyc
bmi next ; 2 cyc
increment: ; 5-10 cyc
inc arg + 2 ; 5 cyc