dither4/dither4.s
2022-12-11 16:37:47 -08:00

253 lines
5.7 KiB
ArmAsm

SAVMSC = $58
VDSLST = $200
VDSLSTL = $200
VDSLSTH = $201
COLPF0 = $D016
COLPF1 = $D017
COLPF2 = $D018
COLPF3 = $D019
COLBK = $D01A
AUDC1 = $D201
DMACTL = $D400
DLISTL = $D402
DLISTH = $D403
WSYNC = $D40A
VCOUNT = $D40B
NMIEN = $D40E
temp1l = $80
temp1h = $81
temp1 = temp1l
temp2l = $82
temp2h = $83
temp2 = temp2l
sample_ptrl = $84
sample_ptrh = $85
sample_ptr = sample_ptrl
scanline = $86
audiotemp = $87
sample_index = $80
frame_counter = $89
height = 160
bytes_per_line = 40
pages_per_frame = 32
lines_per_frame = 262
;scanline_offset = 31 + (40 - 24) / 2
scanline_offset = 46
scanline_max = (lines_per_frame - scanline_offset) / 2
.data
.import audio_samples
.import audio_samples_end
.import frame1_top
.import frame1_bottom
.import frame1_palette1_even
.import frame1_palette1_odd
.import frame1_palette2_even
.import frame1_palette2_odd
.import frame1_palette3_even
.import frame1_palette3_odd
.import frame2_top
.import frame2_bottom
.import frame2_palette1_even
.import frame2_palette1_odd
.import frame2_palette2_even
.import frame2_palette2_odd
.import frame2_palette3_even
.import frame2_palette3_odd
.import displaylist
audio_high_byte:
.scope
.macro byteseq val
.repeat 16
.byte val | $10
.endrep
.endmacro
byteseq $0
byteseq $1
byteseq $2
byteseq $3
byteseq $4
byteseq $5
byteseq $7
byteseq $8
byteseq $9
byteseq $a
byteseq $b
byteseq $c
byteseq $d
byteseq $e
byteseq $f
.endscope
.code
.export start
.proc start
; Set up the audio sample buffer
lda #.lobyte(audio_samples)
sta sample_ptrl
lda #.hibyte(audio_samples)
sta sample_ptrh
lda #0
sta sample_index
; Disable display DMA
lda #$00
sta DMACTL
; Disable VBI and DLI but allow Reset
lda #$20
sta NMIEN
; Set up the display list
lda #.lobyte(displaylist)
sta DLISTL
lda #.hibyte(displaylist)
sta DLISTH
; Set up the DLI handler
lda #.lobyte(dli_handler)
sta VDSLSTL
lda #.hibyte(dli_handler)
sta VDSLSTH
; Disable VBI but allow Reset and DLI
lda #$a0
sta NMIEN
; Manually wait for first scan line
wait_vblank:
sta WSYNC
lda VCOUNT
bne wait_vblank
; Re-enable display DMA
lda #$22
sta DMACTL
wait_start:
; Wait for the vblank
; Resynchronize the scanline counter
wait_loop:
ldy VCOUNT ; 4 cycles
bne wait_loop ; 2 cycles
.macro audio_prep
; Y is VCOUNT at entry
lda (sample_ptr),y ; 5/6 cyc
sta audiotemp ; 3 cyc
.endmacro
.macro inner_scanline frame_offset, line_offset
; Y should be VCOUNT at entry
; it'll fire on unused lines, but harmlessly
;ldy scanline ; 3 cyc
;inc scanline ; 5 cyc
; 23-26 cycles before break
; Leisurely memory fetches
lda frame1_palette1_even + frame_offset + line_offset - scanline_offset / 2,y ; 4/5 @FIXME alternate
pha ; 3
ldx frame1_palette2_even + frame_offset + line_offset - scanline_offset / 2,y ; 4/5
lda frame1_palette3_even + frame_offset + line_offset - scanline_offset / 2,y ; 4/5
tay ; 2
pla ; 3
; Wait for horizontal blank
sta WSYNC ; 4
; 12 cycles after break
; Update color registers as fast as possible
sta COLPF0 ; 4
stx COLPF1 ; 4
sty COLPF2 ; 4
.endmacro
.macro audio_play_raw
;ldy VCOUNT ; set on entry
ldy sample_index ; 3 cycles
lda (sample_ptr),y ; 5/6 cyc
sta AUDC1 ; 4 cyc
.endmacro
.macro audio_play_lo
lda audiotemp ; 3 cyc
and #$0f ; 2 cyc
ora #$10 ; 2 cyc
sta AUDC1 ; 4 cyc
.endmacro
.macro audio_play_hi ; 12 cycles
ldy audiotemp ; 3 cyc
lda audio_high_byte,y ; 5 cyc
sta AUDC1 ; 4 cyc
.endmacro
.macro audio_inc
; 7 cycles
inc sample_index ; 5 cycles
bne audio_cont ; 2
; 12 cycles, optional
inc sample_ptrh ; 5
lda sample_ptrh ; 3
cmp #.hibyte(audio_samples_end) ; 2
bmi audio_cont ; 2
; 5 cycles, optional
lda #.hibyte(audio_samples) ; 2
sta sample_ptrh ; 3
audio_cont:
.endmacro
.macro run_frame frame_offset
.scope
; each scanline is 228 color clocks
; that's 114 CPU cycles
; minus 41-43 for DMA leaves 71-73 clock cycles per line
each_scanline_pair:
sty scanline ; 3 cycles
inner_scanline frame_offset, 0 ; 23-26 cycles before break, 12 cycles after
audio_play_raw ; 11-12 cycles
audio_inc ; 7-24 cycles
ldy scanline ; 3 cycles
inner_scanline frame_offset, 128 ; 23-26 cycles before break, 12 cycles after
; pair cleanup: 6 cycles
ldy VCOUNT ; 4 cycles
bne each_scanline_pair ; 2 cycles
; frame cleanup: 11 cycles
lda frame_counter ; 3 cycles
eor #1 ; 2 cycles
sta frame_counter ; 3 cycles
jmp wait_start ; 3 cycles
.endscope
.endmacro
lda frame_counter ; 3 cycles
beq run_frame1 ; 2 cycles
jmp run_frame2 ; 3 cycles
run_frame1:
run_frame 0
run_frame2:
run_frame 8192
.endproc
.proc dli_handler
lda #0
sta frame_counter
rti
.endproc