diff --git a/mandel.s b/mandel.s index 6880e8b..c6d3cf7 100644 --- a/mandel.s +++ b/mandel.s @@ -16,6 +16,7 @@ dist = $9c ; fixed6.26: z_x^2 + z_y^2 iter = $a0 ; u8: iteration count zoom = $a1 ; u8: zoom shift level temp = $a2 ; u16 +temp2 = $a4 ; u16 ; FP registers in zero page FR0 = $d4 @@ -33,7 +34,8 @@ framebuffer_end = $a000 height = 184 half_height = height >> 1 width = 160 -half_width = 160 >> 1 +half_width = width >> 1 +stride = width >> 2 width_ratio_3_13 = (5 << 11) ; 5/4 height_ratio_3_13 = (3 << 11) ; 5/4 @@ -133,6 +135,14 @@ display_list_start: display_list_end: display_list_len = display_list_end - display_list_start +color_map: + .byte 0 + .repeat 85 + .byte 1 + .byte 2 + .byte 3 + .endrepeat + .code .export start @@ -228,6 +238,21 @@ display_list_len = display_list_end - display_list_start neg 4, arg .endmacro +.macro extend_8_16 dest, src + ; clobbers A, X + ; 13-15 cycles + .local positive + .local negative + ldx #0 ; 2 cyc + lda src ; 3 cyc + sta dest ; 3 cyc + bpl positive ; 2 cyc +negative: + dex ; 2 cyc +positive: + stx dest + 1 ; 3 cyc +.endmacro + ; inner loop for imul16 ; bitnum < 8: 25 or 41 cycles ; bitnum >= 8: 30 or 46 cycles @@ -499,6 +524,89 @@ enough: ; screen coords in signed sx,sy ; iter holds the target to use ; @todo implement + + pixel_ptr = $b0 + pixel_color = $b2 + pixel_mask = $b3 + pixel_shift = $b4 + pixel_offset = $b5 + + ; iter -> color + ldx iter + lda color_map,x + sta pixel_color + lda #(255 - 3) + sta pixel_mask + + ; sy -> line base address in temp + lda sy + bpl positive + +negative: + ; temp1 = top half + lda #.lobyte(framebuffer_top + stride * half_height) + sta pixel_ptr + lda #.hibyte(framebuffer_top + stride * half_height) + sta pixel_ptr + 1 + jmp point + +positive: + + lda #.lobyte(framebuffer_bottom) + sta pixel_ptr + lda #.hibyte(framebuffer_bottom) + sta pixel_ptr + 1 + +point: + + ; pixel_ptr += sy * stride + ; temp * 40 + ; = temp * 32 + temp * 8 + ; = (temp << 5) + (temp << 3) + extend_8_16 temp, sy + shl16 temp + shl16 temp + shl16 temp + add16 pixel_ptr, temp, temp + shl16 temp + shl16 temp + add16 pixel_ptr, temp, temp + + ; Ok so temp1 points to the start of the line, which is 40 bytes. + ; Get the byte and bit offsets + lda sx + clc + adc #half_width + sta temp + + ; pixel_shift = temp & 3 + ; pixel_color <<= pixel_shift (shifting in zeros) + ; pixel_mask <<= pixel_shift (shifting in ones) + and #3 + sta pixel_shift + tax +shift_loop: + beq shift_done + asl pixel_color + sec + rol pixel_mask + dex + jmp shift_loop +shift_done: + + ; pixel_offset = temp >> 2 + lda temp + lsr a + lsr a + sta pixel_offset + tay + + ; read, mask, or, write + lda (temp),y + and pixel_mask + ora pixel_color + sta (temp),y + rts .endproc