diff --git a/mandel.s b/mandel.s index fc30532..b0c2b42 100644 --- a/mandel.s +++ b/mandel.s @@ -129,11 +129,8 @@ KEY_0 = 50 mantissa .byte 5 .endstruct -.import mul_lobyte256 -.import mul_hibyte256 -.import mul_hibyte512 -.import sqr_lobyte -.import sqr_hibyte +.import mul_lobyte +.import mul_hibyte .data @@ -351,7 +348,7 @@ viewport_oy: sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * bytes cycles +; 3 + 5 * (bytes - 1) cycles .macro shl bytes, arg asl arg ; 3 cyc .repeat bytes-1, i @@ -359,17 +356,17 @@ viewport_oy: .endrepeat .endmacro -; 13 cycles +; 8 cycles .macro shl16 arg shl 2, arg .endmacro -; 18 cycles +; 13 cycles .macro shl24 arg shl 3, arg .endmacro -; 23 cycles +; 18 cycles .macro shl32 arg shl 4, arg .endmacro @@ -445,21 +442,13 @@ viewport_oy: ; clobbers a, x .macro sqr8 dest, arg ldx arg - lda sqr_lobyte,x + txa + lsr + lda mul_lobyte,x + rol sta dest - lda sqr_hibyte,x - sta dest + 1 -.endmacro - -; clobbers a, x -.macro sqr8_add16 dest, arg - ldx arg - clc - lda sqr_lobyte,x - adc dest - sta dest - lda sqr_hibyte,x - adc dest + 1 + lda mul_hibyte,x + rol sta dest + 1 .endmacro @@ -548,22 +537,25 @@ bank_switch_table: clc ; 2 cyc adc mul_factor_x ; 3 cyc tax ; 2 cyc - bcc under256 ; 2 cyc - lda mul_hibyte512,x ; 4 cyc - bcs next ; 2 cyc - under256: - lda mul_hibyte256,x ; 4 cyc - sec ; 2 cyc + lda mul_hibyte,x ; 4 cyc + bcc next ; 2 cyc + ; carry is set so we get to add 1 for free, but need to add 0x80 + adc #$7f ; 2 cyc + clc ; 2 cyc + ; stash the sum temporarily so we can use it as an operand to add + stx mul_product_lo ; 3 cyc + adc mul_product_lo ; 3 cyc next: + sec ; 2 cyc sta mul_product_hi ; 3 cyc - lda mul_lobyte256,x ; 4 cyc + lda mul_lobyte,x ; 4 cyc ; - a^2/2 ldx mul_factor_a ; 3 cyc - sbc mul_lobyte256,x ; 4 cyc + sbc mul_lobyte,x ; 4 cyc sta mul_product_lo ; 3 cyc lda mul_product_hi ; 3 cyc - sbc mul_hibyte256,x ; 4 cyc + sbc mul_hibyte,x ; 4 cyc sta mul_product_hi ; 3 cyc ; + x & a & 1: @@ -582,10 +574,10 @@ bank_switch_table: ; - x^2/2 small_product: sec ; 2 cyc - sbc mul_lobyte256,x ; 4 cyc + sbc mul_lobyte,x ; 4 cyc sta mul_product_lo ; 3 cyc lda mul_product_hi ; 3 cyc - sbc mul_hibyte256,x ; 4 cyc + sbc mul_hibyte,x ; 4 cyc sta mul_product_hi ; 3 cyc .endscope .endif @@ -792,18 +784,14 @@ arg2_pos: ; h*h*256*256 + h*l*256 + h*l*256 + l*l sqr8 result, arg - lda #0 - sta result + 2 - sta result + 3 + sqr8 result + 2, arg + 1 imul8 inter, arg + 1, arg, xe - add16 result + 1, result + 1, inter + shl16 inter add_carry result + 3 add16 result + 1, result + 1, inter add_carry result + 3 - sqr8_add16 result + 2, arg + 1 - rts ; 6 cyc .endscope .endmacro diff --git a/tables.js b/tables.js index 50cbef9..176e4df 100644 --- a/tables.js +++ b/tables.js @@ -11,40 +11,19 @@ function db(func) { return lines.join('\n'); } -let squares = []; -for (let i = 0; i < 512; i++) { - squares.push(Math.trunc((i * i + 1) / 2)); -} - console.log( `.segment "TABLES" -.export mul_lobyte256 -.export mul_hibyte256 -.export mul_hibyte512 -.export sqr_lobyte -.export sqr_hibyte +.export mul_lobyte +.export mul_hibyte -; (i * i + 1) / 2 for the multiplier +; (i * i) / 2 for the multiplier .align 256 -mul_lobyte256: -${db((i) => squares[i] & 0xff)} +mul_lobyte: +${db((i) => ((i * i) >> 1) & 0xff)} .align 256 -mul_hibyte256: -${db((i) => (squares[i] >> 8) & 0xff)} - -.align 256 -mul_hibyte512: -${db((i) => (squares[i + 256] >> 8) & 0xff)} - -; (i * i) for the plain squares -.align 256 -sqr_lobyte: -${db((i) => (i * i) & 0xff)} - -.align 256 -sqr_hibyte: -${db((i) => ((i * i) >> 8) & 0xff)} +mul_hibyte: +${db((i) => ((i * i) >> 9) & 0xff)} `);