diff --git a/mandel.s b/mandel.s index b0c2b42..fc30532 100644 --- a/mandel.s +++ b/mandel.s @@ -129,8 +129,11 @@ KEY_0 = 50 mantissa .byte 5 .endstruct -.import mul_lobyte -.import mul_hibyte +.import mul_lobyte256 +.import mul_hibyte256 +.import mul_hibyte512 +.import sqr_lobyte +.import sqr_hibyte .data @@ -348,7 +351,7 @@ viewport_oy: sub 4, dest, arg1, arg2 .endmacro -; 3 + 5 * (bytes - 1) cycles +; 3 + 5 * bytes cycles .macro shl bytes, arg asl arg ; 3 cyc .repeat bytes-1, i @@ -356,17 +359,17 @@ viewport_oy: .endrepeat .endmacro -; 8 cycles +; 13 cycles .macro shl16 arg shl 2, arg .endmacro -; 13 cycles +; 18 cycles .macro shl24 arg shl 3, arg .endmacro -; 18 cycles +; 23 cycles .macro shl32 arg shl 4, arg .endmacro @@ -442,13 +445,21 @@ viewport_oy: ; clobbers a, x .macro sqr8 dest, arg ldx arg - txa - lsr - lda mul_lobyte,x - rol + lda sqr_lobyte,x sta dest - lda mul_hibyte,x - rol + lda sqr_hibyte,x + sta dest + 1 +.endmacro + +; clobbers a, x +.macro sqr8_add16 dest, arg + ldx arg + clc + lda sqr_lobyte,x + adc dest + sta dest + lda sqr_hibyte,x + adc dest + 1 sta dest + 1 .endmacro @@ -537,25 +548,22 @@ bank_switch_table: clc ; 2 cyc adc mul_factor_x ; 3 cyc tax ; 2 cyc - lda mul_hibyte,x ; 4 cyc - bcc next ; 2 cyc - ; carry is set so we get to add 1 for free, but need to add 0x80 - adc #$7f ; 2 cyc - clc ; 2 cyc - ; stash the sum temporarily so we can use it as an operand to add - stx mul_product_lo ; 3 cyc - adc mul_product_lo ; 3 cyc - next: + bcc under256 ; 2 cyc + lda mul_hibyte512,x ; 4 cyc + bcs next ; 2 cyc + under256: + lda mul_hibyte256,x ; 4 cyc sec ; 2 cyc + next: sta mul_product_hi ; 3 cyc - lda mul_lobyte,x ; 4 cyc + lda mul_lobyte256,x ; 4 cyc ; - a^2/2 ldx mul_factor_a ; 3 cyc - sbc mul_lobyte,x ; 4 cyc + sbc mul_lobyte256,x ; 4 cyc sta mul_product_lo ; 3 cyc lda mul_product_hi ; 3 cyc - sbc mul_hibyte,x ; 4 cyc + sbc mul_hibyte256,x ; 4 cyc sta mul_product_hi ; 3 cyc ; + x & a & 1: @@ -574,10 +582,10 @@ bank_switch_table: ; - x^2/2 small_product: sec ; 2 cyc - sbc mul_lobyte,x ; 4 cyc + sbc mul_lobyte256,x ; 4 cyc sta mul_product_lo ; 3 cyc lda mul_product_hi ; 3 cyc - sbc mul_hibyte,x ; 4 cyc + sbc mul_hibyte256,x ; 4 cyc sta mul_product_hi ; 3 cyc .endscope .endif @@ -784,14 +792,18 @@ arg2_pos: ; h*h*256*256 + h*l*256 + h*l*256 + l*l sqr8 result, arg - sqr8 result + 2, arg + 1 + lda #0 + sta result + 2 + sta result + 3 imul8 inter, arg + 1, arg, xe - shl16 inter + add16 result + 1, result + 1, inter add_carry result + 3 add16 result + 1, result + 1, inter add_carry result + 3 + sqr8_add16 result + 2, arg + 1 + rts ; 6 cyc .endscope .endmacro diff --git a/tables.js b/tables.js index 176e4df..50cbef9 100644 --- a/tables.js +++ b/tables.js @@ -11,19 +11,40 @@ function db(func) { return lines.join('\n'); } +let squares = []; +for (let i = 0; i < 512; i++) { + squares.push(Math.trunc((i * i + 1) / 2)); +} + console.log( `.segment "TABLES" -.export mul_lobyte -.export mul_hibyte +.export mul_lobyte256 +.export mul_hibyte256 +.export mul_hibyte512 +.export sqr_lobyte +.export sqr_hibyte -; (i * i) / 2 for the multiplier +; (i * i + 1) / 2 for the multiplier .align 256 -mul_lobyte: -${db((i) => ((i * i) >> 1) & 0xff)} +mul_lobyte256: +${db((i) => squares[i] & 0xff)} .align 256 -mul_hibyte: -${db((i) => ((i * i) >> 9) & 0xff)} +mul_hibyte256: +${db((i) => (squares[i] >> 8) & 0xff)} + +.align 256 +mul_hibyte512: +${db((i) => (squares[i + 256] >> 8) & 0xff)} + +; (i * i) for the plain squares +.align 256 +sqr_lobyte: +${db((i) => (i * i) & 0xff)} + +.align 256 +sqr_hibyte: +${db((i) => ((i * i) >> 8) & 0xff)} `);