From 99197bb7b2d3387ee45d1a357145cbdaea00b7f3 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 11 Feb 2023 15:47:58 -0800 Subject: [PATCH] no diff using different temp --- mandel.s | 20 ++++++++++++-------- tables.js | 33 +++++++++++++++++++++++++++++++++ testme.js | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 8 deletions(-) create mode 100644 tables.js create mode 100644 testme.js diff --git a/mandel.s b/mandel.s index 023a1ea..71bc6c2 100644 --- a/mandel.s +++ b/mandel.s @@ -417,6 +417,9 @@ positive_result: ; Adapted from https://everything2.com/title/Fast+6502+multiplication .macro imul8 dest, arg1, arg2 + .local under256 + .local next + .local small_product .scope mul_factor_a = arg1 mul_factor_x = arg2 @@ -468,6 +471,7 @@ positive_result: arg1 = FR0 ; 16-bit arg (clobbered) arg2 = FR1 ; 16-bit arg (clobbered) result = FR2 ; 32-bit result + inter = temp2 ldy #0 ; 2 cyc ; counts the number of sign bits in Y @@ -480,18 +484,18 @@ positive_result: sta result + 2 sta result + 3 - imul8 temp, arg1, arg2 - add16 result, result, temp + imul8 inter, arg1, arg2 + add16 result, result, inter - imul8 temp, arg1 + 1, arg2 - add16 result + 1, result + 1, temp + imul8 inter, arg1 + 1, arg2 + add16 result + 1, result + 1, inter - imul8 temp, arg1, arg2 + 1 - add16 result + 1, result + 1, temp + imul8 inter, arg1, arg2 + 1 + add16 result + 1, result + 1, inter add_carry result + 3 - imul8 temp, arg1 + 1, arg2 + 1 - add16 result + 2, result + 2, temp + imul8 inter, arg1 + 1, arg2 + 1 + add16 result + 2, result + 2, inter ; In case of mixed input signs, return a negative result. cpy #1 ; 2 cyc diff --git a/tables.js b/tables.js new file mode 100644 index 0000000..5afc3c0 --- /dev/null +++ b/tables.js @@ -0,0 +1,33 @@ +function db(func) { + let lines = []; + for (let i = 0; i < 256; i += 16) { + let items = []; + for (let j = 0; j < 16; j++) { + let x = i + j; + items.push(func(x)); + } + lines.push(' .byte ' + items.join(', ')); + } + return lines.join('\n'); +} + +console.log( +`.segment "TABLES" + +.export mul_lobyte256 +.export mul_hibyte256 +.export mul_hibyte512 + +.align 256 +mul_lobyte256: +${db((x) => Math.round(x * x / 2) & 0xff)} + +.align 256 +mul_hibyte256: +${db((x) => (Math.round(x * x / 2) >> 8) & 0xff)} + +.align 256 +mul_hibyte512: +${db((x) => (Math.round((x + 256) * (x + 256) / 2) >> 8) & 0xff)} + +`); diff --git a/testme.js b/testme.js new file mode 100644 index 0000000..e12e706 --- /dev/null +++ b/testme.js @@ -0,0 +1,41 @@ +// ax = (a + x)2/2 - a2/2 - x2/2 + +function half_square(x) { + return Math.round(x * x / 2) & 0xffff >>> 0; +} + +function mul8(a, b) { + let result = half_square(a + b) & 0xffff; + result = (result - half_square(a)) & 0xffff; + result = (result - half_square(b)) & 0xffff; + result = (result + (b & a & 1)) & 0xffff; + return result >>> 0; +} + +function mul16(a, b) { + let ah = (a & 0xff00) >>> 8; + let al = (a & 0x00ff) >>> 0; + let bh = (b & 0xff00) >>> 8; + let bl = (b & 0x00ff) >>> 0; + let result = (mul8(al, bl) & 0xffff) >>> 0; + result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0; + result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0; + result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0; + return result; +} + +let max = 65536; +//let max = 256; +//let max = 128; +//let max = 8; + +for (let a = 0; a < max; a++) { + for (let b = 0; b < max; b++) { + let expected = Math.imul(a, b) >>> 0; + //let actual = mul8(a, b); + let actual = mul16(a, b); + if (expected !== actual) { + console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`); + } + } +} \ No newline at end of file