no diff using different temp

2023-02-11 15:47:58 -08:00 · 2023-02-11 15:47:58 -08:00 · 99197bb7b2
commit 99197bb7b2
parent e3c80bff59
3 changed files with 86 additions and 8 deletions
--- a/mandel.s
+++ b/mandel.s
@ -417,6 +417,9 @@ positive_result:

 ; Adapted from https://everything2.com/title/Fast+6502+multiplication
 .macro imul8 dest, arg1, arg2
+    .local under256
+    .local next
+    .local small_product
    .scope
        mul_factor_a   = arg1
        mul_factor_x   = arg2
@ -468,6 +471,7 @@ positive_result:
    arg1 = FR0   ; 16-bit arg (clobbered)
    arg2 = FR1   ; 16-bit arg (clobbered)
    result = FR2 ; 32-bit result
+    inter = temp2

    ldy #0          ; 2 cyc
    ; counts the number of sign bits in Y
@ -480,18 +484,18 @@ positive_result:
    sta result + 2
    sta result + 3

-    imul8 temp, arg1, arg2
-    add16 result, result, temp
+    imul8 inter, arg1, arg2
+    add16 result, result, inter

-    imul8 temp, arg1 + 1, arg2
-    add16 result + 1, result + 1, temp
+    imul8 inter, arg1 + 1, arg2
+    add16 result + 1, result + 1, inter

-    imul8 temp, arg1, arg2 + 1
-    add16 result + 1, result + 1, temp
+    imul8 inter, arg1, arg2 + 1
+    add16 result + 1, result + 1, inter
    add_carry result + 3

-    imul8 temp, arg1 + 1, arg2 + 1
-    add16 result + 2, result + 2, temp
+    imul8 inter, arg1 + 1, arg2 + 1
+    add16 result + 2, result + 2, inter

    ; In case of mixed input signs, return a negative result.
    cpy #1              ; 2 cyc
--- a/tables.js
+++ b/tables.js
@ -0,0 +1,33 @@
+function db(func) {
+    let lines = [];
+    for (let i = 0; i < 256; i += 16) {
+        let items = [];
+        for (let j = 0; j < 16; j++) {
+            let x = i + j;
+            items.push(func(x));
+        }
+        lines.push('    .byte ' + items.join(', '));
+    }
+    return lines.join('\n');
+}
+
+console.log(
+`.segment "TABLES"
+
+.export mul_lobyte256
+.export mul_hibyte256
+.export mul_hibyte512
+
+.align 256
+mul_lobyte256:
+${db((x) => Math.round(x * x / 2) & 0xff)}
+
+.align 256
+mul_hibyte256:
+${db((x) => (Math.round(x * x / 2) >> 8) & 0xff)}
+
+.align 256
+mul_hibyte512:
+${db((x) => (Math.round((x + 256) * (x + 256) / 2) >> 8) & 0xff)}
+
+`);
--- a/testme.js
+++ b/testme.js
@ -0,0 +1,41 @@
+// ax = (a + x)2/2 - a2/2 - x2/2 
+
+function half_square(x) {
+    return Math.round(x * x / 2) & 0xffff >>> 0;
+}
+
+function mul8(a, b) {
+    let result = half_square(a + b) & 0xffff;
+    result = (result - half_square(a)) & 0xffff;
+    result = (result - half_square(b)) & 0xffff;
+    result = (result + (b & a & 1)) & 0xffff;
+    return result >>> 0;
+}
+
+function mul16(a, b) {
+    let ah = (a & 0xff00) >>> 8;
+    let al = (a & 0x00ff) >>> 0;
+    let bh = (b & 0xff00) >>> 8;
+    let bl = (b & 0x00ff) >>> 0;
+    let result = (mul8(al, bl) & 0xffff) >>> 0;
+    result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0;
+    result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0;
+    result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0;
+    return result;
+}
+
+let max = 65536;
+//let max = 256;
+//let max = 128;
+//let max = 8;
+
+for (let a = 0; a < max; a++) {
+    for (let b = 0; b < max; b++) {
+        let expected = Math.imul(a, b) >>> 0;
+        //let actual = mul8(a, b);
+        let actual = mul16(a, b);
+        if (expected !== actual) {
+            console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`);
+        }
+    }
+}