forked from brooke/mandel-6502
Faster imul16 routine
Improves runtime from 16.24 ms/px to 14.44 ms/px This uses a routine found on Everything2: https://everything2.com/title/Fast+6502+multiplication which uses a lookup table of squares to do 8-bit imuls, which are then composed into a 16-bit imul
This commit is contained in:
parent
29630c8887
commit
5637783529
5 changed files with 183 additions and 81 deletions
41
testme.js
Normal file
41
testme.js
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
// ax = (a + x)2/2 - a2/2 - x2/2
|
||||
|
||||
function half_square(x) {
|
||||
return Math.round(x * x / 2) & 0xffff >>> 0;
|
||||
}
|
||||
|
||||
function mul8(a, b) {
|
||||
let result = half_square(a + b) & 0xffff;
|
||||
result = (result - half_square(a)) & 0xffff;
|
||||
result = (result - half_square(b)) & 0xffff;
|
||||
result = (result + (b & a & 1)) & 0xffff;
|
||||
return result >>> 0;
|
||||
}
|
||||
|
||||
function mul16(a, b) {
|
||||
let ah = (a & 0xff00) >>> 8;
|
||||
let al = (a & 0x00ff) >>> 0;
|
||||
let bh = (b & 0xff00) >>> 8;
|
||||
let bl = (b & 0x00ff) >>> 0;
|
||||
let result = (mul8(al, bl) & 0xffff) >>> 0;
|
||||
result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0;
|
||||
result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0;
|
||||
result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
let max = 65536;
|
||||
//let max = 256;
|
||||
//let max = 128;
|
||||
//let max = 8;
|
||||
|
||||
for (let a = 0; a < max; a++) {
|
||||
for (let b = 0; b < max; b++) {
|
||||
let expected = Math.imul(a, b) >>> 0;
|
||||
//let actual = mul8(a, b);
|
||||
let actual = mul16(a, b);
|
||||
if (expected !== actual) {
|
||||
console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue