no diff using different temp
This commit is contained in:
parent
e3c80bff59
commit
99197bb7b2
20
mandel.s
20
mandel.s
|
@ -417,6 +417,9 @@ positive_result:
|
|||
|
||||
; Adapted from https://everything2.com/title/Fast+6502+multiplication
|
||||
.macro imul8 dest, arg1, arg2
|
||||
.local under256
|
||||
.local next
|
||||
.local small_product
|
||||
.scope
|
||||
mul_factor_a = arg1
|
||||
mul_factor_x = arg2
|
||||
|
@ -468,6 +471,7 @@ positive_result:
|
|||
arg1 = FR0 ; 16-bit arg (clobbered)
|
||||
arg2 = FR1 ; 16-bit arg (clobbered)
|
||||
result = FR2 ; 32-bit result
|
||||
inter = temp2
|
||||
|
||||
ldy #0 ; 2 cyc
|
||||
; counts the number of sign bits in Y
|
||||
|
@ -480,18 +484,18 @@ positive_result:
|
|||
sta result + 2
|
||||
sta result + 3
|
||||
|
||||
imul8 temp, arg1, arg2
|
||||
add16 result, result, temp
|
||||
imul8 inter, arg1, arg2
|
||||
add16 result, result, inter
|
||||
|
||||
imul8 temp, arg1 + 1, arg2
|
||||
add16 result + 1, result + 1, temp
|
||||
imul8 inter, arg1 + 1, arg2
|
||||
add16 result + 1, result + 1, inter
|
||||
|
||||
imul8 temp, arg1, arg2 + 1
|
||||
add16 result + 1, result + 1, temp
|
||||
imul8 inter, arg1, arg2 + 1
|
||||
add16 result + 1, result + 1, inter
|
||||
add_carry result + 3
|
||||
|
||||
imul8 temp, arg1 + 1, arg2 + 1
|
||||
add16 result + 2, result + 2, temp
|
||||
imul8 inter, arg1 + 1, arg2 + 1
|
||||
add16 result + 2, result + 2, inter
|
||||
|
||||
; In case of mixed input signs, return a negative result.
|
||||
cpy #1 ; 2 cyc
|
||||
|
|
33
tables.js
Normal file
33
tables.js
Normal file
|
@ -0,0 +1,33 @@
|
|||
function db(func) {
|
||||
let lines = [];
|
||||
for (let i = 0; i < 256; i += 16) {
|
||||
let items = [];
|
||||
for (let j = 0; j < 16; j++) {
|
||||
let x = i + j;
|
||||
items.push(func(x));
|
||||
}
|
||||
lines.push(' .byte ' + items.join(', '));
|
||||
}
|
||||
return lines.join('\n');
|
||||
}
|
||||
|
||||
console.log(
|
||||
`.segment "TABLES"
|
||||
|
||||
.export mul_lobyte256
|
||||
.export mul_hibyte256
|
||||
.export mul_hibyte512
|
||||
|
||||
.align 256
|
||||
mul_lobyte256:
|
||||
${db((x) => Math.round(x * x / 2) & 0xff)}
|
||||
|
||||
.align 256
|
||||
mul_hibyte256:
|
||||
${db((x) => (Math.round(x * x / 2) >> 8) & 0xff)}
|
||||
|
||||
.align 256
|
||||
mul_hibyte512:
|
||||
${db((x) => (Math.round((x + 256) * (x + 256) / 2) >> 8) & 0xff)}
|
||||
|
||||
`);
|
41
testme.js
Normal file
41
testme.js
Normal file
|
@ -0,0 +1,41 @@
|
|||
// ax = (a + x)2/2 - a2/2 - x2/2
|
||||
|
||||
function half_square(x) {
|
||||
return Math.round(x * x / 2) & 0xffff >>> 0;
|
||||
}
|
||||
|
||||
function mul8(a, b) {
|
||||
let result = half_square(a + b) & 0xffff;
|
||||
result = (result - half_square(a)) & 0xffff;
|
||||
result = (result - half_square(b)) & 0xffff;
|
||||
result = (result + (b & a & 1)) & 0xffff;
|
||||
return result >>> 0;
|
||||
}
|
||||
|
||||
function mul16(a, b) {
|
||||
let ah = (a & 0xff00) >>> 8;
|
||||
let al = (a & 0x00ff) >>> 0;
|
||||
let bh = (b & 0xff00) >>> 8;
|
||||
let bl = (b & 0x00ff) >>> 0;
|
||||
let result = (mul8(al, bl) & 0xffff) >>> 0;
|
||||
result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0;
|
||||
result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0;
|
||||
result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
let max = 65536;
|
||||
//let max = 256;
|
||||
//let max = 128;
|
||||
//let max = 8;
|
||||
|
||||
for (let a = 0; a < max; a++) {
|
||||
for (let b = 0; b < max; b++) {
|
||||
let expected = Math.imul(a, b) >>> 0;
|
||||
//let actual = mul8(a, b);
|
||||
let actual = mul16(a, b);
|
||||
if (expected !== actual) {
|
||||
console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue