diff --git a/.gitignore b/.gitignore index 771e47a..8d2f7ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,3 @@ *.o *.xex -tables.s .DS_Store diff --git a/Makefile b/Makefile index 008bf8c..25148b4 100644 --- a/Makefile +++ b/Makefile @@ -2,17 +2,13 @@ all : mandel.xex -mandel.xex : mandel.o tables.o - ld65 -C ./atari-asm-xex.cfg -o $@ $+ +%.xex : %.o + ld65 -C atari-asm-xex.cfg -o $@ $< %.o : %.s ca65 -o $@ $< -tables.s : tables.js - node tables.js > tables.s - clean : - rm -f tables.s rm -f *.o rm -f *.xex diff --git a/mandel.s b/mandel.s index fcc7867..3db6a77 100644 --- a/mandel.s +++ b/mandel.s @@ -25,15 +25,14 @@ z_buffer_active = $b0 ; boolean: 1 if we triggered the lake, 0 if not z_buffer_start = $b1 ; u8: index into z_buffer z_buffer_end = $b2 ; u8: index into z_buffer temp = $b4 ; u16 -temp2 = $b6 ; u16 -pixel_ptr = $b8 ; u16 -pixel_color = $ba ; u8 -pixel_mask = $bb ; u8 -pixel_shift = $bc ; u8 -pixel_offset = $bd ; u8 -fill_level = $be ; u8 -palette_offset = $bf ; u8 +pixel_ptr = $b6 ; u16 +pixel_color = $b8 ; u8 +pixel_mask = $b9 ; u8 +pixel_shift = $ba ; u8 +pixel_offset = $bb ; u8 +fill_level = $bc ; u8 +palette_offset = $bd ; u8 ; FP registers in zero page FR0 = $d4 ; float48 @@ -108,10 +107,6 @@ KEY_RIGHT = $87 mantissa .byte 6 .endstruct -.import mul_lobyte256 -.import mul_hibyte256 -.import mul_hibyte512 - .data strings: @@ -262,12 +257,6 @@ fill_masks: add 4, dest, arg2, dest .endmacro -.macro add_carry dest - lda dest - adc #0 - sta dest -.endmacro - ; 2 + 9 * byte cycles .macro sub bytes, dest, arg1, arg2 sec ; 2 cyc @@ -398,12 +387,12 @@ next: ; 5 to 25 cycles .macro check_sign arg ; Check sign bit and flip argument to postive, - ; keeping a count of sign bits in the Y register. + ; keeping a count of sign bits in the X register. .local positive lda arg + 1 ; 3 cyc bpl positive ; 2 cyc neg16 arg ; 18 cyc - iny ; 2 cyc + inx ; 2 cyc positive: .endmacro @@ -432,13 +421,13 @@ positive: ; min 470 cycles ; max 780 cycles -.proc imul16_func_orig +.proc imul16_func arg1 = FR0 ; 16-bit arg (clobbered) arg2 = FR1 ; 16-bit arg (clobbered) result = FR2 ; 32-bit result - ldy #0 ; 2 cyc - ; counts the number of sign bits in Y + ldx #0 ; 2 cyc + ; counts the number of sign bits in X check_sign arg1 ; 5 to 25 cyc check_sign arg2 ; 5 to 25 cyc @@ -458,103 +447,7 @@ positive: .endrepeat ; In case of mixed input signs, return a negative result. - cpy #1 ; 2 cyc - bne positive_result ; 2 cyc - neg32 result ; 34 cyc -positive_result: - - rts ; 6 cyc -.endproc - -; Adapted from https://everything2.com/title/Fast+6502+multiplication -.macro imul8 dest, arg1, arg2 - .local under256 - .local next - .local small_product - .scope - mul_factor_a = arg1 - mul_factor_x = arg2 - mul_product_lo = dest - mul_product_hi = dest + 1 - - lda mul_factor_a ; setup: 6 cycles - ;ldx mul_factor_x - - clc ; (a + x)^2/2: 23 cycles - adc mul_factor_x - tax - bcc under256 - lda mul_hibyte512,x - bcs next - under256: - lda mul_hibyte256,x - sec - next: - sta mul_product_hi - lda mul_lobyte256,x - - ldx mul_factor_a ; - a^2/2: 20 cycles - sbc mul_lobyte256,x - sta mul_product_lo - lda mul_product_hi - sbc mul_hibyte256,x - sta mul_product_hi - - ldx mul_factor_x ; + x & a & 1: 22 cycles - txa ; (this is a kludge to correct a - and mul_factor_a ; roundoff error that makes odd * odd too low) - and #1 - - clc - adc mul_product_lo - bcc small_product - inc mul_product_hi - small_product: - sec ; - x^2/2: 25 cycles - sbc mul_lobyte256,x - lda mul_product_hi - sbc mul_hibyte256,x - sta mul_product_hi - .endscope -.endmacro - -.proc imul16_func - arg1 = FR0 ; 16-bit arg (clobbered) - arg2 = FR1 ; 16-bit arg (clobbered) - result = FR2 ; 32-bit result - inter = temp2 - - ldy #0 ; 2 cyc - ; counts the number of sign bits in Y - check_sign arg1 ; 5 to 25 cyc - check_sign arg2 ; 5 to 25 cyc - - ; h1l1 * h2l2 - ; (h1*256 + l1) * (h2*256 + l2) - ; h1*256*(h2*256 + l2) + l1*(h2*256 + l2) - ; h1*h2*256*256 + h1*l2*256 + h2*l1*256 + l1*l2 - - lda #0 - sta result + 0 - sta result + 1 - sta result + 2 - sta result + 3 - - imul8 inter, arg1, arg2 - add16 result, result, inter - - imul8 inter, arg1 + 1, arg2 - add16 result + 1, result + 1, inter - - imul8 inter, arg1, arg2 + 1 - add16 result + 1, result + 1, inter - add_carry result + 3 - - imul8 inter, arg1 + 1, arg2 + 1 - add16 result + 2, result + 2, inter - - ; In case of mixed input signs, return a negative result. - cpy #1 ; 2 cyc + cpx #1 ; 2 cyc bne positive_result ; 2 cyc neg32 result ; 34 cyc positive_result: diff --git a/tables.js b/tables.js deleted file mode 100644 index 5afc3c0..0000000 --- a/tables.js +++ /dev/null @@ -1,33 +0,0 @@ -function db(func) { - let lines = []; - for (let i = 0; i < 256; i += 16) { - let items = []; - for (let j = 0; j < 16; j++) { - let x = i + j; - items.push(func(x)); - } - lines.push(' .byte ' + items.join(', ')); - } - return lines.join('\n'); -} - -console.log( -`.segment "TABLES" - -.export mul_lobyte256 -.export mul_hibyte256 -.export mul_hibyte512 - -.align 256 -mul_lobyte256: -${db((x) => Math.round(x * x / 2) & 0xff)} - -.align 256 -mul_hibyte256: -${db((x) => (Math.round(x * x / 2) >> 8) & 0xff)} - -.align 256 -mul_hibyte512: -${db((x) => (Math.round((x + 256) * (x + 256) / 2) >> 8) & 0xff)} - -`); diff --git a/testme.js b/testme.js deleted file mode 100644 index e12e706..0000000 --- a/testme.js +++ /dev/null @@ -1,41 +0,0 @@ -// ax = (a + x)2/2 - a2/2 - x2/2 - -function half_square(x) { - return Math.round(x * x / 2) & 0xffff >>> 0; -} - -function mul8(a, b) { - let result = half_square(a + b) & 0xffff; - result = (result - half_square(a)) & 0xffff; - result = (result - half_square(b)) & 0xffff; - result = (result + (b & a & 1)) & 0xffff; - return result >>> 0; -} - -function mul16(a, b) { - let ah = (a & 0xff00) >>> 8; - let al = (a & 0x00ff) >>> 0; - let bh = (b & 0xff00) >>> 8; - let bl = (b & 0x00ff) >>> 0; - let result = (mul8(al, bl) & 0xffff) >>> 0; - result = ((result + (mul8(ah, bl) << 8)) & 0x00ffffff) >>> 0; - result = ((result + (mul8(al, bh) << 8)) & 0x01ffffff) >>> 0; - result = ((result + (mul8(ah, bh) << 16)) & 0xffffffff) >>> 0; - return result; -} - -let max = 65536; -//let max = 256; -//let max = 128; -//let max = 8; - -for (let a = 0; a < max; a++) { - for (let b = 0; b < max; b++) { - let expected = Math.imul(a, b) >>> 0; - //let actual = mul8(a, b); - let actual = mul16(a, b); - if (expected !== actual) { - console.log(`wrong! ${a} * ${b} expected ${expected} got ${actual}`); - } - } -} \ No newline at end of file