Compare commits

..

No commits in common. "3553ce986f6721f8c6d446368cb6c6f55186713b" and "b56dc1e98bfeb3c18c4f90df0e0d19fbe5362cde" have entirely different histories.

2 changed files with 68 additions and 35 deletions

View file

@ -129,8 +129,11 @@ KEY_0 = 50
mantissa .byte 5 mantissa .byte 5
.endstruct .endstruct
.import mul_lobyte .import mul_lobyte256
.import mul_hibyte .import mul_hibyte256
.import mul_hibyte512
.import sqr_lobyte
.import sqr_hibyte
.data .data
@ -348,7 +351,7 @@ viewport_oy:
sub 4, dest, arg1, arg2 sub 4, dest, arg1, arg2
.endmacro .endmacro
; 3 + 5 * (bytes - 1) cycles ; 3 + 5 * bytes cycles
.macro shl bytes, arg .macro shl bytes, arg
asl arg ; 3 cyc asl arg ; 3 cyc
.repeat bytes-1, i .repeat bytes-1, i
@ -356,17 +359,17 @@ viewport_oy:
.endrepeat .endrepeat
.endmacro .endmacro
; 8 cycles ; 13 cycles
.macro shl16 arg .macro shl16 arg
shl 2, arg shl 2, arg
.endmacro .endmacro
; 13 cycles ; 18 cycles
.macro shl24 arg .macro shl24 arg
shl 3, arg shl 3, arg
.endmacro .endmacro
; 18 cycles ; 23 cycles
.macro shl32 arg .macro shl32 arg
shl 4, arg shl 4, arg
.endmacro .endmacro
@ -442,13 +445,21 @@ viewport_oy:
; clobbers a, x ; clobbers a, x
.macro sqr8 dest, arg .macro sqr8 dest, arg
ldx arg ldx arg
txa lda sqr_lobyte,x
lsr
lda mul_lobyte,x
rol
sta dest sta dest
lda mul_hibyte,x lda sqr_hibyte,x
rol sta dest + 1
.endmacro
; clobbers a, x
.macro sqr8_add16 dest, arg
ldx arg
clc
lda sqr_lobyte,x
adc dest
sta dest
lda sqr_hibyte,x
adc dest + 1
sta dest + 1 sta dest + 1
.endmacro .endmacro
@ -537,25 +548,22 @@ bank_switch_table:
clc ; 2 cyc clc ; 2 cyc
adc mul_factor_x ; 3 cyc adc mul_factor_x ; 3 cyc
tax ; 2 cyc tax ; 2 cyc
lda mul_hibyte,x ; 4 cyc bcc under256 ; 2 cyc
bcc next ; 2 cyc lda mul_hibyte512,x ; 4 cyc
; carry is set so we get to add 1 for free, but need to add 0x80 bcs next ; 2 cyc
adc #$7f ; 2 cyc under256:
clc ; 2 cyc lda mul_hibyte256,x ; 4 cyc
; stash the sum temporarily so we can use it as an operand to add
stx mul_product_lo ; 3 cyc
adc mul_product_lo ; 3 cyc
next:
sec ; 2 cyc sec ; 2 cyc
next:
sta mul_product_hi ; 3 cyc sta mul_product_hi ; 3 cyc
lda mul_lobyte,x ; 4 cyc lda mul_lobyte256,x ; 4 cyc
; - a^2/2 ; - a^2/2
ldx mul_factor_a ; 3 cyc ldx mul_factor_a ; 3 cyc
sbc mul_lobyte,x ; 4 cyc sbc mul_lobyte256,x ; 4 cyc
sta mul_product_lo ; 3 cyc sta mul_product_lo ; 3 cyc
lda mul_product_hi ; 3 cyc lda mul_product_hi ; 3 cyc
sbc mul_hibyte,x ; 4 cyc sbc mul_hibyte256,x ; 4 cyc
sta mul_product_hi ; 3 cyc sta mul_product_hi ; 3 cyc
; + x & a & 1: ; + x & a & 1:
@ -574,10 +582,10 @@ bank_switch_table:
; - x^2/2 ; - x^2/2
small_product: small_product:
sec ; 2 cyc sec ; 2 cyc
sbc mul_lobyte,x ; 4 cyc sbc mul_lobyte256,x ; 4 cyc
sta mul_product_lo ; 3 cyc sta mul_product_lo ; 3 cyc
lda mul_product_hi ; 3 cyc lda mul_product_hi ; 3 cyc
sbc mul_hibyte,x ; 4 cyc sbc mul_hibyte256,x ; 4 cyc
sta mul_product_hi ; 3 cyc sta mul_product_hi ; 3 cyc
.endscope .endscope
.endif .endif
@ -784,14 +792,18 @@ arg2_pos:
; h*h*256*256 + h*l*256 + h*l*256 + l*l ; h*h*256*256 + h*l*256 + h*l*256 + l*l
sqr8 result, arg sqr8 result, arg
sqr8 result + 2, arg + 1 lda #0
sta result + 2
sta result + 3
imul8 inter, arg + 1, arg, xe imul8 inter, arg + 1, arg, xe
shl16 inter add16 result + 1, result + 1, inter
add_carry result + 3 add_carry result + 3
add16 result + 1, result + 1, inter add16 result + 1, result + 1, inter
add_carry result + 3 add_carry result + 3
sqr8_add16 result + 2, arg + 1
rts ; 6 cyc rts ; 6 cyc
.endscope .endscope
.endmacro .endmacro

View file

@ -11,19 +11,40 @@ function db(func) {
return lines.join('\n'); return lines.join('\n');
} }
let squares = [];
for (let i = 0; i < 512; i++) {
squares.push(Math.trunc((i * i + 1) / 2));
}
console.log( console.log(
`.segment "TABLES" `.segment "TABLES"
.export mul_lobyte .export mul_lobyte256
.export mul_hibyte .export mul_hibyte256
.export mul_hibyte512
.export sqr_lobyte
.export sqr_hibyte
; (i * i) / 2 for the multiplier ; (i * i + 1) / 2 for the multiplier
.align 256 .align 256
mul_lobyte: mul_lobyte256:
${db((i) => ((i * i) >> 1) & 0xff)} ${db((i) => squares[i] & 0xff)}
.align 256 .align 256
mul_hibyte: mul_hibyte256:
${db((i) => ((i * i) >> 9) & 0xff)} ${db((i) => (squares[i] >> 8) & 0xff)}
.align 256
mul_hibyte512:
${db((i) => (squares[i + 256] >> 8) & 0xff)}
; (i * i) for the plain squares
.align 256
sqr_lobyte:
${db((i) => (i * i) & 0xff)}
.align 256
sqr_hibyte:
${db((i) => ((i * i) >> 8) & 0xff)}
`); `);