From 3e3b44d087ab616089402129b2bc4c4831c6b33a Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Wed, 16 Mar 2016 14:30:29 +0100 Subject: Optimise zsqr, zmul, zstr, zdivmod, zpow, and zpowu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- src/zsqr.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) (limited to 'src/zsqr.c') diff --git a/src/zsqr.c b/src/zsqr.c index e9418bf..8a616f0 100644 --- a/src/zsqr.c +++ b/src/zsqr.c @@ -11,16 +11,19 @@ zsqr_impl_single_char(z_t a, z_t b) SET_SIGNUM(a, 1); } -static void +extern void zmul_impl(z_t a, z_t b, z_t c); + +void zsqr_impl(z_t a, z_t b) { /* * Karatsuba algorithm, optimised for equal factors. */ - z_t z0, z1, z2, high, low; - size_t bits; +#define z2 a + z_t z0, z1, high, low; zahl_char_t auxchars[3]; + size_t bits; bits = zbits(b); @@ -31,39 +34,41 @@ zsqr_impl(z_t a, z_t b) bits >>= 1; - zinit(z0); - zinit(z1); - zinit(z2); - + /* Try to split only at a character level rather than a bit level. + * Such splits are faster, even if bit-level is required, and do + * not require auxiliary memory except for the bit-level split + * which require constant auxiliary memory. */ if (bits < BITS_PER_CHAR) { low->chars = auxchars; high->chars = auxchars + 1; - zsplit_fast_small_tainted(high, low, b, bits); + zsplit_unsigned_fast_small_tainted(high, low, b, bits); } else { bits &= ~(BITS_PER_CHAR - 1); - zsplit_fast_large_taint(high, low, b, bits); + zsplit_unsigned_fast_large_taint(high, low, b, bits); } - zsqr_impl(z2, high); if (unlikely(zzero(low))) { - SET_SIGNUM(z0, 0); - SET_SIGNUM(z1, 0); + zsqr_impl(z2, high); + zlsh(a, z2, bits << 1); } else { + zinit(z0); + zinit(z1); + zsqr_impl(z0, low); - zmul(z1, low, high); - } - zlsh(z1, z1, bits + 1); - bits <<= 1; - zlsh(a, z2, bits); - zadd_unsigned_assign(a, z1); - zadd_unsigned_assign(a, z0); + zmul_impl(z1, low, high); + zlsh(z1, z1, bits + 1); + zsqr_impl(z2, high); + zlsh(a, z2, bits << 1); - zfree(z0); - zfree(z1); - zfree(z2); + zadd_unsigned_assign(a, z1); + zadd_unsigned_assign(a, z0); + + zfree(z0); + zfree(z1); + } } void -- cgit v1.2.3-70-g09d2