diff options
| author | Mattias Andrée <maandree@kth.se> | 2016-03-15 22:38:08 +0100 |
|---|---|---|
| committer | Mattias Andrée <maandree@kth.se> | 2016-03-15 22:38:08 +0100 |
| commit | 0d409e30fd712507216d5b4bd32ab4b6cb5fae28 (patch) | |
| tree | 9f89ac5239298447db78b7f9ab3347353f04bbc8 /src/zsqr.c | |
| parent | Optimise zsetup, zgcd, zmul, and zsqr and add -flto (diff) | |
| download | libzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.gz libzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.bz2 libzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.xz | |
Optimise zsqr, and optimise zmul a little
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'src/zsqr.c')
| -rw-r--r-- | src/zsqr.c | 79 |
1 files changed, 46 insertions, 33 deletions
@@ -2,54 +2,61 @@ #include "internals.h" -void -zsqr(z_t a, z_t b) +static inline void +zsqr_impl_single_char(z_t a, z_t b) +{ + ENSURE_SIZE(a, 1); + a->used = 1; + a->chars[0] = b->chars[0] * b->chars[0]; + SET_SIGNUM(a, 1); +} + +static void +zsqr_impl(z_t a, z_t b) { /* * Karatsuba algorithm, optimised for equal factors. */ - size_t m2; z_t z0, z1, z2, high, low; - int sign; + size_t bits; + zahl_char_t auxchars[3]; - if (unlikely(zzero(b))) { - SET_SIGNUM(a, 0); - return; - } - - m2 = zbits(b); + bits = zbits(b); - if (m2 <= BITS_PER_CHAR / 2) { - /* zsetu(a, b->chars[0] * b->chars[0]); { */ - ENSURE_SIZE(a, 1); - a->used = 1; - a->chars[0] = b->chars[0] * b->chars[0]; - /* } */ - SET_SIGNUM(a, 1); + if (bits <= BITS_PER_CHAR / 2) { + zsqr_impl_single_char(a, b); return; } - sign = zsignum(b); - SET_SIGNUM(b, 1); - m2 >>= 1; + bits >>= 1; zinit(z0); zinit(z1); zinit(z2); - zinit(high); - zinit(low); - zsplit(high, low, b, m2); + if (bits < BITS_PER_CHAR) { + low->chars = auxchars; + high->chars = auxchars + 1; + zsplit_fast_small_tainted(high, low, b, bits); + } else { + bits &= ~(BITS_PER_CHAR - 1); + zsplit_fast_large_taint(high, low, b, bits); + } - zsqr(z0, low); - zsqr(z2, high); - zmul(z1, low, high); + zsqr_impl(z2, high); + if (unlikely(zzero(low))) { + SET_SIGNUM(z0, 0); + SET_SIGNUM(z1, 0); + } else { + zsqr_impl(z0, low); + zmul(z1, low, high); + } - zlsh(z1, z1, m2 + 1); - m2 <<= 1; - zlsh(a, z2, m2); + zlsh(z1, z1, bits + 1); + bits <<= 1; + zlsh(a, z2, bits); zadd_unsigned_assign(a, z1); zadd_unsigned_assign(a, z0); @@ -57,9 +64,15 @@ zsqr(z_t a, z_t b) zfree(z0); zfree(z1); zfree(z2); - zfree(high); - zfree(low); +} - SET_SIGNUM(b, sign); - SET_SIGNUM(a, 1); +void +zsqr(z_t a, z_t b) +{ + if (unlikely(zzero(b))) { + SET_SIGNUM(a, 0); + } else { + zsqr_impl(a, b); + SET_SIGNUM(a, 1); + } } |
