aboutsummaryrefslogtreecommitdiffstats
path: root/src/zsqr.c
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2016-03-15 22:38:08 +0100
committerMattias Andrée <maandree@kth.se>2016-03-15 22:38:08 +0100
commit0d409e30fd712507216d5b4bd32ab4b6cb5fae28 (patch)
tree9f89ac5239298447db78b7f9ab3347353f04bbc8 /src/zsqr.c
parentOptimise zsetup, zgcd, zmul, and zsqr and add -flto (diff)
downloadlibzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.gz
libzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.bz2
libzahl-0d409e30fd712507216d5b4bd32ab4b6cb5fae28.tar.xz
Optimise zsqr, and optimise zmul a little
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'src/zsqr.c')
-rw-r--r--src/zsqr.c79
1 files changed, 46 insertions, 33 deletions
diff --git a/src/zsqr.c b/src/zsqr.c
index 68480ba..e9418bf 100644
--- a/src/zsqr.c
+++ b/src/zsqr.c
@@ -2,54 +2,61 @@
#include "internals.h"
-void
-zsqr(z_t a, z_t b)
+static inline void
+zsqr_impl_single_char(z_t a, z_t b)
+{
+ ENSURE_SIZE(a, 1);
+ a->used = 1;
+ a->chars[0] = b->chars[0] * b->chars[0];
+ SET_SIGNUM(a, 1);
+}
+
+static void
+zsqr_impl(z_t a, z_t b)
{
/*
* Karatsuba algorithm, optimised for equal factors.
*/
- size_t m2;
z_t z0, z1, z2, high, low;
- int sign;
+ size_t bits;
+ zahl_char_t auxchars[3];
- if (unlikely(zzero(b))) {
- SET_SIGNUM(a, 0);
- return;
- }
-
- m2 = zbits(b);
+ bits = zbits(b);
- if (m2 <= BITS_PER_CHAR / 2) {
- /* zsetu(a, b->chars[0] * b->chars[0]); { */
- ENSURE_SIZE(a, 1);
- a->used = 1;
- a->chars[0] = b->chars[0] * b->chars[0];
- /* } */
- SET_SIGNUM(a, 1);
+ if (bits <= BITS_PER_CHAR / 2) {
+ zsqr_impl_single_char(a, b);
return;
}
- sign = zsignum(b);
- SET_SIGNUM(b, 1);
- m2 >>= 1;
+ bits >>= 1;
zinit(z0);
zinit(z1);
zinit(z2);
- zinit(high);
- zinit(low);
- zsplit(high, low, b, m2);
+ if (bits < BITS_PER_CHAR) {
+ low->chars = auxchars;
+ high->chars = auxchars + 1;
+ zsplit_fast_small_tainted(high, low, b, bits);
+ } else {
+ bits &= ~(BITS_PER_CHAR - 1);
+ zsplit_fast_large_taint(high, low, b, bits);
+ }
- zsqr(z0, low);
- zsqr(z2, high);
- zmul(z1, low, high);
+ zsqr_impl(z2, high);
+ if (unlikely(zzero(low))) {
+ SET_SIGNUM(z0, 0);
+ SET_SIGNUM(z1, 0);
+ } else {
+ zsqr_impl(z0, low);
+ zmul(z1, low, high);
+ }
- zlsh(z1, z1, m2 + 1);
- m2 <<= 1;
- zlsh(a, z2, m2);
+ zlsh(z1, z1, bits + 1);
+ bits <<= 1;
+ zlsh(a, z2, bits);
zadd_unsigned_assign(a, z1);
zadd_unsigned_assign(a, z0);
@@ -57,9 +64,15 @@ zsqr(z_t a, z_t b)
zfree(z0);
zfree(z1);
zfree(z2);
- zfree(high);
- zfree(low);
+}
- SET_SIGNUM(b, sign);
- SET_SIGNUM(a, 1);
+void
+zsqr(z_t a, z_t b)
+{
+ if (unlikely(zzero(b))) {
+ SET_SIGNUM(a, 0);
+ } else {
+ zsqr_impl(a, b);
+ SET_SIGNUM(a, 1);
+ }
}