aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2016-03-03 17:47:48 +0100
committerMattias Andrée <maandree@kth.se>2016-03-03 17:47:48 +0100
commitae625b8dacba17e4a41912b5bb16da8f97089681 (patch)
tree1afef6dc887e3ddda1dfeeb8cf1f947dbfe556c5 /src
parentOptimised zdivmod (diff)
downloadlibzahl-ae625b8dacba17e4a41912b5bb16da8f97089681.tar.gz
libzahl-ae625b8dacba17e4a41912b5bb16da8f97089681.tar.bz2
libzahl-ae625b8dacba17e4a41912b5bb16da8f97089681.tar.xz
Add zmul and zsqr
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'src')
-rw-r--r--src/zmul.c95
-rw-r--r--src/zsqr.c76
2 files changed, 171 insertions, 0 deletions
diff --git a/src/zmul.c b/src/zmul.c
new file mode 100644
index 0000000..b6c4549
--- /dev/null
+++ b/src/zmul.c
@@ -0,0 +1,95 @@
+/* See LICENSE file for copyright and license details. */
+#include "internals"
+
+
+void
+zmul(z_t a, z_t b, z_t c)
+{
+ /*
+ * Karatsuba algorithm
+ */
+
+ size_t m, m2;
+ z_t z0, z1, z2, b_high, b_low, c_high, c_low;
+ int b_sign, c_sign;
+
+ if (zzero(b) || zzero(c)) {
+ SET_SIGNUM(a, 0);
+ return;
+ }
+
+ m = zbits(b);
+ m2 = b == c ? m : zbits(c);
+
+ b_sign = zsignum(b);
+ c_sign = zsignum(c);
+
+ if (m <= 16 && m2 <= 16) {
+ zsetu(a, b->chars[0] * c->chars[0]);
+ SET_SIGNUM(a, b_sign * c_sign);
+ return;
+ }
+
+ SET_SIGNUM(b, 1);
+ SET_SIGNUM(c, 1);
+
+ m = m > m2 ? m : m2;
+ m2 = m >> 1;
+
+ zinit(z0);
+ zinit(z1);
+ zinit(z2);
+ zinit(b_high);
+ zinit(b_low);
+ zinit(c_high);
+ zinit(c_low);
+
+ zsplit(b_high, b_low, b, m2);
+ zsplit(c_high, c_low, c, m2);
+
+#if 0
+ zmul(z0, b_low, c_low);
+ zmul(z2, b_high, c_high);
+ zadd(b_low, b_low, b_high);
+ zadd(c_low, c_low, c_high);
+ zmul(z1, b_low, c_low);
+
+ zsub(z1, z1, z0);
+ zsub(z1, z1, z2);
+
+ zlsh(z2, z2, m2);
+ m2 <<= 1;
+ zlsh(z1, z1, m2);
+
+ zadd(a, z2, z1);
+ zadd(a, a, z0);
+#else
+ zmul(z0, b_low, c_low);
+ zmul(z2, b_high, c_high);
+ zsub(b_low, b_high, b_low);
+ zsub(c_low, c_high, c_low);
+ zmul(z1, b_low, c_low);
+
+ zlsh(z0, z0, m2 + 1);
+ zlsh(z1, z1, m2);
+ zlsh(a, z2, m2);
+ m2 <<= 1;
+ zlsh(z2, z2, m2);
+ zadd(z2, z2, a);
+
+ zsub(a, z2, z1);
+ zadd(a, a, z0);
+#endif
+
+ zfree(z0);
+ zfree(z1);
+ zfree(z2);
+ zfree(b_high);
+ zfree(b_low);
+ zfree(c_high);
+ zfree(c_low);
+
+ SET_SIGNUM(b, b_sign);
+ SET_SIGNUM(c, c_sign);
+ SET_SIGNUM(a, b_sign * c_sign);
+}
diff --git a/src/zsqr.c b/src/zsqr.c
new file mode 100644
index 0000000..ea840b4
--- /dev/null
+++ b/src/zsqr.c
@@ -0,0 +1,76 @@
+/* See LICENSE file for copyright and license details. */
+#include "internals"
+
+
+void
+zsqr(z_t a, z_t b)
+{
+ /*
+ * Karatsuba algorithm, optimised for equal factors.
+ */
+
+ size_t m2;
+ z_t z0, z1, z2, high, low;
+ int sign;
+
+ if (zzero(b)) {
+ SET_SIGNUM(a, 0);
+ return;
+ }
+
+ m2 = zbits(b);
+
+ if (m2 <= 16) {
+ zsetu(a, b->chars[0] * b->chars[0]);
+ SET_SIGNUM(a, 1);
+ return;
+ }
+
+ sign = zsignum(b);
+ SET_SIGNUM(b, 1);
+ m2 >>= 1;
+
+ zinit(z0);
+ zinit(z1);
+ zinit(z2);
+ zinit(high);
+ zinit(low);
+
+ zsplit(high, low, b, m2);
+
+#if 0
+ zsqr(z0, low);
+ zsqr(z2, high);
+ zmul(z1, low, high);
+
+ zlsh(z2, z2, m2);
+ m2 = (m2 << 1) | 1;
+ zlsh(z1, z1, m2);
+
+ zadd(a, z2, z1);
+ zadd(a, a, z0);
+#else
+ zsqr(z0, low);
+ zsqr(z2, high);
+ zmul(z1, low, low);
+
+ zlsh(z0, z0, m2 + 1);
+ zlsh(z1, z1, m2 + 1);
+ zlsh(a, z2, m2);
+ m2 <<= 1;
+ zlsh(z2, z2, m2);
+ zadd(z2, z2, a);
+
+ zsub(a, z2, z1);
+ zadd(a, a, z0);
+#endif
+
+ zfree(z0);
+ zfree(z1);
+ zfree(z2);
+ zfree(high);
+ zfree(low);
+
+ SET_SIGNUM(b, sign);
+ SET_SIGNUM(a, 1);
+}