aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/internals.h95
-rw-r--r--src/zadd.c27
-rw-r--r--src/zgcd.c58
-rw-r--r--src/zmul.c31
-rw-r--r--src/zsetup.c6
-rw-r--r--src/zsqr.c21
-rw-r--r--src/zsub.c22
-rw-r--r--src/zunsetup.c2
8 files changed, 156 insertions, 106 deletions
diff --git a/src/internals.h b/src/internals.h
index c859ce1..456a2df 100644
--- a/src/internals.h
+++ b/src/internals.h
@@ -45,29 +45,29 @@
#endif
#define LIST_TEMPS\
- X(libzahl_tmp_cmp)\
- X(libzahl_tmp_str_num)\
- X(libzahl_tmp_str_mag)\
- X(libzahl_tmp_str_div)\
- X(libzahl_tmp_str_rem)\
- X(libzahl_tmp_gcd_u)\
- X(libzahl_tmp_gcd_v)\
- X(libzahl_tmp_sub)\
- X(libzahl_tmp_modmul)\
- X(libzahl_tmp_div)\
- X(libzahl_tmp_mod)\
- X(libzahl_tmp_pow_b)\
- X(libzahl_tmp_pow_c)\
- X(libzahl_tmp_pow_d)\
- X(libzahl_tmp_modsqr)\
- X(libzahl_tmp_divmod_a)\
- X(libzahl_tmp_divmod_b)\
- X(libzahl_tmp_divmod_d)\
- X(libzahl_tmp_ptest_x)\
- X(libzahl_tmp_ptest_a)\
- X(libzahl_tmp_ptest_d)\
- X(libzahl_tmp_ptest_n1)\
- X(libzahl_tmp_ptest_n4)
+ X(libzahl_tmp_cmp, 1)\
+ X(libzahl_tmp_str_num, 0)\
+ X(libzahl_tmp_str_mag, 0)\
+ X(libzahl_tmp_str_div, 0)\
+ X(libzahl_tmp_str_rem, 0)\
+ X(libzahl_tmp_gcd_u, 0)\
+ X(libzahl_tmp_gcd_v, 0)\
+ X(libzahl_tmp_sub, 0)\
+ X(libzahl_tmp_modmul, 0)\
+ X(libzahl_tmp_div, 0)\
+ X(libzahl_tmp_mod, 0)\
+ X(libzahl_tmp_pow_b, 0)\
+ X(libzahl_tmp_pow_c, 0)\
+ X(libzahl_tmp_pow_d, 0)\
+ X(libzahl_tmp_modsqr, 0)\
+ X(libzahl_tmp_divmod_a, 0)\
+ X(libzahl_tmp_divmod_b, 0)\
+ X(libzahl_tmp_divmod_d, 0)\
+ X(libzahl_tmp_ptest_x, 0)\
+ X(libzahl_tmp_ptest_a, 0)\
+ X(libzahl_tmp_ptest_d, 0)\
+ X(libzahl_tmp_ptest_n1, 0)\
+ X(libzahl_tmp_ptest_n4, 0)
#define LIST_CONSTS\
X(libzahl_const_1e19, zsetu, 10000000000000000000ULL) /* The largest power of 10 < 2⁶⁴. */\
@@ -75,7 +75,7 @@
X(libzahl_const_2, zsetu, 2)\
X(libzahl_const_4, zsetu, 4)
-#define X(x) extern z_t x;
+#define X(x, s) extern z_t x;
LIST_TEMPS
#undef X
#define X(x, f, v) extern z_t x;
@@ -185,3 +185,50 @@ libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b)
return carry;
}
#endif
+
+static inline void
+zrsh_taint(z_t a, size_t bits)
+{
+ size_t i, chars, cbits;
+
+ if (unlikely(!bits))
+ return;
+ if (unlikely(zzero(a)))
+ return;
+
+ chars = FLOOR_BITS_TO_CHARS(bits);
+
+ if (unlikely(chars >= a->used || zbits(a) <= bits)) {
+ SET_SIGNUM(a, 0);
+ return;
+ }
+
+ bits = BITS_IN_LAST_CHAR(bits);
+ cbits = BITS_PER_CHAR - bits;
+
+ if (likely(chars)) {
+ a->used -= chars;
+ a->chars += chars;
+ }
+
+ if (unlikely(bits)) { /* This if statement is very important in C. */
+ a->chars[0] >>= bits;
+ for (i = 1; i < a->used; i++) {
+ a->chars[i - 1] |= a->chars[i] << cbits;
+ a->chars[i] >>= bits;
+ }
+ TRIM_NONZERO(a);
+ }
+}
+
+static inline void
+zswap_tainted_unsigned(z_t a, z_t b)
+{
+ z_t t;
+ t->used = b->used;
+ b->used = a->used;
+ a->used = t->used;
+ t->chars = b->chars;
+ b->chars = a->chars;
+ a->chars = t->chars;
+}
diff --git a/src/zadd.c b/src/zadd.c
index 557ec6f..b730b81 100644
--- a/src/zadd.c
+++ b/src/zadd.c
@@ -72,6 +72,33 @@ zadd_unsigned(z_t a, z_t b, z_t c)
}
void
+zadd_unsigned_assign(z_t a, z_t b)
+{
+ size_t size, n;
+
+ if (unlikely(zzero(a))) {
+ zabs(a, b);
+ return;
+ } else if (unlikely(zzero(b))) {
+ return;
+ }
+
+ size = MAX(a->used, b->used);
+ n = a->used + b->used - size;
+
+ ENSURE_SIZE(a, size + 1);
+ a->chars[size] = 0;
+
+ if (a->used < b->used) {
+ n = b->used;
+ zmemset(a->chars + a->used, 0, n - a->used);
+ }
+ zadd_impl(a, b, n);
+
+ SET_SIGNUM(a, 1);
+}
+
+void
zadd(z_t a, z_t b, z_t c)
{
if (unlikely(zzero(b))) {
diff --git a/src/zgcd.c b/src/zgcd.c
index 7c6f2bc..502a779 100644
--- a/src/zgcd.c
+++ b/src/zgcd.c
@@ -12,14 +12,11 @@ zgcd(z_t a, z_t b, z_t c)
* Binary GCD algorithm.
*/
- size_t shifts = 0, i = 0, min;
- zahl_char_t uv, bit;
- int neg;
+ size_t shifts;
+ zahl_char_t *u_orig, *v_orig;
+ size_t u_lsb, v_lsb;
+ int neg, cmpmag;
- if (unlikely(!zcmp(b, c))) {
- SET(a, b);
- return;
- }
if (unlikely(zzero(b))) {
SET(a, c);
return;
@@ -29,37 +26,30 @@ zgcd(z_t a, z_t b, z_t c)
return;
}
- zabs(u, b);
- zabs(v, c);
neg = znegative2(b, c);
- min = MIN(u->used, v->used);
- for (; i < min; i++) {
- uv = u->chars[i] | v->chars[i];
- for (bit = 1; bit; bit <<= 1, shifts++)
- if (uv & bit)
- goto loop_done;
- }
- for (; i < u->used; i++)
- for (bit = 1; bit; bit <<= 1, shifts++)
- if (u->chars[i] & bit)
- goto loop_done;
- for (; i < v->used; i++)
- for (bit = 1; bit; bit <<= 1, shifts++)
- if (v->chars[i] & bit)
- goto loop_done;
-loop_done:
- zrsh(u, u, shifts);
- zrsh(v, v, shifts);
+ u_lsb = zlsb(b);
+ v_lsb = zlsb(c);
+ shifts = MIN(u_lsb, v_lsb);
+ zrsh(u, b, u_lsb);
+ zrsh(v, c, v_lsb);
- zrsh(u, u, zlsb(u));
- do {
- zrsh(v, v, zlsb(v));
- if (zcmpmag(u, v) > 0) /* Both are non-negative. */
- zswap(u, v);
- zsub_unsigned(v, v, u);
- } while (!zzero(v));
+ u_orig = u->chars;
+ v_orig = v->chars;
+
+ for (;;) {
+ if (likely((cmpmag = zcmpmag(u, v)) >= 0)) {
+ if (unlikely(cmpmag == 0))
+ break;
+ zswap_tainted_unsigned(u, v);
+ }
+ zsub_positive_assign(v, u);
+ zrsh_taint(v, zlsb(v));
+ }
zlsh(a, u, shifts);
SET_SIGNUM(a, neg ? -1 : 1);
+
+ u->chars = u_orig;
+ v->chars = v_orig;
}
diff --git a/src/zmul.c b/src/zmul.c
index ae02844..ab41213 100644
--- a/src/zmul.c
+++ b/src/zmul.c
@@ -57,39 +57,22 @@ zmul(z_t a, z_t b, z_t c)
zsplit(b_high, b_low, b, m2);
zsplit(c_high, c_low, c, m2);
-#if 1
+
zmul(z0, b_low, c_low);
zmul(z2, b_high, c_high);
- zadd(b_low, b_low, b_high);
- zadd(c_low, c_low, c_high);
+ zadd_unsigned_assign(b_low, b_high);
+ zadd_unsigned_assign(c_low, c_high);
zmul(z1, b_low, c_low);
- zsub(z1, z1, z0);
- zsub(z1, z1, z2);
+ zsub_nonnegative_assign(z1, z0);
+ zsub_nonnegative_assign(z1, z2);
zlsh(z1, z1, m2);
m2 <<= 1;
- zlsh(z2, z2, m2);
-
- zadd(a, z2, z1);
- zadd(a, a, z0);
-#else
- zmul(z0, b_low, c_low);
- zmul(z2, b_high, c_high);
- zsub(b_low, b_high, b_low);
- zsub(c_low, c_high, c_low);
- zmul(z1, b_low, c_low);
-
- zlsh(z0, z0, m2 + 1);
- zlsh(z1, z1, m2);
zlsh(a, z2, m2);
- m2 <<= 1;
- zlsh(z2, z2, m2);
- zadd(z2, z2, a);
+ zadd_unsigned_assign(a, z1);
+ zadd_unsigned_assign(a, z0);
- zsub(a, z2, z1);
- zadd(a, a, z0);
-#endif
zfree(z0);
zfree(z1);
diff --git a/src/zsetup.c b/src/zsetup.c
index 921e509..10fc5f5 100644
--- a/src/zsetup.c
+++ b/src/zsetup.c
@@ -1,7 +1,7 @@
/* See LICENSE file for copyright and license details. */
#include "internals.h"
-#define X(x) z_t x;
+#define X(x, s) z_t x;
LIST_TEMPS
#undef X
#define X(x, f, v) z_t x;
@@ -30,8 +30,8 @@ zsetup(jmp_buf env)
memset(libzahl_pool_n, 0, sizeof(libzahl_pool_n));
memset(libzahl_pool_alloc, 0, sizeof(libzahl_pool_alloc));
-#define X(x)\
- zinit(x), zsetu(x, 1);
+#define X(x, s)\
+ zinit(x); if (s) zsetu(x, 1);
LIST_TEMPS;
#undef X
#define X(x, f, v)\
diff --git a/src/zsqr.c b/src/zsqr.c
index bd03128..68480ba 100644
--- a/src/zsqr.c
+++ b/src/zsqr.c
@@ -42,32 +42,17 @@ zsqr(z_t a, z_t b)
zsplit(high, low, b, m2);
-#if 1
+
zsqr(z0, low);
zsqr(z2, high);
zmul(z1, low, high);
zlsh(z1, z1, m2 + 1);
m2 <<= 1;
- zlsh(z2, z2, m2);
-
- zadd(a, z2, z1);
- zadd(a, a, z0);
-#else
- zsqr(z0, low);
- zsqr(z2, high);
- zmul(z1, low, low);
-
- zlsh(z0, z0, m2 + 1);
- zlsh(z1, z1, m2 + 1);
zlsh(a, z2, m2);
- m2 <<= 1;
- zlsh(z2, z2, m2);
- zadd(z2, z2, a);
+ zadd_unsigned_assign(a, z1);
+ zadd_unsigned_assign(a, z0);
- zsub(a, z2, z1);
- zadd(a, a, z0);
-#endif
zfree(z0);
zfree(z1);
diff --git a/src/zsub.c b/src/zsub.c
index b3f12f2..259526f 100644
--- a/src/zsub.c
+++ b/src/zsub.c
@@ -46,7 +46,7 @@ libzahl_zsub_unsigned(z_t a, z_t b, z_t c)
SET_SIGNUM(a, 0);
return;
}
- n = MIN(b->used, c->used);
+ n = b->used;
if (a == b) {
zset(libzahl_tmp_sub, b);
SET(a, c);
@@ -56,7 +56,7 @@ libzahl_zsub_unsigned(z_t a, z_t b, z_t c)
zsub_impl(a, b, n);
}
} else {
- n = MIN(b->used, c->used);
+ n = c->used;
if (unlikely(a == c)) {
zset(libzahl_tmp_sub, c);
SET(a, b);
@@ -77,6 +77,24 @@ zsub_unsigned(z_t a, z_t b, z_t c)
}
void
+zsub_nonnegative_assign(z_t a, z_t b)
+{
+ if (unlikely(zzero(b))) {
+ zabs(a, a);
+ } else if (unlikely(!zcmpmag(a, b))) {
+ SET_SIGNUM(a, 0);
+ } else {
+ zsub_impl(a, b, b->used);
+ }
+}
+
+void
+zsub_positive_assign(z_t a, z_t b)
+{
+ zsub_impl(a, b, b->used);
+}
+
+void
zsub(z_t a, z_t b, z_t c)
{
if (unlikely(zzero(b))) {
diff --git a/src/zunsetup.c b/src/zunsetup.c
index 9926354..ba84dd7 100644
--- a/src/zunsetup.c
+++ b/src/zunsetup.c
@@ -8,7 +8,7 @@ zunsetup(void)
size_t i;
if (libzahl_set_up) {
libzahl_set_up = 0;
-#define X(x)\
+#define X(x, s)\
free(x->chars);
LIST_TEMPS;
#undef X