diff options
| author | Mattias Andrée <maandree@kth.se> | 2016-03-14 17:56:37 +0100 |
|---|---|---|
| committer | Mattias Andrée <maandree@kth.se> | 2016-03-14 17:56:37 +0100 |
| commit | b2c44d8c961090c2773f3a98d12fcafc7f5c5b2b (patch) | |
| tree | 8d7d19fd6ebd768023b3030d3e7b69d985fbf1c5 /src | |
| parent | Fix so that no workaround is required. (diff) | |
| download | libzahl-b2c44d8c961090c2773f3a98d12fcafc7f5c5b2b.tar.gz libzahl-b2c44d8c961090c2773f3a98d12fcafc7f5c5b2b.tar.bz2 libzahl-b2c44d8c961090c2773f3a98d12fcafc7f5c5b2b.tar.xz | |
Mostly optimisations
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'src')
| -rw-r--r-- | src/allocator.c | 2 | ||||
| -rw-r--r-- | src/internals.h | 51 | ||||
| -rw-r--r-- | src/zadd.c | 69 | ||||
| -rw-r--r-- | src/zand.c | 37 | ||||
| -rw-r--r-- | src/zcmpi.c | 2 | ||||
| -rw-r--r-- | src/zcmpu.c | 2 | ||||
| -rw-r--r-- | src/zor.c | 31 | ||||
| -rw-r--r-- | src/zrand.c | 8 | ||||
| -rw-r--r-- | src/zseti.c | 14 | ||||
| -rw-r--r-- | src/zsets.c | 6 | ||||
| -rw-r--r-- | src/zsetu.c | 6 | ||||
| -rw-r--r-- | src/zstr.c | 8 | ||||
| -rw-r--r-- | src/zsub.c | 78 | ||||
| -rw-r--r-- | src/ztrunc.c | 4 | ||||
| -rw-r--r-- | src/zxor.c | 35 |
15 files changed, 195 insertions, 158 deletions
diff --git a/src/allocator.c b/src/allocator.c index 44f7985..7737046 100644 --- a/src/allocator.c +++ b/src/allocator.c @@ -28,7 +28,7 @@ libzahl_realloc(z_t a, size_t need) a->chars = new; } else { a->chars = realloc(a->chars, need * sizeof(zahl_char_t)); - if (!a->chars) { + if (unlikely(!a->chars)) { if (!errno) /* sigh... */ errno = ENOMEM; libzahl_failure(errno); diff --git a/src/internals.h b/src/internals.h index c1153ea..c859ce1 100644 --- a/src/internals.h +++ b/src/internals.h @@ -4,7 +4,11 @@ #include <string.h> #include <stdlib.h> #include <errno.h> -#include <limits.h> + +/* clang pretends to be GCC... */ +#if defined(__GNUC__) && defined(__clang__) +# undef __GNUC__ +#endif #define BITS_PER_CHAR 64 #define LB_BITS_PER_CHAR 6 @@ -14,6 +18,32 @@ #define CEILING_BITS_TO_CHARS(bits) (((bits) + (BITS_PER_CHAR - 1)) >> LB_BITS_PER_CHAR) #define BITS_IN_LAST_CHAR(bits) ((bits) & (BITS_PER_CHAR - 1)) +#if defined(__GNUC__) +# define O0 __attribute__((optimize("O0"))) +# define O1 __attribute__((optimize("O1"))) +# define O2 __attribute__((optimize("O2"))) +# define O3 __attribute__((optimize("O3"))) +# define Ofast __attribute__((optimize("Ofast"))) +# define Os __attribute__((optimize("Os"))) +# define Oz __attribute__((optimize("Os"))) +#elif defined(__clang__) +# define O0 __attribute__((optnone)) +# define O1 /* Don't know how. */ +# define O2 /* Don't know how. */ +# define O3 /* Don't know how. */ +# define Ofast /* Don't know how. */ +# define Os /* Don't know how. */ +# define Oz /* Don't know how. */ +#else +# define O0 /* Don't know how. */ +# define O1 /* Don't know how. */ +# define O2 /* Don't know how. */ +# define O3 /* Don't know how. */ +# define Ofast /* Don't know how. */ +# define Os /* Don't know how. */ +# define Oz /* Don't know how. */ +#endif + #define LIST_TEMPS\ X(libzahl_tmp_cmp)\ X(libzahl_tmp_str_num)\ @@ -77,13 +107,14 @@ extern size_t libzahl_pool_alloc[sizeof(size_t) * 8]; #define TRIM(a) for (; (a)->used && !(a)->chars[(a)->used - 1]; (a)->used--) #define TRIM_NONZERO(a) for (; !(a)->chars[(a)->used - 1]; (a)->used--) #define TRIM_AND_ZERO(a) do { TRIM(a); if (!(a)->used) SET_SIGNUM(a, 0); } while (0) +#define TRIM_AND_SIGN(a, s) do { TRIM(a); SET_SIGNUM(a, (a)->used ? (s) : 0); } while (0) #define znegative(a) (zsignum(a) < 0) #define znegative1(a, b) ((zsignum(a) | zsignum(b)) < 0) #define znegative2(a, b) ((zsignum(a) & zsignum(b)) < 0) #define zpositive(a) (zsignum(a) > 0) #define zpositive1(a, b) (zpositive(a) + zpositive(b) > 0) #define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2) -#define zzero1(a, b) (zzero(a) + zzero(b) > 0) +#define zzero1(a, b) (zzero(a) || zzero(b)) #define zzero2(a, b) (!(zsignum(a) | zsignum(b))) #define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t)) @@ -138,3 +169,19 @@ libzahl_msb_nz_llu(unsigned long long int x) return r; } #endif + +#if defined(__GNUC__) || defined(__clang__) +# if INT64_MAX == LONG_MAX +# define libzahl_add_overflow(rp, a, b) __builtin_uaddl_overflow(a, b, rp) +# else +# define libzahl_add_overflow(rp, a, b) __builtin_uaddll_overflow(a, b, rp) +# endif +#else +static inline int +libzahl_add_overflow(zahl_char_t *rp, zahl_char_t a, zahl_char_t b) +{ + int carry = ZAHL_CHAR_MAX - a < b; + *rp = a + b; + return carry; +} +#endif @@ -2,12 +2,29 @@ #include "internals.h" -void +static inline void +zadd_impl(z_t a, z_t b, size_t n) +{ + zahl_char_t carry = 0, tcarry; + size_t i; + + for (i = 0; i < n; i++) { + tcarry = libzahl_add_overflow(a->chars + i, a->chars[i], b->chars[i]); + carry = tcarry | libzahl_add_overflow(a->chars + i, a->chars[i], carry); + } + while (carry) { + carry = libzahl_add_overflow(a->chars + i, a->chars[i], 1); + i++; + } + + if (a->used < i) + a->used = i; +} + +inline void zadd_unsigned(z_t a, z_t b, z_t c) { - size_t i, size, n; - uint32_t carry[] = {0, 0}; - zahl_char_t *addend; + size_t size, n; if (unlikely(zzero(b))) { zabs(a, c); @@ -28,42 +45,26 @@ zadd_unsigned(z_t a, z_t b, z_t c) n = c->used; zmemset(a->chars + a->used, 0, n - a->used); } - addend = c->chars; + zadd_impl(a, c, n); } else if (unlikely(a == c)) { if (a->used < b->used) { n = b->used; zmemset(a->chars + a->used, 0, n - a->used); } - addend = b->chars; - } else if (b->used > c->used) { + zadd_impl(a, b, n); + } else if (likely(b->used > c->used)) { zmemcpy(a->chars, b->chars, b->used); a->used = b->used; - addend = c->chars; + zadd_impl(a, c, n); } else { zmemcpy(a->chars, c->chars, c->used); a->used = c->used; - addend = b->chars; + zadd_impl(a, b, n); } - for (i = 0; i < n; i++) { - if (carry[i & 1]) - carry[~i & 1] = (ZAHL_CHAR_MAX - a->chars[i] <= addend[i]); - else - carry[~i & 1] = (ZAHL_CHAR_MAX - a->chars[i] < addend[i]); - a->chars[i] += addend[i] + carry[i & 1]; - } - - while (carry[i & 1]) { - carry[~i & 1] = a->chars[i] == ZAHL_CHAR_MAX; - a->chars[i++] += 1; - } - - if (a->used < i) - a->used = i; SET_SIGNUM(a, 1); } - void zadd(z_t a, z_t b, z_t c) { @@ -71,19 +72,15 @@ zadd(z_t a, z_t b, z_t c) SET(a, c); } else if (unlikely(zzero(c))) { SET(a, b); - } else if (unlikely(b == c)) { - zlsh(a, b, 1); - } else if (unlikely(znegative1(b, c))) { - if (znegative(b)) { - if (znegative(c)) { - zadd_unsigned(a, b, c); - SET_SIGNUM(a, -zsignum(a)); - } else { - zsub_unsigned(a, c, b); - } + } else if (unlikely(znegative(b))) { + if (znegative(c)) { + zadd_unsigned(a, b, c); + SET_SIGNUM(a, -zsignum(a)); } else { - zsub_unsigned(a, b, c); + zsub_unsigned(a, c, b); } + } else if (unlikely(znegative(c))) { + zsub_unsigned(a, b, c); } else { zadd_unsigned(a, b, c); } @@ -2,36 +2,37 @@ #include "internals.h" +static inline O2 void +zand_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] &= b[i]; +} + void zand(z_t a, z_t b, z_t c) { - size_t n; - - if (unlikely(zzero1(b, c))) { + /* Yes, you are reading this right. It's an optimisation. */ + if (unlikely(zzero(b))) { + SET_SIGNUM(a, 0); + return; + } else if (unlikely(zzero(c))) { SET_SIGNUM(a, 0); return; } - n = MIN(b->used, c->used); - while (n--) - if (b->chars[n] & c->chars[n]) - goto found_highest; - SET_SIGNUM(a, 0); - return; + a->used = MIN(b->used, c->used); -found_highest: - a->used = ++n; if (a == b) { - while (n--) - a->chars[n] &= c->chars[n]; + zand_impl(a->chars, c->chars, a->used); } else if (unlikely(a == c)) { - while (n--) - a->chars[n] &= b->chars[n]; + zand_impl(a->chars, b->chars, a->used); } else { ENSURE_SIZE(a, a->used); zmemcpy(a->chars, c->chars, a->used); - while (n--) - a->chars[n] &= b->chars[n]; + zand_impl(a->chars, b->chars, a->used); } - SET_SIGNUM(a, zpositive1(b, c) * 2 - 1); + + TRIM_AND_SIGN(a, zpositive1(b, c) * 2 - 1); } diff --git a/src/zcmpi.c b/src/zcmpi.c index 54abcd9..71391b1 100644 --- a/src/zcmpi.c +++ b/src/zcmpi.c @@ -3,7 +3,7 @@ int -zcmpi(z_t a, long long int b) +zcmpi(z_t a, int64_t b) { if (unlikely(!b)) return zsignum(a); diff --git a/src/zcmpu.c b/src/zcmpu.c index 40e33b6..8bba692 100644 --- a/src/zcmpu.c +++ b/src/zcmpu.c @@ -3,7 +3,7 @@ int -zcmpu(z_t a, unsigned long long int b) +zcmpu(z_t a, uint64_t b) { if (unlikely(!b)) return zsignum(a); @@ -2,16 +2,21 @@ #include "internals.h" +static inline O2 void +zor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] |= b[i]; +} + void zor(z_t a, z_t b, z_t c) { size_t n, m; if (unlikely(zzero(b))) { - if (zzero(c)) - SET_SIGNUM(a, 0); - else - SET(a, c); + SET(a, c); return; } else if (unlikely(zzero(c))) { SET(a, b); @@ -24,23 +29,19 @@ zor(z_t a, z_t b, z_t c) ENSURE_SIZE(a, m); if (a == b) { - if (b->used < c->used) + if (a->used < c->used) zmemcpy(a->chars + n, c->chars + n, m - n); - while (n--) - a->chars[n] |= c->chars[n]; + zor_impl(a->chars, c->chars, n); } else if (unlikely(a == c)) { - if (c->used < b->used) + if (a->used < b->used) zmemcpy(a->chars + n, b->chars + n, m - n); - while (n--) - a->chars[n] |= b->chars[n]; - } else if (m == b->used) { + zor_impl(a->chars, b->chars, n); + } else if (m == b->used) { zmemcpy(a->chars, b->chars, m); - while (n--) - a->chars[n] |= c->chars[n]; + zor_impl(a->chars, c->chars, n); } else { zmemcpy(a->chars, c->chars, m); - while (n--) - a->chars[n] |= b->chars[n]; + zor_impl(a->chars, b->chars, n); } a->used = m; diff --git a/src/zrand.c b/src/zrand.c index 64fa7ed..1bb1a90 100644 --- a/src/zrand.c +++ b/src/zrand.c @@ -26,7 +26,7 @@ zrand_get_random_bits(z_t r, size_t bits, int fd) for (n = chars * sizeof(zahl_char_t); n;) { read_just = read(fd, buf + read_total, n); - if (read_just < 0) + if (unlikely(read_just < 0)) libzahl_failure(errno); read_total += (size_t)read_just; n -= (size_t)read_just; @@ -38,7 +38,7 @@ zrand_get_random_bits(z_t r, size_t bits, int fd) r->chars[chars - 1] &= mask; for (n = chars; n--;) { - if (r->chars[n]) { + if (likely(r->chars[n])) { r->used = n + 1; SET_SIGNUM(r, 1); return; @@ -71,7 +71,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n) } fd = open(pathname, O_RDONLY); - if (fd < 0) + if (unlikely(fd < 0)) libzahl_failure(errno); switch (dist) { @@ -91,7 +91,7 @@ zrand(z_t r, enum zranddev dev, enum zranddist dist, z_t n) bits = zbits(n); do zrand_get_random_bits(r, bits, fd); - while (zcmpmag(r, n) > 0); + while (unlikely(zcmpmag(r, n) > 0)); break; default: diff --git a/src/zseti.c b/src/zseti.c deleted file mode 100644 index e1116b6..0000000 --- a/src/zseti.c +++ /dev/null @@ -1,14 +0,0 @@ -/* See LICENSE file for copyright and license details. */ -#include "internals.h" - - -void -zseti(z_t a, long long int b) -{ - if (unlikely(b >= 0)) { - zsetu(a, (unsigned long long int)b); - } else { - zsetu(a, (unsigned long long int)-b); - SET_SIGNUM(a, -1); - } -} diff --git a/src/zsets.c b/src/zsets.c index bddc9ec..e1506f6 100644 --- a/src/zsets.c +++ b/src/zsets.c @@ -3,6 +3,10 @@ #include <ctype.h> +#ifdef __GNUC__ +# pragma GCC diagnostic ignored "-Wswitch-default" +#endif + int zsets(z_t a, const char *str) @@ -44,7 +48,7 @@ zsets(z_t a, const char *str) } } - if (neg) + if (unlikely(neg)) SET_SIGNUM(a, -zsignum(a)); return 0; } diff --git a/src/zsetu.c b/src/zsetu.c index 538ea37..42e8cec 100644 --- a/src/zsetu.c +++ b/src/zsetu.c @@ -1,17 +1,15 @@ /* See LICENSE file for copyright and license details. */ #include "internals.h" -#define SIZE_MULTIPLE(fit, in) ((sizeof(fit) + sizeof(in) - 1) / sizeof(in)) - void -zsetu(z_t a, unsigned long long int b) +zsetu(z_t a, uint64_t b) { if (!b) { SET_SIGNUM(a, 0); return; } - ENSURE_SIZE(a, SIZE_MULTIPLE(b, *(a->chars))); + ENSURE_SIZE(a, 1); SET_SIGNUM(a, 1); a->chars[0] = (zahl_char_t)b; a->used = 1; @@ -19,8 +19,8 @@ zstr(z_t a, char *b) size_t n, len, neg; char overridden = 0; - if (zzero(a)) { - if (!b && !(b = malloc(2))) + if (unlikely(zzero(a))) { + if (unlikely(!b) && unlikely(!(b = malloc(2)))) libzahl_failure(errno); b[0] = '0'; b[1] = 0; @@ -29,7 +29,7 @@ zstr(z_t a, char *b) n = zstr_length(a, 10); - if (!b && !(b = malloc(n + 1))) + if (unlikely(!b) && unlikely(!(b = malloc(n + 1)))) libzahl_failure(errno); neg = znegative(a); @@ -41,7 +41,7 @@ zstr(z_t a, char *b) for (;;) { zdivmod(num, rem, num, libzahl_const_1e19); - if (!zzero(num)) { + if (likely(!zzero(num))) { sprintf(b + n, "%019llu", zzero(rem) ? 0ULL : (unsigned long long)(rem->chars[0])); b[n + 19] = overridden; overridden = b[n]; @@ -2,13 +2,34 @@ #include "internals.h" -void +static inline void +zsub_impl(z_t a, z_t b, size_t n) +{ + zahl_char_t carry = 0, tcarry; + size_t i; + + for (i = 0; i < n; i++) { + tcarry = carry ? (a->chars[i] <= b->chars[i]) : (a->chars[i] < b->chars[i]); + a->chars[i] -= b->chars[i]; + a->chars[i] -= carry; + carry = tcarry; + } + + if (carry) { + while (!a->chars[i]) + a->chars[i++] = ZAHL_CHAR_MAX; + if (a->chars[i] == 1) + a->used--; + else + a->chars[i] -= 1; + } +} + +inline void zsub_unsigned(z_t a, z_t b, z_t c) { - zahl_char_t carry[] = {0, 0}; - zahl_char_t *s; - size_t i, n; int magcmp; + size_t n; if (unlikely(zzero(b))) { zabs(a, c); @@ -20,64 +41,51 @@ zsub_unsigned(z_t a, z_t b, z_t c) } magcmp = zcmpmag(b, c); - if (magcmp <= 0) { - if (magcmp == 0) { + if (unlikely(magcmp <= 0)) { + if (unlikely(magcmp == 0)) { SET_SIGNUM(a, 0); return; } n = MIN(b->used, c->used); if (a == b) { zset(libzahl_tmp_sub, b); - s = libzahl_tmp_sub->chars; + SET(a, c); + zsub_impl(a, libzahl_tmp_sub, n); } else { - s = b->chars; + SET(a, c); + zsub_impl(a, b, n); } - SET(a, c); } else { n = MIN(b->used, c->used); - if (a == c) { + if (unlikely(a == c)) { zset(libzahl_tmp_sub, c); - s = libzahl_tmp_sub->chars; + SET(a, b); + zsub_impl(a, libzahl_tmp_sub, n); } else { - s = c->chars; + SET(a, b); + zsub_impl(a, c, n); } - SET(a, b); - } - - for (i = 0; i < n; i++) { - carry[~i & 1] = carry[i & 1] ? (a->chars[i] <= s[i]) : (a->chars[i] < s[i]); - a->chars[i] -= s[i]; - a->chars[i] -= carry[i & 1]; } - if (carry[i & 1]) { - while (!a->chars[i]) - a->chars[i++] = ZAHL_CHAR_MAX; - a->chars[i] -= 1; - } SET_SIGNUM(a, magcmp); } void zsub(z_t a, z_t b, z_t c) { - if (unlikely(b == c)) { - SET_SIGNUM(a, 0); - } else if (unlikely(zzero(b))) { + if (unlikely(zzero(b))) { zneg(a, c); } else if (unlikely(zzero(c))) { SET(a, b); - } else if (unlikely(znegative1(b, c))) { - if (znegative(b)) { - if (znegative(c)) { - zsub_unsigned(a, c, b); - } else { - zadd_unsigned(a, b, c); - SET_SIGNUM(a, -zsignum(a)); - } + } else if (unlikely(znegative(b))) { + if (znegative(c)) { + zsub_unsigned(a, c, b); } else { zadd_unsigned(a, b, c); + SET_SIGNUM(a, -zsignum(a)); } + } else if (znegative(c)) { + zadd_unsigned(a, b, c); } else { zsub_unsigned(a, b, c); } diff --git a/src/ztrunc.c b/src/ztrunc.c index 91d2a92..e1a330c 100644 --- a/src/ztrunc.c +++ b/src/ztrunc.c @@ -29,7 +29,5 @@ ztrunc(z_t a, z_t b, size_t bits) a->chars[a->used - 1] &= mask; } - TRIM(a); - if (!a->used) - SET_SIGNUM(a, 0); + TRIM_AND_ZERO(a); } @@ -2,16 +2,21 @@ #include "internals.h" +static inline void +zxor_impl(zahl_char_t *restrict a, const zahl_char_t *restrict b, size_t n) +{ + size_t i; + for (i = 0; i < n; i++) + a[i] ^= b[i]; +} + void zxor(z_t a, z_t b, z_t c) { size_t n, m; if (unlikely(zzero(b))) { - if (zzero(c)) - SET_SIGNUM(a, 0); - else - SET(a, c); + SET(a, c); return; } else if (unlikely(zzero(c))) { SET(a, b); @@ -24,29 +29,21 @@ zxor(z_t a, z_t b, z_t c) ENSURE_SIZE(a, m); if (a == b) { - if (b->used < c->used) + if (a->used < c->used) zmemcpy(a->chars + n, c->chars + n, m - n); - while (n--) - a->chars[n] ^= c->chars[n]; + zxor_impl(a->chars, c->chars, n); } else if (unlikely(a == c)) { - if (c->used < b->used) + if (a->used < b->used) zmemcpy(a->chars + n, b->chars + n, m - n); - while (n--) - a->chars[n] ^= b->chars[n]; + zxor_impl(a->chars, b->chars, n); } else if (m == b->used) { zmemcpy(a->chars, b->chars, m); - while (n--) - a->chars[n] ^= c->chars[n]; + zxor_impl(a->chars, c->chars, n); } else { zmemcpy(a->chars, c->chars, m); - while (n--) - a->chars[n] ^= b->chars[n]; + zxor_impl(a->chars, b->chars, n); } a->used = m; - TRIM(a); - if (a->used) - SET_SIGNUM(a, 1 - 2 * ((zsignum(b) ^ zsignum(c)) < 0)); - else - SET_SIGNUM(a, 0); + TRIM_AND_SIGN(a, 1 - 2 * ((zsignum(b) ^ zsignum(c)) < 0)); } |
