diff options
| author | Mattias Andrée <maandree@kth.se> | 2016-05-04 21:04:52 +0200 |
|---|---|---|
| committer | Mattias Andrée <maandree@kth.se> | 2016-05-04 21:04:52 +0200 |
| commit | fbace74784b115d24441d2a67b932a96011d7012 (patch) | |
| tree | 9adaaccb875eccf53ca415777f626bae8d9a5942 | |
| parent | Optimise znot (diff) | |
| download | libzahl-fbace74784b115d24441d2a67b932a96011d7012.tar.gz libzahl-fbace74784b115d24441d2a67b932a96011d7012.tar.bz2 libzahl-fbace74784b115d24441d2a67b932a96011d7012.tar.xz | |
Optimise zlsh
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
| -rw-r--r-- | src/internals.h | 47 | ||||
| -rw-r--r-- | src/zlsh.c | 7 | ||||
| -rw-r--r-- | zahl-internals.h | 10 |
3 files changed, 49 insertions, 15 deletions
diff --git a/src/internals.h b/src/internals.h index a475ada..0690ad3 100644 --- a/src/internals.h +++ b/src/internals.h @@ -4,6 +4,7 @@ #include <errno.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> /* clang pretends to be GCC... */ #if defined(__GNUC__) && defined(__clang__) @@ -103,7 +104,6 @@ extern void *libzahl_temp_allocation; #define zpositive1(a, b) (zpositive(a) + zpositive(b) > 0) #define zpositive2(a, b) (zsignum(a) + zsignum(b) == 2) #define zzero2(a, b) (!(zsignum(a) | zsignum(b))) -#define zmemmove(d, s, n) memmove((d), (s), (n) * sizeof(zahl_char_t)) #define zmemcpy(d, s, n) libzahl_memcpy(d, s, n) #define zmemset(a, v, n) libzahl_memset(a, v, n) @@ -245,12 +245,8 @@ static inline void zswap_tainted_unsigned(z_t a, z_t b) { z_t t; - t->used = b->used; - b->used = a->used; - a->used = t->used; - t->chars = b->chars; - b->chars = a->chars; - a->chars = t->chars; + SWAP(a, b, t, used); + SWAP(b, a, t, chars); } static inline void @@ -398,3 +394,40 @@ zfree_temp(z_t a) a__[i__] = OP(b__[i__]); \ } \ } while (0) + +static inline void +zmemcpyb(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n_) +{ + ssize_t i, n = (ssize_t)n_; + switch (n & 3) { + case 3: + d[n - 1] = s[n - 1]; + d[n - 2] = s[n - 2]; + d[n - 3] = s[n - 3]; + break; + case 2: + d[n - 1] = s[n - 1]; + d[n - 2] = s[n - 2]; + break; + case 1: + d[n - 1] = s[n - 1]; + break; + default: + break; + } + for (i = n & ~3; (i -= 4) >= 0;) { + d[i + 3] = s[i + 3]; + d[i + 2] = s[i + 2]; + d[i + 1] = s[i + 1]; + d[i + 0] = s[i + 0]; + } +} + +static inline void +zmemmove(register zahl_char_t *d, register const zahl_char_t *s, size_t n) +{ + if (d < s) + zmemcpy(d, s, n); + else + zmemcpyb(d, s, n); +} @@ -18,10 +18,11 @@ zlsh(z_t a, z_t b, size_t bits) cbits = BITS_PER_CHAR - bits; ENSURE_SIZE(a, b->used + chars + 1); - if (likely(a == b)) - zmemmove(a->chars + chars, b->chars, b->used); - else + if (likely(a == b)) { + zmemcpyb(a->chars + chars, b->chars, b->used); + } else { zmemcpy(a->chars + chars, b->chars, b->used); + } zmemset(a->chars, 0, chars); a->used = b->used + chars; diff --git a/zahl-internals.h b/zahl-internals.h index fc6768a..5c9cc5e 100644 --- a/zahl-internals.h +++ b/zahl-internals.h @@ -110,7 +110,7 @@ struct zahl { void libzahl_realloc(struct zahl *, size_t); ZAHL_INLINE void -libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n) +libzahl_memcpy(register zahl_char_t *d, register const zahl_char_t *s, size_t n) { size_t i; if (n <= 4) { @@ -124,10 +124,10 @@ libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *res d[3] = s[3]; } else { for (i = 0; (i += 4) <= n;) { - d[i - 1] = s[i - 1]; - d[i - 2] = s[i - 2]; - d[i - 3] = s[i - 3]; d[i - 4] = s[i - 4]; + d[i - 3] = s[i - 3]; + d[i - 2] = s[i - 2]; + d[i - 1] = s[i - 1]; } if (i > n) { i -= 4; @@ -138,7 +138,7 @@ libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *res if (i < n) d[i] = s[i], i++; if (i < n) - d[i] = s[i], i++; + d[i] = s[i]; } } } |
