diff options
Diffstat (limited to '')
| -rw-r--r-- | zahl.h | 1 | ||||
| -rw-r--r-- | zahl/inlines.h | 10 | ||||
| -rw-r--r-- | zahl/internals.h | 61 | ||||
| -rw-r--r-- | zahl/memory.h | 133 |
4 files changed, 143 insertions, 62 deletions
@@ -11,6 +11,7 @@ #include <setjmp.h> #include <stddef.h> #include <stdint.h> +#include <unistd.h> #include "zahl/internals.h" diff --git a/zahl/inlines.h b/zahl/inlines.h index 9884812..c6ddc84 100644 --- a/zahl/inlines.h +++ b/zahl/inlines.h @@ -259,12 +259,16 @@ zsave(z_t a, void *buffer) { if (ZAHL_LIKELY(buffer)) { char *buf = buffer; - *((int *)buf) = a->sign, buf += sizeof(int); + *((long *)buf) = a->sign, buf += sizeof(long); /* Use `long` for alignment. */ *((size_t *)buf) = a->used, buf += sizeof(size_t); - if (ZAHL_LIKELY(!zzero(a))) + if (ZAHL_LIKELY(!zzero(a))) { + a->chars[a->used + 2] = 0; + a->chars[a->used + 1] = 0; + a->chars[a->used + 0] = 0; libzahl_memcpy((zahl_char_t *)buf, a->chars, a->used); + } } - return sizeof(int) + sizeof(size_t) + (zzero(a) ? 0 : a->used * sizeof(zahl_char_t)); + return sizeof(long) + sizeof(size_t) + (zzero(a) ? 0 : ((a->used + 3) & ~3) * sizeof(zahl_char_t)); } diff --git a/zahl/internals.h b/zahl/internals.h index 672dfcd..30e2765 100644 --- a/zahl/internals.h +++ b/zahl/internals.h @@ -51,6 +51,7 @@ #define ZAHL_BITS_PER_CHAR 64 #define ZAHL_LB_BITS_PER_CHAR 6 #define ZAHL_CHAR_MAX UINT64_MAX +#define ZAHL_FLUFF 4 /* Note: These cannot be changed willy-nilly, some code depends * on them, be cause being flexible would just be too painful. */ @@ -113,62 +114,4 @@ extern struct zahl libzahl_tmp_mod[1]; void libzahl_realloc(struct zahl *, size_t); -ZAHL_INLINE void -libzahl_memcpy(register zahl_char_t *d, register const zahl_char_t *s, size_t n) -{ - size_t i; - if (n <= 4) { - if (n >= 1) - d[0] = s[0]; - if (n >= 2) - d[1] = s[1]; - if (n >= 3) - d[2] = s[2]; - if (n >= 4) - d[3] = s[3]; - } else { - for (i = 0; (i += 4) <= n;) { - d[i - 4] = s[i - 4]; - d[i - 3] = s[i - 3]; - d[i - 2] = s[i - 2]; - d[i - 1] = s[i - 1]; - } - if (i > n) { - i -= 4; - if (i < n) - d[i] = s[i], i++; - if (i < n) - d[i] = s[i], i++; - if (i < n) - d[i] = s[i], i++; - if (i < n) - d[i] = s[i]; - } - } -} - -ZAHL_INLINE void -libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n) -{ - size_t i; - if (n <= 4) { - if (n >= 1) - a[0] = v; - if (n >= 2) - a[1] = v; - if (n >= 3) - a[2] = v; - if (n >= 4) - a[3] = v; - } else { - for (i = 0; (i += 4) <= n;) { - a[i - 1] = v; - a[i - 2] = v; - a[i - 3] = v; - a[i - 4] = v; - } - if (i > n) - for (i -= 4; i < n; i++) - a[i] = v; - } -} +#include "memory.h" diff --git a/zahl/memory.h b/zahl/memory.h new file mode 100644 index 0000000..2eb2b68 --- /dev/null +++ b/zahl/memory.h @@ -0,0 +1,133 @@ +/* See LICENSE file for copyright and license details. */ + +#define LIBZAHL_MEM_CASES \ + LIBZAHL_X(20); \ + LIBZAHL_X(19); \ + LIBZAHL_X(18); \ + LIBZAHL_X(17); \ + LIBZAHL_X(16); \ + LIBZAHL_X(15); \ + LIBZAHL_X(14); \ + LIBZAHL_X(13); \ + LIBZAHL_X(12); \ + LIBZAHL_X(11); \ + LIBZAHL_X(10); \ + LIBZAHL_X( 9); \ + LIBZAHL_X( 8); \ + LIBZAHL_X( 7); \ + LIBZAHL_X( 6); \ + LIBZAHL_X( 5); \ + LIBZAHL_X( 4); \ + LIBZAHL_X( 3); \ + LIBZAHL_X( 2); \ + LIBZAHL_X( 1); \ + case 0: break; + + +#if defined(LIBZAHL_ISA_MISSING_INDIRECT_JUMP) +# define LIBZAHL_SMALL_INPUT_BEGIN(n) +# define LIBZAHL_SMALL_INPUT_END +#else +# define LIBZAHL_SMALL_INPUT_BEGIN(n) switch (n) { LIBZAHL_MEM_CASES default: +# define LIBZAHL_SMALL_INPUT_END break; } +#endif + + +ZAHL_INLINE void +libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n) +{ + size_t i; +#define LIBZAHL_X(I) case I: d[I - 1] = s[I - 1]; + LIBZAHL_SMALL_INPUT_BEGIN(n); + for (i = 0; i < n; i += 4) { + d[i + 0] = s[i + 0]; + d[i + 1] = s[i + 1]; + d[i + 2] = s[i + 2]; + d[i + 3] = s[i + 3]; + } + LIBZAHL_SMALL_INPUT_END; +#undef LIBZAHL_X +} + + +ZAHL_INLINE void +libzahl_memset(register zahl_char_t *a, register zahl_char_t v, size_t n) +{ + size_t i; + for (i = 0; i < n; i += 4) { + a[i + 0] = v; + a[i + 1] = v; + a[i + 2] = v; + a[i + 3] = v; + } +} + +ZAHL_INLINE void +libzahl_memset_precise(register zahl_char_t *a, register zahl_char_t v, size_t n) +{ + size_t i; + if (n <= 4) { + if (n >= 1) + a[0] = v; + if (n >= 2) + a[1] = v; + if (n >= 3) + a[2] = v; + if (n >= 4) + a[3] = v; + } else { + for (i = 0; (i += 4) <= n;) { + a[i - 1] = v; + a[i - 2] = v; + a[i - 3] = v; + a[i - 4] = v; + } + if (i > n) + for (i -= 4; i < n; i++) + a[i] = v; + } +} + + +ZAHL_INLINE void +libzahl_memmovef(register zahl_char_t *d, register const zahl_char_t *s, size_t n) +{ + if (n && n < 4) { + d[0] = s[0]; + d[1] = s[1]; + d[2] = s[2]; + } else { + size_t i; + for (i = 0; i < n; i += 4) { + d[i + 0] = s[i + 0]; + d[i + 1] = s[i + 1]; + d[i + 2] = s[i + 2]; + d[i + 3] = s[i + 3]; + } + } +} + +ZAHL_INLINE void +libzahl_memmoveb(register zahl_char_t *d, register const zahl_char_t *s, size_t n) +{ + ssize_t i; +#define LIBZAHL_X(I) case I: d[I - 1] = s[I - 1]; + LIBZAHL_SMALL_INPUT_BEGIN(n); + for (i = ((ssize_t)n + 3) & ~3; (i -= 4) >= 0;) { + d[i + 3] = s[i + 3]; + d[i + 2] = s[i + 2]; + d[i + 1] = s[i + 1]; + d[i + 0] = s[i + 0]; + } + LIBZAHL_SMALL_INPUT_END; +#undef LIBZAHL_X +} + +ZAHL_INLINE void +libzahl_memmove(register zahl_char_t *d, register const zahl_char_t *s, size_t n) +{ + if (d < s) + libzahl_memmovef(d, s, n); + else + libzahl_memmoveb(d, s, n); +} |
