diff options
| author | Mattias Andrée <maandree@kth.se> | 2016-05-07 18:15:59 +0200 |
|---|---|---|
| committer | Mattias Andrée <maandree@kth.se> | 2016-05-07 18:15:59 +0200 |
| commit | 84ad8821d456e8f7f40df43b7eb7245703004ce7 (patch) | |
| tree | 9c2101449d73bcc87c3f3ed54df2853dedfe35d8 /zahl | |
| parent | Some small improvements (diff) | |
| download | libzahl-84ad8821d456e8f7f40df43b7eb7245703004ce7.tar.gz libzahl-84ad8821d456e8f7f40df43b7eb7245703004ce7.tar.bz2 libzahl-84ad8821d456e8f7f40df43b7eb7245703004ce7.tar.xz | |
Optimise libzahl_memcpy for clang
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to 'zahl')
| -rw-r--r-- | zahl/memory.h | 45 |
1 files changed, 38 insertions, 7 deletions
diff --git a/zahl/memory.h b/zahl/memory.h index b3d6a37..797beab 100644 --- a/zahl/memory.h +++ b/zahl/memory.h @@ -34,16 +34,47 @@ ZAHL_INLINE void -libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n) +libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, register size_t n) { - size_t i; #define LIBZAHL_X(I) case I: d[I - 1] = s[I - 1]; LIBZAHL_SMALL_INPUT_BEGIN(n); - for (i = 0; i < n; i += 4) { - d[i + 0] = s[i + 0]; - d[i + 1] = s[i + 1]; - d[i + 2] = s[i + 2]; - d[i + 3] = s[i + 3]; + { +#if defined(__x86_64__) && !defined(ZAHL_NO_ASM) + /* This crap is needed for clang. */ + register zahl_char_t t; + __asm__ __volatile__ ( +# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP) + "\n testq %[e], %[e]" + "\n jz 2f" +# endif + "\n shlq $3, %[e]" + "\n addq %[d], %[e]" + "\n 1:" + "\n movq 0(%[s]), %[t]" + "\n movq %[t], 0(%[d])" + "\n movq 8(%[s]), %[t]" + "\n movq %[t], 8(%[d])" + "\n movq 16(%[s]), %[t]" + "\n movq %[t], 16(%[d])" + "\n movq 24(%[s]), %[t]" + "\n movq %[t], 24(%[d])" + "\n addq $32, %[s]" + "\n addq $32, %[d]" + "\n cmpq %[e], %[d]" + "\n jl 1b" +# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP) + "\n 2:" +# endif + : [t]"=r"(t), [d]"+r"(d), [s]"+r"(s), [e]"+r"(n)); +#else + size_t i; + for (i = 0; i < n; i += 4) { + d[i + 0] = s[i + 0]; + d[i + 1] = s[i + 1]; + d[i + 2] = s[i + 2]; + d[i + 3] = s[i + 3]; + } +#endif } LIBZAHL_SMALL_INPUT_END; #undef LIBZAHL_X |
