From 84ad8821d456e8f7f40df43b7eb7245703004ce7 Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Sat, 7 May 2016 18:15:59 +0200 Subject: Optimise libzahl_memcpy for clang MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- zahl/memory.h | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'zahl/memory.h') diff --git a/zahl/memory.h b/zahl/memory.h index b3d6a37..797beab 100644 --- a/zahl/memory.h +++ b/zahl/memory.h @@ -34,16 +34,47 @@ ZAHL_INLINE void -libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, size_t n) +libzahl_memcpy(register zahl_char_t *restrict d, register const zahl_char_t *restrict s, register size_t n) { - size_t i; #define LIBZAHL_X(I) case I: d[I - 1] = s[I - 1]; LIBZAHL_SMALL_INPUT_BEGIN(n); - for (i = 0; i < n; i += 4) { - d[i + 0] = s[i + 0]; - d[i + 1] = s[i + 1]; - d[i + 2] = s[i + 2]; - d[i + 3] = s[i + 3]; + { +#if defined(__x86_64__) && !defined(ZAHL_NO_ASM) + /* This crap is needed for clang. */ + register zahl_char_t t; + __asm__ __volatile__ ( +# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP) + "\n testq %[e], %[e]" + "\n jz 2f" +# endif + "\n shlq $3, %[e]" + "\n addq %[d], %[e]" + "\n 1:" + "\n movq 0(%[s]), %[t]" + "\n movq %[t], 0(%[d])" + "\n movq 8(%[s]), %[t]" + "\n movq %[t], 8(%[d])" + "\n movq 16(%[s]), %[t]" + "\n movq %[t], 16(%[d])" + "\n movq 24(%[s]), %[t]" + "\n movq %[t], 24(%[d])" + "\n addq $32, %[s]" + "\n addq $32, %[d]" + "\n cmpq %[e], %[d]" + "\n jl 1b" +# if defined(ZAHL_ISA_MISSING_INDIRECT_JUMP) + "\n 2:" +# endif + : [t]"=r"(t), [d]"+r"(d), [s]"+r"(s), [e]"+r"(n)); +#else + size_t i; + for (i = 0; i < n; i += 4) { + d[i + 0] = s[i + 0]; + d[i + 1] = s[i + 1]; + d[i + 2] = s[i + 2]; + d[i + 3] = s[i + 3]; + } +#endif } LIBZAHL_SMALL_INPUT_END; #undef LIBZAHL_X -- cgit v1.2.3-70-g09d2