aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/zadd.c132
1 files changed, 55 insertions, 77 deletions
diff --git a/src/zadd.c b/src/zadd.c
index 8efdf19..5eb7050 100644
--- a/src/zadd.c
+++ b/src/zadd.c
@@ -4,116 +4,94 @@
#if defined(__x86_64__)
# define ASM3(code) \
- __asm__ __volatile__ (code : "+d"(carry) : "a"(ac + i), "b"(bc + i), "c"(cc + i))
+ __asm__ __volatile__ (code : [x]"+r"(carry), [a]"+r"(ac), [b]"+r"(bc), [c]"+r"(cc))
# define ASM2(code) \
- __asm__ __volatile__ (code : "+d"(carry) : "a"(ac + i), "b"(bc + i))
+ __asm__ __volatile__ (code : [x]"+r"(carry), [a]"+r"(ac), [b]"+r"(bc))
-# define ADD2(off) \
- "\n movq "#off"(%%rbx), %%rdx" \
- "\n adcq %%rdx, "#off"(%%rax)"
+# define ADD2(off) \
+ "\n movq "#off"(%[b]), %[x]" \
+ "\n adcq %[x], "#off"(%[a])"
-# define ADD3(off) \
- "\n movq "#off"(%%rbx), %%rdx" \
- "\n adcq "#off"(%%rcx), %%rdx" \
- "\n movq %%rdx, "#off"(%%rax)"
+# define ADD3(off) \
+ "\n movq "#off"(%[b]), %[x]" \
+ "\n adcq "#off"(%[c]), %[x]" \
+ "\n movq %[x], "#off"(%[a])"
# define WRAP_CARRY(interior) \
- "\n clc" \
- "\n cmpq $0, %%rdx" \
- "\n je 1f" \
- "\n stc" \
- "\n 1:" \
+ "\n addq $-1, %[x]" \
interior \
- "\n movq $1, %%rdx" \
+ "\n movq $1, %[x]" \
"\n jc 1f" \
- "\n movq $0, %%rdx" \
+ "\n movq $0, %[x]" \
"\n 1:"
+
+# define ASM_ADD(N) \
+ do { \
+ register zahl_char_t carry = 0; \
+ size_t i; \
+ for (i = 0; (INC(4)), (i += 4) <= n;) \
+ ASM##N(WRAP_CARRY(ADD##N(-32) ADD##N(-24) ADD##N(-16) ADD##N(-8))); \
+ switch (n & 3) { \
+ case 3: \
+ ASM##N(WRAP_CARRY(ADD##N(-32) ADD##N(-24) ADD##N(-16))); \
+ break; \
+ case 2: \
+ ASM##N(WRAP_CARRY(ADD##N(-32) ADD##N(-24))); \
+ break; \
+ case 1: \
+ ASM##N(WRAP_CARRY(ADD##N(-32))); \
+ break; \
+ default: \
+ break; \
+ } \
+ i = n; \
+ while (carry) { \
+ carry = libzahl_add_overflow(a->chars + i, a->chars[i], 1); \
+ i++; \
+ } \
+ if (a->used < i) \
+ a->used = i; \
+ } while (0)
#endif
static inline void
zadd_impl_4(z_t a, z_t b, z_t c, size_t n)
{
- zahl_char_t carry = 0, *ac = a->chars, *bc = b->chars, *cc = c->chars;
- size_t i;
-
-#if defined(__x86_64__)
- for (i = 0; (i += 4) <= n;)
- ASM3(WRAP_CARRY(ADD3(-32) ADD3(-24) ADD3(-16) ADD3(-8)));
- if (i > n) {
- i -= 4;
- switch (n & 3) {
- case 3:
- ASM3(WRAP_CARRY(ADD3(0) ADD3(8) ADD3(16)));
- break;
- case 2:
- ASM3(WRAP_CARRY(ADD3(0) ADD3(8)));
- break;
- case 1:
- ASM3(WRAP_CARRY(ADD3(0)));
- break;
- default:
- break;
- }
- }
- i = n;
-
- while (carry) {
- carry = libzahl_add_overflow(ac + i, ac[i], 1);
- i++;
- }
+#ifdef ASM_ADD
+ register zahl_char_t *ac = a->chars, *bc = b->chars, *cc = c->chars;
+# define INC(P) (ac += (P), bc += (P), cc += (P))
+ ASM_ADD(3);
+# undef INC
#else
- zahl_char_t tcarry;
+ zahl_char_t carry = 0, tcarry;
+ zahl_char_t *ac = a->chars, *bc = b->chars, *cc = c->chars;
+ size_t i;
for (i = 0; i < n; i++) {
tcarry = libzahl_add_overflow(ac + i, bc[i], cc[i]);
carry = tcarry | (zahl_char_t)libzahl_add_overflow(ac + i, ac[i], carry);
}
+
while (carry) {
carry = libzahl_add_overflow(ac + i, ac[i], 1);
i++;
}
-#endif
if (a->used < i)
a->used = i;
+#endif
}
static inline void
zadd_impl_3(z_t a, z_t b, size_t n)
{
-#if defined(__x86_64__)
- zahl_char_t carry = 0, *ac = a->chars, *bc = b->chars;
- size_t i;
-
- for (i = 0; (i += 4) <= n;)
- ASM2(WRAP_CARRY(ADD2(-32) ADD2(-24) ADD2(-16) ADD2(-8)));
- if (i > n) {
- i -= 4;
- switch (n & 3) {
- case 3:
- ASM2(WRAP_CARRY(ADD2(0) ADD2(8) ADD2(16)));
- break;
- case 2:
- ASM2(WRAP_CARRY(ADD2(0) ADD2(8)));
- break;
- case 1:
- ASM2(WRAP_CARRY(ADD2(0)));
- break;
- default:
- break;
- }
- }
- i = n;
-
- while (carry) {
- carry = libzahl_add_overflow(ac + i, ac[i], 1);
- i++;
- }
-
- if (a->used < i)
- a->used = i;
+#ifdef ASM_ADD
+ register zahl_char_t *ac = a->chars, *bc = b->chars;
+# define INC(P) (ac += (P), bc += (P))
+ ASM_ADD(2);
+# undef INC
#else
zadd_impl_4(a, a, b, n);
#endif