From 5d77a0178349ecac6536e0374cf689500efa22bc Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Wed, 19 Jan 2022 20:28:55 +0100 Subject: Optimisation for amd64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increased major number as the ABI was broken by insertion of padding into the BLAKE2 parameter structures (except for BLAKE2Xs) Signed-off-by: Mattias Andrée --- libblake_blake2s_force_update.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'libblake_blake2s_force_update.c') diff --git a/libblake_blake2s_force_update.c b/libblake_blake2s_force_update.c index 925d381..5330ab2 100644 --- a/libblake_blake2s_force_update.c +++ b/libblake_blake2s_force_update.c @@ -8,6 +8,24 @@ libblake_blake2s_force_update(struct libblake_blake2s_state *state, const void * size_t off = 0; for (; len - off >= 64; off += 64) { + /* The following optimisations have been tested: + * + * 1) + * `*(uint64_t *)state->t += 64;` + * result: slower + * + * 2) + * using `__builtin_add_overflow` + * result: no difference + * + * These testes where preformed on amd64 with a compile-time + * assumption that `UINT_LEAST32_C(0xFFFFffff) + 1 == 0`, + * which the compiler accepted and those included the attempted + * optimisations. + * + * UNLIKELY does not seem to make any difference, but it + * does change the output, theoretically of the better. + */ state->t[0] = (state->t[0] + 64) & UINT_LEAST32_C(0xFFFFffff); if (state->t[0] < 64) state->t[1] = (state->t[1] + 1) & UINT_LEAST32_C(0xFFFFffff); -- cgit v1.2.3-70-g09d2