aboutsummaryrefslogtreecommitdiffstats
path: root/libblake_blake2s_force_update.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--libblake_blake2s_force_update.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/libblake_blake2s_force_update.c b/libblake_blake2s_force_update.c
index 925d381..5330ab2 100644
--- a/libblake_blake2s_force_update.c
+++ b/libblake_blake2s_force_update.c
@@ -8,6 +8,24 @@ libblake_blake2s_force_update(struct libblake_blake2s_state *state, const void *
size_t off = 0;
for (; len - off >= 64; off += 64) {
+ /* The following optimisations have been tested:
+ *
+ * 1)
+ * `*(uint64_t *)state->t += 64;`
+ * result: slower
+ *
+ * 2)
+ * using `__builtin_add_overflow`
+ * result: no difference
+ *
+ * These testes where preformed on amd64 with a compile-time
+ * assumption that `UINT_LEAST32_C(0xFFFFffff) + 1 == 0`,
+ * which the compiler accepted and those included the attempted
+ * optimisations.
+ *
+ * UNLIKELY does not seem to make any difference, but it
+ * does change the output, theoretically of the better.
+ */
state->t[0] = (state->t[0] + 64) & UINT_LEAST32_C(0xFFFFffff);
if (state->t[0] < 64)
state->t[1] = (state->t[1] + 1) & UINT_LEAST32_C(0xFFFFffff);