blob: 2446e16ac636881e75d6ac39bfa7c408dc0c226d (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
|
/* See LICENSE file for copyright and license details. */
#include "common.h"
size_t
libblake_blake2b_force_update(struct libblake_blake2b_state *state, const void *data_, size_t len)
{
const unsigned char *data = data_;
size_t off = 0;
for (; len - off >= 128; off += 128) {
/* The following optimisations have been tested:
*
* 1)
* `*(__uint128_t *)state->t += 128;`
* result: slower
*
* 2)
* addq, adcq using `__asm__ __volatile__`
* result: slower (as 1)
*
* 3)
* using `__builtin_add_overflow`
* result: no difference
*
* These testes where preformed on amd64 with a compile-time
* assumption that `UINT_LEAST64_C(0xFFFFffffFFFFffff) + 1 == 0`,
* which the compiler accepted and those included the attempted
* optimisations.
*
* UNLIKELY does not seem to make any difference, but it
* does change the output, theoretically of the better.
*/
state->t[0] = (state->t[0] + 128) & UINT_LEAST64_C(0xFFFFffffFFFFffff);
if (UNLIKELY(state->t[0] < 128))
state->t[1] = (state->t[1] + 1) & UINT_LEAST64_C(0xFFFFffffFFFFffff);
libblake_internal_blake2b_compress(state, &data[off]);
}
return off;
}
|