From 5d77a0178349ecac6536e0374cf689500efa22bc Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Wed, 19 Jan 2022 20:28:55 +0100 Subject: Optimisation for amd64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Increased major number as the ABI was broken by insertion of padding into the BLAKE2 parameter structures (except for BLAKE2Xs) Signed-off-by: Mattias Andrée --- libblake_blake2s_init.c | 78 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 25 deletions(-) (limited to 'libblake_blake2s_init.c') diff --git a/libblake_blake2s_init.c b/libblake_blake2s_init.c index c4b126c..fae7e0c 100644 --- a/libblake_blake2s_init.c +++ b/libblake_blake2s_init.c @@ -1,6 +1,21 @@ /* See LICENSE file for copyright and license details. */ #include "common.h" +#if defined(LITTLE_ENDIAN) +# define le32(X) X +#else +static uint_least32_t +le32(uint_least32_t h) +{ + unsigned char r[4]; + r[0] = (unsigned char)((h >> 0) & 255); + r[1] = (unsigned char)((h >> 8) & 255); + r[2] = (unsigned char)((h >> 16) & 255); + r[3] = (unsigned char)((h >> 24) & 255); + return *(uint_least32_t *)r; +} +#endif + void libblake_blake2s_init(struct libblake_blake2s_state *state, const struct libblake_blake2s_params *params, const unsigned char *key) { @@ -18,31 +33,44 @@ libblake_blake2s_init(struct libblake_blake2s_state *state, const struct libblak state->f[0] = 0; state->f[1] = 0; - state->h[0] ^= ((uint_least32_t)params->digest_len & 255) << 0; - state->h[0] ^= ((uint_least32_t)params->key_len & 255) << 8; - state->h[0] ^= ((uint_least32_t)params->fanout & 255) << 16; - state->h[0] ^= ((uint_least32_t)params->depth & 255) << 24; - state->h[1] ^= params->leaf_len & UINT_LEAST32_C(0xFFFFffff); - state->h[2] ^= (uint_least32_t)((params->node_offset >> 0) & UINT_LEAST64_C(0xFFFFffff)); - state->h[3] ^= (uint_least32_t)((params->node_offset >> 32) & UINT_LEAST64_C(0xFFFF)) << 0; - state->h[3] ^= ((uint_least32_t)params->node_depth & 255) << 16; - state->h[3] ^= ((uint_least32_t)params->inner_len & 255) << 24; - state->h[4] ^= ((uint_least32_t)params->salt[0] & 255) << 0; - state->h[4] ^= ((uint_least32_t)params->salt[1] & 255) << 8; - state->h[4] ^= ((uint_least32_t)params->salt[2] & 255) << 16; - state->h[4] ^= ((uint_least32_t)params->salt[3] & 255) << 24; - state->h[5] ^= ((uint_least32_t)params->salt[4] & 255) << 0; - state->h[5] ^= ((uint_least32_t)params->salt[5] & 255) << 8; - state->h[5] ^= ((uint_least32_t)params->salt[6] & 255) << 16; - state->h[5] ^= ((uint_least32_t)params->salt[7] & 255) << 24; - state->h[6] ^= ((uint_least32_t)params->pepper[0] & 255) << 0; - state->h[6] ^= ((uint_least32_t)params->pepper[1] & 255) << 8; - state->h[6] ^= ((uint_least32_t)params->pepper[2] & 255) << 16; - state->h[6] ^= ((uint_least32_t)params->pepper[3] & 255) << 24; - state->h[7] ^= ((uint_least32_t)params->pepper[4] & 255) << 0; - state->h[7] ^= ((uint_least32_t)params->pepper[5] & 255) << 8; - state->h[7] ^= ((uint_least32_t)params->pepper[6] & 255) << 16; - state->h[7] ^= ((uint_least32_t)params->pepper[7] & 255) << 24; + if (offsetof(struct libblake_blake2s_params, inner_len) == 17) { + state->h[0] ^= le32(((uint_least32_t *)params)[0]); + state->h[1] ^= le32(((uint_least32_t *)params)[1]); + state->h[2] ^= le32((uint_least32_t)(params->node_offset >> 0)); + state->h[3] ^= le32(((uint_least32_t)(params->node_offset >> 32) & UINT_LEAST64_C(0xFFFF)) | + ((uint_least32_t)params->node_depth << 16) | + ((uint_least32_t)params->inner_len << 24)); + state->h[4] ^= le32(*(uint_least32_t *)¶ms->salt[0]); + state->h[5] ^= le32(*(uint_least32_t *)¶ms->salt[4]); + state->h[6] ^= le32(*(uint_least32_t *)¶ms->pepper[0]); + state->h[7] ^= le32(*(uint_least32_t *)¶ms->pepper[4]); + } else { + state->h[0] ^= ((uint_least32_t)params->digest_len & 255) << 0; + state->h[0] ^= ((uint_least32_t)params->key_len & 255) << 8; + state->h[0] ^= ((uint_least32_t)params->fanout & 255) << 16; + state->h[0] ^= ((uint_least32_t)params->depth & 255) << 24; + state->h[1] ^= params->leaf_len & UINT_LEAST32_C(0xFFFFffff); + state->h[2] ^= (uint_least32_t)((params->node_offset >> 0) & UINT_LEAST64_C(0xFFFFffff)); + state->h[3] ^= (uint_least32_t)((params->node_offset >> 32) & UINT_LEAST64_C(0xFFFF)) << 0; + state->h[3] ^= ((uint_least32_t)params->node_depth & 255) << 16; + state->h[3] ^= ((uint_least32_t)params->inner_len & 255) << 24; + state->h[4] ^= ((uint_least32_t)params->salt[0] & 255) << 0; + state->h[4] ^= ((uint_least32_t)params->salt[1] & 255) << 8; + state->h[4] ^= ((uint_least32_t)params->salt[2] & 255) << 16; + state->h[4] ^= ((uint_least32_t)params->salt[3] & 255) << 24; + state->h[5] ^= ((uint_least32_t)params->salt[4] & 255) << 0; + state->h[5] ^= ((uint_least32_t)params->salt[5] & 255) << 8; + state->h[5] ^= ((uint_least32_t)params->salt[6] & 255) << 16; + state->h[5] ^= ((uint_least32_t)params->salt[7] & 255) << 24; + state->h[6] ^= ((uint_least32_t)params->pepper[0] & 255) << 0; + state->h[6] ^= ((uint_least32_t)params->pepper[1] & 255) << 8; + state->h[6] ^= ((uint_least32_t)params->pepper[2] & 255) << 16; + state->h[6] ^= ((uint_least32_t)params->pepper[3] & 255) << 24; + state->h[7] ^= ((uint_least32_t)params->pepper[4] & 255) << 0; + state->h[7] ^= ((uint_least32_t)params->pepper[5] & 255) << 8; + state->h[7] ^= ((uint_least32_t)params->pepper[6] & 255) << 16; + state->h[7] ^= ((uint_least32_t)params->pepper[7] & 255) << 24; + } if (params->key_len) { state->t[0] = 32; -- cgit v1.2.3-70-g09d2