From 839a3d17d257e73be9bc99dfa90e56c0824050ba Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Fri, 21 Jan 2022 18:29:26 +0100 Subject: Initial work on optimising compression function; mm128 version is slower, mm256 version is barely faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- libblake_internal_blake2b_compress.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'libblake_internal_blake2b_compress.c') diff --git a/libblake_internal_blake2b_compress.c b/libblake_internal_blake2b_compress.c index e844180..d04a469 100644 --- a/libblake_internal_blake2b_compress.c +++ b/libblake_internal_blake2b_compress.c @@ -1,9 +1,12 @@ /* See LICENSE file for copyright and license details. */ #include "common.h" +/* This code performs suboptimally if compiled with -mavx2 */ + static uint_least64_t decode_uint64_le(const unsigned char *data) { + /* This is perfectly optimised by the compiler */ return (((uint_least64_t)(data[0] & 255)) << 0) | (((uint_least64_t)(data[1] & 255)) << 8) | (((uint_least64_t)(data[2] & 255)) << 16) | @@ -17,6 +20,7 @@ decode_uint64_le(const unsigned char *data) static uint_least64_t rotate_right(uint_least64_t x, int n) { + /* This is perfectly optimised by the compiler */ return ((x >> n) | (x << (64 - n))) & UINT_LEAST64_C(0xFFFFffffFFFFffff); } -- cgit v1.2.3-70-g09d2