From 6570686dc178b801b287e0ab155aaf691bbcb1eb Mon Sep 17 00:00:00 2001 From: Mattias Andrée Date: Thu, 17 Feb 2022 15:11:28 +0100 Subject: m + use optimisations '(adds auto-executed function: libar2_init)' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Mattias Andrée --- Makefile | 2 +- common.h | 14 +++++++++ libar2.7 | 1 + libar2.h | 8 +++++ libar2_erase.c | 10 ++---- libar2_hash.3 | 1 + libar2_hash.c | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--- libar2_init.3 | 44 ++++++++++++++++++++++++++ test.c | 4 +-- 9 files changed, 165 insertions(+), 17 deletions(-) create mode 100644 libar2_init.3 diff --git a/Makefile b/Makefile index ef329d3..2395e8b 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ HDR =\ common.h LOBJ = $(OBJ:.o=.lo) -MAN3 = $(OBJ:.o=.3) +MAN3 = $(OBJ:.o=.3) libar2_init.3 MAN7 = libar2.7 SRC = $(OBJ:.o=.c) $(HDR) test.c diff --git a/common.h b/common.h index edfaed7..d323f78 100644 --- a/common.h +++ b/common.h @@ -31,6 +31,11 @@ #if defined(__GNUC__) # define LIBAR2_WEAKLY_LINKED__ __attribute__((__weak__)) # define LIBAR2_TARGET__(TARGETS) __attribute__((__target__(TARGETS))) +# define LIBAR2_INITIALISER__ __attribute__((__constructor__)) +# define LIBAR2_HIDDEN__ __attribute__((__visibility__("hidden"))) +#else +# define LIBAR2_INITIALISER__ +# define LIBAR2_HIDDEN__ #endif @@ -112,6 +117,15 @@ struct block { }; +LIBAR2_HIDDEN__ void libar2_internal_erase__(volatile void *mem, size_t size); +#if defined(__x86_64__) && defined(LIBAR2_TARGET__) +LIBAR2_HIDDEN__ void libar2_internal_use_generic__(void); +LIBAR2_HIDDEN__ void libar2_internal_use_sse2__(void); +LIBAR2_HIDDEN__ void libar2_internal_use_avx2__(void); +LIBAR2_HIDDEN__ void libar2_internal_use_avx512f__(void); +#endif + + #if defined(__clang__) # pragma clang diagnostic ignored "-Wc++98-compat" #endif diff --git a/libar2.7 b/libar2.7 index c0d24c0..05f0887 100644 --- a/libar2.7 +++ b/libar2.7 @@ -21,6 +21,7 @@ hashing (one-way encrypting) passwords. .BR libar2_erase (3), .BR libar2_hash (3), .BR libar2_hash_buf_size (3), +.BR libar2_init (3), .BR libar2_latest_argon2_version (3), .BR libar2_string_to_type (3), .BR libar2_string_to_version (3), diff --git a/libar2.h b/libar2.h index 3b14b3a..8343a8f 100644 --- a/libar2.h +++ b/libar2.h @@ -619,6 +619,14 @@ enum libar2_parameter_error libar2_validate_params(const struct libar2_argon2_pa LIBAR2_PUBLIC__ void libar2_erase(volatile void *mem, size_t size); +/** + * Initialise the library + * + * Called automatically by `libar2_hash` + */ +LIBAR2_PUBLIC__ +void libar2_init(void); + /** * Hash a message * diff --git a/libar2_erase.c b/libar2_erase.c index 0e3174e..585d7d9 100644 --- a/libar2_erase.c +++ b/libar2_erase.c @@ -6,19 +6,13 @@ #elif defined(explicit_bzero) || defined(__OpenBSD__) #elif defined(explicit_memset) #else -# if defined(__GNUC__) -__attribute__((visibility("hidden"))) -# endif +LIBAR2_HIDDEN__ extern void *(*const volatile libar2_internal_explicit_memset__)(void *, int, size_t); void *(*const volatile libar2_internal_explicit_memset__)(void *, int, size_t) = &memset; #endif -/* libar2_internal_erase__ is intended for the test code to use, because it replaces `libar2_erase` */ -# if defined(__GNUC__) -__attribute__((visibility("hidden"))) -# endif -void libar2_internal_erase__(volatile void *mem, size_t size); +/* libar2_internal_erase__ is intended for the test code to use, because it replaces `libar2_erase` */ #if defined(__clang__) /* before __GNUC__ because that is also set in clang */ # if __has_attribute(optnone) __attribute__((optnone)) diff --git a/libar2_hash.3 b/libar2_hash.3 index f34bac7..1e7cd2c 100644 --- a/libar2_hash.3 +++ b/libar2_hash.3 @@ -478,6 +478,7 @@ fails, and will, in that case, not modify .SH SEE ALSO .BR libar2 (7), .BR libar2_hash_buf_size (3), +.BR libar2_init (3), .BR libar2_encode_base64 (3), .BR libar2_encode_params (3), .BR libar2_decode_params (3), diff --git a/libar2_hash.c b/libar2_hash.c index b52133b..80fb6d3 100644 --- a/libar2_hash.c +++ b/libar2_hash.c @@ -1,6 +1,7 @@ /* See LICENSE file for copyright and license details. */ #define WARN_UNKNOWN_ENDIAN #include "common.h" +#include #if defined(__x86_64__) # include @@ -32,7 +33,7 @@ static const struct libblake_blake2b_params b2params = { }; -static const struct block zerob; /* implicitly zeroed via `static` */ +static const SIMD_ALIGNED struct block zerob; /* implicitly zeroed via `static` */ #if defined(__x86_64__) && defined(LIBAR2_TARGET__) @@ -82,7 +83,7 @@ blockxor_vanilla(struct block *a, const struct block *b) a->w[i] ^= b->w[i]; } -static void (*blockxor)(struct block *a, const struct block *b) = &blockxor_vanilla; /* TODO set at init */ +static void (*blockxor)(struct block *a, const struct block *b) = &blockxor_vanilla; LIBAR2_TARGET__("avx2") static void @@ -118,7 +119,7 @@ blockxor3_vanilla(struct block *a, const struct block *b, const struct block *c) } #define DEFINED_BLOCKXOR3 -static void (*blockxor3)(struct block *a, const struct block *b, const struct block *c) = &blockxor3_vanilla; /* TODO set at init */ +static void (*blockxor3)(struct block *a, const struct block *b, const struct block *c) = &blockxor3_vanilla; LIBAR2_TARGET__("avx2") static void @@ -151,7 +152,7 @@ blockcpy_vanilla(struct block *a, const struct block *b) } #define DEFINED_BLOCKCPY -static void (*blockcpy)(struct block *a, const struct block *b) = &blockcpy_vanilla; /* TODO set at init */ +static void (*blockcpy)(struct block *a, const struct block *b) = &blockcpy_vanilla; #else @@ -400,7 +401,7 @@ fill_segment(struct block *memory, const uint_least64_t *sbox, struct libar2_arg uint_least32_t pass, uint_least32_t lane, uint_least32_t slice) { int data_independent; - struct block inputb, addrb; + SIMD_ALIGNED struct block inputb, addrb; uint_least32_t off, prevoff, rlane, rindex; uint_least32_t index = 0, i; uint_least64_t prand; @@ -620,6 +621,91 @@ argon2_blake2b_exthash(void *hash_, size_t hashlen, void *msg_, size_t msglen) } +#if defined(__x86_64__) && defined(LIBAR2_TARGET__) + +void +libar2_internal_use_generic__(void) +{ + blockxor = &blockxor_vanilla; + blockxor3 = &blockxor3_vanilla; + blockcpy = &blockcpy_vanilla; +} + +void +libar2_internal_use_sse2__(void) +{ + libar2_internal_use_generic__(); +} + +void +libar2_internal_use_avx2__(void) +{ + blockxor = &blockxor_avx2; + blockxor3 = &blockxor3_avx2; + blockcpy = &blockcpy_avx2; +} + +void +libar2_internal_use_avx512f__(void) +{ + blockxor = &blockxor_avx512f; + blockxor3 = &blockxor3_avx512f; + blockcpy = &blockcpy_avx512f; +} + +#endif + + +LIBAR2_INITIALISER__ /* ignored if statically linked, so this function shall + * by the application, we just use the constructor (init) + * attribute in case that is forgotten, as it will only + * improve performance, but the library with function + * perfectly fine even if it's not called */ +void +libar2_init(void) +{ +#if defined(__x86_64__) && defined(LIBAR2_TARGET__) + static volatile int initialised = 0; + static volatile atomic_flag spinlock = ATOMIC_FLAG_INIT; + + if (!initialised) { + while (atomic_flag_test_and_set(&spinlock)); + + if (!initialised) { +#if 0 + __builtin_cpu_init(); + if (__builtin_cpu_supports("avx512f")) + libar2_internal_use_avx512f__(); + else if (__builtin_cpu_supports("avx2")) + libar2_internal_use_avx2__(); + else if (__builtin_cpu_supports("sse2")) + libar2_internal_use_sse2__(); + else + libar2_internal_use_generic__(); +#else + uint32_t x; + __asm__ volatile("cpuid" : "=b"(x) : "a"(7), "c"(0) : "edx"); + if (x & ((uint32_t)1 << 16)) { + libar2_internal_use_avx512f__(); + } else if (x & ((uint32_t)1 << 5)) { + libar2_internal_use_avx2__(); + } else { + __asm__ volatile("cpuid" : "=d"(x) : "a"(1) : "ebx", "ecx"); + if (x & ((uint32_t)1 << 26)) + libar2_internal_use_sse2__(); + else + libar2_internal_use_generic__(); + } +#endif + initialised = 1; + } + + atomic_flag_clear(&spinlock); +#endif + } +} + + int libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameters *params, struct libar2_context *ctx) { @@ -634,6 +720,8 @@ libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameter uint_least64_t *sbox = NULL; /* This is 8K large (assuming support for uint64_t), so we allocate it dynamically */ size_t alignment; + libar2_init(); + if (libar2_validate_params(params, NULL) || msglen >> 31 > 1) { errno = EINVAL; return -1; diff --git a/libar2_init.3 b/libar2_init.3 new file mode 100644 index 0000000..d14b31f --- /dev/null +++ b/libar2_init.3 @@ -0,0 +1,44 @@ +.TH LIBAR2_INIT 3 LIBAR2 +.SH NAME +libar2_init - Initialise libar2 + +.SH SYNOPSIS +.nf +#include + +void libar2_init(void); +.fi +.PP +Link with +.IR -lar2 . + +.SH DESCRIPTION +The +.BR libar2_init () +function initialises the library, specifically +it selects which versions of different functions +the library shall use, based on the capabilities +of the machine that runs the function. +.PP +The +.BR libar2_init () +function is called automatically by the +.BR libar2_hash (3) +function, but it is desirable, but not necessary, +for multi-threaded applications to call it manually, +also depending on the capabilities of the compiler +and linker that was used to create the library, the +.BR libar2_init () +function may be called automatically at load-time. + +.SH RETURN VALUES +None. + +.SH ERRORS +The +.BR libar2_init () +function cannot fail. + +.SH SEE ALSO +.BR libar2 (7), +.BR libar2_hash (3) diff --git a/test.c b/test.c index af9538b..3aec24c 100644 --- a/test.c +++ b/test.c @@ -879,9 +879,6 @@ memis(char *mem, int ch, size_t n) /* Typo in version 1.0 */ extern void libar2_earse(volatile void *mem, size_t size); -/* libar2_erase has been replaced by this test, so we test this instead */ -extern void libar2_internal_erase__(volatile void *mem, size_t size); - static void check_libar2_erase(void) { @@ -892,6 +889,7 @@ check_libar2_erase(void) assert(memis(&buf[512], 1, 512)); assert(memis(&buf[0], 0, 512)); + /* libar2_erase has been replaced by this test, so we test this instead */ memset(buf, 1, sizeof(buf)); libar2_internal_erase__(&buf[0], 512); assert(memis(&buf[512], 1, 512)); -- cgit v1.2.3-70-g09d2