aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2022-02-17 15:11:28 +0100
committerMattias Andrée <maandree@kth.se>2022-02-17 15:11:33 +0100
commit6570686dc178b801b287e0ab155aaf691bbcb1eb (patch)
tree72ac2dcc50d58314b607ce523792ae6e7ec2a102
parentAdd optimisations (no yet used) (diff)
downloadlibar2-6570686dc178b801b287e0ab155aaf691bbcb1eb.tar.gz
libar2-6570686dc178b801b287e0ab155aaf691bbcb1eb.tar.bz2
libar2-6570686dc178b801b287e0ab155aaf691bbcb1eb.tar.xz
m + use optimisations '(adds auto-executed function: libar2_init)'
Signed-off-by: Mattias Andrée <maandree@kth.se>
Diffstat (limited to '')
-rw-r--r--Makefile2
-rw-r--r--common.h14
-rw-r--r--libar2.71
-rw-r--r--libar2.h8
-rw-r--r--libar2_erase.c10
-rw-r--r--libar2_hash.31
-rw-r--r--libar2_hash.c98
-rw-r--r--libar2_init.344
-rw-r--r--test.c4
9 files changed, 165 insertions, 17 deletions
diff --git a/Makefile b/Makefile
index ef329d3..2395e8b 100644
--- a/Makefile
+++ b/Makefile
@@ -37,7 +37,7 @@ HDR =\
common.h
LOBJ = $(OBJ:.o=.lo)
-MAN3 = $(OBJ:.o=.3)
+MAN3 = $(OBJ:.o=.3) libar2_init.3
MAN7 = libar2.7
SRC = $(OBJ:.o=.c) $(HDR) test.c
diff --git a/common.h b/common.h
index edfaed7..d323f78 100644
--- a/common.h
+++ b/common.h
@@ -31,6 +31,11 @@
#if defined(__GNUC__)
# define LIBAR2_WEAKLY_LINKED__ __attribute__((__weak__))
# define LIBAR2_TARGET__(TARGETS) __attribute__((__target__(TARGETS)))
+# define LIBAR2_INITIALISER__ __attribute__((__constructor__))
+# define LIBAR2_HIDDEN__ __attribute__((__visibility__("hidden")))
+#else
+# define LIBAR2_INITIALISER__
+# define LIBAR2_HIDDEN__
#endif
@@ -112,6 +117,15 @@ struct block {
};
+LIBAR2_HIDDEN__ void libar2_internal_erase__(volatile void *mem, size_t size);
+#if defined(__x86_64__) && defined(LIBAR2_TARGET__)
+LIBAR2_HIDDEN__ void libar2_internal_use_generic__(void);
+LIBAR2_HIDDEN__ void libar2_internal_use_sse2__(void);
+LIBAR2_HIDDEN__ void libar2_internal_use_avx2__(void);
+LIBAR2_HIDDEN__ void libar2_internal_use_avx512f__(void);
+#endif
+
+
#if defined(__clang__)
# pragma clang diagnostic ignored "-Wc++98-compat"
#endif
diff --git a/libar2.7 b/libar2.7
index c0d24c0..05f0887 100644
--- a/libar2.7
+++ b/libar2.7
@@ -21,6 +21,7 @@ hashing (one-way encrypting) passwords.
.BR libar2_erase (3),
.BR libar2_hash (3),
.BR libar2_hash_buf_size (3),
+.BR libar2_init (3),
.BR libar2_latest_argon2_version (3),
.BR libar2_string_to_type (3),
.BR libar2_string_to_version (3),
diff --git a/libar2.h b/libar2.h
index 3b14b3a..8343a8f 100644
--- a/libar2.h
+++ b/libar2.h
@@ -620,6 +620,14 @@ LIBAR2_PUBLIC__
void libar2_erase(volatile void *mem, size_t size);
/**
+ * Initialise the library
+ *
+ * Called automatically by `libar2_hash`
+ */
+LIBAR2_PUBLIC__
+void libar2_init(void);
+
+/**
* Hash a message
*
* The recommended why of verify a password is to hash the
diff --git a/libar2_erase.c b/libar2_erase.c
index 0e3174e..585d7d9 100644
--- a/libar2_erase.c
+++ b/libar2_erase.c
@@ -6,19 +6,13 @@
#elif defined(explicit_bzero) || defined(__OpenBSD__)
#elif defined(explicit_memset)
#else
-# if defined(__GNUC__)
-__attribute__((visibility("hidden")))
-# endif
+LIBAR2_HIDDEN__
extern void *(*const volatile libar2_internal_explicit_memset__)(void *, int, size_t);
void *(*const volatile libar2_internal_explicit_memset__)(void *, int, size_t) = &memset;
#endif
-/* libar2_internal_erase__ is intended for the test code to use, because it replaces `libar2_erase` */
-# if defined(__GNUC__)
-__attribute__((visibility("hidden")))
-# endif
-void libar2_internal_erase__(volatile void *mem, size_t size);
+/* libar2_internal_erase__ is intended for the test code to use, because it replaces `libar2_erase` */
#if defined(__clang__) /* before __GNUC__ because that is also set in clang */
# if __has_attribute(optnone)
__attribute__((optnone))
diff --git a/libar2_hash.3 b/libar2_hash.3
index f34bac7..1e7cd2c 100644
--- a/libar2_hash.3
+++ b/libar2_hash.3
@@ -478,6 +478,7 @@ fails, and will, in that case, not modify
.SH SEE ALSO
.BR libar2 (7),
.BR libar2_hash_buf_size (3),
+.BR libar2_init (3),
.BR libar2_encode_base64 (3),
.BR libar2_encode_params (3),
.BR libar2_decode_params (3),
diff --git a/libar2_hash.c b/libar2_hash.c
index b52133b..80fb6d3 100644
--- a/libar2_hash.c
+++ b/libar2_hash.c
@@ -1,6 +1,7 @@
/* See LICENSE file for copyright and license details. */
#define WARN_UNKNOWN_ENDIAN
#include "common.h"
+#include <stdatomic.h>
#if defined(__x86_64__)
# include <immintrin.h>
@@ -32,7 +33,7 @@ static const struct libblake_blake2b_params b2params = {
};
-static const struct block zerob; /* implicitly zeroed via `static` */
+static const SIMD_ALIGNED struct block zerob; /* implicitly zeroed via `static` */
#if defined(__x86_64__) && defined(LIBAR2_TARGET__)
@@ -82,7 +83,7 @@ blockxor_vanilla(struct block *a, const struct block *b)
a->w[i] ^= b->w[i];
}
-static void (*blockxor)(struct block *a, const struct block *b) = &blockxor_vanilla; /* TODO set at init */
+static void (*blockxor)(struct block *a, const struct block *b) = &blockxor_vanilla;
LIBAR2_TARGET__("avx2")
static void
@@ -118,7 +119,7 @@ blockxor3_vanilla(struct block *a, const struct block *b, const struct block *c)
}
#define DEFINED_BLOCKXOR3
-static void (*blockxor3)(struct block *a, const struct block *b, const struct block *c) = &blockxor3_vanilla; /* TODO set at init */
+static void (*blockxor3)(struct block *a, const struct block *b, const struct block *c) = &blockxor3_vanilla;
LIBAR2_TARGET__("avx2")
static void
@@ -151,7 +152,7 @@ blockcpy_vanilla(struct block *a, const struct block *b)
}
#define DEFINED_BLOCKCPY
-static void (*blockcpy)(struct block *a, const struct block *b) = &blockcpy_vanilla; /* TODO set at init */
+static void (*blockcpy)(struct block *a, const struct block *b) = &blockcpy_vanilla;
#else
@@ -400,7 +401,7 @@ fill_segment(struct block *memory, const uint_least64_t *sbox, struct libar2_arg
uint_least32_t pass, uint_least32_t lane, uint_least32_t slice)
{
int data_independent;
- struct block inputb, addrb;
+ SIMD_ALIGNED struct block inputb, addrb;
uint_least32_t off, prevoff, rlane, rindex;
uint_least32_t index = 0, i;
uint_least64_t prand;
@@ -620,6 +621,91 @@ argon2_blake2b_exthash(void *hash_, size_t hashlen, void *msg_, size_t msglen)
}
+#if defined(__x86_64__) && defined(LIBAR2_TARGET__)
+
+void
+libar2_internal_use_generic__(void)
+{
+ blockxor = &blockxor_vanilla;
+ blockxor3 = &blockxor3_vanilla;
+ blockcpy = &blockcpy_vanilla;
+}
+
+void
+libar2_internal_use_sse2__(void)
+{
+ libar2_internal_use_generic__();
+}
+
+void
+libar2_internal_use_avx2__(void)
+{
+ blockxor = &blockxor_avx2;
+ blockxor3 = &blockxor3_avx2;
+ blockcpy = &blockcpy_avx2;
+}
+
+void
+libar2_internal_use_avx512f__(void)
+{
+ blockxor = &blockxor_avx512f;
+ blockxor3 = &blockxor3_avx512f;
+ blockcpy = &blockcpy_avx512f;
+}
+
+#endif
+
+
+LIBAR2_INITIALISER__ /* ignored if statically linked, so this function shall
+ * by the application, we just use the constructor (init)
+ * attribute in case that is forgotten, as it will only
+ * improve performance, but the library with function
+ * perfectly fine even if it's not called */
+void
+libar2_init(void)
+{
+#if defined(__x86_64__) && defined(LIBAR2_TARGET__)
+ static volatile int initialised = 0;
+ static volatile atomic_flag spinlock = ATOMIC_FLAG_INIT;
+
+ if (!initialised) {
+ while (atomic_flag_test_and_set(&spinlock));
+
+ if (!initialised) {
+#if 0
+ __builtin_cpu_init();
+ if (__builtin_cpu_supports("avx512f"))
+ libar2_internal_use_avx512f__();
+ else if (__builtin_cpu_supports("avx2"))
+ libar2_internal_use_avx2__();
+ else if (__builtin_cpu_supports("sse2"))
+ libar2_internal_use_sse2__();
+ else
+ libar2_internal_use_generic__();
+#else
+ uint32_t x;
+ __asm__ volatile("cpuid" : "=b"(x) : "a"(7), "c"(0) : "edx");
+ if (x & ((uint32_t)1 << 16)) {
+ libar2_internal_use_avx512f__();
+ } else if (x & ((uint32_t)1 << 5)) {
+ libar2_internal_use_avx2__();
+ } else {
+ __asm__ volatile("cpuid" : "=d"(x) : "a"(1) : "ebx", "ecx");
+ if (x & ((uint32_t)1 << 26))
+ libar2_internal_use_sse2__();
+ else
+ libar2_internal_use_generic__();
+ }
+#endif
+ initialised = 1;
+ }
+
+ atomic_flag_clear(&spinlock);
+#endif
+ }
+}
+
+
int
libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameters *params, struct libar2_context *ctx)
{
@@ -634,6 +720,8 @@ libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameter
uint_least64_t *sbox = NULL; /* This is 8K large (assuming support for uint64_t), so we allocate it dynamically */
size_t alignment;
+ libar2_init();
+
if (libar2_validate_params(params, NULL) || msglen >> 31 > 1) {
errno = EINVAL;
return -1;
diff --git a/libar2_init.3 b/libar2_init.3
new file mode 100644
index 0000000..d14b31f
--- /dev/null
+++ b/libar2_init.3
@@ -0,0 +1,44 @@
+.TH LIBAR2_INIT 3 LIBAR2
+.SH NAME
+libar2_init - Initialise libar2
+
+.SH SYNOPSIS
+.nf
+#include <libar2.h>
+
+void libar2_init(void);
+.fi
+.PP
+Link with
+.IR -lar2 .
+
+.SH DESCRIPTION
+The
+.BR libar2_init ()
+function initialises the library, specifically
+it selects which versions of different functions
+the library shall use, based on the capabilities
+of the machine that runs the function.
+.PP
+The
+.BR libar2_init ()
+function is called automatically by the
+.BR libar2_hash (3)
+function, but it is desirable, but not necessary,
+for multi-threaded applications to call it manually,
+also depending on the capabilities of the compiler
+and linker that was used to create the library, the
+.BR libar2_init ()
+function may be called automatically at load-time.
+
+.SH RETURN VALUES
+None.
+
+.SH ERRORS
+The
+.BR libar2_init ()
+function cannot fail.
+
+.SH SEE ALSO
+.BR libar2 (7),
+.BR libar2_hash (3)
diff --git a/test.c b/test.c
index af9538b..3aec24c 100644
--- a/test.c
+++ b/test.c
@@ -879,9 +879,6 @@ memis(char *mem, int ch, size_t n)
/* Typo in version 1.0 */
extern void libar2_earse(volatile void *mem, size_t size);
-/* libar2_erase has been replaced by this test, so we test this instead */
-extern void libar2_internal_erase__(volatile void *mem, size_t size);
-
static void
check_libar2_erase(void)
{
@@ -892,6 +889,7 @@ check_libar2_erase(void)
assert(memis(&buf[512], 1, 512));
assert(memis(&buf[0], 0, 512));
+ /* libar2_erase has been replaced by this test, so we test this instead */
memset(buf, 1, sizeof(buf));
libar2_internal_erase__(&buf[0], 512);
assert(memis(&buf[512], 1, 512));