aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMattias Andrée <maandree@kth.se>2022-02-16 16:11:36 +0100
committerMattias Andrée <maandree@kth.se>2022-02-16 16:11:36 +0100
commitd6f303ab33bdb97fa50a7daba6d4fa489a9feca2 (patch)
treeb36af440b4d97d7b13dc49683168c4fc18dbe463
parentFix style + minimal optimisation (diff)
downloadlibar2-d6f303ab33bdb97fa50a7daba6d4fa489a9feca2.tar.gz
libar2-d6f303ab33bdb97fa50a7daba6d4fa489a9feca2.tar.bz2
libar2-d6f303ab33bdb97fa50a7daba6d4fa489a9feca2.tar.xz
Small optimisation for little-endian machines
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r--common.h22
-rw-r--r--libar2_hash.c42
-rw-r--r--test.c39
3 files changed, 97 insertions, 6 deletions
diff --git a/common.h b/common.h
index e02d2c6..8863661 100644
--- a/common.h
+++ b/common.h
@@ -54,6 +54,28 @@
#endif
+#define LITTLE_ENDIAN__ 1234
+#define BIG_ENDIAN__ 4321
+#ifndef HOST_ENDIAN
+# if defined(i386) || defined(__i386__) || defined(__x86_64__)
+# define HOST_ENDIAN LITTLE_ENDIAN__
+# endif
+#endif
+#ifdef HOST_ENDIAN
+# if HOST_ENDIAN == LITTLE_ENDIAN__
+# define USING_LITTLE_ENDIAN
+# elif HOST_ENDIAN == BIG_ENDIAN__
+# define USING_BIG_ENDIAN
+# endif
+#else
+# ifdef __GNUC__
+# ifdef WARN_UNKNOWN_ENDIAN
+# warning The host endian is unknown
+# endif
+# endif
+#endif
+
+
#define ELEMSOF(ARR) (sizeof(ARR) / sizeof(*(ARR)))
#define MAX(A, B) ((A) > (B) ? (A) : (B))
diff --git a/libar2_hash.c b/libar2_hash.c
index f1e6d9b..8afb02d 100644
--- a/libar2_hash.c
+++ b/libar2_hash.c
@@ -1,4 +1,5 @@
/* See LICENSE file for copyright and license details. */
+#define WARN_UNKNOWN_ENDIAN
#include "common.h"
@@ -31,7 +32,7 @@ static const struct block zerob; /* implicitly zeroed via `static` */
static void
-memxor(void *a_, const void *b_, size_t n)
+memxor(void *a_, const void *b_, size_t n) /* TODO using _mm_xor_si128 may improve performance */
{
unsigned char *a = a_;
const unsigned char *b = b_;
@@ -52,6 +53,8 @@ store32(unsigned char *out, uint_least32_t value)
}
+#ifndef USING_LITTLE_ENDIAN
+
static void
store64(unsigned char *out, uint_least64_t value)
{
@@ -97,6 +100,8 @@ load_block(struct block *block64, const unsigned char *block8)
load64(&block64->w[j], &block8[i]);
}
+#endif
+
static size_t
storemem(unsigned char *out, const void *mem, size_t len, size_t max)
@@ -175,6 +180,7 @@ fill_block(struct block *block, const struct block *prevblock, const struct bloc
ARR[OFF + W8], ARR[OFF + W9], ARR[OFF + WA], ARR[OFF + WB],\
ARR[OFF + WC], ARR[OFF + WD], ARR[OFF + WE], ARR[OFF + WF])
+ /* TODO does unrolling these loop help? */
for (i = 0; i < 8; i++) {
BLAMKA_ROUND_(tmpblock.w, i * 16,
0, 1, 2, 3,
@@ -482,7 +488,10 @@ argon2_blake2b_exthash(void *hash_, size_t hashlen, void *msg_, size_t msglen)
int
libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameters *params, struct libar2_context *ctx)
{
- unsigned char block[1024 + 128], hash0[256];
+#ifndef USING_LITTLE_ENDIAN
+ unsigned char block[1024 + 128];
+#endif
+ unsigned char hash0[256];
uint_least32_t blocks, seglen, lanelen;
struct block *memory;
size_t i, p, s, nthreads, ts[16], ti, tn, bufsize;
@@ -494,12 +503,21 @@ libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameter
return -1;
}
- blocks = MAX(params->m_cost, 8 * params->lanes);
+ blocks = MAX(params->m_cost, 8 * params->lanes); /* 8 * params->lanes <= 0x07FFfff8 */
seglen = blocks / (4 * params->lanes);
blocks -= blocks % (4 * params->lanes);
lanelen = seglen * 4;
- memory = ctx->allocate(blocks, sizeof(struct block), MAX(ALIGNOF(struct block), CACHE_LINE_SIZE), ctx);
+#ifdef USING_LITTLE_ENDIAN
+ /* We are allocating one extra block, this gives use 1024 extra bytes,
+ * but we only need 128, to ensure that `argon2_blake2b_exthash` does
+ * not write on unallocated memory. Preferable we would just request
+ * 128 bytes bytes, but this would require an undesirable API/ABI
+ * change. */
+ memory = ctx->allocate(blocks + 1, sizeof(struct block), MAX(MAX(ALIGNOF(struct block), CACHE_LINE_SIZE), 16), ctx);
+#else
+ memory = ctx->allocate(blocks, sizeof(struct block), MAX(MAX(ALIGNOF(struct block), CACHE_LINE_SIZE), 16), ctx);
+#endif
if (!memory)
return -1;
@@ -512,15 +530,23 @@ libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameter
}
initial_hash(hash0, msg, msglen, params, ctx);
- for (i = 0; i < params->lanes; i++) {
+ for (i = 0; i < params->lanes; i++) { /* direction is important for little-endian optimisation */
store32(&hash0[64], 0);
store32(&hash0[68], (uint_least32_t)i);
+#ifdef USING_LITTLE_ENDIAN
+ argon2_blake2b_exthash(&memory[i * lanelen + 0], 1024, hash0, 72);
+#else
argon2_blake2b_exthash(block, 1024, hash0, 72);
load_block(&memory[i * lanelen + 0], block);
+#endif
store32(&hash0[64], 1);
+#ifdef USING_LITTLE_ENDIAN
+ argon2_blake2b_exthash(&memory[i * lanelen + 1], 1024, hash0, 72);
+#else
argon2_blake2b_exthash(block, 1024, hash0, 72);
load_block(&memory[i * lanelen + 1], block);
+#endif
}
ERASE_ARRAY(hash0);
@@ -592,13 +618,19 @@ libar2_hash(void *hash, void *msg, size_t msglen, struct libar2_argon2_parameter
for (i = 1; i < params->lanes; i++)
memxor(&memory[lanelen - 1], &memory[i * lanelen + lanelen - 1], sizeof(*memory));
+#ifdef USING_LITTLE_ENDIAN
+ argon2_blake2b_exthash(hash, params->hashlen, &memory[lanelen - 1], 1024);
+#else
store_block(block, &memory[lanelen - 1]);
argon2_blake2b_exthash(hash, params->hashlen, block, 1024);
+#endif
bufsize = libar2_hash_buf_size(params);
if (bufsize) /* should never be 0 as that would indicate the user provided a too small buffer */
libar2_erase(&((char *)hash)[params->hashlen], bufsize - params->hashlen);
+#ifndef USING_LITTLE_ENDIAN
ERASE_ARRAY(block);
+#endif
if (sbox)
ctx->deallocate(sbox, ctx);
ctx->deallocate(memory, ctx);
diff --git a/test.c b/test.c
index 9a9b3f7..c9f7b5a 100644
--- a/test.c
+++ b/test.c
@@ -1,6 +1,15 @@
/* See LICENSE file for copyright and license details. */
#include "common.h"
+
+#ifndef MEASURE_TIME
+# define MEASURE_TIME 0
+#endif
+
#include <stdlib.h>
+#if MEASURE_TIME
+# include <stdio.h>
+# include <time.h>
+#endif
#define MEM(S) S, sizeof(S) - 1
@@ -845,6 +854,7 @@ check_libar2_hash_buf_size(void)
int
main(void)
{
+#if 1
check_libar2_type_to_string();
check_libar2_string_to_type();
check_libar2_version_to_string();
@@ -856,8 +866,35 @@ main(void)
check_libar2_validate_params();
check_libar2_hash();
-#ifdef LIBAR2_WEAKLY_LINKED__
+# ifdef LIBAR2_WEAKLY_LINKED__
check_libar2_hash_buf_size();
+# endif
+#endif
+
+#if MEASURE_TIME
+ {
+ struct libar2_argon2_parameters params;
+ char output[512];
+ clock_t dur;
+ double ddur;
+ int r;
+ memset(&params, 0, sizeof(params));
+ params.m_cost = (uint_least32_t)1 << 18;
+ params.t_cost = 1;
+ params.lanes = 1;
+ params.saltlen = 8;
+ params.salt = (unsigned char[]){"\0\0\0\0\0\0\0\0"};
+ params.hashlen = 32;
+ assert(!libar2_validate_params(&params, NULL));
+ dur = clock();
+ r = libar2_hash(output, NULL, 0, &params, &ctx_st);
+ dur = clock() - dur;
+ assert(!r);
+ ddur = (double)dur;
+ ddur /= CLOCKS_PER_SEC;
+ ddur *= 1000;
+ fprintf(stderr, "Time: %lg ms\n", ddur);
+ }
#endif
return 0;