diff options
Diffstat (limited to '')
| -rw-r--r-- | Makefile | 7 | ||||
| -rw-r--r-- | config-armv8.mk | 14 | ||||
| -rw-r--r-- | libsha2.7 | 4 | ||||
| -rw-r--r-- | libsha2_algorithm_output_size.3 | 2 | ||||
| -rw-r--r-- | libsha2_behex_lower.3 | 2 | ||||
| -rw-r--r-- | libsha2_behex_upper.3 | 2 | ||||
| -rw-r--r-- | libsha2_digest.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_digest.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_init.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_marshal.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_state_output_size.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_unmarshal.3 | 2 | ||||
| -rw-r--r-- | libsha2_hmac_update.3 | 2 | ||||
| -rw-r--r-- | libsha2_init.3 | 2 | ||||
| -rw-r--r-- | libsha2_marshal.3 | 2 | ||||
| -rw-r--r-- | libsha2_state_output_size.3 | 2 | ||||
| -rw-r--r-- | libsha2_sum_fd.3 | 2 | ||||
| -rw-r--r-- | libsha2_unhex.3 | 2 | ||||
| -rw-r--r-- | libsha2_unmarshal.3 | 2 | ||||
| -rw-r--r-- | libsha2_update.3 | 2 | ||||
| -rw-r--r-- | process.c | 202 |
21 files changed, 226 insertions, 35 deletions
@@ -7,6 +7,9 @@ CONFIGFILE = config-x86.mk # enables optimisations on x86 CPU's that have the # required features. # +# Additionally config-arm.mk which uses optimisations +# for ARMv8 is available. +# # config-portable.mk is available for exotic CPU's # and compiler that do not support the features required # for the optimisations. @@ -20,7 +23,7 @@ include mk/$(OS).mk LIB_MAJOR = 1 -LIB_MINOR = 0 +LIB_MINOR = 1 LIB_VERSION = $(LIB_MAJOR).$(LIB_MINOR) @@ -99,7 +102,7 @@ libsha2.a: $(OBJ) $(AR) -s $@ check: test - ./test + $(CHECK_PREFIX) ./test $(CHECK_FLAGS) install: mkdir -p -- "$(DESTDIR)$(PREFIX)/lib" diff --git a/config-armv8.mk b/config-armv8.mk new file mode 100644 index 0000000..c98fd0e --- /dev/null +++ b/config-armv8.mk @@ -0,0 +1,14 @@ +PREFIX = /usr +MANPREFIX = $(PREFIX)/share/man + +CC = cc -std=c11 + +CPPFLAGS = -D_DEFAULT_SOURCE -D_BSD_SOURCE -D_XOPEN_SOURCE=700 +CFLAGS = -Wall -O3 -march=armv8-a+crypto +LDFLAGS = -s + +# You can add -DALLOCA_LIMIT=# to CPPFLAGS, where # is a size_t +# value, to put a limit on how large allocation the library is +# allowed to make with alloca(3). For buffers that can have any +# size this limit will be used if it wants to allocate a larger +# buffer. Choose 0 to use malloc(3) instead of alloca(3). @@ -1,8 +1,8 @@ -.TH LIBSHA2 7 2022-07-07 libsha1 +.TH LIBSHA2 7 LIBSHA2 .SH NAME libsha2 \- SHA-2 hashing library .SH DESCRIPTION -.B libsha1 +.B libsha2 is an implementation of the SHA-2 family hashing functions: SHA-224, SHA-256, SHA-384, SHA-512, SHA-512/224, and SHA-512/256; with support for state marshalling and HMAC. diff --git a/libsha2_algorithm_output_size.3 b/libsha2_algorithm_output_size.3 index d083a38..32fb95a 100644 --- a/libsha2_algorithm_output_size.3 +++ b/libsha2_algorithm_output_size.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_ALGORITHM_OUTPUT_SIZE 3 2019-02-09 libsha2 +.TH LIBSHA2_ALGORITHM_OUTPUT_SIZE 3 LIBSHA2 .SH NAME libsha2_algorithm_output_size \- Get the size of the output for a SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_behex_lower.3 b/libsha2_behex_lower.3 index 1172397..e0ce17f 100644 --- a/libsha2_behex_lower.3 +++ b/libsha2_behex_lower.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_BEHEX_LOWER 3 2019-02-09 libsha2 +.TH LIBSHA2_BEHEX_LOWER 3 LIBSHA2 .SH NAME libsha2_behex_lower \- Convert binary to lower case hexadecimal .SH SYNOPSIS diff --git a/libsha2_behex_upper.3 b/libsha2_behex_upper.3 index c65202b..4d24b99 100644 --- a/libsha2_behex_upper.3 +++ b/libsha2_behex_upper.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_BEHEX_UPPER 3 2019-02-09 libsha2 +.TH LIBSHA2_BEHEX_UPPER 3 LIBSHA2 .SH NAME libsha2_behex_upper \- Convert binary to upper case hexadecimal .SH SYNOPSIS diff --git a/libsha2_digest.3 b/libsha2_digest.3 index 127171d..afd4d10 100644 --- a/libsha2_digest.3 +++ b/libsha2_digest.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_DIGEST 3 2019-02-09 libsha2 +.TH LIBSHA2_DIGEST 3 LIBSHA2 .SH NAME libsha2_digest \- Get the result of a SHA-2 hashing .SH SYNOPSIS diff --git a/libsha2_hmac_digest.3 b/libsha2_hmac_digest.3 index 1203049..faf6ab2 100644 --- a/libsha2_hmac_digest.3 +++ b/libsha2_hmac_digest.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_DIGEST 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_DIGEST 3 LIBSHA2 .SH NAME libsha2_hmac_digest \- Get the result of a HMAC-SHA-2 hashing .SH SYNOPSIS diff --git a/libsha2_hmac_init.3 b/libsha2_hmac_init.3 index 6f09680..bffe76c 100644 --- a/libsha2_hmac_init.3 +++ b/libsha2_hmac_init.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_INIT 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_INIT 3 LIBSHA2 .SH NAME libsha2_hmac_init \- Initialises hashing with an HMAC-SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_hmac_marshal.3 b/libsha2_hmac_marshal.3 index eca380c..48fe001 100644 --- a/libsha2_hmac_marshal.3 +++ b/libsha2_hmac_marshal.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_MARSHAL 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_MARSHAL 3 LIBSHA2 .SH NAME libsha2_hmac_marshal \- Marshal an HMAC-SHA-2 hashing state .SH SYNOPSIS diff --git a/libsha2_hmac_state_output_size.3 b/libsha2_hmac_state_output_size.3 index aa96436..93c2daf 100644 --- a/libsha2_hmac_state_output_size.3 +++ b/libsha2_hmac_state_output_size.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_STATE_OUTPUT_SIZE 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_STATE_OUTPUT_SIZE 3 LIBSHA2 .SH NAME libsha2_hmac_state_output_size \- Get the size of the output for a HMAC-SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_hmac_unmarshal.3 b/libsha2_hmac_unmarshal.3 index 6cc2967..467595c 100644 --- a/libsha2_hmac_unmarshal.3 +++ b/libsha2_hmac_unmarshal.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_UNMARSHAL 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_UNMARSHAL 3 LIBSHA2 .SH NAME libsha2_hmac_unmarshal \- Unmarshal an HMAC-SHA-2 hashing state .SH SYNOPSIS diff --git a/libsha2_hmac_update.3 b/libsha2_hmac_update.3 index bb02a18..7764c6e 100644 --- a/libsha2_hmac_update.3 +++ b/libsha2_hmac_update.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_HMAC_UPDATE 3 2019-02-10 libsha2 +.TH LIBSHA2_HMAC_UPDATE 3 LIBSHA2 .SH NAME libsha2_hmac_update \- Feed data into a HMAC-SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_init.3 b/libsha2_init.3 index 5823464..aca97b0 100644 --- a/libsha2_init.3 +++ b/libsha2_init.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_INIT 3 2019-02-09 libsha2 +.TH LIBSHA2_INIT 3 LIBSHA2 .SH NAME libsha2_init \- Initialises hashing with a SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_marshal.3 b/libsha2_marshal.3 index 4b44533..ecb5031 100644 --- a/libsha2_marshal.3 +++ b/libsha2_marshal.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_MARSHAL 3 2019-02-09 libsha2 +.TH LIBSHA2_MARSHAL 3 LIBSHA2 .SH NAME libsha2_marshal \- Marshal a SHA-2 hashing state .SH SYNOPSIS diff --git a/libsha2_state_output_size.3 b/libsha2_state_output_size.3 index 591eace..ef3130b 100644 --- a/libsha2_state_output_size.3 +++ b/libsha2_state_output_size.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_STATE_OUTPUT_SIZE 3 2019-02-09 libsha2 +.TH LIBSHA2_STATE_OUTPUT_SIZE 3 LIBSHA2 .SH NAME libsha2_state_output_size \- Get the size of the output for a SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_sum_fd.3 b/libsha2_sum_fd.3 index 8850e01..07e1864 100644 --- a/libsha2_sum_fd.3 +++ b/libsha2_sum_fd.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_SUM_FD 3 2019-02-09 libsha2 +.TH LIBSHA2_SUM_FD 3 LIBSHA2 .SH NAME libsha2_sum_fd \- Hash a file with a SHA-2 algorithm .SH SYNOPSIS diff --git a/libsha2_unhex.3 b/libsha2_unhex.3 index 7e009a0..5737057 100644 --- a/libsha2_unhex.3 +++ b/libsha2_unhex.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_UNHEX 3 2019-02-09 libsha2 +.TH LIBSHA2_UNHEX 3 LIBSHA2 .SH NAME libsha2_unhex \- Covert hexadecimal to binary .SH SYNOPSIS diff --git a/libsha2_unmarshal.3 b/libsha2_unmarshal.3 index 5df4929..a33eec5 100644 --- a/libsha2_unmarshal.3 +++ b/libsha2_unmarshal.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_UNMARSHAL 3 2019-02-09 libsha2 +.TH LIBSHA2_UNMARSHAL 3 LIBSHA2 .SH NAME libsha2_unmarshal \- Unmarshal a SHA-2 hashing state .SH SYNOPSIS diff --git a/libsha2_update.3 b/libsha2_update.3 index b7baf39..e64c972 100644 --- a/libsha2_update.3 +++ b/libsha2_update.3 @@ -1,4 +1,4 @@ -.TH LIBSHA2_UPDATE 3 2019-02-09 libsha2 +.TH LIBSHA2_UPDATE 3 LIBSHA2 .SH NAME libsha2_update \- Feed data into a SHA-2 algorithm .SH SYNOPSIS @@ -3,14 +3,25 @@ #include <stdatomic.h> #if defined(__SSE4_1__) && defined(__SSSE3__) && defined(__SSE2__) && defined(__SHA__) -# define HAVE_X86_SHA_INTRINSICS +# define HAVE_X86_SHA256_INTRINSICS #endif +#if defined(__arm__) || defined(__aarch32__) || defined(__arm64__) || defined(__aarch64__) || defined(_M_ARM) +# if defined(__ARM_NEON) && (defined(__ARM_ACLE) || defined(__ARM_FEATURE_CRYPTO)) +# define HAVE_ARM_SHA256_INTRINSICS +# endif +#endif -#ifdef HAVE_X86_SHA_INTRINSICS + +#ifdef HAVE_X86_SHA256_INTRINSICS # include <immintrin.h> #endif +#ifdef HAVE_ARM_SHA256_INTRINSICS +# include <arm_neon.h> +# include <arm_acle.h> +#endif + /** * Unified implementation (what can unified without performance impact) @@ -68,7 +79,7 @@ h[i] = TRUNC(h[i] + work_h[i]); -#ifdef HAVE_X86_SHA_INTRINSICS +#ifdef HAVE_X86_SHA256_INTRINSICS static size_t process_x86_sha256(struct libsha2_state *restrict state, const unsigned char *restrict data, size_t len) @@ -290,6 +301,168 @@ out: #endif +#ifdef HAVE_ARM_SHA256_INTRINSICS + +static size_t +process_arm_sha256(struct libsha2_state *restrict state, const unsigned char *restrict data, size_t len) +{ + static const uint32_t rc[] = { + UINT32_C(0x428A2F98), UINT32_C(0x71374491), UINT32_C(0xB5C0FBCF), UINT32_C(0xE9B5DBA5), + UINT32_C(0x3956C25B), UINT32_C(0x59F111F1), UINT32_C(0x923F82A4), UINT32_C(0xAB1C5ED5), + UINT32_C(0xD807AA98), UINT32_C(0x12835B01), UINT32_C(0x243185BE), UINT32_C(0x550C7DC3), + UINT32_C(0x72BE5D74), UINT32_C(0x80DEB1FE), UINT32_C(0x9BDC06A7), UINT32_C(0xC19BF174), + UINT32_C(0xE49B69C1), UINT32_C(0xEFBE4786), UINT32_C(0x0FC19DC6), UINT32_C(0x240CA1CC), + UINT32_C(0x2DE92C6F), UINT32_C(0x4A7484AA), UINT32_C(0x5CB0A9DC), UINT32_C(0x76F988DA), + UINT32_C(0x983E5152), UINT32_C(0xA831C66D), UINT32_C(0xB00327C8), UINT32_C(0xBF597FC7), + UINT32_C(0xC6E00BF3), UINT32_C(0xD5A79147), UINT32_C(0x06CA6351), UINT32_C(0x14292967), + UINT32_C(0x27B70A85), UINT32_C(0x2E1B2138), UINT32_C(0x4D2C6DFC), UINT32_C(0x53380D13), + UINT32_C(0x650A7354), UINT32_C(0x766A0ABB), UINT32_C(0x81C2C92E), UINT32_C(0x92722C85), + UINT32_C(0xA2BFE8A1), UINT32_C(0xA81A664B), UINT32_C(0xC24B8B70), UINT32_C(0xC76C51A3), + UINT32_C(0xD192E819), UINT32_C(0xD6990624), UINT32_C(0xF40E3585), UINT32_C(0x106AA070), + UINT32_C(0x19A4C116), UINT32_C(0x1E376C08), UINT32_C(0x2748774C), UINT32_C(0x34B0BCB5), + UINT32_C(0x391C0CB3), UINT32_C(0x4ED8AA4A), UINT32_C(0x5B9CCA4F), UINT32_C(0x682E6FF3), + UINT32_C(0x748F82EE), UINT32_C(0x78A5636F), UINT32_C(0x84C87814), UINT32_C(0x8CC70208), + UINT32_C(0x90BEFFFA), UINT32_C(0xA4506CEB), UINT32_C(0xBEF9A3F7), UINT32_C(0xC67178F2) + }; + + uint32x4_t abcd, efgh, abcd_orig, efgh_orig; + uint32x4_t msg0, msg1, msg2, msg3, tmp0, tmp1, tmp2; + const unsigned char *restrict chunk; + size_t off = 0; + + abcd_orig = vld1q_u32(&state->h.b32[0]); + efgh_orig = vld1q_u32(&state->h.b32[4]); + + for (; len - off >= state->chunk_size; off += state->chunk_size) { + abcd = abcd_orig; + efgh = efgh_orig; + + chunk = &data[off]; + msg0 = vld1q_u32((const uint32_t *)&chunk[0]); + msg1 = vld1q_u32((const uint32_t *)&chunk[16]); + msg2 = vld1q_u32((const uint32_t *)&chunk[32]); + msg3 = vld1q_u32((const uint32_t *)&chunk[48]); + msg0 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg0))); + msg1 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg1))); + msg2 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg2))); + msg3 = vreinterpretq_u32_u8(vrev32q_u8(vreinterpretq_u8_u32(msg3))); + + tmp0 = vaddq_u32(msg0, vld1q_u32(&rc[0 * 4])); + + msg0 = vsha256su0q_u32(msg0, msg1); + tmp2 = abcd; + tmp1 = vaddq_u32(msg1, vld1q_u32(&rc[1 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg0 = vsha256su1q_u32(msg0, msg2, msg3); + + msg1 = vsha256su0q_u32(msg1, msg2); + tmp2 = abcd; + tmp0 = vaddq_u32(msg2, vld1q_u32(&rc[2 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg1 = vsha256su1q_u32(msg1, msg3, msg0); + + msg2 = vsha256su0q_u32(msg2, msg3); + tmp2 = abcd; + tmp1 = vaddq_u32(msg3, vld1q_u32(&rc[3 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg2 = vsha256su1q_u32(msg2, msg0, msg1); + + msg3 = vsha256su0q_u32(msg3, msg0); + tmp2 = abcd; + tmp0 = vaddq_u32(msg0, vld1q_u32(&rc[4 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg3 = vsha256su1q_u32(msg3, msg1, msg2); + + msg0 = vsha256su0q_u32(msg0, msg1); + tmp2 = abcd; + tmp1 = vaddq_u32(msg1, vld1q_u32(&rc[5 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg0 = vsha256su1q_u32(msg0, msg2, msg3); + + msg1 = vsha256su0q_u32(msg1, msg2); + tmp2 = abcd; + tmp0 = vaddq_u32(msg2, vld1q_u32(&rc[6 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg1 = vsha256su1q_u32(msg1, msg3, msg0); + + msg2 = vsha256su0q_u32(msg2, msg3); + tmp2 = abcd; + tmp1 = vaddq_u32(msg3, vld1q_u32(&rc[7 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg2 = vsha256su1q_u32(msg2, msg0, msg1); + + msg3 = vsha256su0q_u32(msg3, msg0); + tmp2 = abcd; + tmp0 = vaddq_u32(msg0, vld1q_u32(&rc[8 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg3 = vsha256su1q_u32(msg3, msg1, msg2); + + msg0 = vsha256su0q_u32(msg0, msg1); + tmp2 = abcd; + tmp1 = vaddq_u32(msg1, vld1q_u32(&rc[9 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg0 = vsha256su1q_u32(msg0, msg2, msg3); + + msg1 = vsha256su0q_u32(msg1, msg2); + tmp2 = abcd; + tmp0 = vaddq_u32(msg2, vld1q_u32(&rc[10 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg1 = vsha256su1q_u32(msg1, msg3, msg0); + + msg2 = vsha256su0q_u32(msg2, msg3); + tmp2 = abcd; + tmp1 = vaddq_u32(msg3, vld1q_u32(&rc[11 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + msg2 = vsha256su1q_u32(msg2, msg0, msg1); + + msg3 = vsha256su0q_u32(msg3, msg0); + tmp2 = abcd; + tmp0 = vaddq_u32(msg0, vld1q_u32(&rc[12 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + msg3 = vsha256su1q_u32(msg3, msg1, msg2); + + tmp2 = abcd; + tmp1 = vaddq_u32(msg1, vld1q_u32(&rc[13 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + + tmp2 = abcd; + tmp0 = vaddq_u32(msg2, vld1q_u32(&rc[14 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + + tmp2 = abcd; + tmp1 = vaddq_u32(msg3, vld1q_u32(&rc[15 * 4])); + abcd = vsha256hq_u32(abcd, efgh, tmp0); + efgh = vsha256h2q_u32(efgh, tmp2, tmp0); + + tmp2 = abcd; + abcd = vsha256hq_u32(abcd, efgh, tmp1); + efgh = vsha256h2q_u32(efgh, tmp2, tmp1); + + abcd_orig = vaddq_u32(abcd_orig, abcd); + efgh_orig = vaddq_u32(efgh_orig, efgh); + } + + vst1q_u32(&state->h.b32[0], abcd_orig); + vst1q_u32(&state->h.b32[4], efgh_orig); + + return off; +} + +#endif size_t libsha2_process(struct libsha2_state *restrict state, const unsigned char *restrict data, size_t len) @@ -298,6 +471,10 @@ libsha2_process(struct libsha2_state *restrict state, const unsigned char *restr size_t off = 0; if (state->algorithm <= LIBSHA2_256) { +#if defined(HAVE_ARM_SHA256_INTRINSICS) + return process_arm_sha256(state, data, len); +#else +# define ROTR(X, N) TRUNC32(((X) >> (N)) | ((X) << (32 - (N)))) uint_least32_t s0, s1; size_t i, j; @@ -305,36 +482,33 @@ libsha2_process(struct libsha2_state *restrict state, const unsigned char *restr # pragma GCC diagnostic push # pragma GCC diagnostic ignored "-Wmemset-elt-size" #endif -#define ROTR(X, N) TRUNC32(((X) >> (N)) | ((X) << (32 - (N)))) - -#ifdef HAVE_X86_SHA_INTRINSICS +# ifdef HAVE_X86_SHA256_INTRINSICS if (have_sha_intrinsics()) return process_x86_sha256(state, data, len); -#endif - +# endif for (; len - off >= state->chunk_size; off += state->chunk_size) { chunk = &data[off]; SHA2_IMPLEMENTATION(chunk, 7, 18, 3, 17, 19, 10, 6, 11, 25, 2, 13, 22, uint_least32_t, 4, TRUNC32, state->k.b32, state->w.b32, state->h.b32, state->work_h.b32); } - -#undef ROTR -#if defined(__GNUC__) -# pragma GCC diagnostic pop +# if defined(__GNUC__) +# pragma GCC diagnostic pop +# endif +# undef ROTR #endif } else { +#define ROTR(X, N) TRUNC64(((X) >> (N)) | ((X) << (64 - (N)))) uint_least64_t s0, s1; size_t i, j; -#define ROTR(X, N) TRUNC64(((X) >> (N)) | ((X) << (64 - (N)))) + /* TODO Add optimisation using ARMv8.2 SHA-512 intrinsics (when I've access to a machine supporting it) */ for (; len - off >= state->chunk_size; off += state->chunk_size) { chunk = &data[off]; SHA2_IMPLEMENTATION(chunk, 1, 8, 7, 19, 61, 6, 14, 18, 41, 28, 34, 39, uint_least64_t, 8, TRUNC64, state->k.b64, state->w.b64, state->h.b64, state->work_h.b64); } - #undef ROTR } |
