diff options
author | Mattias Andrée <maandree@kth.se> | 2024-09-15 13:21:09 +0200 |
---|---|---|
committer | Mattias Andrée <maandree@kth.se> | 2024-09-15 13:21:09 +0200 |
commit | 1f9e94880486a44ed95f8d84a6dc02aa9351bd51 (patch) | |
tree | 5e8c4bb300cfea9f213760974b219ccff17a439c | |
parent | Optimisation for w=8,16,32 (diff) | |
download | libkeccak-1f9e94880486a44ed95f8d84a6dc02aa9351bd51.tar.gz libkeccak-1f9e94880486a44ed95f8d84a6dc02aa9351bd51.tar.bz2 libkeccak-1f9e94880486a44ed95f8d84a6dc02aa9351bd51.tar.xz |
m + split out implemenation for w=8,16,32,64
Signed-off-by: Mattias Andrée <maandree@kth.se>
-rw-r--r-- | 1600.c | 92 | ||||
-rw-r--r-- | 200.c | 92 | ||||
-rw-r--r-- | 400.c | 91 | ||||
-rw-r--r-- | 800.c | 92 | ||||
-rw-r--r-- | Makefile | 2 | ||||
-rw-r--r-- | benchmark.c | 2 | ||||
-rw-r--r-- | common.h | 9 | ||||
-rw-r--r-- | digest.c | 569 | ||||
-rw-r--r-- | libkeccak.h | 2 |
9 files changed, 437 insertions, 514 deletions
@@ -0,0 +1,92 @@ +/* See LICENSE file for copyright and license details. */ + + +/** + * 64-bit Keccak-f round constants + */ +static const uint64_t rc64[] = { + UINT64_C(0x0000000000000001), UINT64_C(0x0000000000008082), UINT64_C(0x800000000000808A), UINT64_C(0x8000000080008000), + UINT64_C(0x000000000000808B), UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008009), + UINT64_C(0x000000000000008A), UINT64_C(0x0000000000000088), UINT64_C(0x0000000080008009), UINT64_C(0x000000008000000A), + UINT64_C(0x000000008000808B), UINT64_C(0x800000000000008B), UINT64_C(0x8000000000008089), UINT64_C(0x8000000000008003), + UINT64_C(0x8000000000008002), UINT64_C(0x8000000000000080), UINT64_C(0x000000000000800A), UINT64_C(0x800000008000000A), + UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008080), UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008008) +}; + + +/** + * Rotate a 64-bit word + * + * @param x:uint64_t The value to rotate + * @param n:long int Rotation steps, may not be zero + * @return :uint64_t The value rotated + */ +#define rotate64(x, n) ((uint64_t)(((uint64_t)(x) >> (64L - (n))) | ((uint64_t)(x) << (n)))) + + +/** + * 64-bit word version of `libkeccak_f_round` + * + * @param state The hashing state + * @param rc The round contant for this round + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) +static void +libkeccak_f_round64(register struct libkeccak_state *state, register uint64_t rc) +{ + uint64_t *restrict A = state->S.w64; + uint64_t B[25], C[5], da, db, dc, dd, de; + + /* θ step (step 1 of 3). */ +#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] + LIST_5(X, ;); +#undef X + + /* θ step (step 2 of 3). */ + da = C[4] ^ rotate64(C[1], 1); + dd = C[2] ^ rotate64(C[4], 1); + db = C[0] ^ rotate64(C[2], 1); + de = C[3] ^ rotate64(C[0], 1); + dc = C[1] ^ rotate64(C[3], 1); + + /* ρ and π steps, with last two part of θ. */ +#define X(bi, ai, dv, r) B[bi] = rotate64(A[ai] ^ dv, r) + B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); + X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); + X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); + X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); + X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); +#undef X + + /* ξ step. */ +#define X(N) A[N] = B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25]) + LIST_25(X, ;); +#undef X + + /* ι step. */ + A[0] ^= rc; +} + + +/** + * 64-bit lane version of `libkeccak_to_lane` + * + * @param message The message + * @param msglen The length of the message + * @param rr Bitrate in bytes + * @param off The offset in the message + * @return The lane + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) +static inline uint64_t +libkeccak_to_lane64(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) +{ + register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); + uint64_t rc = 0; + message += off; +#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint64_t)message[N] << (N * 8);\ + else return rc + LIST_8(X, ;); +#undef X + return rc; +} @@ -0,0 +1,92 @@ +/* See LICENSE file for copyright and license details. */ + + +/** + * 8-bit Keccak-f round constants + */ +static const uint_fast8_t rc8[] = { + UINT8_C(0x01), UINT8_C(0x82), UINT8_C(0x8A), UINT8_C(0x00), + UINT8_C(0x8B), UINT8_C(0x01), UINT8_C(0x81), UINT8_C(0x09), + UINT8_C(0x8A), UINT8_C(0x88), UINT8_C(0x09), UINT8_C(0x0A), + UINT8_C(0x8B), UINT8_C(0x8B), UINT8_C(0x89), UINT8_C(0x03), + UINT8_C(0x02), UINT8_C(0x80) +}; + + +/** + * Rotate a 8-bit word + * + * @param x:uint8_t The value to rotate + * @param n:long int Rotation steps, may not be zero + * @return :uint8_t The value rotated + */ +#define rotate8(x, n) ((uint_fast8_t)(((uint_fast8_t)(x) >> (8L - (n))) | ((uint_fast8_t)(x) << (n)))) + + +/** + * 8-bit word version of `libkeccak_f` + * + * @param state The hashing state + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) +static void +libkeccak_f8(register struct libkeccak_state *state) +{ +#define A state->S.w8 + + uint_fast8_t B[25], C[5], da, db, dc, dd, de; + int i; + + for (i = 0; i < 18; i++) { + /* θ step (step 1 of 3). */ +#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] + LIST_5(X, ;); +#undef X + + /* θ step (step 2 of 3). */ + da = C[4] ^ rotate8(C[1], 1); + dd = C[2] ^ rotate8(C[4], 1); + db = C[0] ^ rotate8(C[2], 1); + de = C[3] ^ rotate8(C[0], 1); + dc = C[1] ^ rotate8(C[3], 1); + + /* ρ and π steps, with last two part of θ. */ +#define X(bi, ai, dv, r) B[bi] = rotate8(A[ai] ^ dv, r) + B[0] = A[0] ^ da; X( 1, 15, dd, 4); X( 2, 5, db, 1); X( 3, 20, de, 3); X( 4, 10, dc, 6); + X( 5, 6, db, 4); X( 6, 21, de, 4); X( 7, 11, dc, 6); X( 8, 1, da, 4); X( 9, 16, dd, 7); + X(10, 12, dc, 3); X(11, 2, da, 3); X(12, 17, dd, 1); X(13, 7, db, 2); X(14, 22, de, 7); + X(15, 18, dd, 5); X(16, 8, db, 5); B[17] = A[23]^de; X(18, 13, dc, 7); X(19, 3, da, 1); + X(20, 24, de, 6); X(21, 14, dc, 5); X(22, 4, da, 2); B[23] = A[19]^dd; X(24, 9, db, 2); +#undef X + + /* ξ step. */ +#define X(N) A[N] = (uint8_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) + LIST_25(X, ;); +#undef X + + /* ι step. */ + A[0] ^= rc8[i]; + } + +#undef A +} + + +/** + * 8-bit lane version of `libkeccak_to_lane` + * + * @param message The message + * @param msglen The length of the message + * @param rr Bitrate in bytes + * @param off The offset in the message + * @return The lane + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) +static inline uint8_t +libkeccak_to_lane8(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) +{ + register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); + if (__builtin_expect(0 < n, 1)) + return (uint8_t)message[off]; + return 0; +} @@ -0,0 +1,91 @@ +/* See LICENSE file for copyright and license details. */ + + +/** + * 16-bit Keccak-f round constants + */ +static const uint16_t rc16[] = { + UINT16_C(0x0001), UINT16_C(0x8082), UINT16_C(0x808A), UINT16_C(0x8000), + UINT16_C(0x808B), UINT16_C(0x0001), UINT16_C(0x8081), UINT16_C(0x8009), + UINT16_C(0x008A), UINT16_C(0x0088), UINT16_C(0x8009), UINT16_C(0x000A), + UINT16_C(0x808B), UINT16_C(0x008B), UINT16_C(0x8089), UINT16_C(0x8003), + UINT16_C(0x8002), UINT16_C(0x0080), UINT16_C(0x800A), UINT16_C(0x000A) +}; + + +/** + * Rotate a 16-bit word + * + * @param x:uint16_t The value to rotate + * @param n:long int Rotation steps, may not be zero + * @return :uint16_t The value rotated + */ +#define rotate16(x, n) ((uint16_t)(((uint16_t)(x) >> (16L - (n))) | ((uint16_t)(x) << (n)))) + + +/** + * 16-bit word version of `libkeccak_f_round` + * + * @param state The hashing state + * @param rc The round contant for this round + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) +static void +libkeccak_f_round16(register struct libkeccak_state *state, register uint16_t rc) +{ + uint16_t *restrict A = state->S.w16; + uint16_t B[25], C[5], da, db, dc, dd, de; + + /* θ step (step 1 of 3). */ +#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] + LIST_5(X, ;); +#undef X + + /* θ step (step 2 of 3). */ + da = C[4] ^ rotate16(C[1], 1); + dd = C[2] ^ rotate16(C[4], 1); + db = C[0] ^ rotate16(C[2], 1); + de = C[3] ^ rotate16(C[0], 1); + dc = C[1] ^ rotate16(C[3], 1); + + /* ρ and π steps, with last two part of θ. */ +#define X(bi, ai, dv, r) B[bi] = rotate16(A[ai] ^ dv, r) + B[0] = A[0] ^ da; X( 1, 15, dd, 12); X( 2, 5, db, 1); X( 3, 20, de, 11); X( 4, 10, dc, 14); + X( 5, 6, db, 12); X( 6, 21, de, 4); X( 7, 11, dc, 6); X( 8, 1, da, 4); X( 9, 16, dd, 7); + X(10, 12, dc, 11); X(11, 2, da, 3); X(12, 17, dd, 9); X(13, 7, db, 10); X(14, 22, de, 7); + X(15, 18, dd, 5); X(16, 8, db, 13); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 9); + X(20, 24, de, 14); X(21, 14, dc, 13); X(22, 4, da, 2); X(23, 19, dd, 8); X(24, 9, db, 2); +#undef X + + /* ξ step. */ +#define X(N) A[N] = (uint16_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) + LIST_25(X, ;); +#undef X + + /* ι step. */ + A[0] ^= rc; +} + + +/** + * 16-bit lane version of `libkeccak_to_lane` + * + * @param message The message + * @param msglen The length of the message + * @param rr Bitrate in bytes + * @param off The offset in the message + * @return The lane + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) +static inline uint16_t +libkeccak_to_lane16(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) +{ + register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); + uint16_t rc = 0; + message += off; +#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint16_t)message[N] << (N * 8);\ + else return rc + LIST_2(X, ;); +#undef X + return rc; +} @@ -0,0 +1,92 @@ +/* See LICENSE file for copyright and license details. */ + + +/** + * 32-bit Keccak-f round constants + */ +static const uint32_t rc32[] = { + UINT32_C(0x00000001), UINT32_C(0x00008082), UINT32_C(0x0000808A), UINT32_C(0x80008000), + UINT32_C(0x0000808B), UINT32_C(0x80000001), UINT32_C(0x80008081), UINT32_C(0x00008009), + UINT32_C(0x0000008A), UINT32_C(0x00000088), UINT32_C(0x80008009), UINT32_C(0x8000000A), + UINT32_C(0x8000808B), UINT32_C(0x0000008B), UINT32_C(0x00008089), UINT32_C(0x00008003), + UINT32_C(0x00008002), UINT32_C(0x00000080), UINT32_C(0x0000800A), UINT32_C(0x8000000A), + UINT32_C(0x80008081), UINT32_C(0x00008080) +}; + + +/** + * Rotate a 32-bit word + * + * @param x:uint32_t The value to rotate + * @param n:long int Rotation steps, may not be zero + * @return :uint32_t The value rotated + */ +#define rotate32(x, n) ((uint32_t)(((uint32_t)(x) >> (32L - (n))) | ((uint32_t)(x) << (n)))) + + +/** + * 32-bit word version of `libkeccak_f_round` + * + * @param state The hashing state + * @param rc The round contant for this round + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) +static void +libkeccak_f_round32(register struct libkeccak_state *state, register uint32_t rc) +{ + uint32_t *restrict A = state->S.w32; + uint32_t B[25], C[5], da, db, dc, dd, de; + + /* θ step (step 1 of 3). */ +#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] + LIST_5(X, ;); +#undef X + + /* θ step (step 2 of 3). */ + da = C[4] ^ rotate32(C[1], 1); + dd = C[2] ^ rotate32(C[4], 1); + db = C[0] ^ rotate32(C[2], 1); + de = C[3] ^ rotate32(C[0], 1); + dc = C[1] ^ rotate32(C[3], 1); + + /* ρ and π steps, with last two part of θ. */ +#define X(bi, ai, dv, r) B[bi] = rotate32(A[ai] ^ dv, r) + B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 30); + X( 5, 6, db, 12); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 4); X( 9, 16, dd, 23); + X(10, 12, dc, 11); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 7); + X(15, 18, dd, 21); X(16, 8, db, 13); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 9); + X(20, 24, de, 14); X(21, 14, dc, 29); X(22, 4, da, 18); X(23, 19, dd, 24); X(24, 9, db, 2); +#undef X + + /* ξ step. */ +#define X(N) A[N] = (uint32_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) + LIST_25(X, ;); +#undef X + + /* ι step. */ + A[0] ^= rc; +} + + +/** + * 32-bit lane version of `libkeccak_to_lane` + * + * @param message The message + * @param msglen The length of the message + * @param rr Bitrate in bytes + * @param off The offset in the message + * @return The lane + */ +LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) +static inline uint32_t +libkeccak_to_lane32(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) +{ + register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); + uint32_t rc = 0; + message += off; +#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint32_t)message[N] << (N * 8);\ + else return rc + LIST_4(X, ;); +#undef X + return rc; +} @@ -150,6 +150,8 @@ MAN7 =\ all: libkeccak.a libkeccak.$(LIBEXT) test benchmark $(OBJ): $(HDR) +digest.o: 1600.c 800.c 400.c 200.c + .c.o: $(CC) -fPIC -c -o $@ $< $(CFLAGS) $(CPPFLAGS) diff --git a/benchmark.c b/benchmark.c index eec02ba..ef1a0d9 100644 --- a/benchmark.c +++ b/benchmark.c @@ -17,7 +17,7 @@ #ifndef L -# define L 6 +# define L 3 #endif #ifndef STATE # define STATE (25 << (L)) @@ -38,15 +38,6 @@ #define COMMA , /** - * X-macro-enabled listing of all intergers in [0, 0] - * - * @param X(int) The macro to expand 4 times - * @param D Code to insert between each expansion of `X` - */ -#define LIST_1(X, D)\ - X(0) - -/** * X-macro-enabled listing of all intergers in [0, 1] * * @param X(int) The macro to expand 2 times @@ -11,330 +11,10 @@ static const long int LANE_TRANSPOSE_MAP[] = { LIST_25(X, COMMA) }; #undef X -/** - * 64-bit Keccak-f round constants - */ -static const uint64_t rc64[] = { - UINT64_C(0x0000000000000001), UINT64_C(0x0000000000008082), UINT64_C(0x800000000000808A), UINT64_C(0x8000000080008000), - UINT64_C(0x000000000000808B), UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008009), - UINT64_C(0x000000000000008A), UINT64_C(0x0000000000000088), UINT64_C(0x0000000080008009), UINT64_C(0x000000008000000A), - UINT64_C(0x000000008000808B), UINT64_C(0x800000000000008B), UINT64_C(0x8000000000008089), UINT64_C(0x8000000000008003), - UINT64_C(0x8000000000008002), UINT64_C(0x8000000000000080), UINT64_C(0x000000000000800A), UINT64_C(0x800000008000000A), - UINT64_C(0x8000000080008081), UINT64_C(0x8000000000008080), UINT64_C(0x0000000080000001), UINT64_C(0x8000000080008008) -}; - - -/** - * 32-bit Keccak-f round constants - */ -static const uint32_t rc32[] = { - UINT32_C(0x00000001), UINT32_C(0x00008082), UINT32_C(0x0000808A), UINT32_C(0x80008000), - UINT32_C(0x0000808B), UINT32_C(0x80000001), UINT32_C(0x80008081), UINT32_C(0x00008009), - UINT32_C(0x0000008A), UINT32_C(0x00000088), UINT32_C(0x80008009), UINT32_C(0x8000000A), - UINT32_C(0x8000808B), UINT32_C(0x0000008B), UINT32_C(0x00008089), UINT32_C(0x00008003), - UINT32_C(0x00008002), UINT32_C(0x00000080), UINT32_C(0x0000800A), UINT32_C(0x8000000A), - UINT32_C(0x80008081), UINT32_C(0x00008080) -}; - - -/** - * 16-bit Keccak-f round constants - */ -static const uint16_t rc16[] = { - UINT16_C(0x0001), UINT16_C(0x8082), UINT16_C(0x808A), UINT16_C(0x8000), - UINT16_C(0x808B), UINT16_C(0x0001), UINT16_C(0x8081), UINT16_C(0x8009), - UINT16_C(0x008A), UINT16_C(0x0088), UINT16_C(0x8009), UINT16_C(0x000A), - UINT16_C(0x808B), UINT16_C(0x008B), UINT16_C(0x8089), UINT16_C(0x8003), - UINT16_C(0x8002), UINT16_C(0x0080), UINT16_C(0x800A), UINT16_C(0x000A) -}; - - -/** - * 8-bit Keccak-f round constants - */ -static const uint8_t rc8[] = { - UINT8_C(0x01), UINT8_C(0x82), UINT8_C(0x8A), UINT8_C(0x00), - UINT8_C(0x8B), UINT8_C(0x01), UINT8_C(0x81), UINT8_C(0x09), - UINT8_C(0x8A), UINT8_C(0x88), UINT8_C(0x09), UINT8_C(0x0A), - UINT8_C(0x8B), UINT8_C(0x8B), UINT8_C(0x89), UINT8_C(0x03), - UINT8_C(0x02), UINT8_C(0x80) -}; - - -/** - * Rotate a word - * - * @param x:uint_fast64_t The value to rotate - * @param n:long int Rotation steps, may be zero mod `w` - * @param w:long int `state->w` - * @param wmod:uint_fast64_t `state->wmod` - * @return :uint_fast64_t The value rotated - */ -#define rotate(x, n, w, wmod) ((((x) >> ((w) - ((n) % (w)))) | ((x) << ((n) % (w)))) & (wmod)) - - -/** - * Rotate a 64-bit word - * - * @param x:uint64_t The value to rotate - * @param n:long int Rotation steps, may not be zero - * @return :uint64_t The value rotated - */ -#define rotate64(x, n) ((uint64_t)(((uint64_t)(x) >> (64L - (n))) | ((uint64_t)(x) << (n)))) - - -/** - * Rotate a 32-bit word - * - * @param x:uint32_t The value to rotate - * @param n:long int Rotation steps, may not be zero - * @return :uint32_t The value rotated - */ -#define rotate32(x, n) ((uint32_t)(((uint32_t)(x) >> (32L - (n))) | ((uint32_t)(x) << (n)))) - - -/** - * Rotate a 16-bit word - * - * @param x:uint16_t The value to rotate - * @param n:long int Rotation steps, may not be zero - * @return :uint16_t The value rotated - */ -#define rotate16(x, n) ((uint16_t)(((uint16_t)(x) >> (16L - (n))) | ((uint16_t)(x) << (n)))) - - -/** - * Rotate a 8-bit word - * - * @param x:uint8_t The value to rotate - * @param n:long int Rotation steps, may not be zero - * @return :uint8_t The value rotated - */ -#define rotate8(x, n) ((uint8_t)(((uint8_t)(x) >> (8L - (n))) | ((uint8_t)(x) << (n)))) - - -/** - * Perform one round of computation - * - * @param state The hashing state - * @param rc The round contant for this round - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) -static void -libkeccak_f_round(register struct libkeccak_state *state, register uint64_t rc) -{ - uint64_t *restrict A = state->S.w64; - uint_fast64_t B[25]; - uint_fast64_t C[5]; - uint_fast64_t da, db, dc, dd, de; - uint_fast64_t wmod = state->wmod; - long int w = state->w; - - /* θ step (step 1 of 3). */ -#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] - LIST_5(X, ;); -#undef X - - /* θ step (step 2 of 3). */ - da = C[4] ^ rotate(C[1], 1, w, wmod); - dd = C[2] ^ rotate(C[4], 1, w, wmod); - db = C[0] ^ rotate(C[2], 1, w, wmod); - de = C[3] ^ rotate(C[0], 1, w, wmod); - dc = C[1] ^ rotate(C[3], 1, w, wmod); - - /* ρ and π steps, with last two part of θ. */ -#define X(bi, ai, dv, r) B[bi] = rotate(A[ai] ^ dv, r, w, wmod) - B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); - X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); - X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); - X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); - X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); -#undef X - - /* ξ step. */ -#define X(N) A[N] = B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25]) - LIST_25(X, ;); -#undef X - - /* ι step. */ - A[0] ^= rc; -} - - -/** - * 64-bit word version of `libkeccak_f_round` - * - * @param state The hashing state - * @param rc The round contant for this round - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) -static void -libkeccak_f_round64(register struct libkeccak_state *state, register uint64_t rc) -{ - uint64_t *restrict A = state->S.w64; - uint64_t B[25], C[5], da, db, dc, dd, de; - - /* θ step (step 1 of 3). */ -#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] - LIST_5(X, ;); -#undef X - - /* θ step (step 2 of 3). */ - da = C[4] ^ rotate64(C[1], 1); - dd = C[2] ^ rotate64(C[4], 1); - db = C[0] ^ rotate64(C[2], 1); - de = C[3] ^ rotate64(C[0], 1); - dc = C[1] ^ rotate64(C[3], 1); - - /* ρ and π steps, with last two part of θ. */ -#define X(bi, ai, dv, r) B[bi] = rotate64(A[ai] ^ dv, r) - B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); - X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); - X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); - X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); - X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); -#undef X - - /* ξ step. */ -#define X(N) A[N] = B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25]) - LIST_25(X, ;); -#undef X - - /* ι step. */ - A[0] ^= rc; -} - - -/** - * 32-bit word version of `libkeccak_f_round` - * - * @param state The hashing state - * @param rc The round contant for this round - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) -static void -libkeccak_f_round32(register struct libkeccak_state *state, register uint32_t rc) -{ - uint32_t *restrict A = state->S.w32; - uint32_t B[25], C[5], da, db, dc, dd, de; - - /* θ step (step 1 of 3). */ -#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] - LIST_5(X, ;); -#undef X - - /* θ step (step 2 of 3). */ - da = C[4] ^ rotate32(C[1], 1); - dd = C[2] ^ rotate32(C[4], 1); - db = C[0] ^ rotate32(C[2], 1); - de = C[3] ^ rotate32(C[0], 1); - dc = C[1] ^ rotate32(C[3], 1); - - /* ρ and π steps, with last two part of θ. */ -#define X(bi, ai, dv, r) B[bi] = rotate32(A[ai] ^ dv, (r & 31)) - B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); - X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); - X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); - X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); - X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); -#undef X - - /* ξ step. */ -#define X(N) A[N] = (uint32_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) - LIST_25(X, ;); -#undef X - - /* ι step. */ - A[0] ^= rc; -} - - -/** - * 16-bit word version of `libkeccak_f_round` - * - * @param state The hashing state - * @param rc The round contant for this round - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) -static void -libkeccak_f_round16(register struct libkeccak_state *state, register uint16_t rc) -{ - uint16_t *restrict A = state->S.w16; - uint16_t B[25], C[5], da, db, dc, dd, de; - - /* θ step (step 1 of 3). */ -#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] - LIST_5(X, ;); -#undef X - - /* θ step (step 2 of 3). */ - da = C[4] ^ rotate16(C[1], 1); - dd = C[2] ^ rotate16(C[4], 1); - db = C[0] ^ rotate16(C[2], 1); - de = C[3] ^ rotate16(C[0], 1); - dc = C[1] ^ rotate16(C[3], 1); - - /* ρ and π steps, with last two part of θ. */ -#define X(bi, ai, dv, r) B[bi] = rotate16(A[ai] ^ dv, (r & 15)) - B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); - X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); - X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); - X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); - X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); -#undef X - - /* ξ step. */ -#define X(N) A[N] = (uint16_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) - LIST_25(X, ;); -#undef X - - /* ι step. */ - A[0] ^= rc; -} - - -/** - * 8-bit word version of `libkeccak_f_round` - * - * @param state The hashing state - * @param rc The round contant for this round - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __hot__))) -static void -libkeccak_f_round8(register struct libkeccak_state *state, register uint8_t rc) -{ - uint8_t *restrict A = state->S.w8; - uint8_t B[25], C[5], da, db, dc, dd, de; - - /* θ step (step 1 of 3). */ -#define X(N) C[N] = A[N * 5] ^ A[N * 5 + 1] ^ A[N * 5 + 2] ^ A[N * 5 + 3] ^ A[N * 5 + 4] - LIST_5(X, ;); -#undef X - - /* θ step (step 2 of 3). */ - da = C[4] ^ rotate8(C[1], 1); - dd = C[2] ^ rotate8(C[4], 1); - db = C[0] ^ rotate8(C[2], 1); - de = C[3] ^ rotate8(C[0], 1); - dc = C[1] ^ rotate8(C[3], 1); - - /* ρ and π steps, with last two part of θ. */ -#define X(bi, ai, dv, r) B[bi] = rotate8(A[ai] ^ dv, (r & 7)) - B[0] = A[0] ^ da; X( 1, 15, dd, 28); X( 2, 5, db, 1); X( 3, 20, de, 27); X( 4, 10, dc, 62); - X( 5, 6, db, 44); X( 6, 21, de, 20); X( 7, 11, dc, 6); X( 8, 1, da, 36); X( 9, 16, dd, 55); - X(10, 12, dc, 43); X(11, 2, da, 3); X(12, 17, dd, 25); X(13, 7, db, 10); X(14, 22, de, 39); - X(15, 18, dd, 21); X(16, 8, db, 45); X(17, 23, de, 8); X(18, 13, dc, 15); X(19, 3, da, 41); - X(20, 24, de, 14); X(21, 14, dc, 61); X(22, 4, da, 18); X(23, 19, dd, 56); X(24, 9, db, 2); -#undef X - - /* ξ step. */ -#define X(N) A[N] = (uint8_t)(B[N] ^ ((~(B[(N + 5) % 25])) & B[(N + 10) % 25])) - LIST_25(X, ;); -#undef X - - /* ι step. */ - A[0] ^= rc; -} +#include "1600.c" +#include "800.c" +#include "400.c" +#include "200.c" /** @@ -348,156 +28,19 @@ libkeccak_f(register struct libkeccak_state *state) { register long int i = 0; register long int nr = state->nr; - register uint_fast64_t wmod = state->wmod; if (nr == 24) { for (; i < nr; i++) libkeccak_f_round64(state, rc64[i]); - return; - } - - if (nr == 22) { + } else if (nr == 22) { for (; i < nr; i++) libkeccak_f_round32(state, rc32[i]); - return; - } - - if (nr == 20) { + } else if (nr == 20) { for (; i < nr; i++) libkeccak_f_round16(state, rc16[i]); - return; - } - - if (nr == 18) { - for (; i < nr; i++) - libkeccak_f_round8(state, rc8[i]); - return; - } - - for (; i < nr; i++) - libkeccak_f_round(state, rc64[i] & wmod); -} - - -/** - * Convert a chunk of bytes to a lane - * - * @param message The message - * @param msglen The length of the message - * @param rr Bitrate in bytes - * @param ww Word size in bytes - * @param off The offset in the message - * @return The lane - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __warn_unused_result__, __gnu_inline__))) -static inline uint64_t -libkeccak_to_lane(register const unsigned char *restrict message, register size_t msglen, - register long int rr, register long int ww, size_t off) -{ - register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); - uint_fast64_t rc = 0; - message += off; - while (ww--) { - rc <<= 8; - rc |= __builtin_expect(ww < n, 1) ? (uint_fast64_t)(unsigned char)message[ww] : 0L; + } else if (nr == 18) { + libkeccak_f8(state); } - return (uint64_t)rc; -} - - -/** - * 64-bit lane version of `libkeccak_to_lane` - * - * @param message The message - * @param msglen The length of the message - * @param rr Bitrate in bytes - * @param off The offset in the message - * @return The lane - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) -static inline uint64_t -libkeccak_to_lane64(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) -{ - register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); - uint64_t rc = 0; - message += off; -#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint64_t)message[N] << (N * 8);\ - else return rc - LIST_8(X, ;); -#undef X - return rc; -} - - -/** - * 32-bit lane version of `libkeccak_to_lane` - * - * @param message The message - * @param msglen The length of the message - * @param rr Bitrate in bytes - * @param off The offset in the message - * @return The lane - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) -static inline uint32_t -libkeccak_to_lane32(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) -{ - register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); - uint32_t rc = 0; - message += off; -#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint32_t)message[N] << (N * 8);\ - else return rc - LIST_4(X, ;); -#undef X - return rc; -} - - -/** - * 16-bit lane version of `libkeccak_to_lane` - * - * @param message The message - * @param msglen The length of the message - * @param rr Bitrate in bytes - * @param off The offset in the message - * @return The lane - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) -static inline uint16_t -libkeccak_to_lane16(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) -{ - register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); - uint16_t rc = 0; - message += off; -#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint16_t)message[N] << (N * 8);\ - else return rc - LIST_2(X, ;); -#undef X - return rc; -} - - -/** - * 8-bit lane version of `libkeccak_to_lane` - * - * @param message The message - * @param msglen The length of the message - * @param rr Bitrate in bytes - * @param off The offset in the message - * @return The lane - */ -LIBKECCAK_GCC_ONLY(__attribute__((__nonnull__, __nothrow__, __pure__, __hot__, __warn_unused_result__, __gnu_inline__))) -static inline uint8_t -libkeccak_to_lane8(register const unsigned char *message, register size_t msglen, register long int rr, size_t off) -{ - register long int n = (long)((msglen < (size_t)rr ? msglen : (size_t)rr) - off); - uint8_t rc = 0; - message += off; -#define X(N) if (__builtin_expect(N < n, 1)) rc |= (uint8_t)(unsigned char)message[N] << (N * 8);\ - else return (uint8_t)rc - LIST_1(X, ;); -#undef X - return (uint8_t)rc; } @@ -562,10 +105,7 @@ libkeccak_absorption_phase(register struct libkeccak_state *restrict state, message += (size_t)rr; len -= (size_t)rr; } - return; - } - - if (__builtin_expect(ww == 4, 1)) { + } else if (__builtin_expect(ww == 4, 1)) { while (n--) { #define X(N) state->S.w32[N] ^= libkeccak_to_lane32(message, len, rr, (size_t)(LANE_TRANSPOSE_MAP[N] * 4)) LIST_25(X, ;); @@ -574,10 +114,7 @@ libkeccak_absorption_phase(register struct libkeccak_state *restrict state, message += (size_t)rr; len -= (size_t)rr; } - return; - } - - if (__builtin_expect(ww == 2, 1)) { + } else if (__builtin_expect(ww == 2, 1)) { while (n--) { #define X(N) state->S.w16[N] ^= libkeccak_to_lane16(message, len, rr, (size_t)(LANE_TRANSPOSE_MAP[N] * 2)) LIST_25(X, ;); @@ -586,10 +123,7 @@ libkeccak_absorption_phase(register struct libkeccak_state *restrict state, message += (size_t)rr; len -= (size_t)rr; } - return; - } - - if (__builtin_expect(ww == 1, 1)) { + } else if (__builtin_expect(ww == 1, 1)) { while (n--) { #define X(N) state->S.w8[N] ^= libkeccak_to_lane8(message, len, rr, (size_t)(LANE_TRANSPOSE_MAP[N] * 1)) LIST_25(X, ;); @@ -598,16 +132,6 @@ libkeccak_absorption_phase(register struct libkeccak_state *restrict state, message += (size_t)rr; len -= (size_t)rr; } - return; - } - - while (n--) { -#define X(N) state->S.w64[N] ^= libkeccak_to_lane(message, len, rr, ww, (size_t)(LANE_TRANSPOSE_MAP[N] * ww)) - LIST_25(X, ;); -#undef X - libkeccak_f(state); - message += (size_t)rr; - len -= (size_t)rr; } } @@ -626,27 +150,66 @@ static void libkeccak_squeezing_phase(register struct libkeccak_state *restrict state, long int rr, long int nn, long int ww, register unsigned char *restrict hashsum) { - register uint64_t v; - register long int ni = rr / ww + !!(rr % ww); + register long int ni = (rr - 1) / ww + 1; auto long int olen = state->n; auto long int i, j = 0; register long int k; - while (olen > 0) { - for (i = 0; i < ni && j < nn; i++) { - if (__builtin_expect(ww == 8, 1)) v = state->S.w64[LANE_TRANSPOSE_MAP[i]]; else - if (__builtin_expect(ww == 4, 1)) v = state->S.w32[LANE_TRANSPOSE_MAP[i]]; else - if (__builtin_expect(ww == 2, 1)) v = state->S.w16[LANE_TRANSPOSE_MAP[i]]; else - if (__builtin_expect(ww == 1, 1)) v = state->S.w8[LANE_TRANSPOSE_MAP[i]]; else - v = state->S.w64[LANE_TRANSPOSE_MAP[i]]; - for (k = 0; k++ < ww && j++ < nn; v >>= 8) - *hashsum++ = (unsigned char)(v & 0xFFU); + if (__builtin_expect(ww == 8, 1)) { + register uint64_t v; + while (olen > 0) { + for (i = 0; i < ni && j < nn; i++) { + v = state->S.w64[LANE_TRANSPOSE_MAP[i]]; + for (k = 0; k++ < ww && j++ < nn; v >>= 8) + *hashsum++ = (unsigned char)v; + } + olen -= state->r; + if (olen > 0) + libkeccak_f(state); } - olen -= state->r; - if (olen > 0) - libkeccak_f(state); + if (state->n & 7) + hashsum[-1] &= (unsigned char)((1 << (state->n & 7)) - 1); + } else if (__builtin_expect(ww == 4, 1)) { + register uint32_t v; + while (olen > 0) { + for (i = 0; i < ni && j < nn; i++) { + v = state->S.w32[LANE_TRANSPOSE_MAP[i]]; + for (k = 0; k++ < ww && j++ < nn; v >>= 8) + *hashsum++ = (unsigned char)v; + } + olen -= state->r; + if (olen > 0) + libkeccak_f(state); + } + if (state->n & 7) + hashsum[-1] &= (unsigned char)((1 << (state->n & 7)) - 1); + } else if (__builtin_expect(ww == 2, 1)) { + register uint16_t v; + while (olen > 0) { + for (i = 0; i < ni && j < nn; i++) { + v = state->S.w16[LANE_TRANSPOSE_MAP[i]]; + for (k = 0; k++ < ww && j++ < nn; v >>= 8) + *hashsum++ = (unsigned char)v; + } + olen -= state->r; + if (olen > 0) + libkeccak_f(state); + } + if (state->n & 7) + hashsum[-1] &= (unsigned char)((1 << (state->n & 7)) - 1); + } else if (__builtin_expect(ww == 1, 1)) { + register uint8_t v; + while (olen > 0) { + for (i = 0; i < ni && j < nn; i++, j++) { + v = state->S.w8[LANE_TRANSPOSE_MAP[i]]; + *hashsum++ = (unsigned char)v; + } + olen -= state->r; + if (olen > 0) + libkeccak_f(state); + } + if (state->n & 7) + hashsum[-1] &= (unsigned char)((1 << (state->n & 7)) - 1); } - if (state->n & 7) - hashsum[-1] &= (unsigned char)((1 << (state->n & 7)) - 1); } diff --git a/libkeccak.h b/libkeccak.h index 814883b..4d8507c 100644 --- a/libkeccak.h +++ b/libkeccak.h @@ -67,7 +67,7 @@ struct libkeccak_state { uint64_t w64[25]; uint32_t w32[25]; uint16_t w16[25]; - uint8_t w8[25]; + uint_fast8_t w8[25]; } S; /** |