From 6f2be3633f499f0a2f659d16eecf2a76116fb9cc Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Thu, 17 Mar 2016 11:58:34 +0100 Subject: [PATCH] Argon2: avoid initial zeroing by calling fill_block() on the first pass --- .../crypto_pwhash/argon2/argon2-core.c | 4 +- .../argon2/argon2-fill-block-ref.c | 50 ++++++++++++++++--- .../argon2/argon2-fill-block-ssse3.c | 33 +++++++++++- 3 files changed, 75 insertions(+), 12 deletions(-) diff --git a/src/libsodium/crypto_pwhash/argon2/argon2-core.c b/src/libsodium/crypto_pwhash/argon2/argon2-core.c index 677e3ed5..11fa95c6 100644 --- a/src/libsodium/crypto_pwhash/argon2/argon2-core.c +++ b/src/libsodium/crypto_pwhash/argon2/argon2-core.c @@ -102,19 +102,17 @@ static int allocate_memory(block_region **region, uint32_t m_cost) { base = NULL; /* LCOV_EXCL_LINE */ } memcpy(&memory, &base, sizeof memory); - memset(memory, 0, memory_size); #elif defined(HAVE_POSIX_MEMALIGN) if ((errno = posix_memalign((void **) &base, 64, memory_size)) != 0) { base = NULL; } memcpy(&memory, &base, sizeof memory); - memset(memory, 0, memory_size); #else memory = NULL; if (memory_size + 63 < memory_size) { base = NULL; errno = ENOMEM; - } else if ((base = calloc(memory_size + 63, (size_t) 1U)) != NULL) { + } else if ((base = malloc(memory_size + 63)) != NULL) { uint8_t *aligned = ((uint8_t *) base) + 63; aligned -= (uintptr_t) aligned & 63; memcpy(&memory, &aligned, sizeof memory); diff --git a/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c b/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c index 88df3388..a7fb7c50 100644 --- a/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c +++ b/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ref.c @@ -20,13 +20,43 @@ #include "argon2-impl.h" #include "blamka-round-ref.h" -/* - * Function fills a new memory block - * @param prev_block Pointer to the previous block - * @param ref_block Pointer to the reference block - * @param next_block Pointer to the block to be constructed - * @pre all block pointers must be valid - */ +static void fill_block(const block *prev_block, const block *ref_block, + block *next_block) { + block blockR, block_tmp; + unsigned i; + + copy_block(&blockR, ref_block); + xor_block(&blockR, prev_block); + copy_block(&block_tmp, &blockR); + /* Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block + Apply Blake2 on columns of 64-bit words: (0,1,...,15), then + (16,17,..31)... finally (112,113,...127) */ + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND_NOMSG( + blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2], + blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5], + blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8], + blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11], + blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14], + blockR.v[16 * i + 15]); + } + + /* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then + (2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */ + for (i = 0; i < 8; i++) { + BLAKE2_ROUND_NOMSG( + blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16], + blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33], + blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64], + blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81], + blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112], + blockR.v[2 * i + 113]); + } + + copy_block(next_block, &block_tmp); + xor_block(next_block, &blockR); +} + static void fill_block_with_xor(const block *prev_block, const block *ref_block, block *next_block) { block blockR, block_tmp; @@ -185,7 +215,11 @@ int fill_segment_ref(const argon2_instance_t *instance, ref_block = instance->region->memory + instance->lane_length * ref_lane + ref_index; curr_block = instance->region->memory + curr_offset; - fill_block_with_xor(instance->region->memory + prev_offset, ref_block, curr_block); + if (0 != position.pass) { + fill_block_with_xor(instance->region->memory + prev_offset, ref_block, curr_block); + } else { + fill_block(instance->region->memory + prev_offset, ref_block, curr_block); + } } free(pseudo_rands); diff --git a/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c b/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c index 8129c9ce..fdb6d710 100644 --- a/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c +++ b/src/libsodium/crypto_pwhash/argon2/argon2-fill-block-ssse3.c @@ -32,6 +32,33 @@ #include "argon2-impl.h" #include "blamka-round-ssse3.h" +static void fill_block(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) { + __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; + uint32_t i; + + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + block_XY[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((__m128i const *)(&ref_block[16 * i]))); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], + state[8 * i + 3], state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (i = 0; i < 8; ++i) { + BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], + state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) { + state[i] = _mm_xor_si128(state[i], block_XY[i]); + _mm_storeu_si128((__m128i *)(&next_block[16 * i]), state[i]); + } +} + static void fill_block_with_xor(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) { __m128i block_XY[ARGON2_OWORDS_IN_BLOCK]; uint32_t i; @@ -181,7 +208,11 @@ int fill_segment_ssse3(const argon2_instance_t *instance, ref_block = instance->region->memory + instance->lane_length * ref_lane + ref_index; curr_block = instance->region->memory + curr_offset; - fill_block_with_xor(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v); + if (0 != position.pass) { + fill_block_with_xor(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v); + } else { + fill_block(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v); + } } free(pseudo_rands);