Argon2: avoid initial zeroing by calling fill_block() on the first pass

This commit is contained in:
Frank Denis 2016-03-17 11:58:34 +01:00
parent 7611ea6018
commit 6f2be3633f
3 changed files with 75 additions and 12 deletions

View File

@ -102,19 +102,17 @@ static int allocate_memory(block_region **region, uint32_t m_cost) {
base = NULL; /* LCOV_EXCL_LINE */
}
memcpy(&memory, &base, sizeof memory);
memset(memory, 0, memory_size);
#elif defined(HAVE_POSIX_MEMALIGN)
if ((errno = posix_memalign((void **) &base, 64, memory_size)) != 0) {
base = NULL;
}
memcpy(&memory, &base, sizeof memory);
memset(memory, 0, memory_size);
#else
memory = NULL;
if (memory_size + 63 < memory_size) {
base = NULL;
errno = ENOMEM;
} else if ((base = calloc(memory_size + 63, (size_t) 1U)) != NULL) {
} else if ((base = malloc(memory_size + 63)) != NULL) {
uint8_t *aligned = ((uint8_t *) base) + 63;
aligned -= (uintptr_t) aligned & 63;
memcpy(&memory, &aligned, sizeof memory);

View File

@ -20,13 +20,43 @@
#include "argon2-impl.h"
#include "blamka-round-ref.h"
/*
* Function fills a new memory block
* @param prev_block Pointer to the previous block
* @param ref_block Pointer to the reference block
* @param next_block Pointer to the block to be constructed
* @pre all block pointers must be valid
*/
static void fill_block(const block *prev_block, const block *ref_block,
block *next_block) {
block blockR, block_tmp;
unsigned i;
copy_block(&blockR, ref_block);
xor_block(&blockR, prev_block);
copy_block(&block_tmp, &blockR);
/* Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
Apply Blake2 on columns of 64-bit words: (0,1,...,15), then
(16,17,..31)... finally (112,113,...127) */
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND_NOMSG(
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
blockR.v[16 * i + 15]);
}
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
for (i = 0; i < 8; i++) {
BLAKE2_ROUND_NOMSG(
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
blockR.v[2 * i + 113]);
}
copy_block(next_block, &block_tmp);
xor_block(next_block, &blockR);
}
static void fill_block_with_xor(const block *prev_block, const block *ref_block,
block *next_block) {
block blockR, block_tmp;
@ -185,7 +215,11 @@ int fill_segment_ref(const argon2_instance_t *instance,
ref_block =
instance->region->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
fill_block_with_xor(instance->region->memory + prev_offset, ref_block, curr_block);
if (0 != position.pass) {
fill_block_with_xor(instance->region->memory + prev_offset, ref_block, curr_block);
} else {
fill_block(instance->region->memory + prev_offset, ref_block, curr_block);
}
}
free(pseudo_rands);

View File

@ -32,6 +32,33 @@
#include "argon2-impl.h"
#include "blamka-round-ssse3.h"
static void fill_block(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
uint32_t i;
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
block_XY[i] = state[i] = _mm_xor_si128(
state[i], _mm_loadu_si128((__m128i const *)(&ref_block[16 * i])));
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
state[8 * i + 6], state[8 * i + 7]);
}
for (i = 0; i < 8; ++i) {
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
state[8 * 6 + i], state[8 * 7 + i]);
}
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
state[i] = _mm_xor_si128(state[i], block_XY[i]);
_mm_storeu_si128((__m128i *)(&next_block[16 * i]), state[i]);
}
}
static void fill_block_with_xor(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) {
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
uint32_t i;
@ -181,7 +208,11 @@ int fill_segment_ssse3(const argon2_instance_t *instance,
ref_block =
instance->region->memory + instance->lane_length * ref_lane + ref_index;
curr_block = instance->region->memory + curr_offset;
fill_block_with_xor(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v);
if (0 != position.pass) {
fill_block_with_xor(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v);
} else {
fill_block(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v);
}
}
free(pseudo_rands);