Argon2: avoid initial zeroing by calling fill_block() on the first pass
This commit is contained in:
parent
7611ea6018
commit
6f2be3633f
@ -102,19 +102,17 @@ static int allocate_memory(block_region **region, uint32_t m_cost) {
|
||||
base = NULL; /* LCOV_EXCL_LINE */
|
||||
}
|
||||
memcpy(&memory, &base, sizeof memory);
|
||||
memset(memory, 0, memory_size);
|
||||
#elif defined(HAVE_POSIX_MEMALIGN)
|
||||
if ((errno = posix_memalign((void **) &base, 64, memory_size)) != 0) {
|
||||
base = NULL;
|
||||
}
|
||||
memcpy(&memory, &base, sizeof memory);
|
||||
memset(memory, 0, memory_size);
|
||||
#else
|
||||
memory = NULL;
|
||||
if (memory_size + 63 < memory_size) {
|
||||
base = NULL;
|
||||
errno = ENOMEM;
|
||||
} else if ((base = calloc(memory_size + 63, (size_t) 1U)) != NULL) {
|
||||
} else if ((base = malloc(memory_size + 63)) != NULL) {
|
||||
uint8_t *aligned = ((uint8_t *) base) + 63;
|
||||
aligned -= (uintptr_t) aligned & 63;
|
||||
memcpy(&memory, &aligned, sizeof memory);
|
||||
|
@ -20,13 +20,43 @@
|
||||
#include "argon2-impl.h"
|
||||
#include "blamka-round-ref.h"
|
||||
|
||||
/*
|
||||
* Function fills a new memory block
|
||||
* @param prev_block Pointer to the previous block
|
||||
* @param ref_block Pointer to the reference block
|
||||
* @param next_block Pointer to the block to be constructed
|
||||
* @pre all block pointers must be valid
|
||||
*/
|
||||
static void fill_block(const block *prev_block, const block *ref_block,
|
||||
block *next_block) {
|
||||
block blockR, block_tmp;
|
||||
unsigned i;
|
||||
|
||||
copy_block(&blockR, ref_block);
|
||||
xor_block(&blockR, prev_block);
|
||||
copy_block(&block_tmp, &blockR);
|
||||
/* Now blockR = ref_block + prev_block and bloc_tmp = ref_block + prev_block
|
||||
Apply Blake2 on columns of 64-bit words: (0,1,...,15), then
|
||||
(16,17,..31)... finally (112,113,...127) */
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[16 * i], blockR.v[16 * i + 1], blockR.v[16 * i + 2],
|
||||
blockR.v[16 * i + 3], blockR.v[16 * i + 4], blockR.v[16 * i + 5],
|
||||
blockR.v[16 * i + 6], blockR.v[16 * i + 7], blockR.v[16 * i + 8],
|
||||
blockR.v[16 * i + 9], blockR.v[16 * i + 10], blockR.v[16 * i + 11],
|
||||
blockR.v[16 * i + 12], blockR.v[16 * i + 13], blockR.v[16 * i + 14],
|
||||
blockR.v[16 * i + 15]);
|
||||
}
|
||||
|
||||
/* Apply Blake2 on rows of 64-bit words: (0,1,16,17,...112,113), then
|
||||
(2,3,18,19,...,114,115).. finally (14,15,30,31,...,126,127) */
|
||||
for (i = 0; i < 8; i++) {
|
||||
BLAKE2_ROUND_NOMSG(
|
||||
blockR.v[2 * i], blockR.v[2 * i + 1], blockR.v[2 * i + 16],
|
||||
blockR.v[2 * i + 17], blockR.v[2 * i + 32], blockR.v[2 * i + 33],
|
||||
blockR.v[2 * i + 48], blockR.v[2 * i + 49], blockR.v[2 * i + 64],
|
||||
blockR.v[2 * i + 65], blockR.v[2 * i + 80], blockR.v[2 * i + 81],
|
||||
blockR.v[2 * i + 96], blockR.v[2 * i + 97], blockR.v[2 * i + 112],
|
||||
blockR.v[2 * i + 113]);
|
||||
}
|
||||
|
||||
copy_block(next_block, &block_tmp);
|
||||
xor_block(next_block, &blockR);
|
||||
}
|
||||
|
||||
static void fill_block_with_xor(const block *prev_block, const block *ref_block,
|
||||
block *next_block) {
|
||||
block blockR, block_tmp;
|
||||
@ -185,7 +215,11 @@ int fill_segment_ref(const argon2_instance_t *instance,
|
||||
ref_block =
|
||||
instance->region->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->region->memory + curr_offset;
|
||||
if (0 != position.pass) {
|
||||
fill_block_with_xor(instance->region->memory + prev_offset, ref_block, curr_block);
|
||||
} else {
|
||||
fill_block(instance->region->memory + prev_offset, ref_block, curr_block);
|
||||
}
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
||||
|
@ -32,6 +32,33 @@
|
||||
#include "argon2-impl.h"
|
||||
#include "blamka-round-ssse3.h"
|
||||
|
||||
static void fill_block(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
uint32_t i;
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
block_XY[i] = state[i] = _mm_xor_si128(
|
||||
state[i], _mm_loadu_si128((__m128i const *)(&ref_block[16 * i])));
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * i + 0], state[8 * i + 1], state[8 * i + 2],
|
||||
state[8 * i + 3], state[8 * i + 4], state[8 * i + 5],
|
||||
state[8 * i + 6], state[8 * i + 7]);
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
BLAKE2_ROUND(state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i],
|
||||
state[8 * 3 + i], state[8 * 4 + i], state[8 * 5 + i],
|
||||
state[8 * 6 + i], state[8 * 7 + i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < ARGON2_OWORDS_IN_BLOCK; i++) {
|
||||
state[i] = _mm_xor_si128(state[i], block_XY[i]);
|
||||
_mm_storeu_si128((__m128i *)(&next_block[16 * i]), state[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static void fill_block_with_xor(__m128i *state, const uint8_t *ref_block, uint8_t *next_block) {
|
||||
__m128i block_XY[ARGON2_OWORDS_IN_BLOCK];
|
||||
uint32_t i;
|
||||
@ -181,7 +208,11 @@ int fill_segment_ssse3(const argon2_instance_t *instance,
|
||||
ref_block =
|
||||
instance->region->memory + instance->lane_length * ref_lane + ref_index;
|
||||
curr_block = instance->region->memory + curr_offset;
|
||||
if (0 != position.pass) {
|
||||
fill_block_with_xor(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v);
|
||||
} else {
|
||||
fill_block(state, (uint8_t *)ref_block->v, (uint8_t *)curr_block->v);
|
||||
}
|
||||
}
|
||||
|
||||
free(pseudo_rands);
|
||||
|
Loading…
Reference in New Issue
Block a user