wallet/src/slash6.cpp

382 lines
16 KiB
C++

//#include <cstddef>
//#include <cstdint>
#include <array>
#include <assert.h>
#include <string>
//#include <initializer_list> // for initializer_list
//#include <type_traits>
//#include <memory> // for shared_ptr, unique_ptr
#include <span>
#include "ILog.h"
#include "localization.h"
#include "slash6.h"
// This base64 uses the following characters as the numerals 0 to 63:
// Table to convert six bit to ascii
static constexpr uint8_t index2base64[]{
"0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz!$*+-_"
};
/*
control characters are stops
! $ * + - permitted The characters "#%&'(),. are stops
0-9 permitted The characters :;<=>? are stops
A-Z and _ permitted, @ and [\]^ are stops
a-z permitted. {|} ~` are stops, as is the mysterious control character 0x7F (del)
*/
// Unlike regular baseINV, and incompatible with it. Intended to encode stuff small enough
// that it might be human typed and human transmitted, therefore maps 'o' and 'O' to '0', 'I' and
// 'l' to '1'
// uses six url safe additional characters !$*+-_ to bring it up to six bits
//
// But on reflection, useless, since human typed stuff like this should use Bitcoin's base 58 encoding
// So going to switch to regular base64, despite the unreasonably immense amount of work I put into it.
// Unfortunately, Wireguard, with which I am going to need to interoperate, uses RFC4648, whose
// algorithm is fundamentally different - no special treatment for I, O, o, and l, and uses =
// to handle the case where you have boundary problems between eight and six bit groups.
// They force everything to four character groups, using an = sign to indicate that the
// bytes being represented stop before a multiple of three. https://www.base64encode.org
static_assert(index2base64[63] == '_', "surprise numeral at 63");
// Being intended for small bits of data, assumes no whitespace within an entity
// Encode and decode are called with a bit buffer, consisting of an unsigned byte pointer, a
// starting bit position relative to the pointer, and a bit count. We assume that there is room
// enough in the object pointed to to accommodate the bytes referenced by
// Bit Position+BitCount. We don't change bits outside the range.
// The bit buffer does not need to be aligned, nor does it need to be a multiple of six bits,
// eight bits, or twenty four bits.
// If the input bit buffer to be encoded to base sixty four is not a multiple of six bits, the
// last base sixty four numeral output will represent the bit buffer padded with trailing zeroes.
// If there is no room in the output span for all the base sixty four digits, the encode routine
// will return a number of bits less than the size of the input bit buffer, less than the bitcount
// it was given.
// Compile time execution is C++ is a pain, because expressions are apt to unpredictably lose
// their constexpr character for reasons that are far from clear.
//
// You can declare anything reasonable to be constexpr, and the compiler will not issue an
// error message until the code that attempts to use what you have declared constexpr is
// invoked from somewhere else "expression does not evaluate to constant"
//
// an assert in an expression evaluated at compile time does not trigger a run time error,
// Instead the compiler reports that the expression did not evaluate to a constant,
//
// The error is confusing, because the error points to the declaration where the initialization
// was invoked,instead of pointing to the assert.
// To debug code intended to be run at compile time, exercise it at run time with
// auto ptr(std::make_unique<Class>());
// at run time;
class charindex {
public:
std::array< uint8_t, 0x100> index{ 0, };
// this non const array will become constexpr and be constructed at compile time
// when an instance of this class is created in a constexpr expression.
charindex() = delete;
constexpr charindex(const uint8_t* p) {
uint8_t pu{ 0 };
do { index[pu++] = 0xFF; } while (pu);
uint8_t i{ 0 };
while (pu = static_cast<uint8_t>(p[i])) {
index[pu] = i;
i++;
assert(i != 0); //prevents unending execution,
// inside a constexp, generates an "expression does not evaluate to constant"
// error at compile time, rather than breaking at run time.
}
index['o'] = index['O'] = 0;
index['l'] = index['I'] = 1;
}
};
static constexpr charindex ascii2six_ar(index2base64);
//
//
// You really have to write compile time code in templates as a language, which is the totally
// obscure and hard to use language apt to generate remarkably voluminous error messages
// will little obvious connection to the actual problem, and surprising result that are ver
// difficult to predict in advance or understand at all.
// Table to convert ascii to six bit as a good old fashioned non owning naked pointer to const,
// whose storage is owned by a const static which exists until the program terminates.
const uint8_t* const ascii2six{ &ascii2six_ar.index[0] };
void ascii2test() {
for (unsigned int i{ 0 }; i < 0x100;i++) {
char v = i;
unsigned int j = ascii2six[i];
char w = index2base64[j];
if (j < 64) {
assert(v == w || v == 'I' || v == 'l' || v == 'o' || v == 'O');
}
}
}
// Decode does not have an input span of encoded characters, but a char *, because it assumes
// the string is always terminated by an invalid character, such as the trailing null at the end of
// string or a space, or any character that is not one of our base sixty four numerals
// If the there are not enough input base sixty four numerals, it returns a size less than
// requested.
// if the requested bit buffer is not a multiple of six bits, and the last base 64 numeral had
// trailing ones that would not fit in the buffer, rather than the expected trailing zeroes, then
// it returns a size larger than the buffer, as if it changed stuff outside the buffer but does not
// actually change bits outside the buffer. This is likely an error, because obviously we want
// to decode something from base sixty four that was originally decoded using the same sized
// buffer.
static const uint8_t INV{ UCHAR_MAX };
const uint8_t highBitMask[]{ 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
static_assert(CHAR_BIT+1 == sizeof(highBitMask), "expecting eight bits per character");
const uint8_t lowBitMask[]{ 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00 };
static_assert(CHAR_BIT + 1 == sizeof(lowBitMask), "expecting eight bits per character");
// Converts bit a bit buffer into base 64 numerals
// Start does not need to be byte aligned, nor does the length need to be a multiple of eight,
// six, or twenty four.
// If insufficient room is provided for the base 64 value, throws exception.
void bits2base64(const uint8_t * bitBuffer, unsigned int start, unsigned int length, std::span<char>base64Numerals) {
assert(length <= 6 * base64Numerals.size() - 6);
// If you hit this assert you probably passed in a negative number, overflowing the
// string buffer
// which would cause you to write all over memory.
// Expects(length <= 6 * base64Numerals.size()-6);
if (length > 6 * base64Numerals.size() - 6)throw FatalException(sz_text_buffer_overflow);
auto e{ &base64Numerals[0] };
if (length) {
bitBuffer += start / CHAR_BIT;
start -= (start / CHAR_BIT) * CHAR_BIT;
assert(start == start % 8);
unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
assert((length + zeropadding) % 6 == 0);
assert(length + zeropadding <= 6 * (unsigned int)( base64Numerals.size()));
unsigned int bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
unsigned int bitsInAccumulator{ 8 - start };
assert(bitsInAccumulator < 9);
unsigned int count{ length - bitsInAccumulator }; //count is bits lifted out of the buffer.
// We need to keep track of count, because the last byte lifted out of the buffer may well
// have to be an incomplete byte, so when count gets down to below 8, we have to
// special case loading the bitAccumulator.
char * Outputsz{ &(base64Numerals[0]) };
auto endBaseNumeralBuffer{ Outputsz + length / 6 };
for (; e < endBaseNumeralBuffer; e++) {
assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
if (bitsInAccumulator < 6)
{
if (count > 7) {
bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
bitsInAccumulator += 8;
count -= 8;
assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
}
else {
assert(count + bitsInAccumulator >= 6); // Should be enough bits in buffer for
// all numerals produced by this for loop.
bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
bitsInAccumulator += count;
count = 0;
assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
}
}
assert(bitsInAccumulator > 5);
*e = index2base64[(bitAccumulator >> (bitsInAccumulator - 6)) & 0x3F];
bitsInAccumulator -= 6;
}
// When we drop out of the for loop, we may have more than 0 bits left but less than six, in
// which case we then have to special case the last numeral by filling the bit accumulato
// with our zeropadding.
if (count) {
bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
bitsInAccumulator += count;
count = 0;
assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
}
if (bitsInAccumulator) {
// Going to issue one last numeral
assert(zeropadding + bitsInAccumulator == 6);
if (bitsInAccumulator < 6) {
bitAccumulator = bitAccumulator << zeropadding; // 0 pad accumulator
bitsInAccumulator = 6; //This breaks the invariant checked by the assert.
}
assert(bitsInAccumulator == 6);
*e++ = index2base64[bitAccumulator & 0x3F];
}
assert(count == 0);
assert(6 * (unsigned int)(e - Outputsz) == length + zeropadding);
assert((length + 5) / 6 == e - &base64Numerals[0]); //Ensures that base 64 representation is the right size to hold the bits.
}
*e = '\0';
}
// This may produce the runtime error that there are too few numerals to fill the bit buffer.
// Stops at the first invalid numeral - such as a space.
// This is intended for quite short bit fields, not for transmitting megabytes of data over lines
// that are not eight bit safe.
// Returns the actual size of the fill.
// If the bit buffer is not a multiple of six, the last numerals excess bits need to be zero
// If they are not zero will truncate the excess bits and and throw BadDataException.
unsigned int base64_to_bits(uint8_t * bitBuffer, unsigned int start, unsigned int length, const char * base64Numerals) {
bitBuffer += start / CHAR_BIT;
start -= (start / CHAR_BIT) * CHAR_BIT;
assert(start == start % 8);
unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
assert((length + zeropadding) % 6 == 0);
unsigned int bitAccumulator = (*bitBuffer)>>(8-start);
unsigned int bitsInAccumulator{ start };
unsigned int count{ length };
unsigned int numeral;
uint8_t overflowBits{ '\0' };
uint8_t * p{ bitBuffer };
for (
const char * e = base64Numerals;
((numeral = ascii2six[static_cast<unsigned char>(*e)]), (numeral < INV));
e++
){
assert((e - base64Numerals) * 6 + count == length);
assert((p-bitBuffer)*8 + bitsInAccumulator == start + (e-base64Numerals)*6 );
bitAccumulator = (bitAccumulator << 6) | numeral;
bitsInAccumulator += 6;
if (count < 6) {
if (count > 0) {
const uint8_t lowBitMaskIn[]{ 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
// Bit accumulator is here numeral aligned, rather than byte aligned
overflowBits = bitAccumulator & lowBitMaskIn[6 - count];
bitAccumulator = (bitAccumulator >> (6 - count));
bitsInAccumulator -= 6 - count;
// Now it is bit field aligned
count = 0;
}
}
else count -= 6;
if (bitsInAccumulator > 7) {
*p++ = (bitAccumulator >> (bitsInAccumulator % 8));
bitsInAccumulator -= 8;
}
if (count == 0) {
assert(bitsInAccumulator < 9);
if (bitsInAccumulator) {
*p =
(bitAccumulator << (8 - bitsInAccumulator))
|(*p & lowBitMask[bitsInAccumulator]);
}
break;
}
}
if (overflowBits) throw BadDataException();
return overflowBits ? length + 1 : length - count;
}
/* Expects pointer to byte buffer and pointer to string.
Expects a string of exactly the correct number of numerals,
terminated by a non base64 character, such as null.
Throws exception if that is not what it gets.
Fills the byte buffer exactly.
Returns a uint8_t containing the excess bits of the last numeral in its low order part.*/
uint8_t base64_to_bytes(
uint8_t* byteBuffer,
uint_fast32_t byteCount,
const char* base64Numerals
) {
auto numeralsCount{ byteCount * 8 / 6 };
auto bitsCount{ numeralsCount * 6 };
auto length{ base64_to_bits(byteBuffer, 0, bitsCount, base64Numerals) };
if (length < bitsCount) throw BadDataException();
base64Numerals += numeralsCount;
auto leftoverBitsField{ byteCount * 8 - length };
auto leftoverBits{ 0 };
if (leftoverBitsField) {
// we cast to unsigned character, because otherwise it is likely to be sign
// extended resulting in indexing outside the range 0-0xFF
// with an indeterminate and unpredictable number of high
// order bits set.
uint8_t numeral{ ascii2six[static_cast<unsigned char>(*base64Numerals++)]};
if (numeral>63) throw BadDataException();
auto missingBitsField{8*byteCount-bitsCount};
assert(missingBitsField + leftoverBitsField == 6);
auto missingBitsMask{ (1 << missingBitsField) - 1 };
auto missingBits{ static_cast<uint8_t>(numeral >> leftoverBitsField) };
byteBuffer[byteCount - 1] = ((byteBuffer[byteCount - 1] | missingBitsMask) ^ missingBitsMask) | missingBits;
auto leftoverBits{ static_cast<uint8_t>( numeral ^ (missingBits << missingBitsField)) };
}
if (ascii2six[static_cast<unsigned char>(*base64Numerals)]<64) throw BadDataException();
return leftoverBits;
}
// Converts bit a bit buffer into base 2048 BIPS-39 words
// Using the array ar_sz_bip_0039_wordlist defined in localization.cpp
// The largest word in the array is eight characters, but other languages likely have longer words.
// Start does not need to be byte aligned, nor does the length need to be a multiple of eight or
// eleven.
void bits2base2048(const uint8_t* bitBuffer, int start, int length, std::span<char>szBipsWords) {
char* Outputsz{ &(szBipsWords[0]) };
if (szBipsWords.size() == 0)throw ;
char* end_of_Outputsz{ &(szBipsWords[szBipsWords.size() - 1]) };
if (length>0) {
bitBuffer += start / CHAR_BIT;
start -= (start / CHAR_BIT) * CHAR_BIT;
assert(start == start % 8);
uint_fast32_t bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
int bitsInAccumulator{ 8 - start };
assert(bitsInAccumulator < 9);
int count{ length - bitsInAccumulator }; //count is bits remaining in buffer, may go negative
// We need to keep track of count, because the last byte lifted out of the buffer may well
// have to be an incomplete byte, so when count gets down to below 8, we have to special
// case loading the bitAccumulator.
while (count) {
while (bitsInAccumulator < 11 && count >0)
{
bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
bitsInAccumulator += 8;
count -= 8;
}
if (count < 0) {
// get rid of bad bits
bitAccumulator >>= (-count);
bitsInAccumulator += count;
if (bitsInAccumulator < 11) {
bitAccumulator <<= (11 - bitsInAccumulator);
bitsInAccumulator = 11;
}
count = 0;
}
uint_fast16_t wordnumber{ (bitAccumulator >> (bitsInAccumulator - 11)) & 2047 };
bitsInAccumulator -= 11;
const char* psz_Word{ ar_sz_bip_0039_wordlist[wordnumber] };
while (*psz_Word) {
if (Outputsz == end_of_Outputsz){
throw FatalException("not enough room for BIPS-0039 passphrase");
}
*Outputsz++ = *psz_Word++;
}
if (bitsInAccumulator + count > 0) {
if (Outputsz == end_of_Outputsz){
throw FatalException("not enough room for BIPS-0039 passphrase");
}
*Outputsz++ = ' ';
}
}
}
*Outputsz++ = 0;
}
/*
// Not cryptographically strong, not DoS induced collision resistant. Produces
// the same mapping on all machines, for all time.
uint32_t bernstein_hash(const uint8_t* key, unsigned int len) {
constexpr uint_fast32_t INITIAL_VALUE = 5381;
constexpr uint_fast32_t M = 33;
uint_fast32_t hash = INITIAL_VALUE;
for (uint_fast32_t i = 0; i < len; ++i)
hash = M * hash + key[i];
return hash;
}
*/