wallet/slash6.cpp

//#include <cstddef>
//#include <cstdint>
#include <array>
#include <assert.h>
#include <string>
//#include <initializer_list> // for initializer_list
//#include <type_traits>
//#include <memory>       // for shared_ptr, unique_ptr
#include <span>
#include "ILog.h"
#include "localization.h"
#include "slash6.h"
//	This base64 uses the following characters as the numerals 0 to 63:
//	Table to convert six bit to ascii
static constexpr uint8_t index2base64[]{
	 "0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz!$*+-_"
	 };

//	Unlike regular baseINV, and incompatible with it.  Intended to encode stuff small enough
//	that it might be human typed and human transmitted, therefore maps 'o' and 'O' to '0', 'I' and
//	'l' to '1'
// uses six url safe additional characters !$*+-_ to bring it up to six bits
static_assert(index2base64[63] == '_', "surprise numeral at 63");

//	Being intended for small bits of data, assumes no whitespace within an entity
//	Encode and decode are called with a bit buffer, consisting of an unsigned byte pointer, a
//	starting bit position relative to the pointer, and a bit count.  We assume that there is room
//	enough in the object pointed to to accommodate the bytes referenced by
//	Bit Position+BitCount.  We don't change bits outside the range.
//	The bit buffer does not need to be aligned, nor does it need to be a multiple of six bits,
//	eight bits, or twenty four bits.
//	If the input bit buffer to be encoded to base sixty four is not a multiple of six bits, the
//	last base sixty four numeral output will represent the bit buffer padded with trailing zeroes.
//	If there is no room in the output span for all the base sixty four digits, the encode routine
//	will return a number of bits less than the size of the input bit buffer, less than the bitcount
//	it was given.

//	Table to convert ascii to six bit.
static constexpr std::array<constexpr uint8_t, 0x100> ascii2six_ar{
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, //control characters
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,	//control characters
	0xff, 0x3a, 0xff, 0xff, 0x3b, 0xff, 0xff, 0xff, 0xff, 0xff, 0x3c, 0x3d, 0xff, 0x3e, 0xff, 0xff,	// ! $ * + -  permitted   The characters  "#%&'(),. are stops
	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // 0-9 permitted  The characters  :;<=>? are stops
	0xff, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11, 0x01, 0x12, 0x13, 0x14, 0x15, 0x16, 0x00,
	0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 0x20, 0x21, 0xff, 0xff, 0xff, 0xff, 0x3f, // A-Z and _ permitted, @ and  [\]^ are stops
	0xff, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x01, 0x2d, 0x2e, 0x00,
	0x2f, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0xff, 0xff, 0xff, 0xff, 0xff,	//a-z  the characters  {|} ~` are stops, as is the mysterious control character 0x7F (del)
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
	0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff  // utf8 multibyte characters, all stops.
};

//	Table to convert ascii to six bit as a good old fashioned non owning naked pointer to const,
//	whose storage is owned by a const static which exists until the program terminates.
static const uint8_t *ascii2six{ &ascii2six_ar[0] };
//	Compile time execution is C++ is a pain, because expressions are apt to unpredictably lose
//	their constexpr character for reasons that are far from clear.
// You really have to write compile time code in templates as a language, which is the totally
//	obscure and hard to use language apt to generate remarkably voluminous error messages
//	will little obvious connection to the actual problem, and surprising result that are ver
//	difficult to predict in advance or understand at all.
//	In general, the better solution is to have a routine that is called once and only once at the
//	beginning of the program, which initializes a bunch of static const values, if that solution is
//	adequate, or to have a preproces routine written in python which generates the required C
//	files and header files.

//	After this experiment in compile time code, I swear off it.

//	Decode does not have an input span of encoded characters, but a char *, because it assumes
//	the string is always terminated by an invalid character, such as the trailing null at the end of
//	string or a space, or any character that is not one of our base sixty four numerals
//	If the there are not enough input base sixty four numerals, it returns a size less than
//	requested.
//	if the requested bit buffer is not a multiple of six bits, and the last base 64 numeral had
//	trailing ones that would not fit in the buffer, rather than the expected trailing zeroes, then
//	it returns a size larger than the buffer, as if it changed stuff outside the buffer but does not
//	actually change bits outside the buffer.  This is likely an error, because obviously we want
//	to decode something from base sixty four that was originally decoded using the same sized
//	buffer.

static const uint8_t INV{ UCHAR_MAX };

const uint8_t highBitMask[]{ 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
static_assert(CHAR_BIT+1 == sizeof(highBitMask), "expecting eight bits per character");
const uint8_t lowBitMask[]{ 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00 };
static_assert(CHAR_BIT + 1 == sizeof(lowBitMask), "expecting eight bits per character");


//	Converts bit a bit buffer into base 64 numerals
//	Start does not need to be byte aligned, nor does the length need to be a multiple of eight,
//	six, or twenty four.
//	If insufficient room is provided for the base 64 value, throws exception.
void bits2base64(const uint8_t * bitBuffer, unsigned int start, unsigned int length, std::span<char>base64Numerals) {
	assert(length <= 6 * base64Numerals.size() - 6);
	//	If you hit this assert you probably passed in a negative number, overflowing the
	//	string buffer
	//  which would cause you to write all over memory.
	//	Expects(length <= 6 * base64Numerals.size()-6);
	if (length > 6 * base64Numerals.size() - 6)throw FatalException(sz_text_buffer_overflow);
	auto e{ &base64Numerals[0] };
	if (length) {
		bitBuffer += start / CHAR_BIT;
		start -= (start / CHAR_BIT) * CHAR_BIT;
		assert(start == start % 8);
		unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
		assert((length + zeropadding) % 6 == 0);
		assert(length + zeropadding <= 6 * (unsigned int)( base64Numerals.size()));
		unsigned int bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
		unsigned int bitsInAccumulator{ 8 - start };
		assert(bitsInAccumulator < 9);
		unsigned int count{ length - bitsInAccumulator }; //count is bits lifted out of the buffer.
		//	We need to keep track of count, because the last byte lifted out of the buffer may well
		//	have to be an incomplete byte, so when count gets down to below 8, we have to
		//	special case loading the bitAccumulator.
		char * Outputsz{ &(base64Numerals[0]) };
		auto endBaseNumeralBuffer{ Outputsz + length / 6 };
		for (; e < endBaseNumeralBuffer; e++) {
			assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
			if (bitsInAccumulator < 6)
			{
				if (count > 7) {
					bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
					bitsInAccumulator += 8;
					count -= 8;
					assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
				}
				else {
					assert(count + bitsInAccumulator >= 6); // Should be enough bits in buffer for
					//	all numerals produced by this for loop.
					bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
					bitsInAccumulator += count;
					count = 0;
					assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
				}
			}
			assert(bitsInAccumulator > 5);
			*e = index2base64[(bitAccumulator >> (bitsInAccumulator - 6)) & 0x3F];
			bitsInAccumulator -= 6;
		}
		//	When we drop out of the for loop, we may have more than 0 bits left but less than six, in
		//	which case we then have to special case the last numeral by filling the bit accumulato
		//	with our zeropadding.
		if (count) {
			bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
			bitsInAccumulator += count;
			count = 0;
			assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
		}
		if (bitsInAccumulator) {
			//	Going to issue one last numeral
			assert(zeropadding + bitsInAccumulator == 6);
			if (bitsInAccumulator < 6) {
				bitAccumulator = bitAccumulator << zeropadding; // 0 pad accumulator
				bitsInAccumulator = 6;  //This breaks the invariant checked by the assert.
			}
			assert(bitsInAccumulator == 6);
			*e++ = index2base64[bitAccumulator & 0x3F];
		}
		assert(count == 0);
		assert(6 * (unsigned int)(e - Outputsz) == length + zeropadding);
		assert((length + 5) / 6 == e - &base64Numerals[0]);	//Ensures that base 64 representation is the right size to hold the bits.
	}
	*e = '\0';
}

//	This may produce the runtime error that there are too few numerals to fill the bit buffer.
//	Stops at the first invalid numeral - such as a space.
//	This is intended for quite short bit fields, not for transmitting megabytes of data over lines
//	that are not eight bit safe.
//	Returns the actual size of the fill.
//	If the bit buffer is not a multiple of six, the last numerals excess bits need to be zero
//	If they are not zero will truncate the excess bits and and throw BadDataException.
unsigned int base64_to_bits(uint8_t * bitBuffer, unsigned int start, unsigned int length, const char * base64Numerals) {
	bitBuffer += start / CHAR_BIT;
	start -= (start / CHAR_BIT) * CHAR_BIT;
	assert(start == start % 8);
	unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
	assert((length + zeropadding) % 6 == 0);
	unsigned int bitAccumulator = (*bitBuffer)>>(8-start);
	unsigned int bitsInAccumulator{ start };
	unsigned int count{ length };
	unsigned int numeral;
	uint8_t overflowBits{ '\0' };
	uint8_t * p{ bitBuffer };
	for (
		const char * e = base64Numerals;
		((numeral = ascii2six[static_cast<unsigned char>(*e)]), (numeral < INV));
		e++
	){
		assert((e - base64Numerals) * 6 + count == length);
		assert((p-bitBuffer)*8 + bitsInAccumulator == start + (e-base64Numerals)*6 );
		bitAccumulator = (bitAccumulator << 6) | numeral;
		bitsInAccumulator += 6;
		if (count < 6) {
			if (count > 0) {
				const uint8_t lowBitMaskIn[]{ 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
				//  Bit accumulator is here numeral aligned, rather than byte aligned
				overflowBits = bitAccumulator & lowBitMaskIn[6 - count];
				bitAccumulator = (bitAccumulator >> (6 - count));
				bitsInAccumulator -= 6 - count;
				//	Now it is bit field aligned
				count = 0;
			}
		}
		else count -= 6;
		if (bitsInAccumulator > 7) {
			*p++ = (bitAccumulator >> (bitsInAccumulator % 8));
			bitsInAccumulator -= 8;
		}
		if (count == 0) {
			assert(bitsInAccumulator < 9);
			if (bitsInAccumulator) {
				*p =
					(bitAccumulator << (8 - bitsInAccumulator))
					|(*p & lowBitMask[bitsInAccumulator]);
			}
			break;
		}
	}
	if (overflowBits) throw BadDataException();
	return overflowBits ? length + 1 : length - count;
}

/* Expects pointer to byte buffer and pointer to string.
	Expects a string of exactly the correct number of numerals,
	terminated by a non base64 character, such as null.
	Throws exception if that is not what it gets.
	Fills the byte buffer exactly.
	Returns a uint8_t containing the excess bits of the last numeral in its low order part.*/
uint8_t base64_to_bytes(
	uint8_t* byteBuffer,
	uint_fast32_t byteCount,
	const char* base64Numerals
) {
	auto numeralsCount{ byteCount * 8 / 6 };
	auto bitsCount{ numeralsCount * 6 };
	auto length{ base64_to_bits(byteBuffer, 0, bitsCount, base64Numerals) };
	if (length < bitsCount) throw BadDataException();
	base64Numerals += numeralsCount;
	auto leftoverBitsField{ byteCount * 8 - length };
	auto leftoverBits{ 0 };
	if (leftoverBitsField) {
		// we cast to unsigned character, because otherwise it is likely to be sign
		// extended resulting in indexing outside the range 0-0xFF
		// with an indeterminate and unpredictable number of high
		// order bits set.
		uint8_t numeral{ ascii2six[static_cast<unsigned char>(*base64Numerals++)]};
		if (numeral>63) throw BadDataException();
		auto missingBitsField{8*byteCount-bitsCount};
		assert(missingBitsField + leftoverBitsField == 6);
		auto missingBitsMask{ (1 << missingBitsField) - 1 };
		auto missingBits{ static_cast<uint8_t>(numeral >> leftoverBitsField) };
		byteBuffer[byteCount - 1] = ((byteBuffer[byteCount - 1] | missingBitsMask) ^ missingBitsMask) | missingBits;
		auto leftoverBits{ static_cast<uint8_t>( numeral ^ (missingBits << missingBitsField)) };
	}
	if (ascii2six[static_cast<unsigned char>(*base64Numerals)]<64) throw BadDataException();
	return leftoverBits;
}


//	Converts bit a bit buffer into base 2048 BIPS-39 words
//	Using the array ar_sz_bip_0039_wordlist defined in localization.cpp
//  The largest word in the array is eight characters, but other languages likely have longer words.
//	Start does not need to be byte aligned, nor does the length need to be a multiple of eight or
//	eleven.
void bits2base2048(const uint8_t* bitBuffer, int start, int length, std::span<char>szBipsWords) {
	char* Outputsz{ &(szBipsWords[0]) };
	if (szBipsWords.size() == 0)throw ;
	char* end_of_Outputsz{ &(szBipsWords[szBipsWords.size() - 1]) };
	if (length>0) {
		bitBuffer += start / CHAR_BIT;
		start -= (start / CHAR_BIT) * CHAR_BIT;
		assert(start == start % 8);
		uint_fast32_t bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
		int bitsInAccumulator{ 8 - start };
		assert(bitsInAccumulator < 9);
		int count{ length - bitsInAccumulator }; //count is bits remaining in buffer, may go negative
		//	We need to keep track of count, because the last byte lifted out of the buffer may well
		//	have to be an incomplete byte, so when count gets down to below 8, we have to special
		//	case loading the bitAccumulator.
		while (count) {
			while (bitsInAccumulator < 11 && count >0)
			{
				bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
				bitsInAccumulator += 8;
				count -= 8;
			}
			if (count < 0) {
				// get rid of bad bits
				bitAccumulator >>= (-count);
				bitsInAccumulator += count;
				if (bitsInAccumulator < 11) {
					bitAccumulator <<= (11 - bitsInAccumulator);
					bitsInAccumulator = 11;
				}
				count = 0;
			}
			uint_fast16_t wordnumber{ (bitAccumulator >> (bitsInAccumulator - 11)) & 2047 };
			bitsInAccumulator -= 11;
			const char* psz_Word{ ar_sz_bip_0039_wordlist[wordnumber] };
			while (*psz_Word) {
				if (Outputsz == end_of_Outputsz){
					throw FatalException("not enough room for BIPS-0039 passphrase");
				}
				*Outputsz++ = *psz_Word++;
			}
			if (bitsInAccumulator + count > 0) {
				if (Outputsz == end_of_Outputsz){
					throw FatalException("not enough room for BIPS-0039 passphrase");
				}
				*Outputsz++ = ' ';
			}
		}
	}
	*Outputsz++ = 0;
}
/*
//	Not cryptographically strong, not DoS induced collision resistant.  Produces
//	the same mapping on all machines, for all time.
uint32_t bernstein_hash(const uint8_t* key, unsigned int len) {
	constexpr uint_fast32_t INITIAL_VALUE = 5381;
	constexpr uint_fast32_t M = 33;
	uint_fast32_t hash = INITIAL_VALUE;
	for (uint_fast32_t i = 0; i < len; ++i)
		hash = M * hash + key[i];
	return hash;
}
*/