wallet/src/slash6.cpp

//#include <cstddef>
//#include <cstdint>
#include <array>
#include <assert.h>
#include <string>
//#include <initializer_list> // for initializer_list
//#include <type_traits>
//#include <memory>       // for shared_ptr, unique_ptr
#include <span>
#include "ILog.h"
#include "localization.h"
#include "slash6.h"
//	This base64 uses the following characters as the numerals 0 to 63:
//	Table to convert six bit to ascii
static constexpr uint8_t index2base64[]{
	 "0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz!$*+-_"
	 };
/*
	control characters are stops
	! $ * + -  permitted   The characters  "#%&'(),. are stops
	0-9 permitted  The characters  :;<=>? are stops
	A-Z and _ permitted, @ and  [\]^ are stops
	a-z  permitted.  {|} ~` are stops, as is the mysterious control character 0x7F (del)
*/
//	Unlike regular baseINV, and incompatible with it.  Intended to encode stuff small enough
//	that it might be human typed and human transmitted, therefore maps 'o' and 'O' to '0', 'I' and
//	'l' to '1'
// uses six url safe additional characters !$*+-_ to bring it up to six bits
//
//	But on reflection, useless, since human typed stuff like this should use Bitcoin's base 58 encoding
//	So going to switch to regular base64, despite the unreasonably immense amount of work I put into it.

//	Unfortunately, Wireguard, with which I am going to need to interoperate, uses RFC4648, whose
//	algorithm is fundamentally different - no special treatment for I, O, o, and l, and uses =
//	to handle the case where you have boundary problems between eight and six bit groups.
//	They force everything to four character groups, using an = sign to indicate that the
//	bytes being represented stop before a multiple of three. https://www.base64encode.org


static_assert(index2base64[63] == '_', "surprise numeral at 63");

//	Being intended for small bits of data, assumes no whitespace within an entity
//	Encode and decode are called with a bit buffer, consisting of an unsigned byte pointer, a
//	starting bit position relative to the pointer, and a bit count.  We assume that there is room
//	enough in the object pointed to to accommodate the bytes referenced by
//	Bit Position+BitCount.  We don't change bits outside the range.
//	The bit buffer does not need to be aligned, nor does it need to be a multiple of six bits,
//	eight bits, or twenty four bits.
//	If the input bit buffer to be encoded to base sixty four is not a multiple of six bits, the
//	last base sixty four numeral output will represent the bit buffer padded with trailing zeroes.
//	If there is no room in the output span for all the base sixty four digits, the encode routine
//	will return a number of bits less than the size of the input bit buffer, less than the bitcount
//	it was given.


//	Compile time execution is C++ is a pain, because expressions are apt to unpredictably lose
//	their constexpr character for reasons that are far from clear.
//
// You can declare anything reasonable to be constexpr, and the compiler will not issue an
// error message until the code that attempts to use what you have declared constexpr is
// invoked from somewhere else  "expression does not evaluate to constant"
//
// an assert in an expression evaluated at compile time does not trigger a run time error,
// Instead the compiler reports that the expression did not evaluate to a constant,
//
//	The error is confusing, because the error points to the declaration where the initialization
//	was invoked,instead of pointing to the assert.

//	To debug code intended to be run at compile time, exercise it at run time with
//	auto ptr(std::make_unique<Class>());
//	at run time;

 class charindex {
public:
	std::array< uint8_t, 0x100> index{ 0, };
	//	this non const array will become constexpr and be constructed at compile time
	//	when an instance of this class is created in a constexpr expression.
	charindex() = delete;
	constexpr charindex(const uint8_t* p) {
		uint8_t pu{ 0 };
		do { index[pu++] = 0xFF; } while (pu);
		uint8_t i{ 0 };
		while (pu = static_cast<uint8_t>(p[i])) {
			index[pu] = i;
			i++;
			assert(i != 0);	//prevents unending execution,
			//	inside a constexp, generates an "expression does not evaluate to constant"
			//	error at compile time, rather than breaking at run time.
		}
		index['o'] = index['O'] = 0;
		index['l'] = index['I'] = 1;
	}
};

static constexpr charindex  ascii2six_ar(index2base64);

//
//
// You really have to write compile time code in templates as a language, which is the totally
//	obscure and hard to use language apt to generate remarkably voluminous error messages
//	will little obvious connection to the actual problem, and surprising result that are ver
//	difficult to predict in advance or understand at all.

//	Table to convert ascii to six bit as a good old fashioned non owning naked pointer to const,
//	whose storage is owned by a const static which exists until the program terminates.
const uint8_t* const ascii2six{ &ascii2six_ar.index[0] };

void ascii2test() {
	for (unsigned int i{ 0 }; i < 0x100;i++) {
		char v = i;
		unsigned int j = ascii2six[i];
		char w = index2base64[j];
		if (j < 64) {
			assert(v == w || v == 'I' || v == 'l' || v == 'o' || v == 'O');
		}
	}
}


//	Decode does not have an input span of encoded characters, but a char *, because it assumes
//	the string is always terminated by an invalid character, such as the trailing null at the end of
//	string or a space, or any character that is not one of our base sixty four numerals
//	If the there are not enough input base sixty four numerals, it returns a size less than
//	requested.
//	if the requested bit buffer is not a multiple of six bits, and the last base 64 numeral had
//	trailing ones that would not fit in the buffer, rather than the expected trailing zeroes, then
//	it returns a size larger than the buffer, as if it changed stuff outside the buffer but does not
//	actually change bits outside the buffer.  This is likely an error, because obviously we want
//	to decode something from base sixty four that was originally decoded using the same sized
//	buffer.

static const uint8_t INV{ UCHAR_MAX };

const uint8_t highBitMask[]{ 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF };
static_assert(CHAR_BIT+1 == sizeof(highBitMask), "expecting eight bits per character");
const uint8_t lowBitMask[]{ 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00 };
static_assert(CHAR_BIT + 1 == sizeof(lowBitMask), "expecting eight bits per character");


//	Converts bit a bit buffer into base 64 numerals
//	Start does not need to be byte aligned, nor does the length need to be a multiple of eight,
//	six, or twenty four.
//	If insufficient room is provided for the base 64 value, throws exception.
void bits2base64(const uint8_t * bitBuffer, unsigned int start, unsigned int length, std::span<char>base64Numerals) {
	assert(length <= 6 * base64Numerals.size() - 6);
	//	If you hit this assert you probably passed in a negative number, overflowing the
	//	string buffer
	//  which would cause you to write all over memory.
	//	Expects(length <= 6 * base64Numerals.size()-6);
	if (length > 6 * base64Numerals.size() - 6)throw FatalException(sz_text_buffer_overflow);
	auto e{ &base64Numerals[0] };
	if (length) {
		bitBuffer += start / CHAR_BIT;
		start -= (start / CHAR_BIT) * CHAR_BIT;
		assert(start == start % 8);
		unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
		assert((length + zeropadding) % 6 == 0);
		assert(length + zeropadding <= 6 * (unsigned int)( base64Numerals.size()));
		unsigned int bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
		unsigned int bitsInAccumulator{ 8 - start };
		assert(bitsInAccumulator < 9);
		unsigned int count{ length - bitsInAccumulator }; //count is bits lifted out of the buffer.
		//	We need to keep track of count, because the last byte lifted out of the buffer may well
		//	have to be an incomplete byte, so when count gets down to below 8, we have to
		//	special case loading the bitAccumulator.
		char * Outputsz{ &(base64Numerals[0]) };
		auto endBaseNumeralBuffer{ Outputsz + length / 6 };
		for (; e < endBaseNumeralBuffer; e++) {
			assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
			if (bitsInAccumulator < 6)
			{
				if (count > 7) {
					bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
					bitsInAccumulator += 8;
					count -= 8;
					assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length);
				}
				else {
					assert(count + bitsInAccumulator >= 6); // Should be enough bits in buffer for
					//	all numerals produced by this for loop.
					bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
					bitsInAccumulator += count;
					count = 0;
					assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
				}
			}
			assert(bitsInAccumulator > 5);
			*e = index2base64[(bitAccumulator >> (bitsInAccumulator - 6)) & 0x3F];
			bitsInAccumulator -= 6;
		}
		//	When we drop out of the for loop, we may have more than 0 bits left but less than six, in
		//	which case we then have to special case the last numeral by filling the bit accumulato
		//	with our zeropadding.
		if (count) {
			bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count));
			bitsInAccumulator += count;
			count = 0;
			assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length);
		}
		if (bitsInAccumulator) {
			//	Going to issue one last numeral
			assert(zeropadding + bitsInAccumulator == 6);
			if (bitsInAccumulator < 6) {
				bitAccumulator = bitAccumulator << zeropadding; // 0 pad accumulator
				bitsInAccumulator = 6;  //This breaks the invariant checked by the assert.
			}
			assert(bitsInAccumulator == 6);
			*e++ = index2base64[bitAccumulator & 0x3F];
		}
		assert(count == 0);
		assert(6 * (unsigned int)(e - Outputsz) == length + zeropadding);
		assert((length + 5) / 6 == e - &base64Numerals[0]);	//Ensures that base 64 representation is the right size to hold the bits.
	}
	*e = '\0';
}

//	This may produce the runtime error that there are too few numerals to fill the bit buffer.
//	Stops at the first invalid numeral - such as a space.
//	This is intended for quite short bit fields, not for transmitting megabytes of data over lines
//	that are not eight bit safe.
//	Returns the actual size of the fill.
//	If the bit buffer is not a multiple of six, the last numerals excess bits need to be zero
//	If they are not zero will truncate the excess bits and and throw BadDataException.
unsigned int base64_to_bits(uint8_t * bitBuffer, unsigned int start, unsigned int length, const char * base64Numerals) {
	bitBuffer += start / CHAR_BIT;
	start -= (start / CHAR_BIT) * CHAR_BIT;
	assert(start == start % 8);
	unsigned int zeropadding{ ((length + 5) / 6) * 6 - length };
	assert((length + zeropadding) % 6 == 0);
	unsigned int bitAccumulator = (*bitBuffer)>>(8-start);
	unsigned int bitsInAccumulator{ start };
	unsigned int count{ length };
	unsigned int numeral;
	uint8_t overflowBits{ '\0' };
	uint8_t * p{ bitBuffer };
	for (
		const char * e = base64Numerals;
		((numeral = ascii2six[static_cast<unsigned char>(*e)]), (numeral < INV));
		e++
	){
		assert((e - base64Numerals) * 6 + count == length);
		assert((p-bitBuffer)*8 + bitsInAccumulator == start + (e-base64Numerals)*6 );
		bitAccumulator = (bitAccumulator << 6) | numeral;
		bitsInAccumulator += 6;
		if (count < 6) {
			if (count > 0) {
				const uint8_t lowBitMaskIn[]{ 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F };
				//  Bit accumulator is here numeral aligned, rather than byte aligned
				overflowBits = bitAccumulator & lowBitMaskIn[6 - count];
				bitAccumulator = (bitAccumulator >> (6 - count));
				bitsInAccumulator -= 6 - count;
				//	Now it is bit field aligned
				count = 0;
			}
		}
		else count -= 6;
		if (bitsInAccumulator > 7) {
			*p++ = (bitAccumulator >> (bitsInAccumulator % 8));
			bitsInAccumulator -= 8;
		}
		if (count == 0) {
			assert(bitsInAccumulator < 9);
			if (bitsInAccumulator) {
				*p =
					(bitAccumulator << (8 - bitsInAccumulator))
					|(*p & lowBitMask[bitsInAccumulator]);
			}
			break;
		}
	}
	if (overflowBits) throw BadDataException();
	return overflowBits ? length + 1 : length - count;
}

/* Expects pointer to byte buffer and pointer to string.
	Expects a string of exactly the correct number of numerals,
	terminated by a non base64 character, such as null.
	Throws exception if that is not what it gets.
	Fills the byte buffer exactly.
	Returns a uint8_t containing the excess bits of the last numeral in its low order part.*/
uint8_t base64_to_bytes(
	uint8_t* byteBuffer,
	uint_fast32_t byteCount,
	const char* base64Numerals
) {
	auto numeralsCount{ byteCount * 8 / 6 };
	auto bitsCount{ numeralsCount * 6 };
	auto length{ base64_to_bits(byteBuffer, 0, bitsCount, base64Numerals) };
	if (length < bitsCount) throw BadDataException();
	base64Numerals += numeralsCount;
	auto leftoverBitsField{ byteCount * 8 - length };
	auto leftoverBits{ 0 };
	if (leftoverBitsField) {
		// we cast to unsigned character, because otherwise it is likely to be sign
		// extended resulting in indexing outside the range 0-0xFF
		// with an indeterminate and unpredictable number of high
		// order bits set.
		uint8_t numeral{ ascii2six[static_cast<unsigned char>(*base64Numerals++)]};
		if (numeral>63) throw BadDataException();
		auto missingBitsField{8*byteCount-bitsCount};
		assert(missingBitsField + leftoverBitsField == 6);
		auto missingBitsMask{ (1 << missingBitsField) - 1 };
		auto missingBits{ static_cast<uint8_t>(numeral >> leftoverBitsField) };
		byteBuffer[byteCount - 1] = ((byteBuffer[byteCount - 1] | missingBitsMask) ^ missingBitsMask) | missingBits;
		auto leftoverBits{ static_cast<uint8_t>( numeral ^ (missingBits << missingBitsField)) };
	}
	if (ascii2six[static_cast<unsigned char>(*base64Numerals)]<64) throw BadDataException();
	return leftoverBits;
}


//	Converts bit a bit buffer into base 2048 BIPS-39 words
//	Using the array ar_sz_bip_0039_wordlist defined in localization.cpp
//  The largest word in the array is eight characters, but other languages likely have longer words.
//	Start does not need to be byte aligned, nor does the length need to be a multiple of eight or
//	eleven.
void bits2base2048(const uint8_t* bitBuffer, int start, int length, std::span<char>szBipsWords) {
	char* Outputsz{ &(szBipsWords[0]) };
	if (szBipsWords.size() == 0)throw ;
	char* end_of_Outputsz{ &(szBipsWords[szBipsWords.size() - 1]) };
	if (length>0) {
		bitBuffer += start / CHAR_BIT;
		start -= (start / CHAR_BIT) * CHAR_BIT;
		assert(start == start % 8);
		uint_fast32_t bitAccumulator = (*bitBuffer++) & lowBitMask[(start)];
		int bitsInAccumulator{ 8 - start };
		assert(bitsInAccumulator < 9);
		int count{ length - bitsInAccumulator }; //count is bits remaining in buffer, may go negative
		//	We need to keep track of count, because the last byte lifted out of the buffer may well
		//	have to be an incomplete byte, so when count gets down to below 8, we have to special
		//	case loading the bitAccumulator.
		while (count) {
			while (bitsInAccumulator < 11 && count >0)
			{
				bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++);
				bitsInAccumulator += 8;
				count -= 8;
			}
			if (count < 0) {
				// get rid of bad bits
				bitAccumulator >>= (-count);
				bitsInAccumulator += count;
				if (bitsInAccumulator < 11) {
					bitAccumulator <<= (11 - bitsInAccumulator);
					bitsInAccumulator = 11;
				}
				count = 0;
			}
			uint_fast16_t wordnumber{ (bitAccumulator >> (bitsInAccumulator - 11)) & 2047 };
			bitsInAccumulator -= 11;
			const char* psz_Word{ ar_sz_bip_0039_wordlist[wordnumber] };
			while (*psz_Word) {
				if (Outputsz == end_of_Outputsz){
					throw FatalException("not enough room for BIPS-0039 passphrase");
				}
				*Outputsz++ = *psz_Word++;
			}
			if (bitsInAccumulator + count > 0) {
				if (Outputsz == end_of_Outputsz){
					throw FatalException("not enough room for BIPS-0039 passphrase");
				}
				*Outputsz++ = ' ';
			}
		}
	}
	*Outputsz++ = 0;
}
/*
//	Not cryptographically strong, not DoS induced collision resistant.  Produces
//	the same mapping on all machines, for all time.
uint32_t bernstein_hash(const uint8_t* key, unsigned int len) {
	constexpr uint_fast32_t INITIAL_VALUE = 5381;
	constexpr uint_fast32_t M = 33;
	uint_fast32_t hash = INITIAL_VALUE;
	for (uint_fast32_t i = 0; i < len; ++i)
		hash = M * hash + key[i];
	return hash;
}
*/