2022-02-16 00:53:01 -05:00
//#include <cstddef>
//#include <cstdint>
# include <array>
# include <assert.h>
# include <string>
//#include <initializer_list> // for initializer_list
2022-02-18 15:59:12 -05:00
//#include <type_traits>
2022-02-28 00:54:21 -05:00
//#include <memory> // for shared_ptr, unique_ptr
2022-02-16 00:53:01 -05:00
# include <span>
# include "ILog.h"
# include "localization.h"
# include "slash6.h"
// This base64 uses the following characters as the numerals 0 to 63:
// Table to convert six bit to ascii
2022-03-07 23:46:14 -05:00
static constexpr uint8_t index2base64 [ ] {
" 0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz!$*+-_ "
} ;
2022-02-16 00:53:01 -05:00
2022-03-07 23:46:14 -05:00
// Unlike regular baseINV, and incompatible with it. Intended to encode stuff small enough
// that it might be human typed and human transmitted, therefore maps 'o' and 'O' to '0', 'I' and
// 'l' to '1'
2022-02-16 00:53:01 -05:00
// uses six url safe additional characters !$*+-_ to bring it up to six bits
static_assert ( index2base64 [ 63 ] = = ' _ ' , " surprise numeral at 63 " ) ;
// Being intended for small bits of data, assumes no whitespace within an entity
2022-03-07 23:46:14 -05:00
// Encode and decode are called with a bit buffer, consisting of an unsigned byte pointer, a
// starting bit position relative to the pointer, and a bit count. We assume that there is room
// enough in the object pointed to to accommodate the bytes referenced by
// Bit Position+BitCount. We don't change bits outside the range.
// The bit buffer does not need to be aligned, nor does it need to be a multiple of six bits,
// eight bits, or twenty four bits.
// If the input bit buffer to be encoded to base sixty four is not a multiple of six bits, the
// last base sixty four numeral output will represent the bit buffer padded with trailing zeroes.
// If there is no room in the output span for all the base sixty four digits, the encode routine
// will return a number of bits less than the size of the input bit buffer, less than the bitcount
// it was given.
2022-02-16 00:53:01 -05:00
// Table to convert ascii to six bit.
static constexpr std : : array < constexpr uint8_t , 0x100 > ascii2six_ar {
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , //control characters
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , //control characters
0xff , 0x3a , 0xff , 0xff , 0x3b , 0xff , 0xff , 0xff , 0xff , 0xff , 0x3c , 0x3d , 0xff , 0x3e , 0xff , 0xff , // ! $ * + - permitted The characters "#%&'(),. are stops
0x00 , 0x01 , 0x02 , 0x03 , 0x04 , 0x05 , 0x06 , 0x07 , 0x08 , 0x09 , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , // 0-9 permitted The characters :;<=>? are stops
2022-02-18 15:59:12 -05:00
0xff , 0x0a , 0x0b , 0x0c , 0x0d , 0x0e , 0x0f , 0x10 , 0x11 , 0x01 , 0x12 , 0x13 , 0x14 , 0x15 , 0x16 , 0x00 ,
2022-02-16 00:53:01 -05:00
0x17 , 0x18 , 0x19 , 0x1a , 0x1b , 0x1c , 0x1d , 0x1e , 0x1f , 0x20 , 0x21 , 0xff , 0xff , 0xff , 0xff , 0x3f , // A-Z and _ permitted, @ and [\]^ are stops
0xff , 0x22 , 0x23 , 0x24 , 0x25 , 0x26 , 0x27 , 0x28 , 0x29 , 0x2a , 0x2b , 0x2c , 0x01 , 0x2d , 0x2e , 0x00 ,
0x2f , 0x30 , 0x31 , 0x32 , 0x33 , 0x34 , 0x35 , 0x36 , 0x37 , 0x38 , 0x39 , 0xff , 0xff , 0xff , 0xff , 0xff , //a-z the characters {|} ~` are stops, as is the mysterious control character 0x7F (del)
2022-02-18 15:59:12 -05:00
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
2022-02-16 00:53:01 -05:00
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff ,
0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff , 0xff // utf8 multibyte characters, all stops.
} ;
2022-03-07 23:46:14 -05:00
// Table to convert ascii to six bit as a good old fashioned non owning naked pointer to const,
// whose storage is owned by a const static which exists until the program terminates.
2022-02-16 00:53:01 -05:00
static const uint8_t * ascii2six { & ascii2six_ar [ 0 ] } ;
2022-03-07 23:46:14 -05:00
// Compile time execution is C++ is a pain, because expressions are apt to unpredictably lose
// their constexpr character for reasons that are far from clear.
// You really have to write compile time code in templates as a language, which is the totally
// obscure and hard to use language apt to generate remarkably voluminous error messages
// will little obvious connection to the actual problem, and surprising result that are ver
// difficult to predict in advance or understand at all.
// In general, the better solution is to have a routine that is called once and only once at the
// beginning of the program, which initializes a bunch of static const values, if that solution is
// adequate, or to have a preproces routine written in python which generates the required C
// files and header files.
2022-02-16 00:53:01 -05:00
// After this experiment in compile time code, I swear off it.
2022-03-07 23:46:14 -05:00
// Decode does not have an input span of encoded characters, but a char *, because it assumes
// the string is always terminated by an invalid character, such as the trailing null at the end of
// string or a space, or any character that is not one of our base sixty four numerals
// If the there are not enough input base sixty four numerals, it returns a size less than
// requested.
// if the requested bit buffer is not a multiple of six bits, and the last base 64 numeral had
// trailing ones that would not fit in the buffer, rather than the expected trailing zeroes, then
// it returns a size larger than the buffer, as if it changed stuff outside the buffer but does not
// actually change bits outside the buffer. This is likely an error, because obviously we want
// to decode something from base sixty four that was originally decoded using the same sized
// buffer.
2022-02-16 00:53:01 -05:00
static const uint8_t INV { UCHAR_MAX } ;
const uint8_t highBitMask [ ] { 0x00 , 0x80 , 0xC0 , 0xE0 , 0xF0 , 0xF8 , 0xFC , 0xFE , 0xFF } ;
static_assert ( CHAR_BIT + 1 = = sizeof ( highBitMask ) , " expecting eight bits per character " ) ;
const uint8_t lowBitMask [ ] { 0xFF , 0x7F , 0x3F , 0x1F , 0x0F , 0x07 , 0x03 , 0x01 , 0x00 } ;
static_assert ( CHAR_BIT + 1 = = sizeof ( lowBitMask ) , " expecting eight bits per character " ) ;
// Converts bit a bit buffer into base 64 numerals
2022-03-07 23:46:14 -05:00
// Start does not need to be byte aligned, nor does the length need to be a multiple of eight,
// six, or twenty four.
2022-02-16 00:53:01 -05:00
// If insufficient room is provided for the base 64 value, throws exception.
void bits2base64 ( const uint8_t * bitBuffer , unsigned int start , unsigned int length , std : : span < char > base64Numerals ) {
assert ( length < = 6 * base64Numerals . size ( ) - 6 ) ;
2022-03-07 23:46:14 -05:00
// If you hit this assert you probably passed in a negative number, overflowing the
// string buffer
2022-02-16 00:53:01 -05:00
// which would cause you to write all over memory.
// Expects(length <= 6 * base64Numerals.size()-6);
if ( length > 6 * base64Numerals . size ( ) - 6 ) throw FatalException ( sz_text_buffer_overflow ) ;
auto e { & base64Numerals [ 0 ] } ;
if ( length ) {
bitBuffer + = start / CHAR_BIT ;
start - = ( start / CHAR_BIT ) * CHAR_BIT ;
assert ( start = = start % 8 ) ;
unsigned int zeropadding { ( ( length + 5 ) / 6 ) * 6 - length } ;
assert ( ( length + zeropadding ) % 6 = = 0 ) ;
assert ( length + zeropadding < = 6 * ( unsigned int ) ( base64Numerals . size ( ) ) ) ;
unsigned int bitAccumulator = ( * bitBuffer + + ) & lowBitMask [ ( start ) ] ;
unsigned int bitsInAccumulator { 8 - start } ;
assert ( bitsInAccumulator < 9 ) ;
unsigned int count { length - bitsInAccumulator } ; //count is bits lifted out of the buffer.
2022-03-07 23:46:14 -05:00
// We need to keep track of count, because the last byte lifted out of the buffer may well
// have to be an incomplete byte, so when count gets down to below 8, we have to
// special case loading the bitAccumulator.
2022-02-16 00:53:01 -05:00
char * Outputsz { & ( base64Numerals [ 0 ] ) } ;
auto endBaseNumeralBuffer { Outputsz + length / 6 } ;
for ( ; e < endBaseNumeralBuffer ; e + + ) {
assert ( count + bitsInAccumulator + 6 * ( unsigned int ) ( e - Outputsz ) = = length ) ;
if ( bitsInAccumulator < 6 )
{
if ( count > 7 ) {
bitAccumulator = ( bitAccumulator < < 8 ) | ( * bitBuffer + + ) ;
bitsInAccumulator + = 8 ;
count - = 8 ;
assert ( count + bitsInAccumulator + 6 * ( unsigned int ) ( e - Outputsz ) = = length ) ;
}
else {
2022-03-07 23:46:14 -05:00
assert ( count + bitsInAccumulator > = 6 ) ; // Should be enough bits in buffer for
// all numerals produced by this for loop.
2022-02-16 00:53:01 -05:00
bitAccumulator = ( ( ( bitAccumulator < < 8 ) | ( * bitBuffer + + ) ) > > ( 8 - count ) ) ;
bitsInAccumulator + = count ;
count = 0 ;
assert ( count + bitsInAccumulator + 6 * ( unsigned int ) ( e - & base64Numerals [ 0 ] ) = = length ) ;
}
}
assert ( bitsInAccumulator > 5 ) ;
* e = index2base64 [ ( bitAccumulator > > ( bitsInAccumulator - 6 ) ) & 0x3F ] ;
bitsInAccumulator - = 6 ;
}
2022-03-07 23:46:14 -05:00
// When we drop out of the for loop, we may have more than 0 bits left but less than six, in
// which case we then have to special case the last numeral by filling the bit accumulato
// with our zeropadding.
2022-02-16 00:53:01 -05:00
if ( count ) {
bitAccumulator = ( ( ( bitAccumulator < < 8 ) | ( * bitBuffer + + ) ) > > ( 8 - count ) ) ;
bitsInAccumulator + = count ;
count = 0 ;
assert ( count + bitsInAccumulator + 6 * ( unsigned int ) ( e - & base64Numerals [ 0 ] ) = = length ) ;
}
if ( bitsInAccumulator ) {
// Going to issue one last numeral
assert ( zeropadding + bitsInAccumulator = = 6 ) ;
if ( bitsInAccumulator < 6 ) {
bitAccumulator = bitAccumulator < < zeropadding ; // 0 pad accumulator
bitsInAccumulator = 6 ; //This breaks the invariant checked by the assert.
}
assert ( bitsInAccumulator = = 6 ) ;
* e + + = index2base64 [ bitAccumulator & 0x3F ] ;
}
assert ( count = = 0 ) ;
assert ( 6 * ( unsigned int ) ( e - Outputsz ) = = length + zeropadding ) ;
assert ( ( length + 5 ) / 6 = = e - & base64Numerals [ 0 ] ) ; //Ensures that base 64 representation is the right size to hold the bits.
}
* e = ' \0 ' ;
}
// This may produce the runtime error that there are too few numerals to fill the bit buffer.
// Stops at the first invalid numeral - such as a space.
2022-03-07 23:46:14 -05:00
// This is intended for quite short bit fields, not for transmitting megabytes of data over lines
// that are not eight bit safe.
2022-02-16 00:53:01 -05:00
// Returns the actual size of the fill.
// If the bit buffer is not a multiple of six, the last numerals excess bits need to be zero
// If they are not zero will truncate the excess bits and and throw BadDataException.
unsigned int base64_to_bits ( uint8_t * bitBuffer , unsigned int start , unsigned int length , const char * base64Numerals ) {
bitBuffer + = start / CHAR_BIT ;
start - = ( start / CHAR_BIT ) * CHAR_BIT ;
assert ( start = = start % 8 ) ;
unsigned int zeropadding { ( ( length + 5 ) / 6 ) * 6 - length } ;
assert ( ( length + zeropadding ) % 6 = = 0 ) ;
unsigned int bitAccumulator = ( * bitBuffer ) > > ( 8 - start ) ;
unsigned int bitsInAccumulator { start } ;
unsigned int count { length } ;
unsigned int numeral ;
uint8_t overflowBits { ' \0 ' } ;
uint8_t * p { bitBuffer } ;
2022-03-07 23:46:14 -05:00
for (
const char * e = base64Numerals ;
( ( numeral = ascii2six [ static_cast < unsigned char > ( * e ) ] ) , ( numeral < INV ) ) ;
e + +
) {
2022-02-16 00:53:01 -05:00
assert ( ( e - base64Numerals ) * 6 + count = = length ) ;
assert ( ( p - bitBuffer ) * 8 + bitsInAccumulator = = start + ( e - base64Numerals ) * 6 ) ;
bitAccumulator = ( bitAccumulator < < 6 ) | numeral ;
bitsInAccumulator + = 6 ;
if ( count < 6 ) {
if ( count > 0 ) {
const uint8_t lowBitMaskIn [ ] { 0x00 , 0x01 , 0x03 , 0x07 , 0x0F , 0x1F , 0x3F } ;
// Bit accumulator is here numeral aligned, rather than byte aligned
overflowBits = bitAccumulator & lowBitMaskIn [ 6 - count ] ;
bitAccumulator = ( bitAccumulator > > ( 6 - count ) ) ;
bitsInAccumulator - = 6 - count ;
// Now it is bit field aligned
count = 0 ;
}
}
else count - = 6 ;
if ( bitsInAccumulator > 7 ) {
* p + + = ( bitAccumulator > > ( bitsInAccumulator % 8 ) ) ;
bitsInAccumulator - = 8 ;
}
if ( count = = 0 ) {
assert ( bitsInAccumulator < 9 ) ;
if ( bitsInAccumulator ) {
2022-03-07 23:46:14 -05:00
* p =
( bitAccumulator < < ( 8 - bitsInAccumulator ) )
| ( * p & lowBitMask [ bitsInAccumulator ] ) ;
2022-02-16 00:53:01 -05:00
}
break ;
}
}
if ( overflowBits ) throw BadDataException ( ) ;
return overflowBits ? length + 1 : length - count ;
}
/* Expects pointer to byte buffer and pointer to string.
2022-02-18 15:59:12 -05:00
Expects a string of exactly the correct number of numerals ,
terminated by a non base64 character , such as null .
2022-02-16 00:53:01 -05:00
Throws exception if that is not what it gets .
Fills the byte buffer exactly .
Returns a uint8_t containing the excess bits of the last numeral in its low order part . */
2022-03-07 23:46:14 -05:00
uint8_t base64_to_bytes (
uint8_t * byteBuffer ,
uint_fast32_t byteCount ,
const char * base64Numerals
) {
2022-02-16 00:53:01 -05:00
auto numeralsCount { byteCount * 8 / 6 } ;
auto bitsCount { numeralsCount * 6 } ;
auto length { base64_to_bits ( byteBuffer , 0 , bitsCount , base64Numerals ) } ;
if ( length < bitsCount ) throw BadDataException ( ) ;
base64Numerals + = numeralsCount ;
auto leftoverBitsField { byteCount * 8 - length } ;
auto leftoverBits { 0 } ;
if ( leftoverBitsField ) {
// we cast to unsigned character, because otherwise it is likely to be sign
// extended resulting in indexing outside the range 0-0xFF
// with an indeterminate and unpredictable number of high
// order bits set.
uint8_t numeral { ascii2six [ static_cast < unsigned char > ( * base64Numerals + + ) ] } ;
if ( numeral > 63 ) throw BadDataException ( ) ;
auto missingBitsField { 8 * byteCount - bitsCount } ;
assert ( missingBitsField + leftoverBitsField = = 6 ) ;
auto missingBitsMask { ( 1 < < missingBitsField ) - 1 } ;
auto missingBits { static_cast < uint8_t > ( numeral > > leftoverBitsField ) } ;
byteBuffer [ byteCount - 1 ] = ( ( byteBuffer [ byteCount - 1 ] | missingBitsMask ) ^ missingBitsMask ) | missingBits ;
auto leftoverBits { static_cast < uint8_t > ( numeral ^ ( missingBits < < missingBitsField ) ) } ;
}
if ( ascii2six [ static_cast < unsigned char > ( * base64Numerals ) ] < 64 ) throw BadDataException ( ) ;
return leftoverBits ;
}
// Converts bit a bit buffer into base 2048 BIPS-39 words
// Using the array ar_sz_bip_0039_wordlist defined in localization.cpp
// The largest word in the array is eight characters, but other languages likely have longer words.
2022-03-07 23:46:14 -05:00
// Start does not need to be byte aligned, nor does the length need to be a multiple of eight or
// eleven.
2022-02-16 00:53:01 -05:00
void bits2base2048 ( const uint8_t * bitBuffer , int start , int length , std : : span < char > szBipsWords ) {
char * Outputsz { & ( szBipsWords [ 0 ] ) } ;
if ( szBipsWords . size ( ) = = 0 ) throw ;
char * end_of_Outputsz { & ( szBipsWords [ szBipsWords . size ( ) - 1 ] ) } ;
if ( length > 0 ) {
bitBuffer + = start / CHAR_BIT ;
start - = ( start / CHAR_BIT ) * CHAR_BIT ;
assert ( start = = start % 8 ) ;
uint_fast32_t bitAccumulator = ( * bitBuffer + + ) & lowBitMask [ ( start ) ] ;
int bitsInAccumulator { 8 - start } ;
assert ( bitsInAccumulator < 9 ) ;
int count { length - bitsInAccumulator } ; //count is bits remaining in buffer, may go negative
2022-03-07 23:46:14 -05:00
// We need to keep track of count, because the last byte lifted out of the buffer may well
// have to be an incomplete byte, so when count gets down to below 8, we have to special
// case loading the bitAccumulator.
2022-02-16 00:53:01 -05:00
while ( count ) {
while ( bitsInAccumulator < 11 & & count > 0 )
{
bitAccumulator = ( bitAccumulator < < 8 ) | ( * bitBuffer + + ) ;
bitsInAccumulator + = 8 ;
count - = 8 ;
}
if ( count < 0 ) {
// get rid of bad bits
bitAccumulator > > = ( - count ) ;
bitsInAccumulator + = count ;
if ( bitsInAccumulator < 11 ) {
bitAccumulator < < = ( 11 - bitsInAccumulator ) ;
bitsInAccumulator = 11 ;
}
count = 0 ;
}
uint_fast16_t wordnumber { ( bitAccumulator > > ( bitsInAccumulator - 11 ) ) & 2047 } ;
bitsInAccumulator - = 11 ;
const char * psz_Word { ar_sz_bip_0039_wordlist [ wordnumber ] } ;
while ( * psz_Word ) {
2022-03-07 23:46:14 -05:00
if ( Outputsz = = end_of_Outputsz ) {
throw FatalException ( " not enough room for BIPS-0039 passphrase " ) ;
}
2022-02-16 00:53:01 -05:00
* Outputsz + + = * psz_Word + + ;
}
if ( bitsInAccumulator + count > 0 ) {
2022-03-07 23:46:14 -05:00
if ( Outputsz = = end_of_Outputsz ) {
throw FatalException ( " not enough room for BIPS-0039 passphrase " ) ;
}
2022-02-16 00:53:01 -05:00
* Outputsz + + = ' ' ;
}
}
}
* Outputsz + + = 0 ;
}
/*
// Not cryptographically strong, not DoS induced collision resistant. Produces
// the same mapping on all machines, for all time.
uint32_t bernstein_hash ( const uint8_t * key , unsigned int len ) {
constexpr uint_fast32_t INITIAL_VALUE = 5381 ;
constexpr uint_fast32_t M = 33 ;
uint_fast32_t hash = INITIAL_VALUE ;
for ( uint_fast32_t i = 0 ; i < len ; + + i )
hash = M * hash + key [ i ] ;
return hash ;
}
*/