//#include //#include #include #include #include //#include // for initializer_list //#include //#include // for shared_ptr, unique_ptr #include #include "ILog.h" #include "localization.h" #include "slash6.h" // This base64 uses the following characters as the numerals 0 to 63: // Table to convert six bit to ascii static constexpr uint8_t index2base64[]{ "0123456789ABCDEFGHJKLMNPQRSTUVWXYZabcdefghijkmnpqrstuvwxyz!$*+-_" }; /* control characters are stops ! $ * + - permitted The characters "#%&'(),. are stops 0-9 permitted The characters :;<=>? are stops A-Z and _ permitted, @ and [\]^ are stops a-z permitted. {|} ~` are stops, as is the mysterious control character 0x7F (del) */ // Unlike regular baseINV, and incompatible with it. Intended to encode stuff small enough // that it might be human typed and human transmitted, therefore maps 'o' and 'O' to '0', 'I' and // 'l' to '1' // uses six url safe additional characters !$*+-_ to bring it up to six bits // // But on reflection, useless, since human typed stuff like this should use Bitcoin's base 58 encoding // So going to switch to regular base64, despite the unreasonably immense amount of work I put into it. // Unfortunately, Wireguard, with which I am going to need to interoperate, uses RFC4648, whose // algorithm is fundamentally different - no special treatment for I, O, o, and l, and uses = // to handle the case where you have boundary problems between eight and six bit groups. // They force everything to four character groups, using an = sign to indicate that the // bytes being represented stop before a multiple of three. https://www.base64encode.org static_assert(index2base64[63] == '_', "surprise numeral at 63"); // Being intended for small bits of data, assumes no whitespace within an entity // Encode and decode are called with a bit buffer, consisting of an unsigned byte pointer, a // starting bit position relative to the pointer, and a bit count. We assume that there is room // enough in the object pointed to to accommodate the bytes referenced by // Bit Position+BitCount. We don't change bits outside the range. // The bit buffer does not need to be aligned, nor does it need to be a multiple of six bits, // eight bits, or twenty four bits. // If the input bit buffer to be encoded to base sixty four is not a multiple of six bits, the // last base sixty four numeral output will represent the bit buffer padded with trailing zeroes. // If there is no room in the output span for all the base sixty four digits, the encode routine // will return a number of bits less than the size of the input bit buffer, less than the bitcount // it was given. // Compile time execution is C++ is a pain, because expressions are apt to unpredictably lose // their constexpr character for reasons that are far from clear. // // You can declare anything reasonable to be constexpr, and the compiler will not issue an // error message until the code that attempts to use what you have declared constexpr is // invoked from somewhere else "expression does not evaluate to constant" // // an assert in an expression evaluated at compile time does not trigger a run time error, // Instead the compiler reports that the expression did not evaluate to a constant, // // The error is confusing, because the error points to the declaration where the initialization // was invoked,instead of pointing to the assert. // To debug code intended to be run at compile time, exercise it at run time with // auto ptr(std::make_unique()); // at run time; class charindex { public: std::array< uint8_t, 0x100> index{ 0, }; // this non const array will become constexpr and be constructed at compile time // when an instance of this class is created in a constexpr expression. charindex() = delete; constexpr charindex(const uint8_t* p) { uint8_t pu{ 0 }; do { index[pu++] = 0xFF; } while (pu); uint8_t i{ 0 }; while (pu = static_cast(p[i])) { index[pu] = i; i++; assert(i != 0); //prevents unending execution, // inside a constexp, generates an "expression does not evaluate to constant" // error at compile time, rather than breaking at run time. } index['o'] = index['O'] = 0; index['l'] = index['I'] = 1; } }; static constexpr charindex ascii2six_ar(index2base64); // // // You really have to write compile time code in templates as a language, which is the totally // obscure and hard to use language apt to generate remarkably voluminous error messages // will little obvious connection to the actual problem, and surprising result that are ver // difficult to predict in advance or understand at all. // Table to convert ascii to six bit as a good old fashioned non owning naked pointer to const, // whose storage is owned by a const static which exists until the program terminates. const uint8_t* const ascii2six{ &ascii2six_ar.index[0] }; void ascii2test() { for (unsigned int i{ 0 }; i < 0x100;i++) { char v = i; unsigned int j = ascii2six[i]; char w = index2base64[j]; if (j < 64) { assert(v == w || v == 'I' || v == 'l' || v == 'o' || v == 'O'); } } } // Decode does not have an input span of encoded characters, but a char *, because it assumes // the string is always terminated by an invalid character, such as the trailing null at the end of // string or a space, or any character that is not one of our base sixty four numerals // If the there are not enough input base sixty four numerals, it returns a size less than // requested. // if the requested bit buffer is not a multiple of six bits, and the last base 64 numeral had // trailing ones that would not fit in the buffer, rather than the expected trailing zeroes, then // it returns a size larger than the buffer, as if it changed stuff outside the buffer but does not // actually change bits outside the buffer. This is likely an error, because obviously we want // to decode something from base sixty four that was originally decoded using the same sized // buffer. static const uint8_t INV{ UCHAR_MAX }; const uint8_t highBitMask[]{ 0x00, 0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF }; static_assert(CHAR_BIT+1 == sizeof(highBitMask), "expecting eight bits per character"); const uint8_t lowBitMask[]{ 0xFF, 0x7F, 0x3F, 0x1F, 0x0F, 0x07, 0x03, 0x01, 0x00 }; static_assert(CHAR_BIT + 1 == sizeof(lowBitMask), "expecting eight bits per character"); // Converts bit a bit buffer into base 64 numerals // Start does not need to be byte aligned, nor does the length need to be a multiple of eight, // six, or twenty four. // If insufficient room is provided for the base 64 value, throws exception. void bits2base64(const uint8_t * bitBuffer, unsigned int start, unsigned int length, std::spanbase64Numerals) { assert(length <= 6 * base64Numerals.size() - 6); // If you hit this assert you probably passed in a negative number, overflowing the // string buffer // which would cause you to write all over memory. // Expects(length <= 6 * base64Numerals.size()-6); if (length > 6 * base64Numerals.size() - 6)throw FatalException(sz_text_buffer_overflow); auto e{ &base64Numerals[0] }; if (length) { bitBuffer += start / CHAR_BIT; start -= (start / CHAR_BIT) * CHAR_BIT; assert(start == start % 8); unsigned int zeropadding{ ((length + 5) / 6) * 6 - length }; assert((length + zeropadding) % 6 == 0); assert(length + zeropadding <= 6 * (unsigned int)( base64Numerals.size())); unsigned int bitAccumulator = (*bitBuffer++) & lowBitMask[(start)]; unsigned int bitsInAccumulator{ 8 - start }; assert(bitsInAccumulator < 9); unsigned int count{ length - bitsInAccumulator }; //count is bits lifted out of the buffer. // We need to keep track of count, because the last byte lifted out of the buffer may well // have to be an incomplete byte, so when count gets down to below 8, we have to // special case loading the bitAccumulator. char * Outputsz{ &(base64Numerals[0]) }; auto endBaseNumeralBuffer{ Outputsz + length / 6 }; for (; e < endBaseNumeralBuffer; e++) { assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length); if (bitsInAccumulator < 6) { if (count > 7) { bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++); bitsInAccumulator += 8; count -= 8; assert(count + bitsInAccumulator + 6 * (unsigned int)(e - Outputsz) == length); } else { assert(count + bitsInAccumulator >= 6); // Should be enough bits in buffer for // all numerals produced by this for loop. bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count)); bitsInAccumulator += count; count = 0; assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length); } } assert(bitsInAccumulator > 5); *e = index2base64[(bitAccumulator >> (bitsInAccumulator - 6)) & 0x3F]; bitsInAccumulator -= 6; } // When we drop out of the for loop, we may have more than 0 bits left but less than six, in // which case we then have to special case the last numeral by filling the bit accumulato // with our zeropadding. if (count) { bitAccumulator = (((bitAccumulator << 8) | (*bitBuffer++)) >> (8 - count)); bitsInAccumulator += count; count = 0; assert(count + bitsInAccumulator + 6 * (unsigned int)(e - &base64Numerals[0]) == length); } if (bitsInAccumulator) { // Going to issue one last numeral assert(zeropadding + bitsInAccumulator == 6); if (bitsInAccumulator < 6) { bitAccumulator = bitAccumulator << zeropadding; // 0 pad accumulator bitsInAccumulator = 6; //This breaks the invariant checked by the assert. } assert(bitsInAccumulator == 6); *e++ = index2base64[bitAccumulator & 0x3F]; } assert(count == 0); assert(6 * (unsigned int)(e - Outputsz) == length + zeropadding); assert((length + 5) / 6 == e - &base64Numerals[0]); //Ensures that base 64 representation is the right size to hold the bits. } *e = '\0'; } // This may produce the runtime error that there are too few numerals to fill the bit buffer. // Stops at the first invalid numeral - such as a space. // This is intended for quite short bit fields, not for transmitting megabytes of data over lines // that are not eight bit safe. // Returns the actual size of the fill. // If the bit buffer is not a multiple of six, the last numerals excess bits need to be zero // If they are not zero will truncate the excess bits and and throw BadDataException. unsigned int base64_to_bits(uint8_t * bitBuffer, unsigned int start, unsigned int length, const char * base64Numerals) { bitBuffer += start / CHAR_BIT; start -= (start / CHAR_BIT) * CHAR_BIT; assert(start == start % 8); unsigned int zeropadding{ ((length + 5) / 6) * 6 - length }; assert((length + zeropadding) % 6 == 0); unsigned int bitAccumulator = (*bitBuffer)>>(8-start); unsigned int bitsInAccumulator{ start }; unsigned int count{ length }; unsigned int numeral; uint8_t overflowBits{ '\0' }; uint8_t * p{ bitBuffer }; for ( const char * e = base64Numerals; ((numeral = ascii2six[static_cast(*e)]), (numeral < INV)); e++ ){ assert((e - base64Numerals) * 6 + count == length); assert((p-bitBuffer)*8 + bitsInAccumulator == start + (e-base64Numerals)*6 ); bitAccumulator = (bitAccumulator << 6) | numeral; bitsInAccumulator += 6; if (count < 6) { if (count > 0) { const uint8_t lowBitMaskIn[]{ 0x00, 0x01, 0x03, 0x07, 0x0F, 0x1F, 0x3F }; // Bit accumulator is here numeral aligned, rather than byte aligned overflowBits = bitAccumulator & lowBitMaskIn[6 - count]; bitAccumulator = (bitAccumulator >> (6 - count)); bitsInAccumulator -= 6 - count; // Now it is bit field aligned count = 0; } } else count -= 6; if (bitsInAccumulator > 7) { *p++ = (bitAccumulator >> (bitsInAccumulator % 8)); bitsInAccumulator -= 8; } if (count == 0) { assert(bitsInAccumulator < 9); if (bitsInAccumulator) { *p = (bitAccumulator << (8 - bitsInAccumulator)) |(*p & lowBitMask[bitsInAccumulator]); } break; } } if (overflowBits) throw BadDataException(); return overflowBits ? length + 1 : length - count; } /* Expects pointer to byte buffer and pointer to string. Expects a string of exactly the correct number of numerals, terminated by a non base64 character, such as null. Throws exception if that is not what it gets. Fills the byte buffer exactly. Returns a uint8_t containing the excess bits of the last numeral in its low order part.*/ uint8_t base64_to_bytes( uint8_t* byteBuffer, uint_fast32_t byteCount, const char* base64Numerals ) { auto numeralsCount{ byteCount * 8 / 6 }; auto bitsCount{ numeralsCount * 6 }; auto length{ base64_to_bits(byteBuffer, 0, bitsCount, base64Numerals) }; if (length < bitsCount) throw BadDataException(); base64Numerals += numeralsCount; auto leftoverBitsField{ byteCount * 8 - length }; auto leftoverBits{ 0 }; if (leftoverBitsField) { // we cast to unsigned character, because otherwise it is likely to be sign // extended resulting in indexing outside the range 0-0xFF // with an indeterminate and unpredictable number of high // order bits set. uint8_t numeral{ ascii2six[static_cast(*base64Numerals++)]}; if (numeral>63) throw BadDataException(); auto missingBitsField{8*byteCount-bitsCount}; assert(missingBitsField + leftoverBitsField == 6); auto missingBitsMask{ (1 << missingBitsField) - 1 }; auto missingBits{ static_cast(numeral >> leftoverBitsField) }; byteBuffer[byteCount - 1] = ((byteBuffer[byteCount - 1] | missingBitsMask) ^ missingBitsMask) | missingBits; auto leftoverBits{ static_cast( numeral ^ (missingBits << missingBitsField)) }; } if (ascii2six[static_cast(*base64Numerals)]<64) throw BadDataException(); return leftoverBits; } // Converts bit a bit buffer into base 2048 BIPS-39 words // Using the array ar_sz_bip_0039_wordlist defined in localization.cpp // The largest word in the array is eight characters, but other languages likely have longer words. // Start does not need to be byte aligned, nor does the length need to be a multiple of eight or // eleven. void bits2base2048(const uint8_t* bitBuffer, int start, int length, std::spanszBipsWords) { char* Outputsz{ &(szBipsWords[0]) }; if (szBipsWords.size() == 0)throw ; char* end_of_Outputsz{ &(szBipsWords[szBipsWords.size() - 1]) }; if (length>0) { bitBuffer += start / CHAR_BIT; start -= (start / CHAR_BIT) * CHAR_BIT; assert(start == start % 8); uint_fast32_t bitAccumulator = (*bitBuffer++) & lowBitMask[(start)]; int bitsInAccumulator{ 8 - start }; assert(bitsInAccumulator < 9); int count{ length - bitsInAccumulator }; //count is bits remaining in buffer, may go negative // We need to keep track of count, because the last byte lifted out of the buffer may well // have to be an incomplete byte, so when count gets down to below 8, we have to special // case loading the bitAccumulator. while (count) { while (bitsInAccumulator < 11 && count >0) { bitAccumulator = (bitAccumulator << 8) | (*bitBuffer++); bitsInAccumulator += 8; count -= 8; } if (count < 0) { // get rid of bad bits bitAccumulator >>= (-count); bitsInAccumulator += count; if (bitsInAccumulator < 11) { bitAccumulator <<= (11 - bitsInAccumulator); bitsInAccumulator = 11; } count = 0; } uint_fast16_t wordnumber{ (bitAccumulator >> (bitsInAccumulator - 11)) & 2047 }; bitsInAccumulator -= 11; const char* psz_Word{ ar_sz_bip_0039_wordlist[wordnumber] }; while (*psz_Word) { if (Outputsz == end_of_Outputsz){ throw FatalException("not enough room for BIPS-0039 passphrase"); } *Outputsz++ = *psz_Word++; } if (bitsInAccumulator + count > 0) { if (Outputsz == end_of_Outputsz){ throw FatalException("not enough room for BIPS-0039 passphrase"); } *Outputsz++ = ' '; } } } *Outputsz++ = 0; } /* // Not cryptographically strong, not DoS induced collision resistant. Produces // the same mapping on all machines, for all time. uint32_t bernstein_hash(const uint8_t* key, unsigned int len) { constexpr uint_fast32_t INITIAL_VALUE = 5381; constexpr uint_fast32_t M = 33; uint_fast32_t hash = INITIAL_VALUE; for (uint_fast32_t i = 0; i < len; ++i) hash = M * hash + key[i]; return hash; } */