namespace ro { // Decay to pointer is dangerously convenient, // but in some situations it is just convenient // This class provides an std:array one larger // than the compile time string size, which decays // to char*, std::string, and wxString // In some code, this is ambiguous, so casts // must sometimes be explicitly invoked. template class CompileSizedString : public std::array{ public: static constexpr int length{ stringlen }; CompileSizedString() { *(this->rbegin()) = '0'; } CompileSizedString(char *psz) { auto tsz{ this->rbegin() }; *tsz = '0'; if (psz != nullptr) { auto usz = tsz + strlen; while (tsz < usz && *psz != '\0') *tsz++ = *psz++; *tsz = '\0'; } } operator char* () & { char* pc = &(static_cast*>(this)->operator[](0)); return pc; } operator const char* () const& { const char* pc = &(static_cast*>(this)->operator[](0)); return pc; } operator const char* () const&& { const char* pc = &(static_cast*>(this)->operator[](0)); return pc; } operator std::string() const& { return std::string((const char*)*this, this->length); } operator std::string() const&& { return std::string((const char*)*this, this->length); } operator wxString() const& { return wxString::FromUTF8Unchecked((const char*)(*this)); } operator std::span() const& { return std::span(static_cast((char*)*this), stringlen + 1); } operator wxString() const&& { return wxString::FromUTF8Unchecked((const char*)(*this)); } operator std::span() const&& { return std::span(static_cast((char*)*this), stringlen + 1); } }; // This template generates a span over an indexable byte type, // such as a C array or an std::array, but not pointers template < typename T> std::enable_if_t< !std::is_pointer::value && sizeof(std::declval()[0]) == 1, std::span > serialize(const T& a) { int l; const void* pt; if constexpr (std::is_same_v, std::string>) { l = a.length() + 1; pt = a.c_str(); } else { l = std::size(a); pt = &a[0]; } return std::span(static_cast(pt), l); } // Compile time test to see if a type has a blob array member // This can be used in if constexpr (is_blob_field_type::value) // By convention, blob fields are an std::array of unsigned bytes // therefore already serializable. template struct is_blob_field_type{ template static constexpr decltype(std::declval().blob.size(), bool()) test() { return sizeof(std::declval().blob[0])==1; } template static constexpr bool test(int = 0) { return false; } static constexpr bool value = is_blob_field_type::template test(); }; template concept blob_type = ro::is_blob_field_type::value; // At present our serial classes consist of std::span and custom classes that publicly inherit from std::span // To handle compound objects, add custom classes inheriting from std::span[n] // template class that generates a std::span of bytes over the blob // field of any object containing a blob record, which is normally sufficient // for a machine independent representation of that object template std::span serialize(const T& pt) { return serialize(pt.blob); } // method that assumes that any char * pointer points a null terminated string // and generates a std::span of bytes over the string including the terminating // null. // we assume the string is already machine independent, which is to say, we assume // it comes from a utf8 locale. inline auto serialize(const char* sp) { return std::span(static_cast(static_cast(sp)), strlen(sp) + 1); } inline auto serialize(const decltype(std::declval().ToUTF8()) sz){ return serialize(static_cast(sz)); } /* inline auto serialize(const wxString& wxstr) { return serialize(static_cast(wxstr.ToUTF8())); } If we allowed wxwidgets string to be serializable, all sorts of surprising things would be serializable in surprising ways, because wxWidgets can convert all sorts of things into strings that you were likely not expecting, in ways unlikely to be machine independent, so you if you give an object to be hashed that you have not provided some correct means for serializing, C++ is apt to unhelpfully and unexpectedly turn it into a wxString, If you make wxStrings hashable, suprising things become hashable. However, we do make the strange data structure provided by wxString.ToUTF8() hashable, so that the wxString will not be implicitly hashable, but will be explicitly hashable. */ // data structure containing a serialized unsigned integer // Converts an unsigned integer to VLQ format, and creates a bytespan pointing at it. // VLQ format, Variable Length Quantity (It is a standard used by LLVM and others) // On reflection, VLQ format is not convenient for the intended usage (merkle patricia trees // representing SQL indexes, and a better format is to compress leading zero or leading 0xFF bytes // with the length of the run being implied by a count of the bytes following the run) template class userial : public std::span { public: std::array::digits + 6) / 7> bblob; userial(T i) { byte* p = &bblob[0] + sizeof(bblob); *(--p) = i & 0x7f; i >>= 7; while (i != 0) { *(--p) = (i & 0x7f) | 0x80; i >>= 7; } assert(p >= &bblob[0]); *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; } }; // data structure containing a serialized signed integer, // Converts an signed integer to VLQ format, and creates a bytespan pointing at it. // VLQ format, Variable Length Quantity (It is a standard used by LLVM and others) template class iserial : public std::span { public: std::array::digits + 7) / 7> bblob; iserial(T i) { // Throw away the repeated leading bits, and g byte* p = &bblob[0] + sizeof(bblob); unsigned count; if (i < 0) { size_t ui = i; count = (std::numeric_limits::digits - std::countl_one(ui)) / 7; } else { size_t ui = i; count = (std::numeric_limits::digits - std::countl_zero(ui)) / 7; } *(--p) = i & 0x7f; while (count-- != 0) { i >>= 7; *(--p) = (i & 0x7f) | 0x80; } assert(p >= &bblob[0]); *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; } }; // converts machine dependent representation of an integer // into a span pointin at a compact machine independent representation of an integer // The integer is split into seven bit nibbles in big endian order // (VLQ format), with the high // order bit of the byte indicating that more bytes are to come. // for an unsigned integer, all high order bytes of the form 0x80 are left out. // for a positive signed integer, the same, except that the first byte // of what is left must have zero at bit 6 // for a negative signed integer, all the 0xFF bytes are left out, except // that the first byte of what is left must have a one bit at bit six. // // small numbers get compressed. // primarily used by hash and hsh so that the same numbers on different // machines will generate the same hash template userial serialize(T i) { return userial(i); /* we don't need all deserialize functions to have the same name, indeed they have to be distinct because serialized data contains no type information, but for the sake of template code we need all things that may be serialized to be serialized by the serialize command, so that one template can deal with any number of serializable types */ } template iserial serialize(T i) { return iserial(i); /* we don't need all deserialize functions to have the same name, but for the sake of template code we need all things that may be serialized to be serialized by the serialize command, so that one template can deal with any number of serializable types */ } // Turns a compact machine independent representation of an uninteger // into a 64 bit signed integer template T deserialize(const byte* p) { auto oldp = p; T i; if (*p & 0x40)i = -64; else i = 0; while (*p & 0x80) { i = (i | (*p++ & 0x7F)) << 7; } if (p - oldp > (std::numeric_limits::digits + 6) / 7)throw BadDataException(); return i | *p; } // Turns a compact machine independent representation of an integer // into a 64 bit unsigned integer template T deserialize(const byte * p) { auto oldp = p; T i{ 0 }; while (*p & 0x80) { i = (i | (*p++ & 0x7F)) << 7; } if (p - oldp > 9)throw BadDataException(); return i | *p; } // Turns a compact machine independent representation of an integer // into a 64 bit signed integer template T deserialize(std::span g) { byte* p = static_cast(&g[0]); T i{ deserialize(p) }; if (p > &g[0]+g.size())throw BadDataException(); return i; } /* It will be about a thousand years before numbers larger than 64 bits appear in valid well formed input, and bad data structures have to be dealt with a much higher level that knows what the numbers mean, and deals with them according to their meaning Until then the low level code will arbitrarily truncate numbers larger than sixty four bits, but numbers larger than sixty four bits are permissible in input, are valid at the lowest level. We return uint64_t, rather than uint_fast64_t to ensure that all implementations misinterpret garbage and malicious input in the same way. We cannot protect against Machiavelli perverting the input, so we don't try very hard to prevent Murphy perverting the input, but we do try to prevent Machiavelli from perverting the input in ways that will induce peers to disagree. We use an explicit narrow_cast, rather than simply declaring th function to be uint64_t, in order to express the intent to uniformly force possibly garbage data being deserialized to standardized garbage. We protect against malicious and ill formed data would cause the system to go off the rails at a point of the enemy's choosing, and we protect against malicious and ill formed data that one party might interpret in one way, and another party might interpret in a different way. Ill formed data that just gets converted into well formed, but nonsense data can cause no harm that well formed nonsense data could not cause. It suffices, therefore, to ensure that all implementations misinterpret input containing unreasonably large numbers as the same number. Very large numbers are valid in themselves, but not going to be valid as part of valid data structures for a thousand years or so. The largest numbers occurring in well formed valid data will be currency amounts, and the total number of the smallest unit of currency is fixed at 2^64-1 which will suffice for a thousand years. Or we might allow arbitrary precision floating point with powers of a thousand, so that sensible numbers to a human are represented by sensible numbers in the actual representation. secret keys, scalars are actually much larger numbers, modulo 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ecU but they are represented in a different format, their binary format being fixed size low endian format, as 256 bit numbers, though only 253 bits are actually needed and used, and their human readable format being 44 digits in a base 58 representation.*/ // a compile time test to check if an object class has a machine independent representation template struct is_serializable{ template static constexpr decltype(ro::serialize(std::declval()), bool()) test() { if constexpr (sizeof...(Args2) > 0) { return is_serializable::template test(); } else { return true; } } template static constexpr bool test(int = 0) { return false; } static constexpr bool value = is_serializable::template test(); }; template concept serializable = is_serializable::value; static_assert( !serializable && serializable, char*, std::span>, "concepts needed"); template ro::CompileSizedString< (2 * sizeof(T))>bin2hex(const T& pt) { ro::CompileSizedString< (2 * sizeof(T))>sz; sodium_bin2hex(&sz[0], sizeof(pt.blob) * 2 + 1, &pt.blob[0], pt.blob.size()); return sz; } template T hex2bin(const ro::CompileSizedString< (2 * sizeof(T))>& sz){ T pt; size_t bin_len{ sizeof(T) }; sodium_hex2bin( reinterpret_cast (&pt), sizeof(T), &sz[0], 2 * sizeof(T), nullptr, &bin_len, nullptr ); return pt; } template decltype(std::declval().blob, ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6>()) to_base64_string(const T& p_blob) { ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6> sz; bits2base64( &(p_blob.blob[0]), 0, sizeof(p_blob.blob) * 8, std::span(sz) ); return sz; } } //End ro namespace