From f7876905e3a322aba5202949c32fd91088f8663b Mon Sep 17 00:00:00 2001 From: Cheng Date: Mon, 25 Sep 2023 18:30:42 +1000 Subject: [PATCH] Simplifying the serialization to and from VLQ integer format with C++20 concepts. Moving serialization to a new header, serialization.h --- msvc/wallet.vcxproj | 1 + src/ristretto255.h | 341 -------------------------------------------- src/serialization.h | 340 +++++++++++++++++++++++++++++++++++++++++++ src/stdafx.h | 1 + 4 files changed, 342 insertions(+), 341 deletions(-) create mode 100644 src/serialization.h diff --git a/msvc/wallet.vcxproj b/msvc/wallet.vcxproj index 1a390cf..99865da 100644 --- a/msvc/wallet.vcxproj +++ b/msvc/wallet.vcxproj @@ -144,6 +144,7 @@ + diff --git a/src/ristretto255.h b/src/ristretto255.h index a868115..3a4eb57 100644 --- a/src/ristretto255.h +++ b/src/ristretto255.h @@ -54,348 +54,7 @@ void randombytes_buf(std::span in); void randombytes_buf(std::span in); -namespace ro { - // Decay to pointer is dangerously convenient, - // but in some situations it is just convenient - // This class provides an std:array one larger - // than the compile time string size, which decays - // to char*, std::string, and wxString - // In some code, this is ambiguous, so casts - // must sometimes be explicitly invoked. - template - class CompileSizedString : public std::array{ - public: - static constexpr int length{ stringlen }; - CompileSizedString() { - *(this->rbegin()) = '0'; - } - CompileSizedString(char *psz) { - auto tsz{ this->rbegin() }; - *tsz = '0'; - if (psz != nullptr) { - auto usz = tsz + strlen; - while (tsz < usz && *psz != '\0') - *tsz++ = *psz++; - *tsz = '\0'; - } - } - operator char* () & { - char* pc = &(static_cast*>(this)->operator[](0)); - return pc; - } - - operator const char* () const& { - const char* pc = &(static_cast*>(this)->operator[](0)); - return pc; - } - operator const char* () const&& { - const char* pc = &(static_cast*>(this)->operator[](0)); - return pc; - } - operator std::string() const& { - return std::string((const char*)*this, this->length); - } - operator std::string() const&& { - return std::string((const char*)*this, this->length); - } - operator wxString() const& { - return wxString::FromUTF8Unchecked((const char*)(*this)); - } - operator std::span() const& { - return std::span(static_cast((char*)*this), stringlen + 1); - } - operator wxString() const&& { - return wxString::FromUTF8Unchecked((const char*)(*this)); - } - operator std::span() const&& { - return std::span(static_cast((char*)*this), stringlen + 1); - } - }; - - // This template generates a span over an indexable byte type, - // such as a C array or an std::array, but not pointers - template < typename T> - std::enable_if_t< - !std::is_pointer::value && - sizeof(std::declval()[0]) == 1, - std::span - > serialize(const T& a) { - return std::span(static_cast(static_cast(&a[0])), std::size(a)); - } - - // Compile time test to see if a type has a blob array member - // This can be used in if constexpr (is_blob_field_type::value) - // By convention, blob fields are an std::array of unsigned bytes - // therefore already serializable. - template struct is_blob_field_type{ - template static constexpr decltype(std::declval().blob.size(), bool()) test() { - return sizeof(std::declval().blob[0])==1; - } - template static constexpr bool test(int = 0) { - return false; - } - static constexpr bool value = is_blob_field_type::template test(); - }; - - template concept blob_type = ro::is_blob_field_type::value; - - - // At present our serial classes consist of std::span and custom classes that publicly inherit from std::span - // To handle compound objects, add custom classes inheriting from std::span[n] - - // template class that generates a std::span of bytes over the blob - // field of any object containing a blob record, which is normally sufficient - // for a machine independent representation of that object - template std::span serialize(const T& pt) { - return serialize(pt.blob); - } - - // method that assumes that any char * pointer points a null terminated string - // and generates a std::span of bytes over the string including the terminating - // null. - // we assume the string is already machine independent, which is to say, we assume - // it comes from a utf8 locale. - - inline auto serialize(const char* sp) { return std::span(static_cast(static_cast(sp)), strlen(sp) + 1); } - - inline auto serialize(const decltype(std::declval().ToUTF8()) sz){ - return serialize(static_cast(sz)); - } - /* - inline auto serialize(const wxString& wxstr) { - return serialize(static_cast(wxstr.ToUTF8())); - } - If we allowed wxwidgets string to be serializable, all sorts of surprising things - would be serializable in surprising ways, because wxWidgets can convert all - sorts of things into strings that you were likely not expecting, in ways - unlikely to be machine independent, so you if you give an object to be - hashed that you have not provided some correct means for serializing, C++ is - apt to unhelpfully and unexpectedly turn it into a wxString, - - If you make wxStrings hashable, suprising things become hashable. - However, we do make the strange data structure provided by wxString.ToUTF8() hashable, - so that the wxString will not be implicitly hashable, but will be explicitly hashable. - */ - - // data structure containing a serialized signed integer. - template, int> = 0> - class userial : public std::span { - public: - std::array::digits + 6) / 7> bblob; - userial(T i) { - byte* p = &bblob[0] + sizeof(bblob); - *(--p) = i & 0x7f; - i >>= 7; - while (i != 0) { - *(--p) = (i & 0x7f) | 0x80; - i >>= 7; - } - assert(p >= &bblob[0]); - *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; - } - }; - - // data structure containing a serialized signed integer. - template, int> = 0> - class iserial : public std::span { - public: - std::array::digits + 7) / 7> bblob; - iserial(T i) { - // Throw away the repeated leading bits, and g - byte* p = &bblob[0] + sizeof(bblob); - unsigned count; - if (i < 0) { - size_t ui = i; - count = (std::numeric_limits::digits - std::countl_one(ui)) / 7; - } - else { - size_t ui = i; - count = (std::numeric_limits::digits - std::countl_zero(ui)) / 7; - } - *(--p) = i & 0x7f; - while (count-- != 0) { - i >>= 7; - *(--p) = (i & 0x7f) | 0x80; - } - assert(p >= &bblob[0]); - *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; - } - }; - - - // converts machine dependent representation of an integer - // into a span pointin at a compact machine independent representation of an integer - // The integer is split into seven bit nibbles in big endian order, with the high - // order bit of the byte indicating that more bytes are to come. - // for an unsigned integer, all high order bytes of the form 0x80 are left out. - // for a positive signed integer, the same, except that the first byte - // of what is left must have zero at bit 6 - // for a negative signed integer, all the 0xFF bytes are left out, except - // that the first byte of what is left must have a one bit at bit six. - // - // small numbers get compressed. - // primarily used by hash and hsh so that the same numbers on different - // machines will generate the same hash - template std::enable_if_t, ro::userial > - serialize(T i) { - return userial(i); - /* we don't need all deserialize functions to have the same name, - indeed they have to be distinct because serialized data contains - no type information, but for the sake of template code we need all - things that may be serialized to be serialized by the serialize - command, so that one template can deal with any - number of serializable types */ - } - template std::enable_if_t, ro::iserial >serialize(T i) { - return iserial(i); - /* we don't need all deserialize functions to have the same name, but for the sake of template code we need all - things that may be serialized to be serialized by the serialize command, so that one template can deal with any - number of serializable types */ - } - -// Turns a compact machine independent representation of an uninteger -// into a 64 bit signed integer - template std::enable_if_t, T > - deserialize(const byte* p) { - auto oldp = p; - T i; - if (*p & 0x40)i = -64; - else i = 0; - while (*p & 0x80) { - i = (i | (*p++ & 0x7F)) << 7; - } - if (p - oldp > (std::numeric_limits::digits + 6) / 7)throw BadDataException(); - return i | *p; - } - // Turns a compact machine independent representation of an integer - // into a 64 bit unsigned integer - template std::enable_if_t, T > - deserialize(const byte * p) { - auto oldp = p; - T i{ 0 }; - while (*p & 0x80) { - i = (i | (*p++ & 0x7F)) << 7; - } - if (p - oldp > 9)throw BadDataException(); - return i | *p; - } - - // Turns a compact machine independent representation of an integer - // into a 64 bit signed integer - template std::enable_if_t || is_standard_unsigned_integer, T > - deserialize(std::span g) { - byte* p = static_cast(&g[0]); - T i{ deserialize(p) }; - if (p > &g[0]+g.size())throw BadDataException(); - return i; - } - - /* - It will be about a thousand years before numbers larger than 64 bits - appear in valid well formed input, and bad data structures have to be - dealt with a much higher level that knows what the numbers mean, - and deals with them according to their meaning - - Until then the low level code will arbitrarily truncate numbers larger - than sixty four bits, but numbers larger than sixty four bits are - permissible in input, are valid at the lowest level. - - We return uint64_t, rather than uint_fast64_t to ensure that all - implementations misinterpret garbage and malicious input in the - same way. - We cannot protect against Machiavelli perverting the input, so we - don't try very hard to prevent Murphy perverting the input, - but we do try to prevent Machiavelli from perverting the input in - ways that will induce peers to disagree. - - We use an explicit narrow_cast, rather than simply declaring th - function to be uint64_t, in order to express the intent to uniformly - force possibly garbage data being deserialized to standardized - garbage. - - We protect against malicious and ill formed data would cause the - system to go off the rails at a point of the enemy's choosing, - and we protect against malicious and ill formed data that one party - might interpret in one way, and another party might interpret in a - different way. - - Ill formed data that just gets converted into well formed, but - nonsense data can cause no harm that well formed nonsense data - could not cause. - - It suffices, therefore, to ensure that all implementations misinterpret - input containing unreasonably large numbers as the same number. - - Very large numbers are valid in themselves, but not going to be valid - as part of valid data structures for a thousand years or so. - - The largest numbers occurring in well formed valid data will be - currency amounts, and the total number of the smallest unit of - currency is fixed at 2^64-1 which will suffice for a thousand years. - Or we might allow arbitrary precision floating point with powers of - a thousand, so that sensible numbers to a human are represented by - sensible numbers in the actual representation. - - secret keys, scalars are actually much larger numbers, modulo - 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ecU - but they are represented in a different format, their binary format - being fixed size low endian format, as 256 bit numbers, though only - 253 bits are actually needed and used, and their human readable - format being 44 digits in a base 58 representation.*/ - - // a compile time test to check if an object class has a machine independent representation - template struct is_serializable{ - template - static constexpr decltype(ro::serialize(std::declval()), bool()) test() { - if constexpr (sizeof...(Args2) > 0) { - return is_serializable::template test(); - } - else { - return true; - } - } - template static constexpr bool test(int = 0) { - return false; - } - static constexpr bool value = is_serializable::template test(); - }; - - template - concept serializable = is_serializable::value; - - static_assert( !serializable - && serializable, char*, std::span>, - "concepts needed"); - - template ro::CompileSizedString< (2 * sizeof(T))>bin2hex(const T& pt) { - ro::CompileSizedString< (2 * sizeof(T))>sz; - sodium_bin2hex(&sz[0], sizeof(pt.blob) * 2 + 1, &pt.blob[0], pt.blob.size()); - return sz; - } - - template T hex2bin(const ro::CompileSizedString< (2 * sizeof(T))>& sz){ - T pt; - size_t bin_len{ sizeof(T) }; - sodium_hex2bin( - reinterpret_cast (&pt), - sizeof(T), - &sz[0], 2 * sizeof(T), - nullptr, &bin_len, nullptr - ); - return pt; - } - - template decltype(std::declval().blob, ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6>()) to_base64_string(const T& p_blob) { - ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6> sz; - bits2base64( - &(p_blob.blob[0]), 0, sizeof(p_blob.blob) * 8, - std::span(sz) - ); - return sz; - } - -} //End ro namespace namespace ristretto255 { using diff --git a/src/serialization.h b/src/serialization.h new file mode 100644 index 0000000..cae338e --- /dev/null +++ b/src/serialization.h @@ -0,0 +1,340 @@ +namespace ro { + + // Decay to pointer is dangerously convenient, + // but in some situations it is just convenient + // This class provides an std:array one larger + // than the compile time string size, which decays + // to char*, std::string, and wxString + // In some code, this is ambiguous, so casts + // must sometimes be explicitly invoked. + template + class CompileSizedString : public std::array{ + public: + static constexpr int length{ stringlen }; + CompileSizedString() { + *(this->rbegin()) = '0'; + } + CompileSizedString(char *psz) { + auto tsz{ this->rbegin() }; + *tsz = '0'; + if (psz != nullptr) { + auto usz = tsz + strlen; + while (tsz < usz && *psz != '\0') + *tsz++ = *psz++; + *tsz = '\0'; + } + } + operator char* () & { + char* pc = &(static_cast*>(this)->operator[](0)); + return pc; + } + + operator const char* () const& { + const char* pc = &(static_cast*>(this)->operator[](0)); + return pc; + } + operator const char* () const&& { + const char* pc = &(static_cast*>(this)->operator[](0)); + return pc; + } + operator std::string() const& { + return std::string((const char*)*this, this->length); + } + operator std::string() const&& { + return std::string((const char*)*this, this->length); + } + operator wxString() const& { + return wxString::FromUTF8Unchecked((const char*)(*this)); + } + operator std::span() const& { + return std::span(static_cast((char*)*this), stringlen + 1); + } + operator wxString() const&& { + return wxString::FromUTF8Unchecked((const char*)(*this)); + } + operator std::span() const&& { + return std::span(static_cast((char*)*this), stringlen + 1); + } + }; + + // This template generates a span over an indexable byte type, + // such as a C array or an std::array, but not pointers + template < typename T> + std::enable_if_t< + !std::is_pointer::value && + sizeof(std::declval()[0]) == 1, + std::span + > serialize(const T& a) { + return std::span(static_cast(static_cast(&a[0])), std::size(a)); + } + + // Compile time test to see if a type has a blob array member + // This can be used in if constexpr (is_blob_field_type::value) + // By convention, blob fields are an std::array of unsigned bytes + // therefore already serializable. + template struct is_blob_field_type{ + template static constexpr decltype(std::declval().blob.size(), bool()) test() { + return sizeof(std::declval().blob[0])==1; + } + template static constexpr bool test(int = 0) { + return false; + } + static constexpr bool value = is_blob_field_type::template test(); + }; + + template concept blob_type = ro::is_blob_field_type::value; + + + // At present our serial classes consist of std::span and custom classes that publicly inherit from std::span + // To handle compound objects, add custom classes inheriting from std::span[n] + + // template class that generates a std::span of bytes over the blob + // field of any object containing a blob record, which is normally sufficient + // for a machine independent representation of that object + template std::span serialize(const T& pt) { + return serialize(pt.blob); + } + + // method that assumes that any char * pointer points a null terminated string + // and generates a std::span of bytes over the string including the terminating + // null. + // we assume the string is already machine independent, which is to say, we assume + // it comes from a utf8 locale. + + inline auto serialize(const char* sp) { return std::span(static_cast(static_cast(sp)), strlen(sp) + 1); } + + inline auto serialize(const decltype(std::declval().ToUTF8()) sz){ + return serialize(static_cast(sz)); + } + /* + inline auto serialize(const wxString& wxstr) { + return serialize(static_cast(wxstr.ToUTF8())); + } + If we allowed wxwidgets string to be serializable, all sorts of surprising things + would be serializable in surprising ways, because wxWidgets can convert all + sorts of things into strings that you were likely not expecting, in ways + unlikely to be machine independent, so you if you give an object to be + hashed that you have not provided some correct means for serializing, C++ is + apt to unhelpfully and unexpectedly turn it into a wxString, + + If you make wxStrings hashable, suprising things become hashable. + However, we do make the strange data structure provided by wxString.ToUTF8() hashable, + so that the wxString will not be implicitly hashable, but will be explicitly hashable. + */ + + // data structure containing a serialized unsigned integer + // Converts an unsigned integer to VLQ format, and creates a bytespan pointing at it. + // VLQ format, Variable Length Quantity (It is a standard used by LLVM and others) + template class userial : public std::span { + public: + std::array::digits + 6) / 7> bblob; + userial(T i) { + byte* p = &bblob[0] + sizeof(bblob); + *(--p) = i & 0x7f; + i >>= 7; + while (i != 0) { + *(--p) = (i & 0x7f) | 0x80; + i >>= 7; + } + assert(p >= &bblob[0]); + *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; + } + }; + + // data structure containing a serialized signed integer, + // Converts an signed integer to VLQ format, and creates a bytespan pointing at it. + // VLQ format, Variable Length Quantity (It is a standard used by LLVM and others) + template class iserial : public std::span { + public: + std::array::digits + 7) / 7> bblob; + iserial(T i) { + // Throw away the repeated leading bits, and g + byte* p = &bblob[0] + sizeof(bblob); + unsigned count; + if (i < 0) { + size_t ui = i; + count = (std::numeric_limits::digits - std::countl_one(ui)) / 7; + } + else { + size_t ui = i; + count = (std::numeric_limits::digits - std::countl_zero(ui)) / 7; + } + *(--p) = i & 0x7f; + while (count-- != 0) { + i >>= 7; + *(--p) = (i & 0x7f) | 0x80; + } + assert(p >= &bblob[0]); + *static_cast*>(this) = std::span(p, &bblob[0] + sizeof(bblob));; + } + }; + + // converts machine dependent representation of an integer + // into a span pointin at a compact machine independent representation of an integer + // The integer is split into seven bit nibbles in big endian order + // (VLQ format), with the high + // order bit of the byte indicating that more bytes are to come. + // for an unsigned integer, all high order bytes of the form 0x80 are left out. + // for a positive signed integer, the same, except that the first byte + // of what is left must have zero at bit 6 + // for a negative signed integer, all the 0xFF bytes are left out, except + // that the first byte of what is left must have a one bit at bit six. + // + // small numbers get compressed. + // primarily used by hash and hsh so that the same numbers on different + // machines will generate the same hash + template userial serialize(T i) { + return userial(i); + /* we don't need all deserialize functions to have the same name, + indeed they have to be distinct because serialized data contains + no type information, but for the sake of template code we need all + things that may be serialized to be serialized by the serialize + command, so that one template can deal with any + number of serializable types */ + } + template iserial serialize(T i) { + return iserial(i); + /* we don't need all deserialize functions to have the same name, but for the sake of template code we need all + things that may be serialized to be serialized by the serialize command, so that one template can deal with any + number of serializable types */ + } + +// Turns a compact machine independent representation of an uninteger +// into a 64 bit signed integer + template T deserialize(const byte* p) { + auto oldp = p; + T i; + if (*p & 0x40)i = -64; + else i = 0; + while (*p & 0x80) { + i = (i | (*p++ & 0x7F)) << 7; + } + if (p - oldp > (std::numeric_limits::digits + 6) / 7)throw BadDataException(); + return i | *p; + } + // Turns a compact machine independent representation of an integer + // into a 64 bit unsigned integer + template T deserialize(const byte * p) { + auto oldp = p; + T i{ 0 }; + while (*p & 0x80) { + i = (i | (*p++ & 0x7F)) << 7; + } + if (p - oldp > 9)throw BadDataException(); + return i | *p; + } + + // Turns a compact machine independent representation of an integer + // into a 64 bit signed integer + template T deserialize(std::span g) { + byte* p = static_cast(&g[0]); + T i{ deserialize(p) }; + if (p > &g[0]+g.size())throw BadDataException(); + return i; + } + + /* + It will be about a thousand years before numbers larger than 64 bits + appear in valid well formed input, and bad data structures have to be + dealt with a much higher level that knows what the numbers mean, + and deals with them according to their meaning + + Until then the low level code will arbitrarily truncate numbers larger + than sixty four bits, but numbers larger than sixty four bits are + permissible in input, are valid at the lowest level. + + We return uint64_t, rather than uint_fast64_t to ensure that all + implementations misinterpret garbage and malicious input in the + same way. + We cannot protect against Machiavelli perverting the input, so we + don't try very hard to prevent Murphy perverting the input, + but we do try to prevent Machiavelli from perverting the input in + ways that will induce peers to disagree. + + We use an explicit narrow_cast, rather than simply declaring th + function to be uint64_t, in order to express the intent to uniformly + force possibly garbage data being deserialized to standardized + garbage. + + We protect against malicious and ill formed data would cause the + system to go off the rails at a point of the enemy's choosing, + and we protect against malicious and ill formed data that one party + might interpret in one way, and another party might interpret in a + different way. + + Ill formed data that just gets converted into well formed, but + nonsense data can cause no harm that well formed nonsense data + could not cause. + + It suffices, therefore, to ensure that all implementations misinterpret + input containing unreasonably large numbers as the same number. + + Very large numbers are valid in themselves, but not going to be valid + as part of valid data structures for a thousand years or so. + + The largest numbers occurring in well formed valid data will be + currency amounts, and the total number of the smallest unit of + currency is fixed at 2^64-1 which will suffice for a thousand years. + Or we might allow arbitrary precision floating point with powers of + a thousand, so that sensible numbers to a human are represented by + sensible numbers in the actual representation. + + secret keys, scalars are actually much larger numbers, modulo + 0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ecU + but they are represented in a different format, their binary format + being fixed size low endian format, as 256 bit numbers, though only + 253 bits are actually needed and used, and their human readable + format being 44 digits in a base 58 representation.*/ + + // a compile time test to check if an object class has a machine independent representation + template struct is_serializable{ + template + static constexpr decltype(ro::serialize(std::declval()), bool()) test() { + if constexpr (sizeof...(Args2) > 0) { + return is_serializable::template test(); + } + else { + return true; + } + } + template static constexpr bool test(int = 0) { + return false; + } + static constexpr bool value = is_serializable::template test(); + }; + + template + concept serializable = is_serializable::value; + + static_assert( !serializable + && serializable, char*, std::span>, + "concepts needed"); + + template ro::CompileSizedString< (2 * sizeof(T))>bin2hex(const T& pt) { + ro::CompileSizedString< (2 * sizeof(T))>sz; + sodium_bin2hex(&sz[0], sizeof(pt.blob) * 2 + 1, &pt.blob[0], pt.blob.size()); + return sz; + } + + template T hex2bin(const ro::CompileSizedString< (2 * sizeof(T))>& sz){ + T pt; + size_t bin_len{ sizeof(T) }; + sodium_hex2bin( + reinterpret_cast (&pt), + sizeof(T), + &sz[0], 2 * sizeof(T), + nullptr, &bin_len, nullptr + ); + return pt; + } + + template decltype(std::declval().blob, ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6>()) to_base64_string(const T& p_blob) { + ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6> sz; + bits2base64( + &(p_blob.blob[0]), 0, sizeof(p_blob.blob) * 8, + std::span(sz) + ); + return sz; + } + +} //End ro namespace \ No newline at end of file diff --git a/src/stdafx.h b/src/stdafx.h index a48208b..82ed699 100644 --- a/src/stdafx.h +++ b/src/stdafx.h @@ -92,6 +92,7 @@ static_assert(wxMAJOR_VERSION == 3 && wxMINOR_VERSION == 2 && wxRELEASE_NUMBER = #include "rotime.h" #include "slash6.h" #include "ISqlite3.h" +#include "serialization.h" #include "ristretto255.h" #include "secrets.h" #include "mpir_and_base58.h"