2023-09-27 07:55:13 -04:00
|
|
|
|
namespace ro {
|
2023-09-25 04:30:42 -04:00
|
|
|
|
|
|
|
|
|
// Decay to pointer is dangerously convenient,
|
|
|
|
|
// but in some situations it is just convenient
|
|
|
|
|
// This class provides an std:array one larger
|
|
|
|
|
// than the compile time string size, which decays
|
|
|
|
|
// to char*, std::string, and wxString
|
|
|
|
|
// In some code, this is ambiguous, so casts
|
|
|
|
|
// must sometimes be explicitly invoked.
|
|
|
|
|
template <unsigned int stringlen>
|
|
|
|
|
class CompileSizedString : public std::array<char, stringlen + 1>{
|
|
|
|
|
public:
|
|
|
|
|
static constexpr int length{ stringlen };
|
|
|
|
|
CompileSizedString() {
|
|
|
|
|
*(this->rbegin()) = '0';
|
|
|
|
|
}
|
|
|
|
|
CompileSizedString(char *psz) {
|
|
|
|
|
auto tsz{ this->rbegin() };
|
|
|
|
|
*tsz = '0';
|
|
|
|
|
if (psz != nullptr) {
|
|
|
|
|
auto usz = tsz + strlen;
|
|
|
|
|
while (tsz < usz && *psz != '\0')
|
|
|
|
|
*tsz++ = *psz++;
|
|
|
|
|
*tsz = '\0';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
operator char* () & {
|
|
|
|
|
char* pc = &(static_cast<std::array<char, stringlen + 1>*>(this)->operator[](0));
|
|
|
|
|
return pc;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
operator const char* () const& {
|
|
|
|
|
const char* pc = &(static_cast<const std::array<char, stringlen + 1>*>(this)->operator[](0));
|
|
|
|
|
return pc;
|
|
|
|
|
}
|
|
|
|
|
operator const char* () const&& {
|
|
|
|
|
const char* pc = &(static_cast<const std::array<char, stringlen + 1>*>(this)->operator[](0));
|
|
|
|
|
return pc;
|
|
|
|
|
}
|
|
|
|
|
operator std::string() const& {
|
|
|
|
|
return std::string((const char*)*this, this->length);
|
|
|
|
|
}
|
|
|
|
|
operator std::string() const&& {
|
|
|
|
|
return std::string((const char*)*this, this->length);
|
|
|
|
|
}
|
|
|
|
|
operator wxString() const& {
|
|
|
|
|
return wxString::FromUTF8Unchecked((const char*)(*this));
|
|
|
|
|
}
|
|
|
|
|
operator std::span<byte>() const& {
|
|
|
|
|
return std::span<byte>(static_cast<std::nullptr_t>((char*)*this), stringlen + 1);
|
|
|
|
|
}
|
|
|
|
|
operator wxString() const&& {
|
|
|
|
|
return wxString::FromUTF8Unchecked((const char*)(*this));
|
|
|
|
|
}
|
|
|
|
|
operator std::span<byte>() const&& {
|
|
|
|
|
return std::span<byte>(static_cast<std::nullptr_t>((char*)*this), stringlen + 1);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// This template generates a span over an indexable byte type,
|
|
|
|
|
// such as a C array or an std::array, but not pointers
|
2023-09-30 01:13:25 -04:00
|
|
|
|
template<class T>
|
|
|
|
|
concept byte_spannable = requires (T a) {
|
|
|
|
|
std::size(a);
|
|
|
|
|
a[0];
|
|
|
|
|
} && sizeof(std::declval<T>()[0]) == 1;
|
|
|
|
|
|
|
|
|
|
template<byte_spannable T>
|
|
|
|
|
auto serialize(const T& a) {
|
2023-09-29 04:44:49 -04:00
|
|
|
|
int l;
|
|
|
|
|
const void* pt;
|
|
|
|
|
if constexpr (std::is_same_v<std::remove_cvref_t<T>, std::string>) {
|
|
|
|
|
l = a.length() + 1;
|
|
|
|
|
pt = a.c_str();
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
l = std::size(a);
|
|
|
|
|
pt = &a[0];
|
|
|
|
|
}
|
2023-09-29 07:16:13 -04:00
|
|
|
|
return std::span(static_cast<const byte *>(pt), l);
|
2023-09-25 04:30:42 -04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compile time test to see if a type has a blob array member
|
|
|
|
|
// This can be used in if constexpr (is_blob_field_type<T>::value)
|
|
|
|
|
// By convention, blob fields are an std::array of unsigned bytes
|
|
|
|
|
// therefore already serializable.
|
|
|
|
|
template <class T> struct is_blob_field_type{
|
|
|
|
|
template <typename U> static constexpr decltype(std::declval<U>().blob.size(), bool()) test() {
|
|
|
|
|
return sizeof(std::declval<U>().blob[0])==1;
|
|
|
|
|
}
|
|
|
|
|
template <typename U> static constexpr bool test(int = 0) {
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
static constexpr bool value = is_blob_field_type::template test<T>();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
template<class T> concept blob_type = ro::is_blob_field_type<T>::value;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// At present our serial classes consist of std::span<uint8_t> and custom classes that publicly inherit from std::span<byte>
|
|
|
|
|
// To handle compound objects, add custom classes inheriting from std::span<byte>[n]
|
|
|
|
|
|
|
|
|
|
// template class that generates a std::span of bytes over the blob
|
|
|
|
|
// field of any object containing a blob record, which is normally sufficient
|
|
|
|
|
// for a machine independent representation of that object
|
|
|
|
|
template <blob_type T> std::span<const byte> serialize(const T& pt) {
|
|
|
|
|
return serialize(pt.blob);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// method that assumes that any char * pointer points a null terminated string
|
|
|
|
|
// and generates a std::span of bytes over the string including the terminating
|
|
|
|
|
// null.
|
|
|
|
|
// we assume the string is already machine independent, which is to say, we assume
|
|
|
|
|
// it comes from a utf8 locale.
|
|
|
|
|
|
|
|
|
|
inline auto serialize(const char* sp) { return std::span(static_cast<char*>(static_cast<std::nullptr_t>(sp)), strlen(sp) + 1); }
|
|
|
|
|
|
|
|
|
|
inline auto serialize(const decltype(std::declval<wxString>().ToUTF8()) sz){
|
|
|
|
|
return serialize(static_cast<const char*>(sz));
|
|
|
|
|
}
|
2023-09-30 16:11:14 -04:00
|
|
|
|
/* Don't do this. Disaster ensues,
|
|
|
|
|
|
2023-09-25 04:30:42 -04:00
|
|
|
|
inline auto serialize(const wxString& wxstr) {
|
|
|
|
|
return serialize(static_cast<const char*>(wxstr.ToUTF8()));
|
|
|
|
|
}
|
|
|
|
|
If we allowed wxwidgets string to be serializable, all sorts of surprising things
|
|
|
|
|
would be serializable in surprising ways, because wxWidgets can convert all
|
|
|
|
|
sorts of things into strings that you were likely not expecting, in ways
|
|
|
|
|
unlikely to be machine independent, so you if you give an object to be
|
|
|
|
|
hashed that you have not provided some correct means for serializing, C++ is
|
|
|
|
|
apt to unhelpfully and unexpectedly turn it into a wxString,
|
|
|
|
|
|
|
|
|
|
If you make wxStrings hashable, suprising things become hashable.
|
|
|
|
|
However, we do make the strange data structure provided by wxString.ToUTF8() hashable,
|
|
|
|
|
so that the wxString will not be implicitly hashable, but will be explicitly hashable.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// data structure containing a serialized unsigned integer
|
|
|
|
|
// Converts an unsigned integer to VLQ format, and creates a bytespan pointing at it.
|
|
|
|
|
// VLQ format, Variable Length Quantity (It is a standard used by LLVM and others)
|
2023-09-27 07:55:13 -04:00
|
|
|
|
// On reflection, VLQ format is not convenient for the intended usage (merkle patricia trees
|
|
|
|
|
// representing SQL indexes, and a better format is to compress leading zero or leading 0xFF bytes
|
|
|
|
|
// with the length of the run being implied by a count of the bytes following the run)
|
|
|
|
|
|
2023-09-25 04:30:42 -04:00
|
|
|
|
template<std::unsigned_integral T> class userial : public std::span<byte> {
|
|
|
|
|
public:
|
|
|
|
|
std::array<byte, (std::numeric_limits<T>::digits + 6) / 7> bblob;
|
|
|
|
|
userial(T i) {
|
|
|
|
|
byte* p = &bblob[0] + sizeof(bblob);
|
|
|
|
|
*(--p) = i & 0x7f;
|
|
|
|
|
i >>= 7;
|
|
|
|
|
while (i != 0) {
|
|
|
|
|
*(--p) = (i & 0x7f) | 0x80;
|
|
|
|
|
i >>= 7;
|
|
|
|
|
}
|
|
|
|
|
assert(p >= &bblob[0]);
|
|
|
|
|
*static_cast<std::span<byte>*>(this) = std::span<byte>(p, &bblob[0] + sizeof(bblob));;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// data structure containing a serialized signed integer,
|
|
|
|
|
// Converts an signed integer to VLQ format, and creates a bytespan pointing at it.
|
|
|
|
|
// VLQ format, Variable Length Quantity (It is a standard used by LLVM and others)
|
|
|
|
|
template<std::signed_integral T> class iserial : public std::span<byte> {
|
|
|
|
|
public:
|
|
|
|
|
std::array<byte, (std::numeric_limits<T>::digits + 7) / 7> bblob;
|
|
|
|
|
iserial(T i) {
|
|
|
|
|
// Throw away the repeated leading bits, and g
|
|
|
|
|
byte* p = &bblob[0] + sizeof(bblob);
|
|
|
|
|
unsigned count;
|
|
|
|
|
if (i < 0) {
|
|
|
|
|
size_t ui = i;
|
|
|
|
|
count = (std::numeric_limits<size_t>::digits - std::countl_one(ui)) / 7;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
size_t ui = i;
|
|
|
|
|
count = (std::numeric_limits<size_t>::digits - std::countl_zero(ui)) / 7;
|
|
|
|
|
}
|
|
|
|
|
*(--p) = i & 0x7f;
|
|
|
|
|
while (count-- != 0) {
|
|
|
|
|
i >>= 7;
|
|
|
|
|
*(--p) = (i & 0x7f) | 0x80;
|
|
|
|
|
}
|
|
|
|
|
assert(p >= &bblob[0]);
|
|
|
|
|
*static_cast<std::span<byte>*>(this) = std::span<byte>(p, &bblob[0] + sizeof(bblob));;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// converts machine dependent representation of an integer
|
|
|
|
|
// into a span pointin at a compact machine independent representation of an integer
|
|
|
|
|
// The integer is split into seven bit nibbles in big endian order
|
|
|
|
|
// (VLQ format), with the high
|
|
|
|
|
// order bit of the byte indicating that more bytes are to come.
|
|
|
|
|
// for an unsigned integer, all high order bytes of the form 0x80 are left out.
|
|
|
|
|
// for a positive signed integer, the same, except that the first byte
|
|
|
|
|
// of what is left must have zero at bit 6
|
|
|
|
|
// for a negative signed integer, all the 0xFF bytes are left out, except
|
|
|
|
|
// that the first byte of what is left must have a one bit at bit six.
|
|
|
|
|
//
|
|
|
|
|
// small numbers get compressed.
|
|
|
|
|
// primarily used by hash and hsh so that the same numbers on different
|
|
|
|
|
// machines will generate the same hash
|
|
|
|
|
template<std::unsigned_integral T> userial<T> serialize(T i) {
|
|
|
|
|
return userial<T>(i);
|
|
|
|
|
/* we don't need all deserialize functions to have the same name,
|
|
|
|
|
indeed they have to be distinct because serialized data contains
|
|
|
|
|
no type information, but for the sake of template code we need all
|
|
|
|
|
things that may be serialized to be serialized by the serialize
|
|
|
|
|
command, so that one template can deal with any
|
|
|
|
|
number of serializable types */
|
|
|
|
|
}
|
|
|
|
|
template<std::signed_integral T> iserial<T> serialize(T i) {
|
|
|
|
|
return iserial<T>(i);
|
|
|
|
|
/* we don't need all deserialize functions to have the same name, but for the sake of template code we need all
|
|
|
|
|
things that may be serialized to be serialized by the serialize command, so that one template can deal with any
|
|
|
|
|
number of serializable types */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Turns a compact machine independent representation of an uninteger
|
|
|
|
|
// into a 64 bit signed integer
|
|
|
|
|
template<std::signed_integral T> T deserialize(const byte* p) {
|
|
|
|
|
auto oldp = p;
|
|
|
|
|
T i;
|
|
|
|
|
if (*p & 0x40)i = -64;
|
|
|
|
|
else i = 0;
|
|
|
|
|
while (*p & 0x80) {
|
|
|
|
|
i = (i | (*p++ & 0x7F)) << 7;
|
|
|
|
|
}
|
|
|
|
|
if (p - oldp > (std::numeric_limits<int64_t>::digits + 6) / 7)throw BadDataException();
|
|
|
|
|
return i | *p;
|
|
|
|
|
}
|
|
|
|
|
// Turns a compact machine independent representation of an integer
|
|
|
|
|
// into a 64 bit unsigned integer
|
|
|
|
|
template<std::unsigned_integral T> T deserialize(const byte * p) {
|
|
|
|
|
auto oldp = p;
|
|
|
|
|
T i{ 0 };
|
|
|
|
|
while (*p & 0x80) {
|
|
|
|
|
i = (i | (*p++ & 0x7F)) << 7;
|
|
|
|
|
}
|
|
|
|
|
if (p - oldp > 9)throw BadDataException();
|
|
|
|
|
return i | *p;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Turns a compact machine independent representation of an integer
|
|
|
|
|
// into a 64 bit signed integer
|
|
|
|
|
template<std::integral T> T deserialize(std::span<const byte> g) {
|
|
|
|
|
byte* p = static_cast<std::nullptr_t>(&g[0]);
|
|
|
|
|
T i{ deserialize<T>(p) };
|
|
|
|
|
if (p > &g[0]+g.size())throw BadDataException();
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
It will be about a thousand years before numbers larger than 64 bits
|
|
|
|
|
appear in valid well formed input, and bad data structures have to be
|
|
|
|
|
dealt with a much higher level that knows what the numbers mean,
|
|
|
|
|
and deals with them according to their meaning
|
|
|
|
|
|
|
|
|
|
Until then the low level code will arbitrarily truncate numbers larger
|
|
|
|
|
than sixty four bits, but numbers larger than sixty four bits are
|
|
|
|
|
permissible in input, are valid at the lowest level.
|
|
|
|
|
|
|
|
|
|
We return uint64_t, rather than uint_fast64_t to ensure that all
|
|
|
|
|
implementations misinterpret garbage and malicious input in the
|
|
|
|
|
same way.
|
|
|
|
|
We cannot protect against Machiavelli perverting the input, so we
|
|
|
|
|
don't try very hard to prevent Murphy perverting the input,
|
|
|
|
|
but we do try to prevent Machiavelli from perverting the input in
|
|
|
|
|
ways that will induce peers to disagree.
|
|
|
|
|
|
|
|
|
|
We use an explicit narrow_cast, rather than simply declaring th
|
|
|
|
|
function to be uint64_t, in order to express the intent to uniformly
|
|
|
|
|
force possibly garbage data being deserialized to standardized
|
|
|
|
|
garbage.
|
|
|
|
|
|
|
|
|
|
We protect against malicious and ill formed data would cause the
|
|
|
|
|
system to go off the rails at a point of the enemy's choosing,
|
|
|
|
|
and we protect against malicious and ill formed data that one party
|
|
|
|
|
might interpret in one way, and another party might interpret in a
|
|
|
|
|
different way.
|
|
|
|
|
|
|
|
|
|
Ill formed data that just gets converted into well formed, but
|
|
|
|
|
nonsense data can cause no harm that well formed nonsense data
|
|
|
|
|
could not cause.
|
|
|
|
|
|
|
|
|
|
It suffices, therefore, to ensure that all implementations misinterpret
|
|
|
|
|
input containing unreasonably large numbers as the same number.
|
|
|
|
|
|
|
|
|
|
Very large numbers are valid in themselves, but not going to be valid
|
|
|
|
|
as part of valid data structures for a thousand years or so.
|
|
|
|
|
|
|
|
|
|
The largest numbers occurring in well formed valid data will be
|
|
|
|
|
currency amounts, and the total number of the smallest unit of
|
|
|
|
|
currency is fixed at 2^64-1 which will suffice for a thousand years.
|
|
|
|
|
Or we might allow arbitrary precision floating point with powers of
|
|
|
|
|
a thousand, so that sensible numbers to a human are represented by
|
|
|
|
|
sensible numbers in the actual representation.
|
|
|
|
|
|
|
|
|
|
secret keys, scalars are actually much larger numbers, modulo
|
|
|
|
|
0x1000000000000000000000000000000014def9dea2f79cd65812631a5cf5d3ecU
|
|
|
|
|
but they are represented in a different format, their binary format
|
|
|
|
|
being fixed size low endian format, as 256 bit numbers, though only
|
|
|
|
|
253 bits are actually needed and used, and their human readable
|
|
|
|
|
format being 44 digits in a base 58 representation.*/
|
|
|
|
|
|
|
|
|
|
// a compile time test to check if an object class has a machine independent representation
|
2023-09-30 16:11:14 -04:00
|
|
|
|
template <typename T, typename... Args> static constexpr bool serializable() {
|
|
|
|
|
if constexpr (requires(T a) {
|
|
|
|
|
serialize(a);
|
|
|
|
|
}) {
|
|
|
|
|
if constexpr (sizeof...(Args) > 0) return serializable<Args...>();
|
|
|
|
|
else return true;
|
|
|
|
|
}
|
|
|
|
|
else return false;
|
2023-09-30 01:13:25 -04:00
|
|
|
|
};
|
|
|
|
|
|
2023-09-25 04:30:42 -04:00
|
|
|
|
template<typename... Args>
|
2023-09-30 16:11:14 -04:00
|
|
|
|
concept has_machine_independent_representation = serializable<Args...>();
|
2023-09-25 04:30:42 -04:00
|
|
|
|
|
2023-10-01 21:49:03 -04:00
|
|
|
|
template<has_machine_independent_representation T>
|
|
|
|
|
T trigger_error(T x) { return x; };
|
|
|
|
|
|
2023-09-30 01:13:25 -04:00
|
|
|
|
static_assert( !has_machine_independent_representation<double>
|
2023-09-30 16:11:14 -04:00
|
|
|
|
&& has_machine_independent_representation<std::span<const byte>, char*, std::span<const char>>,
|
2023-09-25 04:30:42 -04:00
|
|
|
|
"concepts needed");
|
|
|
|
|
|
|
|
|
|
template<class T> ro::CompileSizedString< (2 * sizeof(T))>bin2hex(const T& pt) {
|
|
|
|
|
ro::CompileSizedString< (2 * sizeof(T))>sz;
|
|
|
|
|
sodium_bin2hex(&sz[0], sizeof(pt.blob) * 2 + 1, &pt.blob[0], pt.blob.size());
|
|
|
|
|
return sz;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template<class T> T hex2bin(const ro::CompileSizedString< (2 * sizeof(T))>& sz){
|
|
|
|
|
T pt;
|
|
|
|
|
size_t bin_len{ sizeof(T) };
|
|
|
|
|
sodium_hex2bin(
|
|
|
|
|
reinterpret_cast <unsigned char* const>(&pt),
|
|
|
|
|
sizeof(T),
|
|
|
|
|
&sz[0], 2 * sizeof(T),
|
|
|
|
|
nullptr, &bin_len, nullptr
|
|
|
|
|
);
|
|
|
|
|
return pt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
template <class T>decltype(std::declval<T>().blob, ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6>()) to_base64_string(const T& p_blob) {
|
|
|
|
|
ro::CompileSizedString < (sizeof(T) * 8 + 5) / 6> sz;
|
|
|
|
|
bits2base64(
|
|
|
|
|
&(p_blob.blob[0]), 0, sizeof(p_blob.blob) * 8,
|
|
|
|
|
std::span<char>(sz)
|
|
|
|
|
);
|
|
|
|
|
return sz;
|
|
|
|
|
}
|
|
|
|
|
|
2023-09-30 01:13:25 -04:00
|
|
|
|
} //End ro namespace
|