diff --git a/docs/libraries/mkdocs.sh b/docs/libraries/mkdocs.sh index 01ac989..ea1e53c 100644 --- a/docs/libraries/mkdocs.sh +++ b/docs/libraries/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" banner_height=banner_height:15ex diff --git a/docs/manifesto/mkdocs.sh b/docs/manifesto/mkdocs.sh index 01ac989..ea1e53c 100644 --- a/docs/manifesto/mkdocs.sh +++ b/docs/manifesto/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" banner_height=banner_height:15ex diff --git a/docs/mkdocs.sh b/docs/mkdocs.sh index 3eb79ae..878ea19 100644 --- a/docs/mkdocs.sh +++ b/docs/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="./" banner_height=banner_height:15ex diff --git a/docs/names/mkdocs.sh b/docs/names/mkdocs.sh index 01ac989..ea1e53c 100644 --- a/docs/names/mkdocs.sh +++ b/docs/names/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" banner_height=banner_height:15ex diff --git a/docs/notes/mkdocs.sh b/docs/notes/mkdocs.sh index 01ac989..ea1e53c 100644 --- a/docs/notes/mkdocs.sh +++ b/docs/notes/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" banner_height=banner_height:15ex diff --git a/docs/number_encoding.md b/docs/number_encoding.md index 9f0fd5c..a85ba94 100644 --- a/docs/number_encoding.md +++ b/docs/number_encoding.md @@ -66,7 +66,8 @@ the vertex is stored or represented. We could *represent* the vertices by a self terminating bytestring or bitstring, but only if we wanted to put the vertices of a patricia tree inside *another* patricia tree. Which -seems like a stupid thing to do under most circumstances. And what is being represented +seems like a stupid thing to do under most circumstances. +And what is being represented is itself inherently not self terminating. The leaves of the patricia tree represent a data structure @@ -77,26 +78,7 @@ index fully represents all the information of the object) The links inside a vertex also represent a short string of bits, the bits that the vertex pointed to has in addition to the bits -that vertex point already has. This string is typically very -short, one bit implicit in it being a left or right vertex, and -zero or few additional bits, but may be very long, thirty two bytes. - -Because it is of variable, and typically short, length, it is -conveniently represented by a short self terminating variable length data structure -- -the variable length representation of the positive integer -you obtain by prepending a one bit to the bit field, -or the signed integer you obtain by prepending zeroes to bitfield -beginning with a one bit to obtain a small positive integer, -and ones to a bitfield beginning with a zero bit to obtain a small -negative integer. - -But, regardless of how it is represented within the vertex, and how it is manipulated -by the code, what is being represented is a bit field with byte and field alignment -and a bit count. And for large bitfields, which do not fit inside the standard computer -word length, it is inconvenient to represent them as integers, you want to represent them -with field alignment. If using the signed representation, the integers zero and minus one -both represent the empty bit string, and we can reserve one of them to represent -switching to a representation more convenient for longer bitstrings. +that vertex point already has. ### Merkle dag We intend to have a vast Merkle dag, and a vast collection of immutable @@ -134,6 +116,21 @@ $10$ being unary for a one bit wide field, and $0$ being that one bit, indicatin two bytes. Thus is represented by the integer itself $+0\text{x}\,8000$ in big endian format. +There are intrinsics and efficient library code to do endian conversions. We want +big endian format for bytestring sort order to sort correctly in the tree. + +bits.h declares the gcc intrinsics:\ +`uint32_t __builtin_bswap32 (uint32_t x)`\ +`uint64_t __builtin_bswap64 (uint64_t x)` + +16 bit swap is just a bit-rotate. + +intrin.h declare the equivalent functions:\ +`uint16_t _byteswap_ushort(uint16_t value);`\ +`uint32_t _byteswap_ulong(uint32_t value);`\ +`uint64_t _byteswap_uint64(uint64_t value);` + + If the representation is less than $0\text{x}8080$ then it does not represent an integer value, and reading such data should result in an exception that ends processing of the data or in special case handling for non integers. @@ -203,48 +200,35 @@ in big endian format. And so on and so forth for signed integers of unlimited size. -## base 58 representation of a sequence of unsigned integers - -Values $n$ in the range $0\le n \lt 58/2$ are represented by a single base 58 character. - -Values $n$ in the range $58/2\le n \lt \lfloor 58*2^{-2}\rfloor*58 +58/2$ are represented by two base 58 characters starting with a base 58 character $c$ in the range $58/2\le c\lt 58/2+\lfloor 58*2^{-2}\rfloor$ - -Values $n$ in the range: - -$$\lfloor 58*2^{-2}\rfloor*58 +58/2\le n \lt \lfloor 58*2^{-3}\rfloor*58^2 +\lfloor 58*2^{-2}\rfloor*58 +58/2$$ - -are similarly represented by three base 58 characters starting with a base 58 character $c$ in the range $58/2+\lfloor 58*2^{-2}\rfloor\le c \lt 58/2+\lfloor 58*2^{-2}\rfloor58+\lfloor 58*2^{-3}\rfloor$ . - -Values $n$ in the range: - -$$\lfloor 58*2^{-3}\rfloor*58^2 +\lfloor 58*2^{-2}\rfloor*58 +58/2\le n \lt \lfloor 58*2^{-4}\rfloor*58^3 +\lfloor 58*2^{-3}\rfloor*58^2 +\lfloor 58*2^{-2}\rfloor*58 +58/2$$ - -are similarly represented by four base 58 characters. - -And so on, for arbitrarily large values. A truly enormous number is going to start with `zzzz....`, `z` being the representation of $58-1$ in base 58. - -This amounts to shifting the underlying value to the appropriate range, then displaying it as the shifted base 58 value. - -We display a value in the range $0\le n \lt 58/2$ as itself, - -a value $n$ in the range $58/2\le n \lt \lfloor 58*2^{-2}\rfloor*58 +58/2$ as the base 58 representation of $n+58*(58/2-1)$ - # bitstrings Bitstrings in Merkle-patricia tree representing an sql index are typically very short, so should be represented by a -variable length quantity, except for the leaf edge, -which is fixed size and large, so should not be -represented by variable length quantity. +variable length quantity. Which does not need to have the correct +bytestring sort order. -We use the integer zero to represent this special case, -the integer one to represent the zero length bit string, -integers two and three to represent the one bit bitstring, -integers four to seven to represent the two bit bit string, -and so on and so forth. +So for bitstrings of six bits or less, we represent it as a byte with +a leading zero bit, and the bits following the first one bit are +the bitstring, and if the leading bit is one, it is a byte count +of byte aligned bits. Because we know the start alignment, the +beginning of the bitfield is implicit, and the final byte encodes +a bit field of zero to seven bits. This can represent a bytestring +of one to 128 bytes. However unreasonably large values, representing +variable length bytestrings representing unreasonably large bitstrings, +we reserve for future expansion, since the largest bitstring that will +be valid in normal usage will be thirty three bytes, being a full sized +hash followed by a byte representing the zero length bitstring. -In other words, we represent it as the integer obtained -by prepending a leading one bit to the bit string. +If the bitstring representing the edge brings us the end of field, it +is leaf edge, which is a different type, being a pointer to what is +being indexed rather than a pointer to another patricia vertex and +may have additional data. + +An edge in a Merkle-patricia sql index contains the bit path +of the thing pointed to, and the completely unrelated hash of the +thing pointed to, which contains its own type information. +But sometimes, often, we are indexing things +*by* their hash, so need a flag on a leaf edge to denote this case. # Dewey decimal sequences. diff --git a/docs/pandoc_templates/mkdocs.cfg b/docs/pandoc_templates/mkdocs.cfg index 9d0f199..42cc951 100644 --- a/docs/pandoc_templates/mkdocs.cfg +++ b/docs/pandoc_templates/mkdocs.cfg @@ -10,12 +10,17 @@ fi if [[ -z $targetDocroot ]]; then targetDocroot=$docroot fi +echo "${PWD##*/}" options=$osoptions"--toc --number-sections --toc-depth=5 --from markdown+smart+raw_html+fenced_divs+bracketed_spans --to html5 --wrap=preserve --metadata=lang:en --css=$targetDocroot"pandoc_templates/style.css" -Bnavbar -o" for f in * do if [[ -d $f && -x $f/mkdocs.sh ]]; then echo "recursing into" $f $f/mkdocs.sh + dirx=`dirname $0` + dirx="${dirx%/}" # strip trailing slash (if any) + subdirx="${dirx##*/}" + echo "resuming "$subdirx fi if [[ $f =~ (.*)".md"$ ]];then base=${BASH_REMATCH[1]} @@ -37,3 +42,4 @@ fi fi fi done +echo done with "${PWD##*/}" diff --git a/docs/rootDocs/mkdocs.sh b/docs/rootDocs/mkdocs.sh index 35bb001..4717cf0 100644 --- a/docs/rootDocs/mkdocs.sh +++ b/docs/rootDocs/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" targetDocroot="docs/" diff --git a/docs/setup/mkdocs.sh b/docs/setup/mkdocs.sh index 01ac989..ea1e53c 100644 --- a/docs/setup/mkdocs.sh +++ b/docs/setup/mkdocs.sh @@ -1,6 +1,5 @@ #!/bin/bash set -e -echo `dirname $0` cd `dirname $0` docroot="../" banner_height=banner_height:15ex