leaving potentially inconvenient history behind
96
.gitattributes
vendored
Normal file
@ -0,0 +1,96 @@
|
||||
###############################################################################
|
||||
# Handle line endings automatically for files detected as text
|
||||
# and leave all files detected as binary untouched.
|
||||
* text=auto
|
||||
# Force the following filetypes to have unix eols and encoding, so that Windows does not break them.
|
||||
# If a file is going to be used on linux and windows, we want it invariant,
|
||||
# rather than automatically translated, because automatic translation always screw things up.
|
||||
.gitignore text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
.gitattributes text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
.gitmodules text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.sh text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.c text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.cpp text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.h text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.txt text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.html text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.htm text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.md text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.pandoc text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.css text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.manifest text eol=lf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
|
||||
|
||||
makefile text eol=lf encoding=utf-8
|
||||
Makefile text eol=lf encoding=utf-8
|
||||
|
||||
# Force the following Visual Studio specific filetypes to have Windows eols,
|
||||
# so that Git does not break them
|
||||
*.bat text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.cmd text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.rc text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.sln text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.vcproj text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.vcxproj text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.vcxproj.filters text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
*.vcxproj.user text eol=crlf encoding=utf-8 whitespace=trailing-space,space-before-tab,tabwidth=4
|
||||
|
||||
#Don't let git screw with pdf files
|
||||
*.pdf -text
|
||||
|
||||
# Force binary files to be binary
|
||||
*.gif -textn -diff
|
||||
*.jpg -textn -diff
|
||||
*.jepg -textn -diff
|
||||
*.png -textn -diff
|
||||
*.webp -textn -diff
|
||||
|
||||
###############################################################################
|
||||
# Set default behavior for command prompt diff.
|
||||
#
|
||||
# This is need for earlier builds of msysgit that does not have it on by
|
||||
# default for csharp files.
|
||||
# Note: This is only used by command line
|
||||
###############################################################################
|
||||
#*.cs diff=csharp
|
||||
|
||||
###############################################################################
|
||||
# Set the merge driver for project and solution files
|
||||
#
|
||||
# Merging from the command prompt will add diff markers to the files if there
|
||||
# are conflicts (Merging from VS is not affected by the settings below, in VS
|
||||
# the diff markers are never inserted). Diff markers may cause the following
|
||||
# file extensions to fail to load in VS. An alternative would be to treat
|
||||
# these files as binary and thus will always conflict and require user
|
||||
# intervention with every merge. To do so, just uncomment the entries below
|
||||
###############################################################################
|
||||
#*.sln merge=binary
|
||||
#*.csproj merge=binary
|
||||
#*.vbproj merge=binary
|
||||
#*.vcxproj merge=binary
|
||||
#*.vcproj merge=binary
|
||||
#*.dbproj merge=binary
|
||||
#*.fsproj merge=binary
|
||||
#*.lsproj merge=binary
|
||||
#*.wixproj merge=binary
|
||||
#*.modelproj merge=binary
|
||||
#*.sqlproj merge=binary
|
||||
#*.wwaproj merge=binary
|
||||
|
||||
###############################################################################
|
||||
# diff behavior for common document formats
|
||||
#
|
||||
# Convert binary document formats to text before diffing them. This feature
|
||||
# is only available from the command line. Turn it on by uncommenting the
|
||||
# entries below.
|
||||
###############################################################################
|
||||
#*.doc diff=astextplain
|
||||
#*.DOC diff=astextplain
|
||||
#*.docx diff=astextplain
|
||||
#*.DOCX diff=astextplain
|
||||
#*.dot diff=astextplain
|
||||
#*.DOT diff=astextplain
|
||||
#*.pdf diff=astextplain
|
||||
#*.PDF diff=astextplain
|
||||
#*.rtf diff=astextplain
|
||||
#*.RTF diff=astextplain
|
29
.gitconfig
Normal file
@ -0,0 +1,29 @@
|
||||
[core]
|
||||
autocrlf = input
|
||||
whitespace = trailing-space,space-before-tab,tabwidth=4
|
||||
safecrlf
|
||||
[alias]
|
||||
lg = log --reverse --max-count=4 --oneline --pretty='format:%C(yellow)%h %d %Creset%p %C("#60A0FF")%cr %Cgreen %cn %GT trust%Creset%n%s%n'
|
||||
graph = log --max-count=20 --graph --pretty=format:'%C(yellow)%h%Creset %s %Cgreen(%cr) %C(bold blue)%cn %GT%Creset' --abbrev-commit
|
||||
alias = ! git config --get-regexp ^alias\\. | sed -e s/^alias\\.// -e s/\\ /\\ =\\ / | grep -v ^'alias ' | sort
|
||||
fixws = !"\
|
||||
if (! git diff-files --quiet .) && \
|
||||
(! git diff-index --quiet --cached HEAD) ; then \
|
||||
git commit -m FIXWS_SAVE_INDEX && \
|
||||
git add -u :/ && \
|
||||
git commit -m FIXWS_SAVE_TREE && \
|
||||
git rebase --whitespace=fix HEAD~2 && \
|
||||
git reset HEAD~ && \
|
||||
git reset --soft HEAD~ ; \
|
||||
elif (! git diff-files --quiet .) ; then \
|
||||
git add -u :/ && \
|
||||
git commit -m FIXWS_SAVE_TREE && \
|
||||
git rebase --whitespace=fix HEAD~ && \
|
||||
git reset HEAD~ ; \
|
||||
an elif (! git diff-index --quiet --cached HEAD) ; then \
|
||||
git commit -m FIXWS_SAVE_INDEX && \
|
||||
git rebase --whitespace=fix HEAD~ && \
|
||||
git reset --soft HEAD~ ; \
|
||||
fi"
|
||||
[commit]
|
||||
gpgSign = true
|
271
.gitignore
vendored
Normal file
@ -0,0 +1,271 @@
|
||||
*.bat
|
||||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
*.bak
|
||||
|
||||
# User-specific files
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
*.exe
|
||||
*.idb
|
||||
*.vcxproj.filters
|
||||
*.html
|
||||
*.htm
|
||||
wallet.cppcheck
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Ll]og/
|
||||
|
||||
.vscode
|
||||
|
||||
# Visual Studio 2015 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUNIT
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# DNX
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_i.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# JustCode is a .NET coding add-in
|
||||
.JustCode
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# TODO: Comment the next line if you want to checkin your web deploy settings
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
#*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/packages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/packages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/packages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignoreable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
node_modules/
|
||||
orleans.codegen.cs
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# JetBrains Rider
|
||||
.idea/
|
||||
*.sln.iml
|
||||
|
||||
# CodeRush
|
||||
.cr/
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
*.pyc
|
11
.gitmodules
vendored
Normal file
@ -0,0 +1,11 @@
|
||||
[submodule "libsodium"]
|
||||
path = libsodium
|
||||
url = https://github.com/jedisct1/libsodium.git
|
||||
ignore = dirty
|
||||
[submodule "wxWidgets"]
|
||||
path = wxWidgets
|
||||
url = https://github.com/wxWidgets/wxWidgets.git
|
||||
ignore = dirty
|
||||
[submodule "mpir"]
|
||||
path = mpir
|
||||
url = git://github.com/BrianGladman/mpir.git
|
41
ILog.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
#include "stdafx.h"
|
||||
|
||||
void ILogFatalError(const char* sz) {
|
||||
wxLogFatalError(_wx("%s"), _wx(sz));
|
||||
} // which is like wxLogError(), but also terminates the program with the exit code 3 (using abort() standard function).Unlike for all the other logging functions, this function can't be overridden by a log target.
|
||||
void ILogError(const char* sz) {
|
||||
wxLogError(_wx("%s"), _wx(sz));
|
||||
}
|
||||
//is the function to use for error messages, i.e.the messages that must be shown to the user.The default processing is to pop up a message box to inform the user about it.
|
||||
void ILogWarning(const char* sz) {
|
||||
wxLogWarning(_wx("%s"), _wx(sz));
|
||||
} //for warnings.They are also normally shown to the user, but don't interrupt the program work.
|
||||
void ILogMessage(const char* sz) {
|
||||
wxLogMessage(_wx("%s"), _wx(sz));
|
||||
} // is for all normal, informational messages.*/
|
||||
void ILogVerbose(const char* sz) {
|
||||
wxLogVerbose(_wx("%s"), _wx(sz));
|
||||
}
|
||||
; // is for verbose output.Normally, it is suppressed, but might be activated if the user wishes to know more details about the program progress(another, but possibly confusing name for the same function is wxLogInfo).
|
||||
void ILogDebug(const char* sz) {
|
||||
wxLogDebug(_wx("%s"), _wx(sz));
|
||||
} //is the right function for debug output. It only does anything at all in the
|
||||
//debug mode(when the preprocessor symbol WXDEBUG is defined) and expands to
|
||||
//nothing in release mode(otherwise).Note that under Windows, you must either
|
||||
//run the program under debugger or use a 3rd party program such as DebugView
|
||||
|
||||
void queue_error_message(const char* psz) {
|
||||
// Used where throwing immediately would be disastrous, as in a destructor.
|
||||
auto event = new wxCommandEvent(wxEVT_MENU, myID_ERRORMESSAGE);
|
||||
event->SetString(_wx(psz));
|
||||
// wxQueueEvent(singletonFrame->GetMenuBar(), event);
|
||||
wxQueueEvent(singletonApp, event);
|
||||
}
|
||||
|
||||
void queue_fatal_error(const char* psz) {
|
||||
// Used where throwing immediately would be disastrous, as in a destructor or when constructing the main frame
|
||||
if (!errorCode)errorCode = 10;
|
||||
queue_error_message(psz);
|
||||
singletonFrame->Close();
|
||||
}
|
||||
|
88
ILog.h
Normal file
@ -0,0 +1,88 @@
|
||||
#pragma once
|
||||
extern int errorCode;
|
||||
extern std::string szError;
|
||||
void ILogFatalError(const char*);
|
||||
void ILogError(const char*);
|
||||
void ILogWarning(const char*);
|
||||
void ILogMessage(const char* format);
|
||||
void ILogVerbose(const char*);
|
||||
void ILogDebug(const char*);
|
||||
void queue_error_message(const char*); //Used for error conditions within a destructor because you cannot throw within a destructor
|
||||
void queue_fatal_error(const char*); //Used for fatal error conditions within a destructor in place of FatalException because you cannot throw within a destructor
|
||||
|
||||
class MyException: public std::exception {
|
||||
private:
|
||||
std::string err;
|
||||
public:
|
||||
virtual ~MyException() override = default;
|
||||
MyException() = delete;
|
||||
explicit MyException(const std::string &m) noexcept :err(m){}
|
||||
explicit MyException(const char* sz) noexcept :err(sz) {}
|
||||
virtual const char* what() const override {
|
||||
return err.c_str();
|
||||
}
|
||||
};
|
||||
|
||||
class FatalException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
FatalException() noexcept;
|
||||
};
|
||||
|
||||
class HashReuseException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
HashReuseException() noexcept;
|
||||
};
|
||||
|
||||
class SQLexception : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
SQLexception() noexcept;
|
||||
};
|
||||
|
||||
class NonUtf8DataInDatabase : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
NonUtf8DataInDatabase() noexcept;
|
||||
};
|
||||
|
||||
class BadDataException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
BadDataException() noexcept;
|
||||
};
|
||||
|
||||
class NonRandomScalarException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
NonRandomScalarException() noexcept;
|
||||
};
|
||||
|
||||
class BadScalarException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
BadScalarException() noexcept;
|
||||
};
|
||||
|
||||
class OversizeBase58String : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
OversizeBase58String() noexcept;
|
||||
};
|
||||
|
||||
// This exception is obviously far too generic, because the routine throwing it knows nothing of the context.
|
||||
// does not know what the cryptographic id identifies.
|
||||
// higher level code that does know the context needs to catch the exception and issue a more
|
||||
// relevant errror message, possibly with by more informative rethrow.
|
||||
class BadStringRepresentationOfCryptoIdException : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
BadStringRepresentationOfCryptoIdException() noexcept;
|
||||
};
|
||||
|
||||
class NotBase58Exception : public MyException {
|
||||
public:
|
||||
using MyException::MyException;
|
||||
NotBase58Exception() noexcept;
|
||||
};
|
239
ISqlit3Impl.cpp
Normal file
@ -0,0 +1,239 @@
|
||||
// this is implementation class of pure virtual interface base class between sqlite3,
|
||||
// which speaks only C and utf8 char[]
|
||||
// and wxWidgets which speaks only C++ and unicode strings.
|
||||
//
|
||||
// In this code I continually declare stuff public that should be private,
|
||||
// but that is OK, because declared in a cpp file, not a header file,
|
||||
// and thus they remain private to any code outside this particular cpp file.
|
||||
// When the compiler complains that something is inaccessible, I don't muck
|
||||
// around with friend functions and suchlike, which rapidly gets surprisingly
|
||||
// complicated, I just make it public, but only public to this one file.
|
||||
#include <assert.h>
|
||||
#include <string> // for basic_string, allocator, char_traits
|
||||
#include <initializer_list> // for initializer_list
|
||||
#include <memory> // for shared_ptr, unique_ptr
|
||||
#include <span>
|
||||
#include "ISqlite3.h"
|
||||
#include "sqlite3.h"
|
||||
|
||||
static auto error_message(int rc, sqlite3* pdb) {
|
||||
return std::string("Sqlite3 Error: ") + sqlite3_errmsg(pdb) + ". Sqlite3 error number=" + std::to_string(rc);
|
||||
}
|
||||
|
||||
void sqlite3_init() {
|
||||
if (sqlite3_initialize() != SQLITE_OK) {
|
||||
errorCode = 7;
|
||||
szError = "Fatal Error: Sqlite library did not init.";
|
||||
// Cannot log the error, because logging not set up yet, so logging itself causes an exception
|
||||
throw FatalException(szError.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
class IcompiledImpl_sql;
|
||||
|
||||
static int callback(void* NotUsed, int argc, char** argv, char** azColName) {
|
||||
std::string str;
|
||||
str.reserve(256);
|
||||
for (int i = 0; i < argc; i++) {
|
||||
str =str + "\t\"" + azColName[i]+ R"|("=)|" + (argv[i]!=nullptr ? argv[i] : "NULL");
|
||||
}
|
||||
ILogMessage(str.c_str());
|
||||
return 0;
|
||||
}
|
||||
|
||||
class ISqlite3Impl :
|
||||
public ISqlite3
|
||||
{
|
||||
public:
|
||||
sqlite3* pdb;
|
||||
ISqlite3Impl() = delete;
|
||||
ISqlite3Impl(const char* dbName, int flags) {
|
||||
#ifndef NDEBUG
|
||||
pdb = nullptr;
|
||||
#endif
|
||||
int rc =sqlite3_open_v2(dbName, &pdb, flags, nullptr);
|
||||
if (rc != SQLITE_OK) throw SQLexception(error_message(rc, pdb));
|
||||
assert(pdb != nullptr);
|
||||
// pdb can never be nullptr, since the sqlite3_open_v2 command always initializes
|
||||
// it even if open fails
|
||||
}
|
||||
|
||||
void exec(const char* szsql) override {
|
||||
char* zErrMsg = nullptr;
|
||||
int rc = sqlite3_exec(pdb, szsql, callback, nullptr, &zErrMsg);
|
||||
if (rc != SQLITE_OK) {
|
||||
SQLexception e(std::string("SQL Exec Error: ") + zErrMsg);
|
||||
sqlite3_free(zErrMsg);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
~ISqlite3Impl() override {
|
||||
exec("PRAGMA optimize;"); //If we have multiple threads, will want to call this only once, and we will also want to call the check pragma in a separate thread with a separate connection.
|
||||
int rc{ sqlite3_close(pdb) };
|
||||
if (rc == SQLITE_OK) {}
|
||||
else {
|
||||
std::string err(error_message(rc, pdb) + ". Bad destruction of ISqlite3Impl");
|
||||
ILogError(err.c_str());
|
||||
queue_error_message(err.c_str()); //Does not actually pop up a message, which would be extremely bad in a destructor, instead queues an event which causes the message to pop up.
|
||||
}
|
||||
// If called before everything is finalized, will return SQL_BUSY
|
||||
// but that is a coding error.
|
||||
// sqlite3_close_v2 sets everything to shutdown when everything is finalized,
|
||||
// but this is C++. We do our own memory management, and if we need
|
||||
// sqlite3_close_v2 we are doing it wrong.
|
||||
}
|
||||
};
|
||||
|
||||
// Factory method to open database.
|
||||
ISqlite3* Sqlite3_open(const char * db_name) {
|
||||
return new ISqlite3Impl(db_name, SQLITE_OPEN_READWRITE);
|
||||
}
|
||||
|
||||
// Factory method to create database.
|
||||
ISqlite3 * Sqlite3_create(const char* db_name) {
|
||||
return new ISqlite3Impl(db_name, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE);
|
||||
}
|
||||
|
||||
class IcompiledImpl_sql :
|
||||
public Icompiled_sql
|
||||
{
|
||||
friend class ISqlite3Impl;
|
||||
private:
|
||||
sqlite3_stmt *pStmt;
|
||||
ISqlite3Impl *pdbImplOwn;
|
||||
auto e(int rc) {
|
||||
assert(rc != SQLITE_OK);
|
||||
return SQLexception(error_message(rc, pdbImplOwn->pdb));
|
||||
}
|
||||
public:
|
||||
IcompiledImpl_sql() = delete;
|
||||
IcompiledImpl_sql(
|
||||
ISqlite3 * pdbImpl, /* wrapped database handle */
|
||||
const char *zSql /* SQL statement, UTF-8 encoded */
|
||||
) {
|
||||
assert(pdbImpl!=nullptr);
|
||||
assert(zSql!=nullptr);
|
||||
pdbImplOwn = static_cast<ISqlite3Impl*> (pdbImpl); //static downcast
|
||||
// Static downcast is safe because the base is pure virtual,
|
||||
// and we have only one derived type,
|
||||
// If we ever have multiple derived types, I will use an enum
|
||||
// in the pure virtual base class and continue to use static downcasts.
|
||||
// Or, better, derive additional implementation classes from ISqlite3Impl so that
|
||||
// downcast to ISqlite3Impl must always work.
|
||||
assert(pdbImplOwn);
|
||||
// If an instance of the derived class exists, has to a valid upcast.
|
||||
const char *pzTail;
|
||||
int rc = sqlite3_prepare_v3(
|
||||
pdbImplOwn->pdb, /* Database handle */
|
||||
zSql, /* SQL statement, UTF-8 encoded */
|
||||
-1, /* Maximum length of zSql in bytes. */
|
||||
SQLITE_PREPARE_PERSISTENT,
|
||||
&pStmt, /* OUT: Statement handle */
|
||||
&pzTail /* OUT: Pointer to unused portion of zSql */
|
||||
);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
~IcompiledImpl_sql() override{
|
||||
int rc=sqlite3_finalize(pStmt);
|
||||
if (rc == SQLITE_OK) {}
|
||||
else {
|
||||
std::string err(error_message(rc, pdbImplOwn->pdb) + ". Bad destruction of Icompiled_sql");
|
||||
ILogError(err.c_str());
|
||||
// This error should only ever happen if object failed to compile, in which case we have already handled the error
|
||||
// Hence we do not queue an event to pop up a message, only log the error. (Unless ILogError pops up a message, which it might, but normally does not)
|
||||
}
|
||||
}
|
||||
|
||||
virtual void Isqlite3_bind(int param, std::span<const uint8_t> blob) override {
|
||||
int rc = sqlite3_bind_blob(pStmt, param, &blob[0], static_cast<int>(blob.size_bytes()), SQLITE_STATIC);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
virtual void Isqlite3_bind(int param, int i) override {
|
||||
int rc = sqlite3_bind_int(pStmt, param, i);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
virtual void Isqlite3_bind(int param, int64_t i) override {
|
||||
int rc = sqlite3_bind_int64(pStmt, param, i);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
virtual void Isqlite3_bind(int param) override {
|
||||
int rc = sqlite3_bind_null(pStmt, param);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
virtual void Isqlite3_bind(int param, const char* str) override {
|
||||
int rc = sqlite3_bind_text(pStmt, param, str, -1, SQLITE_STATIC);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
}
|
||||
|
||||
virtual sql_result Isqlite3_step() override {
|
||||
int rc = sqlite3_step(pStmt);
|
||||
sql_result ret;
|
||||
switch (rc & 0xFF) {
|
||||
case SQLITE_DONE:
|
||||
ret = DONE;
|
||||
break;
|
||||
case SQLITE_ROW:
|
||||
ret = ROW;
|
||||
break;
|
||||
case SQLITE_BUSY:
|
||||
//ret = BUSY;
|
||||
// As handling busy is hard, we will always use WAL mode and only allow one thread in one process write to the database.
|
||||
// If we need many threads, perhaps in many processes, to write, they will all channel through a single thread
|
||||
// in a single process whose transactions are never substantially larger than thirty two kilobytes.
|
||||
// As a result, we should never get SQL_BUSY codes except in pathological cases that are OK to handle by
|
||||
// terminating or reporting to the user that his operation has failed because database abnormally busy.
|
||||
// When we are building the blockchain, every process but one will see the blockchain as instantaneously changing
|
||||
// from n blocks to n+1 blocks when a single transaction updates the root and anciliary data.
|
||||
// We will build the blockchain hash table in postfix format, with patricia tree nodes that
|
||||
// have skiplink format stored only in memory and rebuilt each startup so that it grows append only,
|
||||
throw SQLexception("Abnormal busy database");
|
||||
break;
|
||||
case SQLITE_MISUSE:
|
||||
//ret = MISUSE;
|
||||
throw e(rc);
|
||||
break;
|
||||
default:
|
||||
//ret = SQL_ERROR;
|
||||
throw e(rc);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
virtual std::span<const uint8_t> Isqlite3_column_blob(int iCol) const override {
|
||||
return std::span<const uint8_t>((const uint8_t*)sqlite3_column_blob(pStmt, iCol), sqlite3_column_bytes(pStmt, iCol));
|
||||
// returns the null pointer if null
|
||||
}
|
||||
virtual int Isqlite3_column_int(int iCol) const override {
|
||||
return sqlite3_column_int(pStmt, iCol);
|
||||
}
|
||||
virtual int64_t Isqlite3_column_int64(int iCol) const override {
|
||||
return sqlite3_column_int64(pStmt, iCol);
|
||||
}
|
||||
virtual char *Isqlite3_column_text(int iCol) const override {
|
||||
return static_cast<std::nullptr_t>(sqlite3_column_text(pStmt, iCol));
|
||||
/* returns pointer to zero length string if null. If we need to distinguish between zero length strings and nulls, need the type function.
|
||||
We can store any type in any column, and read any type from any column, but if something unexpected is in a column, it gets converted to the expected type on being read back. For example an integer gets converted a decimal string if read as a blob or as text.
|
||||
It is very rarely valid to store different types in the same column, except that null is permissible. The difference between null and zero matters, but the case of null is usually dealt with by sql code, not C code. */
|
||||
}
|
||||
virtual void Isqlite3_reset()override {
|
||||
int rc = sqlite3_reset(pStmt);
|
||||
if (rc != SQLITE_OK) throw e(rc);
|
||||
// sqlite3_reset returns extended error codes
|
||||
// https://sqlite.org/c3ref/reset.html
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/* Factory method to prepare a compiled sql statement
|
||||
Uses automatic upcast. You always want to start with the most derived smart pointer if you can, and let the compiler take care of default upcasting.*/
|
||||
Icompiled_sql* sqlite3_prepare(ISqlite3 *pdbImpl, const char * zSql) {
|
||||
return new IcompiledImpl_sql(
|
||||
pdbImpl, /* Database handle */
|
||||
zSql /* SQL statement, UTF-8 encoded */
|
||||
);
|
||||
}
|
96
ISqlite3.h
Normal file
@ -0,0 +1,96 @@
|
||||
#pragma once
|
||||
#include "ILog.h"
|
||||
// this is pure virtual interface base class between sqlite3, which speaks only C and utf8 char[]
|
||||
// and wxWidgets which speaks only C++ and unicode strings.
|
||||
|
||||
// Usage: Call the factory function std::shared_ptr<ISqlite3> sqlite3_open(const char *) to get a shared
|
||||
// pointer to the // Sqlite3 database object. Then call the factory function
|
||||
// sqlite3_prepare(std::shared_ptr<ISqlite3>, const char *) to get a unique pointer to
|
||||
// a compiled SQL statement
|
||||
|
||||
// Its primary purpose is to avoid code that needs both the wxWidgets header files,
|
||||
// and the sqlite3.h header file.
|
||||
//
|
||||
// It speaks only utf8 char[], and needs to be called in wxWidgets code using
|
||||
// wxString.utf8_str() and its return values need to be interpreted in wxWidgets code
|
||||
// using wxString::FromUTF8().
|
||||
//
|
||||
// This header file can be included in code that has the sqlite3.h header file
|
||||
// and in code that has the wxWidgets header file, for it has no dependencies on either one
|
||||
//
|
||||
// In code that has wxWidgets headers, we call members of this interface class,
|
||||
// rather than directly calling sqlite3 functions.
|
||||
//
|
||||
// I originally implemented the pimpl idiom, but it turns out that pimpl has become
|
||||
// substantially more difficult in C++14, because one is effectively rolling one's own
|
||||
// unique pointer.
|
||||
//
|
||||
// It is therefore easier to implement a pure virtual base class with a virtual destructor and
|
||||
// factory function that returns a smart pointer to a member of the derived implementation
|
||||
//
|
||||
/* This code is at a low level abstraction, because it provides low level C++ interface to inherently low level C
|
||||
It is intended to be wrapped in higher level code that does not know about the nuts and bolts of sqlite3, but which supports throwing, templated functions, and all that.*/
|
||||
|
||||
//
|
||||
//___________________________________
|
||||
|
||||
// This class wraps a compiled sql statement.
|
||||
class Icompiled_sql
|
||||
{
|
||||
protected:
|
||||
Icompiled_sql() = default; // needed for derived constructor
|
||||
public:
|
||||
virtual ~Icompiled_sql() = default; // needed for derived destructor
|
||||
// Bind is used when writing stuff into the database. These objects should continue to exist until the write is finalized or reset.
|
||||
virtual void Isqlite3_bind( int, const std::span<const uint8_t>) = 0; // https://sqlite.org/c3ref/bind.html
|
||||
virtual void Isqlite3_bind(int, int) = 0;
|
||||
virtual void Isqlite3_bind(int, int64_t) = 0;
|
||||
virtual void Isqlite3_bind(int) = 0;
|
||||
virtual void Isqlite3_bind(int, const char*) = 0;
|
||||
enum sql_result { DONE, ROW, BUSY, SQL_ERROR, MISUSE };
|
||||
virtual sql_result Isqlite3_step() = 0;
|
||||
// when reading, you don't use bind. Sqlite creates a temporary in the memory that it manages. If you want the object to live beyond the next step operation, need to make a copy
|
||||
// When writing objects, we reinterpret a pointer to a typed object as a blob pointer, when reading them, we need a typed copy, otherwise calling the destructor could be bad.
|
||||
// We don't want Sqlite3 calling destructors on our objects, hence write them as static, and create them from raw bytes on reading.
|
||||
virtual std::span<const uint8_t> Isqlite3_column_blob (int) const = 0; // returns the null pointer and zero length if null.
|
||||
virtual int Isqlite3_column_int (int) const = 0;
|
||||
virtual int64_t Isqlite3_column_int64 (int) const = 0;
|
||||
virtual char* Isqlite3_column_text (int) const = 0; // returns pointer to zero length
|
||||
// string if null. If we need to distinguish betweem zero length strings and nulls, need the
|
||||
// type function.
|
||||
// We can store any type in any column, and read any type from any column, but if something
|
||||
// unexpected is in a column, it gets coerced to the expected type on being read back.
|
||||
// Thus something stored as a number and read back as blob will come back as the decimal character string.
|
||||
// It is very rarely valid to store different types in the same column, except that
|
||||
// null is permissible. The difference between null and zero matters, but the case of
|
||||
// null is usually dealt with by sql code, not C code.
|
||||
virtual void Isqlite3_reset() = 0; // https://sqlite.org/c3ref/reset.html
|
||||
};
|
||||
|
||||
//___________________________________
|
||||
|
||||
// This class wraps a database. Its derived implementation will hold an old type C pointer
|
||||
// to an opened database object, which is destroyed when the class object is destroyed
|
||||
class ISqlite3
|
||||
{
|
||||
protected:
|
||||
ISqlite3() = default; // needed for derived constructor
|
||||
public:
|
||||
virtual ~ISqlite3() = default; // needed for derived destructor
|
||||
virtual void exec(const char*) = 0;
|
||||
};
|
||||
|
||||
// Factory method to open a database and produce a shared object wrapping the database
|
||||
ISqlite3* Sqlite3_open(const char*);
|
||||
|
||||
// Factory method to create a database and produce a shared object wrapping the database
|
||||
ISqlite3* Sqlite3_create(const char*);
|
||||
|
||||
// Factory method to prepare a compiled sql statement
|
||||
Icompiled_sql* sqlite3_prepare(ISqlite3*, const char *);
|
||||
|
||||
void sqlite3_init();
|
||||
extern "C" {
|
||||
int sqlite3_shutdown(void);
|
||||
}
|
||||
|
24
LICENSE.md
Normal file
@ -0,0 +1,24 @@
|
||||
---
|
||||
generator:
|
||||
title: LICENSE
|
||||
---
|
||||
Copyright © 2021 reaction.la gpg key 154588427F2709CD9D7146B01C99BB982002C39F
|
||||
|
||||
This distribution of free software contains numerous other
|
||||
distributions with other compatible free software licenses and copyrights.
|
||||
Those files and directories are governed by their own license, and their
|
||||
combination and integration into this project by this license and this
|
||||
copyright, and anything in this distribution not otherwise licensed and
|
||||
copyrighted in this distribution is governed by this license, and this
|
||||
copyright.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this distribution of software except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
<https://directory.fsf.org/wiki/License:Apache-2.0>
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
22
NOTICE.md
Normal file
@ -0,0 +1,22 @@
|
||||
---
|
||||
title: NOTICE
|
||||
---
|
||||
Copyright © 2021 reaction.la gpg key 154588427F2709CD9D7146B01C99BB982002C39F
|
||||
|
||||
The license of this software, and the licenses of the packages on which it
|
||||
relies, grant the four software freedoms:
|
||||
|
||||
0. The freedom to run the program as you wish, for any purpose.
|
||||
1. The freedom to study how the program works, and change it so it
|
||||
does your computing as you wish.
|
||||
2. The freedom to redistribute copies so you can help others.
|
||||
3. The freedom to distribute copies of your modified versions to
|
||||
others.
|
||||
|
||||
This software is licensed under the [apache 2.0 license](LICENSE.html).
|
||||
|
||||
This product includes several packages, each with their own free software licence, referenced in the relevant files or subdirectories.
|
||||
|
||||
Or, in the case of Sqlite, the Sqlite blessing in place of a license, which is
|
||||
morally though not legally obligatory on those that obey the
|
||||
commandments of Gnon. See also the [contributor code of conduct](docs/contributor_code_of_conduct.html).
|
74
README.md
Normal file
@ -0,0 +1,74 @@
|
||||
---
|
||||
title: >-
|
||||
README
|
||||
---
|
||||
[pre alpha documentation (mostly a wish list)](docs/index.htm)
|
||||
|
||||
[copyright © and license](./license.txt)
|
||||
|
||||
pre-requisite, Pandoc to build the html documentation from the markdown files.
|
||||
|
||||
Windows pre-requisites: Visual Studio and git-bash
|
||||
|
||||
To obtain the source code from which the project can be built, including
|
||||
this README, from the bash command line (git-bash in windows).
|
||||
|
||||
```bash2
|
||||
git clone missing url
|
||||
cd wallet
|
||||
./winConfigure.sh
|
||||
```
|
||||
|
||||
To configure and build the required third party libraries in windows, then
|
||||
build the program and run unit test for the first time, launch the Visual
|
||||
Studio X64 native tools command prompt in the cloned directory, then:
|
||||
|
||||
```bat
|
||||
winConfigure.bat
|
||||
```
|
||||
|
||||
[cryptographic software is under attack]:./docs/contributor_code_of_conduct.html#code-will-be-cryptographically-signed
|
||||
"Contributor Code of Conduct"
|
||||
{target="_blank"}
|
||||
|
||||
winConfigure.bat also configures the repository you just created to use
|
||||
`.gitconfig` in the repository, causing git to to implement GPG signed
|
||||
commits -- because [cryptographic software is under attack] from NSA
|
||||
entryists, and shills, who seek to introduce backdoors.
|
||||
|
||||
This may be inconvenient if you do not have `gpg` installed and set up.
|
||||
|
||||
`.gitconfig` adds several git aliases:
|
||||
|
||||
1. `git lg` to display the gpg trust information for the last four commits.
|
||||
For this to be useful you need to import the repository public key
|
||||
`public_key.gpg` into gpg, and locally sign that key.
|
||||
1. `git fixws` to standardise white space to the project standards
|
||||
1. `git graph` to graph the commit tree
|
||||
1. `git alias` to display the git aliases.
|
||||
|
||||
```bash
|
||||
# To verify that the signature on future pulls is unchanged.
|
||||
gpg --import public_key.gpg
|
||||
gpg --lsign 096EAE16FB8D62E75D243199BC4482E49673711C
|
||||
# We ignore the Gpg Web of Trust model and instead use
|
||||
# the Zooko identity model.
|
||||
# We use Gpg signatures to verify that remote repository
|
||||
# code is coming from an unchanging entity, not for
|
||||
# Gpg Web of Trust. Web of Trust is too complicated
|
||||
# and too user hostile to be workable or safe.
|
||||
# Never --sign any Gpg key related to this project. --lsign it.
|
||||
# Never check any Gpg key related to this project against a
|
||||
# public gpg key repository. It should not be there.
|
||||
# Never use any email address on a gpg key related to this project
|
||||
# unless it is only used for project purposes, or a fake email,
|
||||
# or the email of someone whom you do not like.
|
||||
```
|
||||
|
||||
To build the documentation in its intended html form from the markdown
|
||||
files, execute the bash script file `docs/mkdocs.sh`, in an environment where
|
||||
`pandoc` is available. On Windows, if Git Bash and Pandoc has bee
|
||||
installed, you should be able to run a shell file in bash by double clicking on it.
|
||||
|
||||
[Pre alpha release](./RELEASE_NOTES.html), which means it does not yet work even well enough for
|
||||
it to be apparent what it would do if it did work.
|
8
RELEASE_NOTES.md
Normal file
@ -0,0 +1,8 @@
|
||||
---
|
||||
title: Release Notes
|
||||
---
|
||||
To build and run [README](./README.html)
|
||||
|
||||
[pre alpha documentation (mostly a wish list)](docs/index.htm)
|
||||
|
||||
This software is pre alpha and should not yet be released. It does not work well enough to even show what it would do if it was working
|
206
app.cpp
Normal file
@ -0,0 +1,206 @@
|
||||
#include "stdafx.h"
|
||||
|
||||
thread_local thread_local__* thl{ nullptr };
|
||||
wxIMPLEMENT_APP(App);
|
||||
|
||||
App::App()
|
||||
{
|
||||
assert (singletonApp == nullptr);
|
||||
singletonApp = this;
|
||||
if (thl == nullptr)thl = new thread_local__();
|
||||
}
|
||||
|
||||
App::~App()
|
||||
{
|
||||
assert(singletonApp == this);
|
||||
singletonApp = nullptr;
|
||||
if (thl != nullptr)delete thl;
|
||||
thl = nullptr;
|
||||
}
|
||||
|
||||
bool App::OnInit()
|
||||
{ if (wxApp::OnInit()) {
|
||||
SetVendorName(_T("rho")); /* This causes the non volatile config data to be stored under the rho on
|
||||
windows.*/
|
||||
SetAppName(_T("wallet")); /* This causes the non volatile config data to be stored under rho\wallet
|
||||
We will generally place data in the database, and if additional executables need their own data
|
||||
in the config, they will create their own subkey under Computer\HKEY_CURRENT_USER\Software\rho */
|
||||
pConfig = std::unique_ptr<wxConfigBase>(wxConfigBase::Get());
|
||||
pConfig->SetRecordDefaults(false);
|
||||
/* pConfig corresponds to the Windows Registry entry
|
||||
Computer\HKEY_CURRENT_USER\Software\rho\wallet
|
||||
|
||||
Contrary to wxWidgets documentation, the config data on windows is by default stored in
|
||||
HKCU, HKEY_CURRENT_USER, not in HKLM, HKEY_LOCAL_MACHINE.
|
||||
|
||||
We probably should have placed per user data in an sqlite3 file in
|
||||
wxStandardPaths::GetUserDataDir()
|
||||
|
||||
Data global to all users has to go in an sqlite3 file in wxStandardPaths::GetAppDocumentsDir()
|
||||
or wxStandardPaths::GetLocalDataDir()
|
||||
|
||||
User local database will record the derivation of all secrets, and what wallets along the path
|
||||
are logged in. The local machine database will hold the global consensus blockchain, which contains
|
||||
no privacy sensitive information, and will also hold data global to all users on a particular
|
||||
machine.
|
||||
|
||||
A wallet's secret can be stored in a file - we will eventually provide passwords for files,
|
||||
but passwords provide a false sense of security, because if someone gets a copy of that file,
|
||||
a sophisticated attacker can perform an offline brute force attack, thus a human memorable
|
||||
password only provides protection against casual and opportunistic attackers.
|
||||
If the file is insecure, password needs to impossible to remember, and stored somewhere secure..*/
|
||||
|
||||
Frame* frame = new Frame(pConfig->GetAppName());
|
||||
frame->Show(true); //Frame, being top level unowned window, is owned by the one and only message pump
|
||||
if (m_display_in_front && singletonFrame != nullptr && singletonFrame->m_pLogWindow != nullptr) singletonFrame->m_pLogWindow->GetFrame()->Raise();
|
||||
return true;
|
||||
}
|
||||
else return false;
|
||||
}
|
||||
|
||||
int App::OnRun()
|
||||
{
|
||||
Bind(wxEVT_MENU, &App::OnError, this, myID_ERRORMESSAGE);
|
||||
int exitcode = wxApp::OnRun();
|
||||
//wxTheClipboard->Flush();
|
||||
return exitcode ? exitcode : errorCode;
|
||||
}
|
||||
|
||||
bool App::OnExceptionInMainLoop()
|
||||
{
|
||||
bool handled{ false };
|
||||
wxString error;
|
||||
try {
|
||||
throw; // Rethrow the current exception.
|
||||
}
|
||||
catch (const FatalException& e) {
|
||||
error = wsz_program + _wx(e.what());
|
||||
if (!errorCode)errorCode = 10;
|
||||
}
|
||||
catch (const MyException& e) {
|
||||
// If we handle an error at this level, the current action has been abruptly terminated,
|
||||
// and we need to inform the user, but we are not going to terminate the program,
|
||||
// nor set an error number for exit.
|
||||
handled = true;
|
||||
error = wsz_operation + _wx(e.what());
|
||||
}
|
||||
catch (const std::exception& e) {
|
||||
error = wsz_program + _wx(e.what());
|
||||
errorCode = 9;
|
||||
}
|
||||
catch (...) {
|
||||
error = wsz_program + _wx(sz_unknown_error);
|
||||
errorCode = 8;
|
||||
}
|
||||
wxLogError(_T("%s"), error);
|
||||
wxMessageDialog dlg(singletonFrame, error, wsz_error, wxICON_ERROR);
|
||||
dlg.SetId(myID_ERRORMESSAGE);
|
||||
dlg.ShowModal();
|
||||
// returning false to exit the main loop and thus terminate the program.
|
||||
return handled;
|
||||
}
|
||||
|
||||
void App::OnInitCmdLine(wxCmdLineParser& parser)
|
||||
{
|
||||
parser.SetDesc(g_cmdLineDesc);
|
||||
// must refuse '/' as parameter starter or cannot use "/path" style paths
|
||||
parser.SetSwitchChars(_T("-"));
|
||||
//Command line parameters
|
||||
parser.SetLogo(wsz_commandLineLogo);
|
||||
parser.AddUsageText(wsz_usageText);
|
||||
}
|
||||
|
||||
bool App::OnCmdLineParsed(wxCmdLineParser& parser)
|
||||
{
|
||||
for (const auto& arg : parser.GetArguments()) {
|
||||
wxString optionName;
|
||||
switch (arg.GetKind())
|
||||
{
|
||||
case wxCMD_LINE_SWITCH:
|
||||
optionName = arg.GetShortName();
|
||||
if (optionName == _T("t")) {
|
||||
m_unit_test = !arg.IsNegated();
|
||||
}
|
||||
else if (optionName == _T("l")) {
|
||||
m_display = !arg.IsNegated();
|
||||
}
|
||||
else if (optionName == _T("d")) {
|
||||
m_display |= m_display_in_front = !arg.IsNegated();
|
||||
}
|
||||
else if (optionName == _T("f")) {
|
||||
m_log_focus_events = !arg.IsNegated();
|
||||
if (m_log_focus_events) {
|
||||
Bind(
|
||||
wxEVT_IDLE,
|
||||
+[](wxIdleEvent& event) { //Since this function is only ever used once, never being unbound, using a lambda to avoid naming it.
|
||||
static wxWindow* lastFocus = (wxWindow*)NULL;
|
||||
//wxLogMessage(_T("OnIdle"));
|
||||
wxWindow* curFocus = ::wxWindow::FindFocus();
|
||||
if (curFocus != lastFocus && curFocus)
|
||||
{
|
||||
lastFocus = curFocus;
|
||||
wxString name{ "" };
|
||||
do {
|
||||
name = wxString(_T("/")) + curFocus->GetClassInfo()->GetClassName() + _T(":") + curFocus->GetName() + name;
|
||||
} while (curFocus = curFocus->GetParent());
|
||||
wxLogMessage(name);
|
||||
}
|
||||
event.Skip(); //Called so we can bind multiple tasks to idle, and they will all be handled.
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
else if (optionName == _T("q")) {
|
||||
m_quick_unit_test = !arg.IsNegated();
|
||||
m_complete_unit_test = m_complete_unit_test && !m_quick_unit_test;
|
||||
}
|
||||
else if (optionName == _T("c")) {
|
||||
m_complete_unit_test = !arg.IsNegated();
|
||||
m_quick_unit_test = m_quick_unit_test && !m_complete_unit_test;
|
||||
}
|
||||
break;
|
||||
case wxCMD_LINE_OPTION:
|
||||
assert(false);
|
||||
/* switch (arg.GetType()) {
|
||||
case wxCMD_LINE_VAL_NUMBER:
|
||||
// do something with itarg->GetLongVal();
|
||||
break;
|
||||
case wxCMD_LINE_VAL_DOUBLE:
|
||||
// do something with itarg->GetDoubleVal();
|
||||
break;
|
||||
case wxCMD_LINE_VAL_DATE:
|
||||
// do something with itarg->GetDateVal();
|
||||
break;
|
||||
case wxCMD_LINE_VAL_STRING:
|
||||
// do something with itarg->GetStrVal();
|
||||
break;
|
||||
}*/
|
||||
break;
|
||||
case wxCMD_LINE_PARAM:
|
||||
m_params.push_back(arg.GetStrVal());
|
||||
// This intended to support subcommand processing, but not handling subcommands yet
|
||||
// g_cmdLineDesc has been set to disallow multiple arguments.
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void App::OnError(wxCommandEvent& event)
|
||||
{
|
||||
// We use this to display errors where throwing would cause problems, as in a destructor
|
||||
// Instead we post an event to be handled in due course.
|
||||
wxMessageDialog dlg(singletonFrame, event.GetString(), _T("Error"), wxICON_ERROR);
|
||||
dlg.SetId(myID_ERRORMESSAGE);
|
||||
dlg.ShowModal();
|
||||
}
|
||||
|
||||
int App::OnExit()
|
||||
{
|
||||
assert(pConfig.get());
|
||||
if (errorCode)wxLogDebug("%s", szError);
|
||||
return 0;
|
||||
}
|
58
app.h
Normal file
@ -0,0 +1,58 @@
|
||||
#pragma once
|
||||
|
||||
class App : public wxApp
|
||||
{
|
||||
public:
|
||||
std::unique_ptr<wxConfigBase>pConfig;
|
||||
// pConfig corresponds to the Windows Registry entry Computer\HKEY_CURRENT_USER\Software\ro\wallet
|
||||
// Don't use the registry for stuff better served by wxStandardPaths and sqlit3 files located
|
||||
// in locations specified by wxStandardPaths
|
||||
App();
|
||||
virtual ~App();
|
||||
virtual bool OnInit() wxOVERRIDE;
|
||||
virtual int OnExit() wxOVERRIDE;
|
||||
virtual int OnRun() wxOVERRIDE;
|
||||
void OnError(wxCommandEvent&);
|
||||
virtual void OnInitCmdLine(wxCmdLineParser& parser) wxOVERRIDE;
|
||||
virtual bool OnCmdLineParsed(wxCmdLineParser& parser) wxOVERRIDE;
|
||||
virtual bool OnExceptionInMainLoop() wxOVERRIDE;
|
||||
bool m_unit_test{ false };
|
||||
bool m_display{ false };
|
||||
bool m_display_in_front{ false };
|
||||
bool m_log_focus_events{ false };
|
||||
bool m_quick_unit_test{ false };
|
||||
bool m_complete_unit_test{ false };
|
||||
wxVector<wxString> m_params;
|
||||
};
|
||||
|
||||
void UnitTest(wxIdleEvent& event);
|
||||
|
||||
static constexpr wxCmdLineEntryDesc g_cmdLineDesc[] =
|
||||
{
|
||||
{ wxCMD_LINE_SWITCH, "h", "help", "displays help on the command line parameters.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_OPTION_HELP },
|
||||
{ wxCMD_LINE_SWITCH, "t", "test", "-t or --test performs unit test, exits on completion of "
|
||||
"unit test returning error value.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_SWITCH, "q", "quick", "-qt or --quick --test performs those unit tests that do not cause noticeable startup delay.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_SWITCH, "c", "complete", "-ct or --complete --test tests everything.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_SWITCH, "d", "display", "-d or --display enables display of log in front. "
|
||||
"Usually used with unit test as -dct. "
|
||||
"If the log is displayed, then does not exit on completion of unit test.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_SWITCH, "l", "log", "-l or --log enables display of log behind. "
|
||||
"Usually used with unit test as -lt. "
|
||||
"If the log is displayed, then does not exit on completion of unit test.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_SWITCH, "f", "focus", "-f or --focus causes focus events to be logged for debugging purposes. "
|
||||
"Usually used as -lf or -lft, as logging them without displaying them is useless.",
|
||||
wxCMD_LINE_VAL_NONE, wxCMD_LINE_SWITCH_NEGATABLE},
|
||||
{ wxCMD_LINE_PARAM, "", "", "mywallet.wallet",
|
||||
wxCMD_LINE_VAL_NONE, /*wxCMD_LINE_PARAM_MULTIPLE|*/wxCMD_LINE_PARAM_OPTIONAL},
|
||||
{ wxCMD_LINE_NONE }
|
||||
};
|
||||
|
||||
DECLARE_APP(App)
|
||||
inline App *singletonApp{nullptr};
|
102
bit_hacks.h
Normal file
@ -0,0 +1,102 @@
|
||||
#pragma once
|
||||
|
||||
// We should template this to use __popcnt64 if available
|
||||
// but that is premature optimization
|
||||
inline uint64_t bitcount(uint64_t c) {
|
||||
c = c - ((c >> 1) & 0x5555555555555555);
|
||||
c = ((c >> 2) & 0x3333333333333333) +
|
||||
(c & 0x3333333333333333);
|
||||
c = ((c >> 4) + c) & 0x0F0F0F0F0F0F0F0F;
|
||||
c = ((c >> 8) + c) & 0x00FF00FF00FF00FF;
|
||||
c = ((c >> 16) + c) & 0x0000FFFF0000FFFF;
|
||||
c = ((c >> 32) + c) & 0x00000000FFFFFFFF;
|
||||
return c;
|
||||
}
|
||||
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog "Bit Hacks"
|
||||
// Find ⌊log2⌋
|
||||
// We should template this to use lzcnt64, __builtin_clz or _BitScanReverse if available,
|
||||
// but that is premature optimization.
|
||||
inline auto rounded_log2(uint32_t v) {
|
||||
// This algorithm extends to 64 bits, by adding a step, shrinks to sixteen bits by removing a step.
|
||||
decltype(v) r{ 0 }, s;
|
||||
// This redundant initialization and redundant |= of r can be eliminated,
|
||||
// but eliminating it obfuscates the simplicity of the algorithm.
|
||||
s = (v > 0xFFFF) << 4; v >>= s; r |= s;
|
||||
s = (v > 0x00FF) << 3; v >>= s; r |= s;
|
||||
s = (v > 0x000F) << 2; v >>= s; r |= s;
|
||||
s = (v > 0x0003) << 1; v >>= s; r |= s;
|
||||
r |= (v >> 1);
|
||||
// result of ⌊log2(v)⌋ is in r
|
||||
return r;
|
||||
}
|
||||
|
||||
// For trailing bits, consider int __builtin_ctz (unsigned int x)
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightLinear
|
||||
|
||||
// Count the consecutive trailing zero bits
|
||||
inline auto trailing_zero_bits(uint64_t v) {
|
||||
unsigned int c;
|
||||
if (v & 0x3F) {
|
||||
v = (v ^ (v - 1)) >> 1; // Set v's trailing 0s to 1s and zero rest
|
||||
for (c = 0; v; c++) {
|
||||
v >>= 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
c = 1;
|
||||
if ((v & 0xffffffff) == 0) {
|
||||
v >>= 32;
|
||||
c += 32;
|
||||
}
|
||||
if ((v & 0xffff) == 0) {
|
||||
v >>= 16;
|
||||
c += 16;
|
||||
}
|
||||
if ((v & 0xff) == 0){
|
||||
v >>= 8;
|
||||
c += 8;
|
||||
}
|
||||
if ((v & 0xf) == 0){
|
||||
v >>= 4;
|
||||
c += 4;
|
||||
}
|
||||
if ((v & 0x3) == 0) {
|
||||
v >>= 2;
|
||||
c += 2;
|
||||
}
|
||||
if ((v & 0x1) == 0) {
|
||||
v >>= 1;
|
||||
c += 1;
|
||||
}
|
||||
c -= v & 0x01;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
220
db_accessors.h
Normal file
@ -0,0 +1,220 @@
|
||||
#pragma once
|
||||
namespace ro {
|
||||
// Compile time test to see if a type can be directly read from or written to an sqlite3 file
|
||||
// This can be used in if constexpr (is_sqlite3_field_type<T>::value)
|
||||
template <class T> struct is_sqlite3_field_type
|
||||
{
|
||||
template <typename U> static constexpr decltype(std::declval<Icompiled_sql>().Isqlite3_bind(1, std::declval<U>()), bool()) test() {
|
||||
return true;
|
||||
}
|
||||
template <typename U> static constexpr bool test(int = 0) {
|
||||
return false;
|
||||
}
|
||||
static constexpr bool value = is_sqlite3_field_type::template test<T>();
|
||||
};
|
||||
|
||||
static_assert(is_sqlite3_field_type<int>::value);
|
||||
|
||||
//Owns a compiled sql statement and destroys it when it is deconstructed.
|
||||
//Has move semantics.
|
||||
class sql : public std::unique_ptr<Icompiled_sql> {
|
||||
public:
|
||||
class null {};
|
||||
sql(ISqlite3* p, const char* sz) :std::unique_ptr<Icompiled_sql>(sqlite3_prepare(p, sz)) {}
|
||||
sql(const std::unique_ptr<ISqlite3>& p, const char* sz) :std::unique_ptr<Icompiled_sql>(sqlite3_prepare(p.get(), sz)) {}
|
||||
// copy constructor
|
||||
sql(const sql& a) = delete;
|
||||
// move constructor
|
||||
sql(sql&& p) :std::unique_ptr<Icompiled_sql>(p.release()) { }
|
||||
// copy assignment
|
||||
sql& operator=(const sql) = delete;
|
||||
// Move assignment
|
||||
sql& operator=(sql&& p) {
|
||||
std::unique_ptr<Icompiled_sql>::reset(p.release());
|
||||
}
|
||||
sql(Icompiled_sql* p) :std::unique_ptr<Icompiled_sql>(p) {}
|
||||
sql(std::unique_ptr<Icompiled_sql>&& p) :std::unique_ptr<Icompiled_sql>(p.release()) { }
|
||||
~sql() = default;
|
||||
template <typename T>auto column(int i) const {
|
||||
if constexpr (ro::is_blob_field_type<T>::value) {
|
||||
auto st = (*this)->Isqlite3_column_blob(i);
|
||||
if (st.size_bytes() != sizeof(T)) throw BadDataException();
|
||||
static_assert (std::is_standard_layout<T>(), "not standard layout");
|
||||
static_assert (std::is_trivial<T>(), "not trivial");
|
||||
return reinterpret_cast<const T*>(&st[0]);
|
||||
}
|
||||
else if constexpr (std::is_integral<T>::value) {
|
||||
if constexpr (sizeof(T) > sizeof(int_least32_t)) {
|
||||
T retval = (*this)->Isqlite3_column_int64(i);
|
||||
return retval;
|
||||
}
|
||||
else {
|
||||
T retval = (*this)->Isqlite3_column_int(i);
|
||||
return retval;
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same< T, std::span<const byte>>::value) {
|
||||
return (*this)->Isqlite3_column_blob(i);
|
||||
}
|
||||
else if constexpr (std::is_same< T, const char*>::value) {
|
||||
auto sz{ (*this)->Isqlite3_column_text(i) };
|
||||
// if (!IsValidUtf8String(sz)) throw NonUtf8DataInDatabase();
|
||||
return sz;
|
||||
}
|
||||
else {
|
||||
static_assert(false, "Don't know how to read this datatype from database");
|
||||
return null();
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>const sql& read(int i, T& j) const{
|
||||
if constexpr (ro::is_blob_field_type<T>::value) {
|
||||
auto st = (*this)->Isqlite3_column_blob(i);
|
||||
if (st.size_bytes() != sizeof(T)) throw BadDataException();
|
||||
static_assert (std::is_standard_layout<T>(), "not standard layout");
|
||||
static_assert (std::is_trivial<T>(), "not trivial");
|
||||
j = *reinterpret_cast<const T*>(&st[0]);
|
||||
}
|
||||
else if constexpr (std::is_integral<T>::value) {
|
||||
if constexpr (sizeof(T) > sizeof(int_least32_t)) {
|
||||
j = (*this)->Isqlite3_column_int64(i);
|
||||
}
|
||||
else {
|
||||
j = (*this)->Isqlite3_column_int(i);
|
||||
}
|
||||
}
|
||||
else if constexpr (std::is_same< T, std::span<const byte>>::value) {
|
||||
j = (*this)->Isqlite3_column_blob(i);
|
||||
}
|
||||
else if constexpr (std::is_same< T, const char*>::value) {
|
||||
j = (*this)->Isqlite3_column_text(i);
|
||||
}
|
||||
else {
|
||||
static_assert(false, "Don't know how to read this datatype from database");
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
void bind(int i, null) { (*this)->Isqlite3_bind(i); }
|
||||
template < typename T,
|
||||
typename std::enable_if<is_sqlite3_field_type<T>::value, int >::type dummy_arg = 0 >
|
||||
void bind(int i, T j) {
|
||||
(*this)->Isqlite3_bind(i, j);
|
||||
}
|
||||
template < typename T,
|
||||
typename std::enable_if<!is_sqlite3_field_type<T>::value, int >::type dummy_arg = 0 >
|
||||
void bind(int i, const T& j) {
|
||||
static_assert(ro::is_serializable<T>::value, "Don't know how to store this type in a database");
|
||||
(*this)->Isqlite3_bind(i, ro::serialize(j));
|
||||
}
|
||||
typedef Icompiled_sql::sql_result result;
|
||||
result step() {
|
||||
return (*this)->Isqlite3_step();
|
||||
}
|
||||
void final_step() {
|
||||
if (step() != result::DONE) throw SQLexception("SQL: Unexpected rows remaining");
|
||||
}
|
||||
void reset() {
|
||||
(*this)->Isqlite3_reset();
|
||||
}// https://sqlite.org/c3ref/reset.html
|
||||
|
||||
template<typename T, typename ...Args> void bind(int i, const T& first, const Args& ...args) {
|
||||
bind(i, first);
|
||||
bind(i + 1, args...);
|
||||
}
|
||||
template<typename ...Args> void do_one(const Args& ...args) {
|
||||
reset();
|
||||
if constexpr (sizeof...(args) > 0) {
|
||||
bind(1, args...);
|
||||
}
|
||||
final_step();
|
||||
}
|
||||
template<typename ...Args> auto read_one(const Args& ...args) {
|
||||
reset();
|
||||
if constexpr (sizeof...(args) > 0) {
|
||||
bind(1, args...);
|
||||
}
|
||||
return step() == result::ROW;
|
||||
}
|
||||
};
|
||||
|
||||
class sql_update :sql {
|
||||
public:
|
||||
using sql::sql;
|
||||
sql_update(ISqlite3* p, const char* sz) : sql(p, sz) {}
|
||||
template<typename ...Args> void operator()(const Args& ...args) {
|
||||
do_one(args...);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
class sql_update_to_misc :ro::sql {
|
||||
public:
|
||||
sql_update_to_misc(ISqlite3* p) : sql(p, R"|(REPLACE INTO "Misc" VALUES(?1, ?2);)|") {}
|
||||
sql_update_to_misc(const std::unique_ptr<ISqlite3>& p) : sql_update_to_misc(p.get()) {}
|
||||
template<typename T>void operator()(int i, const T& j) {
|
||||
do_one(i, j);
|
||||
}
|
||||
template<typename T>void operator()(int i, T* j) {
|
||||
do_one(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
class sql_read_from_misc :ro::sql {
|
||||
public:
|
||||
sql_read_from_misc(ISqlite3 *p) : sql(p, R"|(SELECT "m" FROM "Misc" WHERE "index" = ?1;)|") {}
|
||||
sql_read_from_misc(const std::unique_ptr<ISqlite3>& p) : sql_read_from_misc(p.get()){}
|
||||
auto operator()(int i) {
|
||||
return read_one(i);
|
||||
}
|
||||
template<typename T>auto value() {
|
||||
return column<T>(0);
|
||||
}
|
||||
template <typename T> void read(T& j) const {
|
||||
sql::read<T>(0,j);
|
||||
}
|
||||
};
|
||||
|
||||
class sql_insert_name {
|
||||
public:
|
||||
ro::sql csql_begin;
|
||||
ro::sql csql_into_names;
|
||||
ro::sql csql_namekey_into_keys;
|
||||
ro::sql csql_commit;
|
||||
sql_insert_name(ISqlite3* p) :
|
||||
csql_begin(p, R"|(BEGIN;)|"),
|
||||
csql_into_names(p, R"|(INSERT OR ROLLBACK INTO "Names" VALUES(?1);)|"),
|
||||
csql_namekey_into_keys(p, R"|(INSERT OR ROLLBACK INTO "Keys" VALUES(?1, last_insert_rowid(), 1);)|"),
|
||||
csql_commit(p, R"|(COMMIT;)|") {
|
||||
}
|
||||
sql_insert_name(const std::unique_ptr<ISqlite3>& p) : sql_insert_name(p.get()) {}
|
||||
void operator()(const char* psz, const ristretto255::point& pt) {
|
||||
csql_begin.do_one();
|
||||
try {
|
||||
csql_into_names.do_one(psz);
|
||||
csql_namekey_into_keys.do_one(pt);
|
||||
}
|
||||
catch (const std::exception & e) {
|
||||
csql_commit.do_one();
|
||||
throw e;
|
||||
}
|
||||
csql_commit.do_one();
|
||||
}
|
||||
};
|
||||
|
||||
class sql_read_name :ro::sql {
|
||||
public:
|
||||
sql_read_name(ISqlite3* p) : sql(p, R"|(SELECT * FROM "Names" WHERE OID = ?1;)|") {}
|
||||
sql_read_name(const std::unique_ptr<ISqlite3>& p) : sql_read_name(p.get()) {}
|
||||
bool operator()(int i) {
|
||||
return read_one(i) == Icompiled_sql::ROW;
|
||||
}
|
||||
auto name() const {
|
||||
return sql::column<const char*>(0);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
constexpr auto WALLET_FILE_IDENTIFIER (0x56d34bc5a655dd1fi64);
|
||||
constexpr auto WALLET_FILE_SCHEMA_VERSION_0_0(1);
|
62
display_wallet.cpp
Normal file
@ -0,0 +1,62 @@
|
||||
#include "stdafx.h"
|
||||
using ro::base58;
|
||||
display_wallet::display_wallet(wxWindow* parent, wxFileName& walletfile) :
|
||||
wxPanel(parent, myID_WALLET_UI, wxDefaultPosition, wxDefaultSize, wxTAB_TRAVERSAL, _T("Wallet")),
|
||||
m_db(nullptr)
|
||||
{
|
||||
wxLogMessage(_T("Loading %s"), walletfile.GetFullPath());
|
||||
if (!walletfile.IsOk() || !walletfile.HasName() || !walletfile.HasExt()) throw MyException("unexpected file name");
|
||||
if (!walletfile.FileExists())throw MyException(
|
||||
walletfile.GetFullPath().append(" does not exist.").ToUTF8()
|
||||
);
|
||||
m_db.reset(Sqlite3_open(walletfile.GetFullPath().ToUTF8()));
|
||||
sql_read_from_misc read_from_misc(m_db);
|
||||
if (!read_from_misc(1) || read_from_misc.value<int64_t>() != WALLET_FILE_IDENTIFIER)throw MyException(sz_unrecognizable_wallet_file_format);
|
||||
if (!read_from_misc(2) || read_from_misc.value<int64_t>() != WALLET_FILE_SCHEMA_VERSION_0_0 || !read_from_misc(4))throw MyException(sz_unrecognized_wallet_schema);
|
||||
read_from_misc.read(m_MasterSecret);
|
||||
if (!m_MasterSecret.valid()) throw MyException(sz_cold_wallets_not_yet_implemented);
|
||||
auto sizer = new wxBoxSizer(wxHORIZONTAL);
|
||||
m_lSizer = new wxBoxSizer(wxVERTICAL);
|
||||
m_rSizer = new wxBoxSizer(wxVERTICAL);
|
||||
sizer->Add(m_lSizer,0, wxGROW, 4);
|
||||
sizer->Add(m_rSizer, 50, wxGROW, 4);
|
||||
SetSizer(sizer);
|
||||
ro::sql read_keys(m_db, R"|(SELECT * FROM "Keys";)|");
|
||||
sql_read_name read_name(m_db);
|
||||
// m_db.reset(nullptr);// Force error of premature destruction of Isqlite3
|
||||
while (read_keys.step() == Icompiled_sql::ROW) {
|
||||
auto pubkey = read_keys.column<ristretto255::point>(0);
|
||||
auto id = read_keys.column<int>(1);
|
||||
auto use = read_keys.column<int>(2);
|
||||
if (use != 1)throw MyException(sz_unknown_secret_key_algorithm);
|
||||
if (!read_name(id)) throw MyException(sz_no_corresponding_entry);
|
||||
const char* name = read_name.name();
|
||||
if (m_MasterSecret(name).timesBase() != *pubkey)throw MyException(std::string(sz_public_key_of) + name + sz_fails_to_correspond);
|
||||
m_lSizer->Add(
|
||||
new wxStaticText(
|
||||
this,
|
||||
wxID_ANY,
|
||||
name,
|
||||
wxDefaultPosition, wxDefaultSize, wxALIGN_RIGHT|wxST_ELLIPSIZE_END
|
||||
),
|
||||
10,
|
||||
wxEXPAND | // make horizontally stretchable
|
||||
wxALL, // and make border all around
|
||||
2);
|
||||
m_rSizer->Add(
|
||||
new wxStaticText(
|
||||
this,
|
||||
wxID_ANY,
|
||||
"#" + base58(*pubkey).operator std::string(),
|
||||
wxDefaultPosition, wxDefaultSize, wxALIGN_LEFT | wxST_ELLIPSIZE_END
|
||||
),
|
||||
10,
|
||||
wxEXPAND | // make horizontally stretchable
|
||||
wxALL, // and make border all around
|
||||
2);
|
||||
}
|
||||
this->SetSize(this->GetParent()->GetClientSize());
|
||||
singletonFrame->m_LastUsedSqlite.Assign(walletfile);
|
||||
}
|
||||
display_wallet::~display_wallet() {
|
||||
}
|
16
display_wallet.h
Normal file
@ -0,0 +1,16 @@
|
||||
#pragma once
|
||||
class display_wallet : public wxPanel
|
||||
{
|
||||
public:
|
||||
display_wallet(wxWindow*, wxFileName&);
|
||||
~display_wallet();
|
||||
private:
|
||||
std::unique_ptr<ISqlite3> m_db;
|
||||
ristretto255::CMasterSecret m_MasterSecret;
|
||||
wxBoxSizer* m_lSizer;
|
||||
wxBoxSizer* m_rSizer;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
2
docs/BlackHat-DC-09-Marlinspike-Defeating-SSL.pdf.URL
Normal file
@ -0,0 +1,2 @@
|
||||
[InternetShortcut]
|
||||
URL=http://www.blackhat.com/presentations/bh-dc-09/Marlinspike/BlackHat-DC-09-Marlinspike-Defeating-SSL.pdf
|
BIN
docs/Breaking_out_of_the_browser.pdf
Normal file
BIN
docs/Efficient_Error-Propagating_Block_Chaining.pdf
Normal file
2
docs/How to be a program manager - Joel on Software.URL
Normal file
@ -0,0 +1,2 @@
|
||||
[InternetShortcut]
|
||||
URL=http://www.joelonsoftware.com/items/2009/03/09.html
|
BIN
docs/InternetProtocol.pdf
Normal file
BIN
docs/PracticalLargeScaleDistributedKeyGeneration.pdf
Normal file
BIN
docs/SecureDistributedKeyGeneration.pdf
Normal file
BIN
docs/SoK_Diving_into_DAG-based_Blockchain_Systems.pdf
Normal file
BIN
docs/ThresholdSignatures.pdf
Normal file
BIN
docs/anonymous_blockchain_transactions.pdf
Normal file
BIN
docs/anonymous_multihop_locks_lightning_network.pdf
Normal file
153
docs/bitcoin_vulnerable_to_currency_controls.md
Normal file
@ -0,0 +1,153 @@
|
||||
---
|
||||
lang: 'en-US'
|
||||
title: How could regulators successfully introduce Bitcoin censorship and other dystopias
|
||||
---
|
||||
[Original document](https://juraj.bednar.io/en/blog-en/2020/11/12/how-could-regulators-successfully-introduce-bitcoin-censorship-and-other-dystopias/) by [Juraj Bednar](https://juraj.bednar.io/en/juraj-bednar-2/)
|
||||
|
||||
Publishing this a violation of copyright. Needs to be summarized and paraphrased.
|
||||
|
||||
**Note:** A lot of people think this is purely about \>50% attack. Not true, [here’s how this unfolds with 10% of censoring hashrate.](https://threadreaderapp.com/thread/1327206062437621760.html)
|
||||
|
||||
Bitcoin is often said to be anonymous and uncensorable. Thanks to chain analysis, anonymity is to some extent a disputed wishful thinking from the past. And it looks like it won’t be so nice with censorship resistance either.
|
||||
|
||||
My reasoning begins with this quote from Twitter of fluffypony:
|
||||
Note: A lot of people think this is purely about >50% attack. Not true, here’s how this unfolds with 10% of censoring hashrate.
|
||||
|
||||
![blockseer](blockseer.jpg){width=100%}
|
||||
|
||||
( [More information here, for example](https://cointelegraph.com/news/slippery-slope-as-new-bitcoin-mining-pool-censors-transactions) )
|
||||
|
||||
This mining pool censors transactions that are included in the government
|
||||
blacklist. For the time being, the pool just leaves money on the table, so
|
||||
if the pool decides not to include “dirty” transactions, the end result is
|
||||
that they do not to earn transaction fees for that transaction (and go for
|
||||
cheaper transactions) and the “dirty” transaction is mined in another block
|
||||
by a different miner. But I think it’s still a dangerous precedent and it
|
||||
gets scary when you think it through.
|
||||
|
||||
I think if governments or anti-money laundering organizations want to censor Bitcoin, that’s exactly the first step. Try it out on one pool. But if at least one pool mines these transactions, we’re fine, right? Not really.
|
||||
|
||||
Let’s think about what these organizations might do next. Spoiler alert, these steps lead to successful censorship of Bitcoin:
|
||||
|
||||
- Miners have invested a lot of money in the mining hardware, data centers, they are paying electricity and taxes – especially large mining operations. They are mostly **not cypherpunks**, but corporations with shareholders, the CEO wears a suit and a tie, the company has a business permit, all stamps in order, ... At the same time they need a bank account and an account at some Bitcoin exchange, because they have to pay suppliers (for energy, rent, taxes, ...) .
|
||||
- If the government comes in and says, “You can’t mine the blocks that spend these UTXOs”, or you’ll lose either bank account, exchange account, business permit or go to jail for money laundering, most of the big miners would comply. Blockseer is just a first example. They have shareholders that are awaiting dividends, they are not rebels against establishment. By not mining certain transactions, they are only losing some transaction fees and at this point, no one would know. Some miners even occasionally mine empty blocks even if mempool is not empty, so **not including a transaction is not an unusual thing to do**.
|
||||
- Very important note here is that **anti-money laundering regulations and blacklists are mostly global** and they are not approved by states’ parliaments or governments. The enforcement is done through network effects. If you want to be connected in the payment network of the world (SEPA, SWIFT, ACH, ...) is dependent on how well you fight money laundering and how well you are implementing AML standards. These standards are created by organizations such as the [FATF-GAFI](https://translate.googleusercontent.com/translate_c?depth=1&pto=aue&rurl=translate.google.com&sl=sk&sp=nmt4&tl=en&u=https://en.wikipedia.org/wiki/Financial_Action_Task_Force&usg=ALkJrhg3fRHeA2Jvu1BnXtmrZfFfIwnzxg) (remember [crypto travel rule](http://www.fatf-gafi.org/publications/fatfrecommendations/documents/regulation-virtual-assets-interpretive-note.html)?). Thus, it is quite possible that such an organization will start publishing blacklists and they will be accepted by miners in all countries under the threat of jail / loss of business license / loss of bank account / loss of exchange account. For the last two, you only need FATF AML network effects and not local law! The FATF-GAFI rules are considered an international standard of fighting against AML. States are just saying that some entities need to fight money laundering by referring to international standards that are not approved by any parliament in any country.
|
||||
- Of course, there will always be small scale miners who have bought ASIC and are mining in their kitchens or balconies. They could not care less about FATF-GAFI travel rules. But it won’t help much...
|
||||
- If these rules are followed by more than 50% of hashrate, there may be a simple addition to the rules: “If you build upon a block that contains prohibited transactions, you are laundering money.“ This is actually a soft fork introduced by the regulator (beware, not even directly by a state).
|
||||
- This creates a weird [Schelling point](https://en.wikipedia.org/wiki/Focal_point_(game_theory)) situation. Even if I am a miner mining in my kitchen and don’t care about any transaction blacklist, if there is even a double digit probability that if I (or my pool rather) find a block and it will become orphaned, because the next block will be found by a big miner, I will think twice about including a blacklisted transaction in the blockchain. The math is simple – I either get an additional \~ 5 USD transaction fee, but I could lose the whole block reward, or I don’t include this transaction and can keep the whole block reward (coinbase + all tx fees). Including a tainted transaction is low upside, huge downside decision. Even if I am not under a jurisdiction of such a rule and I don’t particularly want to censor transactions, if I understand risk-reward properly, I will just omit it. No harm done, someone else can try.
|
||||
- Majority hashrate can introduce such soft fork. In addition to the fact that the company can continue to do business and ensure a return on its investment in hardware, the second reason is economic. You don’t want to mine later orphaned blocks. But it gets even better for the censors. The effect would be the same as if the miners who do not meet the new soft fork rules shut down their mining machines (if there is at least one transaction with “dirty coins” in the mempool). Why? If a lot of miners decide to refuse tainted transactions and build upon the blocks with tainted transactions, every miner that mines them into block is basically burning electricity for nothing. It is the same as if they just stopped mining – the “softfork” hashrate is lower, so the reward per hashrate is higher. Introducing a softfork means more money for complying miners and no money for non-compliant miners. The compliant miners mine more blocks, it is as if the non-compliant miners did not exist. This means that **the miners have an economic incentive to enforce this rule**. Let me repeat this: Even the miners that are morally against this rule are economically motivated to comply.
|
||||
- If governments or FATF-GAFI do not want to wait for the majority of the hashrate, they can implement this rule even faster. Just tell the exchanges “if you want to be connected to the fiat payment network, your Bitcoin nodes can only accept compliant blocks, otherwise you will be laundering money and we will shut you off from the fiat network and send nice SEC agents to your shiny office”. Exchangers are dependent on connectivity to the fiat network, because new capital flows through it to the crypto economy and that’s how they make money – from the fees on trades. Kraken, Coinbase, Binance,... will gradually start running full nodes with a blacklist. They are using blacklists already, but they only refuse deposits from tainted addresses. I am talking about refusing blocks with tainted transactions. Guess what the miners do? At the end of the month, they need to send the mined coins to an exchange to get fiat to pay for electricity. For this, it is absolutely essential, that the exchange “sees” the blocks. The miners need to be on the same chain as the exchange, otherwise they are screwed.
|
||||
|
||||
The first step of this dystopian scenario has already taken place. We have the first (albeit minority) pool, which does not include some transactions. At this point, it means nothing, at worst, the transaction is mined a bit later. After the introduction of full soft fork (whether by the hashate majority or the economic majority of exchangers), Bitcoin’s non-censorship practically ended.
|
||||
|
||||
# Lightning network and tainted coins
|
||||
|
||||
We will talk about one more dystopian scenario. Imagine for a moment that you are running a node of the Bitcoin Lightning Network (BTW if you have never tried, [check out my intro course](https://hackyourself.io/product/174/)). So you have installed something like [Umbrel](https://getumbrel.com/) or [BtcPayServer](https://getumbrel.com/), you are a good Bitcoiner – you run a full-node Bitcoin, some Lightning daemon, you even run it all through Tor. You’ve opened a few channels, providing liquidity to route payments, and earning some fees. You do it all to help the network and verify transactions. So far so good. Or – so far, great!
|
||||
|
||||
One day, a local drug dealer on the dark market will check out your node. He needs to launder the drug Bitcoins. He will do this as follows:
|
||||
|
||||
- He will install two Lightning nodes, node A and node B. We will mark your node L.
|
||||
- He moves his dirty coins to node A onchain.
|
||||
- He buys incoming liquidity on node B (for example, he buys it from [Bitrefill](https://www.bitrefill.com/buy/lightning-channel/?hl=en))
|
||||
- On node A, after the coins are confirmed on chain, he will create a high-capacity channel(s) with your node L.
|
||||
- On node B, he creates a lightning invoice to receive coins. He will pay the invoice from node A.
|
||||
- Node A after using all sending capacity is turned off and deleted – it doesn’t even have to close the channel, as there is no capacity on his side, he will only lose the channel reserve. Or he can close the channel and deal with the reserve in a later transaction.
|
||||
- He closes the channel on node B (cooperative close) and sends nice clean money to his wallet.
|
||||
- When the channel between node A and node L closes (for example you force close it), you get dirty money from drug sales – tainted coins. You can’t even send them away through Lightning because node A no longer exists. They will end up in your on-chain wallet.
|
||||
|
||||
If there are common chain analysis issues and these “dirty” coins are a problem – either legal, problems with depositing them to an exchange or even the fact that these coins are so tainted that no miner will mine the transaction – letting other nodes open channels with you (with their UTXO) is a serious security risk. If someone succeeds and the coins are marked as “dirty” only after the attacker does this operation, it is quite possible that you will not be able to move the coins anymore.
|
||||
|
||||
Of course, people will probably not be satisfied with this situation and will rightly complain to the state or regulator that they have nothing to do with drug sales and that someone has just opened a channel with them. One possible solution is for the state and anti-money-laundering organizations to say “sorry, our bad, this censorship thing was a bad idea”. Another solution is much more likely:
|
||||
|
||||
*“Dear users of the lightning network, we see that you often get dirty coins. We’ve passed a new law that addresses your issue. We therefore recommend that you install this open-souce module in your Lightning node. Through the API, it verifies that the UTXO through which the other party wants to open the channel is clean. If it is clean, we return a state-signed proof of purity as a result of an API call. If you attach this proof of purity to the transaction as supplementary data, the compliant miner will happily mine it for you, because of course you could not know that the coins were dirty – we did not know either! Thank you for you cooperation in fighting money laundering!*
|
||||
|
||||
***API Call parameters** : KYC ID of the caller, list of UTXOs in the onchain wallet of the node (why not collect extra data that the state does not need? Have you ever seen a government form that did not ask you when and where you were born? Of course they want to know the age and purity of your UTXOs as well), unsigned transaction by which the other party wants to open the channel*
|
||||
|
||||
***Output** : Answer yes-no, State digitally signed certificate of transaction purity*
|
||||
|
||||
You can get your KYC ID at any branch of the Ministry of the Interior, SEC, just bring two documents and proof of ownership of UTXO – a message signed with your identity with the address keys ”
|
||||
|
||||
(*Crypto-anti money laundering lightning enablement act of 202*1)
|
||||
|
||||
(Why enablement? Because when government wants to regulate something, meaning ban something, they always sell it to you that they are enabling you to do something. You know, if it is not forbidden, it is enabled by default, but for some reason, in 2020, if government regulates it, it enables it... Weird, right?)
|
||||
|
||||
OK, they probably won’t be able to pass such a law in 2021, the soft fork dystopia must happen first. But a similar approach has already been taken by the European Union when verifying reverse charge VAT numbers – if you are a VAT paying entity and the customer is a VAT payer in another EU country and therefore you do not invoice VAT, you can verify their VAT ID on the European Union website (or via an API) and **save the call result**. If it is not valid and you do not have stored evidence that you tried to verify it (and it was valid then), you have (perhaps) a problem. But I don’t know that anyone would enforce this rule.
|
||||
|
||||
# Tadaaaa, I’ll do a coinjoin
|
||||
|
||||
If you have “dirty” coins and the miners refuse to mine transactions containing dirty coins, you will most certainly not do a coinjoin.
|
||||
|
||||
Coinjoin is a standard transaction that has inputs – if they are already marked as dirty, then you will not get such a coinjoin transaction into the blockchain (soft-forked away!).
|
||||
|
||||
If you get it into blockchain and the coins are marked later, you have a problem – you could even put completely clean coins in the coinjoin and suddenly you are marked as a drug dealer on the dark market because some other coinjoin participant was marked and tried to launder money.
|
||||
|
||||
If anyone could just use coinjoin to avoid all this censorship, they would. So let’s do it the other way around – coinjoin is an act of money laundering and if any input is tainted, all outputs are tainted.
|
||||
|
||||
Ironically enough, the only safe coinjoin is if the coinjoin provider (and preferably also use) uses and enforces a blacklist. I’ve heard that some coinjoin providers already do this. I don’t know what is worse – if you enforce the blacklist, you are censoring and hurting fungibility. If you are not enforcing blacklist, you taint all your users’ coins and they will be pissed when they want to use them and are not able to.
|
||||
|
||||
Of course, this topic is already relevant now, because many services (such as exchanges) reject dirty coins – and many also reject coinjoin outputs. Even if you withdraw crypto from an exchange to a Wasabi or Samourai and then send it directly to a mixer, you will get a love letter from your exchange, telling you nicely to stop doing that, or they will close your account next time. Of course, they know your name and you have shown your ID, so if you piss them off, they will also report you to your local anti money laundering unit (in my country, that would be financial police).
|
||||
|
||||
# Change it to Monero and back
|
||||
|
||||
If someone has Bitcoins that are not exactly clean and wants to keep Bitcoins, they can exchange Bitcoin for Monero using a decentralized exchange and then after some time (and gradually) change Monero back for Bitcoin, through a reputable exchange (eg [xmr.to](https://xmr.to/)). This will of course cost a few percent in exchange fees and you are also exposed to XMR/BTC exchange rate risk (although it can be both upside risk).
|
||||
|
||||
If many people solve this problem in this way, there will be a lot of tainted coins left in the wallets of the exchanges and their clients. I don’t know how people will deal with it.
|
||||
|
||||
The key is to do it before the coins are marked tainted of course (similar to lightning strategy).
|
||||
|
||||
# Possible solutions to censorship issues
|
||||
|
||||
Anonymous cryptocurrencies such as Monero do not suffer from this problem, at least not so much. The sender, recipient, and amount sent are not visible in the Monero transaction. The Monero transaction refers to your input and ten other inputs.
|
||||
|
||||
This might look similar to a bitcoin coinjoin transaction, but there are key differences:
|
||||
|
||||
- In Monero, this is how you make any transaction. Miners will not mine transactions that do not have decoy inputs. That’s just how Monero works. Changing this would not be a soft fork, but a hard fork (you would need to relax the consensus rules).
|
||||
- In Coinjoin, you are signing the coinjoin transaction with your key. You have seen it, understand what you are doing and all the parties approve and sign. In Monero, only you sign, the other people are unwilling participants that your wallet chose. If you appear as one possible input of a transaction in a drug deal, you did not even have to know it – you did not even needed to be online when it happened.
|
||||
- Monero uses stealth addresses, so you cannot blacklist an address. You can only blacklist a particular transaction (without view key of the address). If one address of a dark market is revealed, in Bitcoin, you could cluster many more dark market addresses. In Monero, if a transaction is revealed, it can be blacklisted, but that’s about it. You can maybe blacklist a few transactions with a 90% probability that you are wrong about them.
|
||||
- Although I’m not a fan of ASIC resistance, because network security depends on proof of work, Monero is much more likely to decentralize miners and have someone mining “in their kitchen”. Mining takes place on regular computers, not on dedicated devices. At the same time, miners include botnets, which is sad, but such miners are motivated not to censor transactions, because I assume they use Monero themselves and need to anonymously spend the rewards. I assume it is much harder to come to the majority of the Monero hashrate and demand they do something (I am not sure about this though).
|
||||
|
||||
So should we just ditch Bitcoin and switch to Monero? Well, there is a different kind of censorship happening – exchanges are kicking privacy coins out. Most [recently ShapeShift](https://decrypt.co/47508/shapeshift-quietly-delists-monero-privacy-coin).
|
||||
|
||||
Here comes the Bitcoin network effect. It is enough if there is one exchange in the world that exchanges Monero for clean untainted Bitcoin without KYC and then any Bitcoin exchange can change it to fiat or anything else. Such exchange, of course, involves two fees (Monero for Bitcoin and Bitcoin for fiat), but it is still possible.
|
||||
|
||||
I call this rule “crypto to crypto fungibility”. Crypto to crypto exchanges are not so easily regulated and all it takes is one that works reasonably well and it does not matter if someone bans one cryptocurrency. It is a “ban all or none” effect in practice.
|
||||
|
||||
# Two Bitcoins
|
||||
|
||||
It is very likely that hard core Bitcoiners will try to resist such censorship. And that’s good. One question is: how is it possible technically? A soft fork is a completely valid chain, with following the consensus rules and a majority soft fork will just be Bitcoin. It is hard to enforce that a miner **includes a transaction**. Consensus rules are good for excluding transactions. Even if there is a hard fork or a checkpoint that all nodes agree on and that includes a tainted transaction, right from the next block a soft fork can continue and censor transactions, including the outputs of the mined tainted transaction. So you **can not easily “fork yourself off” to a censorship resistant fork**. Censorship decisions are made in each new block. You have to win this fight one block at a time, forever, until the end of timechain.
|
||||
|
||||
All this can result in two types of Bitcoin – KYCed and clean vs “black market” Bitcoin. Whether they will live on one blockchain or Bitcoin will be divided into two forked chains depends mainly on the miners and exchanges and their willingness to succumb to the regulatory pressure of the regulators and violent coercion if they fail to comply.
|
||||
|
||||
A paradoxical solution might be to change the hashing algorithm, which would significantly reduce network security (Bitcoin’s Proof of Work currently makes Bitcoin the safest blockchain on the planet). In this way, two Bitcoins would also probably be created – less safe, less regulated and mined in the kitchens all over the world and on the other hand safer but heavily regulated. Where it goes from there, no one knows. Is this enough to avoid censorship? Probably not. Introducing better privacy might help, but then why not just use Monero?
|
||||
|
||||
Thus, the majority hashrate (i.e. miners who control more than half of the power of the network) decide on censorship. These are companies that have their managers, buildings and state licenses. If decentralized mining pools do not have an absolute majority, it will pay off financially to mine on a regulated pool, as we said above.
|
||||
|
||||
The idea that a large miner will “rebel” and move to p2pool (or use Stratum2 and create their own blocks, not dictated by a pool) and problem solved is very naive. Mining companies that control significant hashrates need to achieve a return on their investment in the first place. They are very conservative, don’t want to risk losing rewards by mining blocks that are later orphaned. So the main incentive is not “the government will kick our door if we mine this transaction”. The incentive is simple “let’s kick out all the hashrate that does not comply, more block rewards for us and make sure that no one will kick out our hashrate”.
|
||||
|
||||
Bitcoiners like to signal the virtue of running their own node and how this makes sure that all rules are followed and helping to decentralize the network. While this is nice and I applaud everyone who runs their own node, decentralization from the point of view of censorship is mainly about miners, and running one’s own node will not help in any way.
|
||||
|
||||
(Of course, we can create new rules – blocks that do not involve censored transactions with a sufficient fee to reject as blocks of censors. This has several problems though – how do you know that everyone sees this transaction? If there is already a consensus about transactions, you would not need miners. So this is nicely said, but very difficult to actually achieve. It would probably also lead to two Bitcoins – guess which version would Coinbase, Kraken, Binance, Bitstamp,... and for that matter Microstrategy run?
|
||||
|
||||
# Conclusion
|
||||
|
||||
The idea of the unstoppability and uncontrollability of Bitcoin is, in my opinion, an outdated concept. In the past, we could not imagine what censors and regulators could do. We thought that a rule like the crypto travel rule from FATF that is already in force was pure sci-fi – how could states agree to regulate all exchanges in the world the same way? They cannot even agree on the type of power outlet! Yet, it happened. FATF rules are enforced globally through network effects. These rules apply in Europe, the US and China as well. Without any need for elected officials to pass it through democratic rituals. The way that power and enforcement works in the last few years has changed dramatically. While Bitcoiners still believe it is not possible, we are being regulated more and more – and using power structures that have nothing to do with the ideals of democracy. One office in OECD office in Paris is writing worldwide AML regulations. Another office in the same building created the reporting standards that invade our privacy (the Common Recording Standard – CRS). Payment networks create and enforce their own regulations – even outside their users!
|
||||
|
||||
What can we do to make sure this dystopia does not happen? Build a parallel society that does not rely on regulated services (shops, courts, exchanges, ...). Treat anonymity and privacy as a feature. A core feature. [Reject](https://kycnot.me/) any KYC-requiring service in principle and become an ethical crypto dealer. Buy and sell crypto. Support any services that do not ask for our identity. Promote, build and use decentralized exchanges, ATMs, and local in-person crypto exchange communities. And build a crypto economy that blatantly rejects these ideas, but not only on social media, but in reality.
|
||||
|
||||
If the split of Bitcoin into regulated and unregulated really occurs, the unregulated one should have the greatest network effect, the greatest economic power. It should be the Bitcoin, in which we settle small debts with friends and family. The Bitcoin with which we buy vegetables that someone else grew in their garden. And we should also support cryptocurrencies like Monero, which are not traceable and their censorship is much more difficult to achieve. It is not that hard to admit, that privacy is a good thing to have, even if you are a “Bitcoin is hard money maximalist”. They play along nicely.
|
||||
|
||||
If this Bitcoin’s global censorship really takes place under the leadership of states or other AML organizations, we should have the strength to say “we don’t want this centralized coin, it’s the same shit as your central bank issued digital fiat money.” And “no, thank you.”
|
||||
|
||||
And the time to start building this situation and this network effect is now.
|
||||
|
||||
# Learn more
|
||||
|
||||
A [Twitter thread](https://threadreaderapp.com/thread/1327206062437621760.html) about how this attack unfolds with 10% hashrate enforcing censorship and what is the cost-benefit analysis for individual miners.
|
||||
|
||||
I made a [course](https://hackyourself.io/product/174/) about how to settle small debts among friends and family and use Lightning network to pay through non-KYC exchanges. If you have never tried Lightning network and don’t know where to start, this might be a good start. Open channels when fees are low, you can thank me later.
|
||||
|
||||
I also produce a podcast dedicated to increasing our options, thus increasing our freedom. It’s called [Option Plus Podcast](https://optionplus.io/). There are episodes about opting out, strategies for being more free here and now. If you want to learn more about strategy of parallel societies, I recommend a [Cypherpunk Bitstream episode](https://taz0.org/bitstream/0x0b-the-roots-of-parallel-polis/), where Smuggler and Frank invited me and Martin to talk about Parallel Polis – a strategy to achieve more liberty in a communist dictatorship of former communist Czechoslovakia. Yes, we can use this strategy today.
|
||||
|
||||
If you want to learn more about financial surveillance and how it applies to crypto – and especially how it is made and enforced outside of parliaments and governments, check out [my talk from HCPP on Financial Surveillance and Crypto Utopias](https://juraj.bednar.io/en/talk-en/2019/10/16/financial-surveillance-and-crypto-utopias-recording-from-hcpp19/).
|
||||
|
||||
You can also [follow me on Twitter \@jurbed](https://twitter.com/jurbed).
|
106
docs/block_chain_scaling.md
Normal file
@ -0,0 +1,106 @@
|
||||
---
|
||||
title: Blockchain Scaling
|
||||
---
|
||||
A blockchain is an immutable append only ledger, which ensures that
|
||||
everyone sees the same unchanging account of the past. A principal
|
||||
purpose of blockchain technology is to track ownership of assets on a
|
||||
ledger that is distributed among a large number of computers in such a
|
||||
way that it is not vulnerable to alteration, destruction, or loss at any single location.
|
||||
|
||||
The peers that determine the consensus on each new block only need to
|
||||
have the unspent transaction outputs in fast storage, whereupon they
|
||||
generate a new consensus of unspent transaction outputs. They could
|
||||
throw away all the transactions as soon as there is consensus on the
|
||||
current state that is the result of applying those transactions to the previous
|
||||
state. Or some of them could throw away all the transactions, while others
|
||||
transfer them to distributed and slow storage.
|
||||
|
||||
But this creates the opportunity to inject a fake history with no past
|
||||
through a fifty one attack.
|
||||
|
||||
At scale you have a lot of transactions, a lot of clients, and considerably
|
||||
fewer peers, so you worry about peers conspiring to quietly introduce new
|
||||
unspent transactions with no past through the fifty one percent attack.
|
||||
|
||||
Any dilution has to take place through a process that leaves clearly in the
|
||||
blockchain evidence of dilution that everyone can see. One way to make
|
||||
sure of this is that when any peer asserts that a transaction set leads to a
|
||||
mutable state, and another peer does not agree, peers that persistently
|
||||
agree will never reach consensus with peers that agree, and we get an
|
||||
automatic fork.
|
||||
|
||||
When there are many transactions, the computers constructing the final
|
||||
consensus hash of the final block, which testifies to the entire immutable
|
||||
consensus past, are necessarily rather few, rather large, and owned by a
|
||||
rather small number of rather wealthy people.
|
||||
|
||||
To keep them honest, need a widely distributed history of at least the past
|
||||
few weeks.
|
||||
|
||||
We need a large number of people making sure, and able to make sure, that the
|
||||
history is consistent from day to day, not just from block to block.
|
||||
|
||||
Everyone should keep the transactions to which he is a party, and the subset
|
||||
of the Merkle–patricia tree linking them to the past consensus on unspent
|
||||
transactions, and to the current consensus of the entire history of the
|
||||
blockchain, but this is not enough to prevent a 51% attack from injecting new
|
||||
history with no past.
|
||||
|
||||
The full list of unspent transaction outputs needs to be kept locally in very
|
||||
fast storage sorted and accessed by a primary key that keeps the transaction
|
||||
approximately in temporal order, so that older, and less frequently needed,
|
||||
transaction outputs are stored together, and newer and more likely to be
|
||||
needed transaction outputs are stored together.
|
||||
|
||||
As this ledger can potentially grow quite large, it needs to be subdivided
|
||||
into general ledger and subledgers. When the general ledger is
|
||||
maintained on a blockchain, the chain without the subledgers directly on it in
|
||||
full is known as the mainchain. Where a subledger, like the mainchain, is
|
||||
maintained by multiple entities, the subledger is called a “sidechain” The
|
||||
mainchain contains aggregated and summarized data about the sidechains,
|
||||
and the sidechains can themselves have sidechains.
|
||||
|
||||
Ultimately we want a mainchain that functions like a central bank, with
|
||||
several hundred peers that function like banks, in that many peers on the
|
||||
mainchain maintain a sidechain. Each peer hosts hundreds of thousands of
|
||||
client wallets.
|
||||
|
||||
When the mainchain runs into scaling limits, transactions between
|
||||
individuals will be pushed down into sidechains. A transaction between
|
||||
sidechains will typically have a very large number of inputs from a very
|
||||
large number of sidechains, and a very large number of outputs to a very
|
||||
large number of sidechains. In such a transaction each sidechain usually
|
||||
provides only one or two inputs, usually one input, and receives only on or
|
||||
two outputs, usually one output, that one input and one output representing
|
||||
the aggregate of many transactions with many other sidechains, and each
|
||||
transaction between two sidechains representing the aggregate of many
|
||||
transactions between client wallets.
|
||||
|
||||
But for an input and an output to or from a sidechain to be reasonably
|
||||
short, rather than proportional to the number of peers on the sidechain, we
|
||||
are going to have to have linear chain of signatures,
|
||||
|
||||
You make a payment with a client wallet that works like a bill of exchange.
|
||||
Your host is a peer on the mainchain. It sends your bill of
|
||||
exchange to the other guys host. What appears on the mainchain is the
|
||||
root of a Merkle tree of all the bills of exchange, and the settlements
|
||||
between peers, each such payment being the sum of many bills of exchange, each
|
||||
such payment representing the Merkle tree of many bills of exchange.
|
||||
|
||||
The mainchain records that each sidechain has such and such an amount of money, and owes such and such an amount of money to its client wallets, but only knows totals over all the client wallets of a sidechain. Does not know individual client wallets.
|
||||
|
||||
The individual client wallet has a chain of hashes leading to the root hash of the mainchain consensus that proves it has the money. But the lower levels in this chain of hashes do not appear on the mainchain.
|
||||
|
||||
When one client wallet makes a payment to another client wallet, that
|
||||
payment is final when it is a leaf on the Merkle tree of the consensus hash,
|
||||
but only the upper nodes of the tree, the aggregate payments between
|
||||
mainchain peers, appear on the blockchain.
|
||||
|
||||
The lower nodes of the tree are held in the sidechains, and the very lowest nodes of the tree are held in the client wallets.
|
||||
|
||||
When a transaction between sidechains occurs on the mainchain, the root
|
||||
of the sidechain Merkle tree is placed or referenced on the mainchain.
|
||||
But this does not in itself prove that the sidechain transactions that it
|
||||
summarizes are valid or authorized.
|
||||
|
||||
If any one sidechain peer in good standing on the sidechain objects to the proposed hash of the sidechain state which is to be incorporated on the mainchain, it can demand that transactions necessary to derive the new hash from values attested to by a recent older hash be lifted from the sidechain to the mainchain, putting up some money for what is in effect a bet that the proposed mainchain transaction cannot be justified by the underlying sidechain transactions. If the proposed hash of the sidechain state is supported by valid transactions, and the mainchain peers validate the proposed hash, then the peer that insisted on the sidechain data being raised to the mainchain loses its good standing, and has to pay a fee reflecting the cost of pushing all those transactions onto the mainchain. If not, those sidechain peers who signed the proposed, but unsupported, hash lose their good standing and have to pay the costs of pushing all those transactions onto the mainchain. It should be rare that portions of a sidechain are raised into the mainchain. Trust, to save bandwidth, storage space, and time, but verify.
|
69
docs/blockchain_structure_on_disk.md
Normal file
@ -0,0 +1,69 @@
|
||||
---
|
||||
title: Block chain structure on disk.
|
||||
---
|
||||
|
||||
The question is: One enormous SQLite file, or actually store the chain as a collection of files?
|
||||
|
||||
In the minimum viable product, the blockchain will be quite small, and it will be workable to put it one big SQLite file.
|
||||
The trouble with one enormous SQLite file is that when it gets big enough, we face a high and steadily increasing risk of one sector on the enormous disk going bad, corrupting the entire database. SQLite does not handle the loss of a single sector gracefully.
|
||||
|
||||
We will eventually need our own database structure designed around
|
||||
Merkle-patricia trees, append only data structures, and accommodating a near
|
||||
certainty of sectors and entire disks continually going bad. When one hundred
|
||||
disks have to be added every year, entire disks will be failing every day or
|
||||
so, and sectors will be failing every second.
|
||||
|
||||
Eventually, a typical peer will have several big racks of disks. When we
|
||||
replace the world monetary system, twenty servers each with twenty disks, two
|
||||
hundred thousand transaction inputs and outputs a second, (for each
|
||||
transaction minimally involves one input and two outputs, a change output and
|
||||
a payment output, and usually a lot more. Each signature is sixty four bytes.
|
||||
Each input and output is at least forty bytes. So, say, on average two inputs
|
||||
and two outputs per payment – say, perhaps 288 bytes per payment, and we will
|
||||
want to do one hundred thousand payments per second. So, about nine hundred
|
||||
terabytes a year. With 2020 disk technology, that is about seventy five twelve
|
||||
terabyte hard drives per year, costing about one hundred and fifty hard drives
|
||||
per year costing fifty five thousand dollars per year, to store all the
|
||||
transactions of the world forever.
|
||||
|
||||
If we are constructing one block per five minutes, each block is about ten
|
||||
gigabytes. Sqlite3 cannot possibly handle that – the blocks are going to have
|
||||
to be dispersed over many drives and many physical computers. We are going to
|
||||
have to go to our own custom low level format, in which a block is distributed
|
||||
over many drives and many servers, the upper part of the block Merkle-patricia
|
||||
tree duplicated on every shard, but the the lower branches of the tree each in
|
||||
a separate shard. Instead of a file structure with many files on one enormous
|
||||
disk, we have one enormous data structure on servers, each server with many
|
||||
disks.
|
||||
|
||||
Optimal solution is to store recently accessed data in one big SQLite file,
|
||||
while also storing the data in a large collection of blocks, once it has become
|
||||
subject to wide consensus. Older blocks, fully incorporated in the current
|
||||
consensus, get written to disk in our own custom Merkle-patricia tree format,
|
||||
with append only Merkle-patricia tree node locations, [a sequential append only
|
||||
collection of binary trees in postfix tree format](
|
||||
merkle_patricia-dac.html#a-sequential-append-only-collection-of-postfix-binary-trees).
|
||||
|
||||
Each file, incorporating a
|
||||
range of blocks, has its location on disk, time, size, and the roots of its
|
||||
Merkle-patricia trees recorded in the SQL database. On program launch, the
|
||||
size, touch time, and root has of newest block in the file are checked. If
|
||||
there is a discrepancy, we do a full check of the Merkle-patricia tree, editing
|
||||
it as necessary to an incomplete Merkle-patricia tree, download missing data
|
||||
from peers, and rebuild the blocks, thus winding up with a newer touch dates.
|
||||
Our per peer configuration file tells us where to find the block files, and if
|
||||
they are not stored where expected, we rebuild. If stored where expected, but
|
||||
touch dates unavailable or incorrect (perhaps because this is the first time the
|
||||
program launched) then the entire system of Merkle-patricia trees is validated,
|
||||
making sure the data on disk is consistent.
|
||||
|
||||
How do we tell the one true blockchain, from some other evil blockchain?
|
||||
Well, the running definition is consensus, that you can interact with other
|
||||
peers because they agree on the running root hash. So you downloaded this
|
||||
software from somewhere, and when you downloaded it, you got the means to
|
||||
contact a bunch of peers, whom we suppose agree, and each have evidence that
|
||||
other peers agree. And, having downloaded what they agree on, you then treat
|
||||
it as gospel and as more authoritative that what others say, so long a touch
|
||||
dates, file sizes, locations, and the hash of the most recent block in the file
|
||||
are consistent, and the internal contents of each file are consistent with root
|
||||
of the most recent tree.
|
476
docs/blockdag_consensus.md
Normal file
@ -0,0 +1,476 @@
|
||||
---
|
||||
title: Blockdag Consensus
|
||||
---
|
||||
|
||||
# Hedera, Bitcoin Proof of Work, and Paxos
|
||||
|
||||
## Paxos
|
||||
|
||||
All consensus algorithms that work are equivalent to Paxos.
|
||||
|
||||
All consensus algorithms that continue to work despite Byzantine Fault
|
||||
and Brigading are equivalent to Byzantine Fault Tolerant Paxos.
|
||||
|
||||
But Paxos is not in fact an algorithm. It rather is an idea that underlies
|
||||
actual useful algorithms, and in so far as it is described as algorithm, it is
|
||||
wrong, for the algorithm as described describes many different things that
|
||||
you are unlikely to be interested in doing, or even comprehending, and the
|
||||
algorithm as described is incapable of doing all sorts of things that you are
|
||||
likely to need done. Even worse it is totally specific to one particular
|
||||
common use case, which it studiously avoids mentioning, and does not
|
||||
mention any of the things that you actually need to couple it in to this
|
||||
specific case, making the description utterly mysterious, because the
|
||||
writer has all the specific details of this common case in mind, but is carefully avoiding any mention of what he has in mind. These things are
|
||||
out of scope of the algorithm as given in the interests of maximum
|
||||
generality, but the algorithm as given is not in fact very general and makes
|
||||
no sense and is no use without them.
|
||||
|
||||
Despite the studious effort to be as generic as possible by omitting all of
|
||||
the details required to make it actually do anything useful, the algorithm as
|
||||
given is the simplest and most minimal example of the concept,
|
||||
implementing one specific form of Paxos in one specific way, and as
|
||||
given, will very likely not accomplish you need to do.
|
||||
|
||||
Paxos assumes that each peer knows exactly how many peers there should
|
||||
be, though some of them may be permanently or temporarily unresponsive
|
||||
or permanently or temporarily out of contact.
|
||||
|
||||
In Paxos, every peer repeatedly sends messages to every other peer, and
|
||||
every peer keeps track of those messages, which if you have a lot of peers
|
||||
adds up to a lot of overhead.
|
||||
|
||||
Hedera assumes that each peer knows exactly how many peers there
|
||||
should be, *and that each peer eventually gets through*.
|
||||
|
||||
Which is a much stronger assumption than that made by Paxos or Bitcoin.
|
||||
|
||||
In Hedera, each peer's state eventually becomes known to every other
|
||||
peer, even though it does not necessarily communicate directly with every
|
||||
other peer, which if you have a whole lot of peers still adds up to a whole
|
||||
lot of overhead, though not as much as Paxos. It can handle more peers
|
||||
than Paxos, but if too many peers, still going to bite.
|
||||
|
||||
A blockdag algorithm such as Hedera functions by in effect forking all the
|
||||
time, and resolving those forks very fast, but if you have almost as many
|
||||
forks as you have peers, resolving all those forks is still going to require
|
||||
receiving a great deal of data, processing a great deal of data, and sending
|
||||
a great deal of data.
|
||||
|
||||
Hedera and Paxos can handle a whole lot of transactions very fast, but
|
||||
they cannot reach consensus among a very large number of peers in a
|
||||
reasonable time.
|
||||
|
||||
Bitcoin does not know or care how many peers there are, though it does
|
||||
know and care roughly how much hashing power there is, but this is
|
||||
roughly guesstimated over time, over a long time, over a very long time,
|
||||
over a very very long time. It does not need to know exactly how much
|
||||
hashing power there is at any one time.
|
||||
|
||||
If there are a very large number of peers, this only slows Bitcoin
|
||||
consensus time down logarithmically, not linearly, while the amount of
|
||||
data per round that any one peer has to handle under Hedera is roughly
|
||||
$\bigcirc\big(N\log(N)\big)$ where N is the number of peers. Bitcoin can handle an
|
||||
astronomically large number of peers, unlike Hedera and Paxos, because
|
||||
Bitcoin does not attempt to produce a definitive, known and well defined
|
||||
consensus. It just provides a plausible guess of the current consensus, and
|
||||
over time you get exponentially greater certainty about the long past
|
||||
consensuses. No peer ever knows the current consensus for sure, it just
|
||||
operates on the recent best guess of its immediate neighbours in the
|
||||
network of what the recent consensus likely is. If it is wrong, it eventually
|
||||
finds out.
|
||||
|
||||
## Equivalence of Proof of Work and Paxos
|
||||
|
||||
Bitcoin is of course equivalent to Byzantine Fault Tolerant Paxos, but I
|
||||
compare it to Paxos because Paxos is difficult to understand, and Byzantine
|
||||
Fault Tolerant Paxos is nigh incomprehensible.
|
||||
|
||||
In Paxos, before a peer suggests a value to its peers, it must obtain
|
||||
permission from a majority of peers for that suggestion. And when it seeks
|
||||
permission from each peer, it learns if a value has already been accepted
|
||||
by that peer. If so, it has to accept that value, only propose that value in
|
||||
future, and never propose a different value. Which if everyone always gets
|
||||
through, means that the first time someone proposes a value, that value,
|
||||
being the first his peers have seen, will be accepted by someone, if only by
|
||||
that peer himself.
|
||||
|
||||
Paxos is effect a method for figuring out who was "first", in an
|
||||
environment where, due to network delays and lost packets, it is difficult
|
||||
to figure out, or even define, who was first. But if most packets mostly get
|
||||
through quickly enough, the peer that was first by clock time will usually
|
||||
get his way. Similarly Bitcoin, the first miner to construct a valid block at
|
||||
block height $N$ usually winds up defining the consensus for the block at
|
||||
block height $N$.
|
||||
|
||||
This permission functionality of Paxos is equivalent to the gossip process
|
||||
in Bitcoin, where a peer learns what the current block height is, and seeks
|
||||
to add another block, rather than attempt to replace an existing block.
|
||||
|
||||
In Paxos, once one peer accepts one value, it will eventually become the
|
||||
consensus value, assuming that everyone eventually gets through and that
|
||||
the usual network problems do not foul things up. Thus Paxos can provide
|
||||
a definitive result eventually, while Bitcoin's results are never definitive,
|
||||
merely exponentially probable.
|
||||
|
||||
In Paxos, a peer learns of the definitive and final consensus when it
|
||||
discovers that a majority of peers have accepted one value. Which if
|
||||
several values are in play can take a while, but eventually it is going to
|
||||
happen. In Bitcoin, when the blockchain forks, eventually more hashing
|
||||
power piles on one branch of the fork than the other, and eventually
|
||||
everyone can see that more hashing power has piled on one fork than the
|
||||
other, but there is no moment when a peer discovers than one branch is
|
||||
definitive and final. It just finds that one branch is becoming more and
|
||||
more likely, and all the other branches less and less likely.
|
||||
|
||||
Thus paxos has a stronger liveness property than bitcoin, but this
|
||||
difference is in practice not important, for paxos may take an indefinitely
|
||||
long time before it can report a definite and final consensus, while Bitcoin
|
||||
takes a fairly definite time to report it is nearly certain about the consensus
|
||||
value and that value is unlikely is unlikely to change.
|
||||
|
||||
# Bitcoin does not scale to competing with fiat currency
|
||||
|
||||
Bitcoin is limited to ten transactions per second. Credit card networks
|
||||
handle about ten thousand transactions per second.
|
||||
|
||||
We will need a crypto coin that enables seven billion people to buy a lollipop.
|
||||
|
||||
Blockdag consensus can achieve sufficient speed.
|
||||
|
||||
There are thirty or more proposed blockdag systems, and the number grows rapidly.
|
||||
|
||||
While blockdags can handle very large numbers of transactions, it is not
|
||||
obvious to me that any of the existing blockdag algorithms can handle
|
||||
very large numbers of peers. When actually implemented, they always
|
||||
wind up privileging a small number of special peers, resulting in hidden
|
||||
centralization, as somehow these special and privileged peers all seem to
|
||||
be in the same data centre as the organization operating the blockchain.
|
||||
|
||||
Cardano has a very clever, too clever by half, algorithm to generate
|
||||
random numbers known to everyone and unpredictable and uncontrollable
|
||||
by anyone, with which to distribute specialness fairly and uniformly over
|
||||
time, but this algorithm runs in one centre, rather than using speed of light
|
||||
delay based fair randomness algorithms, which makes me wonder if it is
|
||||
distributing specialness fairly, or operating at all.
|
||||
|
||||
I have become inclined to believe that there is no way around making
|
||||
some peers special, but we need to distribute the specialness fairly and
|
||||
uniformly, so that every peer get his turn being special at a certain block
|
||||
height, with the proportion of block heights at which he is special being
|
||||
proportional to his stake.
|
||||
|
||||
If the number of peers that have a special role in forming the next block is
|
||||
very small, and the selection and organization of those peers is not
|
||||
furtively centralized to make sure that only one such group forms, but
|
||||
rather organized directly those special peers themselves we wind up with
|
||||
forks sometimes, I hope infrequently, because the special peers should
|
||||
most of the time successfully self organize into a single group that
|
||||
contains almost all of the most special peers. If however, we have another,
|
||||
somewhat larger group of peers that have a special role in deciding which
|
||||
branch of the fork is the most popular, two phase blockdag, I think we can
|
||||
preserve blockdag speed without blockdag de-facto concentration of power.
|
||||
|
||||
The algorithm will only have bitcoin liveness, rather than paxos liveness,
|
||||
which is the liveness most blockdag algorithms seek to achieve.
|
||||
|
||||
I will have to test this empirically, because it is hard to predict, or even to
|
||||
comprehend, limits on consensus bandwidth.
|
||||
|
||||
## Bitcoin is limited by its consensus bandwidth
|
||||
|
||||
Not by its network bandwidth.
|
||||
|
||||
Bitcoin makes the miners wade through molasses. Very thick molasses.
|
||||
That is what proof of work is. If there is a fork, it discovers consensus by
|
||||
noticing which fork has made the most progress through the molasses.
|
||||
|
||||
This takes a while. And if there are more forks, it takes longer. To slow
|
||||
down the rate of forks, it makes the molasses thicker. If the molasses is
|
||||
thicker, this slows down fork formation more than it slows down the
|
||||
resolution of forks. It needs to keep the rate of new blocks down slow
|
||||
enough that a miner usually discovers the most recent block before it
|
||||
attempts to add a new block. And if a miner does add a new block at
|
||||
roughly the same time as another miner adds a new block, quite a few
|
||||
more blocks have to be added before the fork is resolved. And as the
|
||||
blocks get bigger, it takes longer for them to circulate. So bigger blocks
|
||||
need thicker molasses. If forks form faster than they can be resolved, no
|
||||
consensus.
|
||||
|
||||
## The network bandwidth limit
|
||||
|
||||
The net bandwidth limit on adding transactions is not a problem.
|
||||
|
||||
What bites every blockchain is consensus bandwidth limit, how fast all the
|
||||
peers can agree on the total order of transactions, when transactions are
|
||||
coming in fast.
|
||||
|
||||
Suppose a typical transaction consists to two input coins, a change output
|
||||
coin, and the actual payment. (I use the term coin to refer to transaction
|
||||
inputs and outputs, although they don’t come in any fixed denominations
|
||||
except as part of anti tracking measures)
|
||||
|
||||
Each output coin consists of payment amount, suppose around sixty four bits,
|
||||
and a public key, two hundred and fifty six bits. It also has a script
|
||||
reference on any special conditions as to what constitutes a valid spend,
|
||||
which might have a lot of long arguments, but it generally will not, so the
|
||||
script reference will normally be one byte.
|
||||
|
||||
The input coins can be a hash reference to a coin in the consensus
|
||||
blockchain, two fifty six bits, or they can be a reference by total order
|
||||
within the blockchain, sixty four bits.
|
||||
|
||||
We can use a Schnorr group signature, which is five hundred and twelve
|
||||
bits no matter how many coins are being signed, no matter how many
|
||||
people are signing, and no matter if it is an n of m signature.
|
||||
|
||||
So a typical transaction, assuming we have a good compact representation
|
||||
of transactions, should be around 1680 bits, maybe less.
|
||||
|
||||
At scale you inevitably have a large number of clients and a small number
|
||||
of full peers. Say several hundred peers, a few billion clients, most of them
|
||||
lightning gateways. So we can assume every peer has a good connection.
|
||||
|
||||
A typical, moderately good, home connection is thirty Mbps download but
|
||||
its upload connection is only ten Mbs or so.
|
||||
|
||||
So if our peers are typical decent home connections, and they will be a lot
|
||||
better than that, bandwidth limits them to adding transactions at 10Mbps,
|
||||
six thousand transactions per second, Visa card magnitude. Though if such
|
||||
a large number of transactions are coming in so fast, blockchain storage
|
||||
requirements will be very large, around 24 TiB, about three or four
|
||||
standard home desktop system disk drives. But by the time we get to that
|
||||
scale all peers will be expensive dedicated systems, rather than a
|
||||
background process using its owners spare storage and spare bandwidth,
|
||||
running on the same desktop that its owner uses to
|
||||
shop at Amazon.
|
||||
|
||||
Which if everyone in the world is buying their lollipops on the blockchain
|
||||
will still need most people using the lightning network layer, rather than
|
||||
the blockchain layer, but everyone will still routinely access the blockchain
|
||||
layer directly, thus ensuring that problems with their lightning
|
||||
gateways are resolved by a peer they can choose, rather than resolved by
|
||||
their lightning network wallet provider, thus ensuring that we can have a
|
||||
truly decentralized lightning network.
|
||||
|
||||
We will not necessarily *get* a truly decentralized lightning layer, but a base
|
||||
layer capable of handling a lot of transactions makes it physically possible.
|
||||
|
||||
So if bandwidth is not a problem, why is bitcoin so slow?
|
||||
|
||||
The bottleneck in bitcoin is that to avoid too many forks, which waste time
|
||||
with fork resolution, you need a fair bit of consensus on the previous block
|
||||
before you form the next block.
|
||||
|
||||
And bitcoin consensus is slow, because the way a fork is resolved is that
|
||||
blocks that received one branch fork first continue to work on that branch,
|
||||
while blocks that received the other branch first continue to work on that
|
||||
branch, until one branch gets ahead of the other branch, whereupon the
|
||||
leading branch spreads rapidly through the peers. With proof of stake, that
|
||||
is not going work, one can lengthen a branch as fast as you please. Instead,
|
||||
each branch has to be accompanied by evidence of the weight of stake of
|
||||
peers on that branch. Which means the winning branch can start spreading
|
||||
immediately.
|
||||
|
||||
# Blockdag to the rescue
|
||||
|
||||
On a blockdag, you don’t need a fair bit of consensus on the previous
|
||||
block to avoid too many forks forming. Every peer is continually forming
|
||||
his own fork, and these forks reach consensus about their left great grand
|
||||
child, or left great great … great grandchild. The blocks that eventually
|
||||
become the consensus as leftmost blocks form a blockchain. So we can
|
||||
roll right ahead, and groups of blocks that deviate from the consensus,
|
||||
which is all of them but one, eventually get included, but later in the total
|
||||
order than they initially thought they were.
|
||||
|
||||
In a blockdag, each block has several children, instead of just one. Total
|
||||
order starting from any one block is depth first search. The left blocks
|
||||
come before the right blocks, and the child blocks come before the parent
|
||||
block. Each block may be referenced by several different parent blocks, but
|
||||
only the first reference in the total order matters.
|
||||
|
||||
Each leftmost block defines the total order of all previous blocks, the
|
||||
total order being the dag in depth first order.
|
||||
|
||||
Each peer disagrees with all the other peers about the total order of recent
|
||||
blocks and recent transactions, each is its own fork, but they all agree
|
||||
about the total order of older blocks and older transactions.
|
||||
|
||||
## previous work
|
||||
|
||||
[There are umpteen proposals for blockdags](./SoK_Diving_into_DAG-based_Blockchain_Systems) most of them garbage, but the general principle is sound.
|
||||
|
||||
For a bunch of algorithms that plausibly claim to approach the upload
|
||||
limit, see:
|
||||
|
||||
* [Scalable and probabilistic leaderless bft consensus through metastability](https://files.avalabs.org/papers/consensus.pdf)
|
||||
|
||||
This explains the underlying concept, that a peer looks at the dag,
|
||||
make its best guess as to which way consensus is going, and joins
|
||||
the seeming consensus, which make it more likely to become the
|
||||
actual consensus.
|
||||
|
||||
Which is a good way of making arbitrary choices where it does not
|
||||
matter which choice everyone makes, provided that they all make
|
||||
the same choice, even though it is an utterly disastrous way of
|
||||
making choices where the choice matters.
|
||||
|
||||
This uses an algorithm that rewards fast mixing peers by making
|
||||
their blocks appear earlier in the total order. This algorithm does
|
||||
not look incentive compatible to me. It looks to me that if all the
|
||||
peers are using that algorithm, then any one peer has an incentive
|
||||
to use a slightly different algorithm.
|
||||
|
||||
The authors use the term Byzantine fault incorrectly, referring to
|
||||
behavior that suggests the unpredictable failures of an unreliable
|
||||
data network as Byzantine failure. No, a Byzantine fault suggests
|
||||
Byzantine defection, treachery, and failure to follow process. It is
|
||||
named after Byzantium because of the stuff that happened during
|
||||
the decline of the Byzantine empire.
|
||||
|
||||
* [Prism: Deconstructing the blockchain to approach physical limits](https://arxiv.org/pdf/1810.08092.pdf)
|
||||
|
||||
A messy, unclear, and overly complicated proposed implementation
|
||||
of the blockdag algorithm, which, however, makes the important
|
||||
point that it can go mighty fast, that the physical limits on
|
||||
consensus are bandwidth, storage, and communication delay, and
|
||||
that we can approach these limits.
|
||||
|
||||
* [Blockmania: from block dags to consensus](https://arxiv.org/pdf/1809.01620.pdf)
|
||||
|
||||
This brings the important concept, that the tree structure created by
|
||||
gossiping the blockdag around _is_ the blockdag, and also is the data
|
||||
you need to create consensus, bringing together things that were
|
||||
separate in Prism, radically simplifying what is complicated in
|
||||
Prism by uniting data and functionality that Prism divided.
|
||||
|
||||
This study shows that the Blockmania implementation of the
|
||||
blockdag is equivalent to the Practical Byzantine Fault Tolerant
|
||||
consensus algorithm, only a great deal faster, more efficient, and
|
||||
considerably easier to understand.
|
||||
|
||||
The Practical Byzantine Fault Tolerant consensus algorithm is an
|
||||
implementation of the Paxos protocol in the presence of Byzantine
|
||||
faults, and the Paxos protocol is already hard enough to understand.
|
||||
|
||||
So anyone who wants to implement consensus in a system where
|
||||
Byzantine failure and Byzantine defection is possible should forget
|
||||
about Paxos, and study blockdags.
|
||||
|
||||
* [A highly scalable, decentralized dag–based consensus algorithm](https://eprint.iacr.org/2018/1112.pdf)
|
||||
|
||||
Another blockdag algorithm, but one whose performance has been tested. Can handle high bandwidth, lots of transactions, and achieves fast Byzantine fault resistant total order consensus in time $O(6λ)$, where λ is the upper bound of the network’s gossip period.
|
||||
|
||||
* [Blockchai–free cryptocurrencies: A framework for truly decentralised fast transactions](https://eprint.iacr.org/2016/871.pdf)
|
||||
|
||||
These transactions are indeed truly decentralized, fast, and free from
|
||||
blocks, assuming all participants download the entire set of
|
||||
transactions all the time.
|
||||
|
||||
The problem with this algorithm is that when the blockchain grows enormous, most participants will become clients, and only a few giant peers will keep the whole transaction set, and this system, because it does not provide a total order of all transactions, will then place all the power in the hands of the peers.
|
||||
|
||||
We would like the clients to have control of their private
|
||||
keys, thus must publish their public keys with the money they
|
||||
spend, in which case the giant peers must exchange blocks of
|
||||
information containing those keys, and it is back to having blocks.
|
||||
|
||||
The defect of this proposal is that convergence does not
|
||||
converge to a total order on all past transactions, but merely a total
|
||||
set of all past transactions. Since the graph is a graph of
|
||||
transactions, not blocks, double spends are simply excluded, so a
|
||||
total order is not needed. While you can get by with a total set, a
|
||||
total order enables you to do many things a total set does not let
|
||||
you do. Such as publish two conflicting transactions and resolve them.
|
||||
|
||||
Total order can represent consensus decisions that total set cannot
|
||||
easily represent, perhaps cannot represent at all. We need a
|
||||
blockdag algorithm that gives us consensus on the total order of
|
||||
blocks, not just the set of blocks.
|
||||
|
||||
In a total order, you do not just converge to the same set, you
|
||||
converge to the same order of the set. Having the same total order
|
||||
of the set makes makes it, among other things, a great deal easier
|
||||
and faster to check that you have the same set. Plus your set can
|
||||
contain double spends, which you are going to need if the clients
|
||||
themselves can commit transactions through the peers, if the clients
|
||||
themselves hold the secret keys and do not need to trust the peers.
|
||||
|
||||
# Proposed blockdag implementation
|
||||
|
||||
The specific details of many of these proposed systems are rather silly and
|
||||
often vague, typical academic exercises unconcerned with real world
|
||||
issues, but the general idea that the academics intend to illustrate is sound
|
||||
and should work, certainly can be made to work. They need to be
|
||||
understood as academic illustrations of the idea of the general algorithm
|
||||
for fast and massive blockdag consensus, and not necessarily intended as
|
||||
ready to roll implementations of that idea.
|
||||
|
||||
Here is an even more vague outline of my variant of this idea, I name
|
||||
Yabca “Yet another blockdag consensus algorithm”,
|
||||
|
||||
I propose proof of stake. The stake of a peer is not the stake it owns, but
|
||||
the stake that it has injected into the blockchain on behalf of its clients
|
||||
and that its clients have not spent yet. Each peer pays on behalf of its
|
||||
clients for the amount of space it takes up on the blockchain, though it does
|
||||
not pay in each block. It makes an advance payment that will cover many
|
||||
transactions in many blocks. The money disappears, built in deflation,
|
||||
instead of built in inflation. Each block is a record of what a peer has
|
||||
injected
|
||||
|
||||
The system does not pay the peers for generating a total order of
|
||||
transactions. Clients pay peers for injecting transactions. We want the
|
||||
power to be in the hands of people who own the money, thus governance will
|
||||
have a built in bias towards appreciation and deflation, rather than
|
||||
inflation.
|
||||
|
||||
The special sauce that makes each proposed blockdag different from each
|
||||
of the others is how each peer decides what consensus is forming about
|
||||
the leftmost edge of the dag, the graph analysis that each peer performs.
|
||||
And this, my special sauce, I will explain when I have something running.
|
||||
|
||||
Each peer adopts as its leftmost child for its latest block, a previous block
|
||||
that looks like a good candidate for consensus, which looks like a good
|
||||
candidate for consensus because the left child has a left child that looks
|
||||
like consensus actually is forming around that grandchild , in part because
|
||||
the left child has a … left child has a … left child that looks like it might
|
||||
have consensus, until eventually, as new blocks pile on top of old blocks, we
|
||||
actually do get consensus about the left most child sufficiently deep in
|
||||
the dag from the latest blocks.
|
||||
|
||||
The blockdag can run fast because all the forks that are continually
|
||||
forming eventually get stuffed into the consensus total order somewhere.
|
||||
So we don’t have to impose a speed limit to prevent excessive forking.
|
||||
|
||||
# Cost of storage on the blockchain.
|
||||
|
||||
Tardigrade charges $120 per year for per terabyte of storage, $45 per terabyte of download
|
||||
|
||||
We have a pile of theory, though no practical experience, that a blockdag can approach the physical limits, that its limits are going to be bandwidth and storage..
|
||||
|
||||
Storage on the blockdag is going to cost more, because massively
|
||||
replicated, so say three hundred times as much, and is going to be
|
||||
optimized for tiny fragments of data while Tardigrade is optimized for
|
||||
enormous blocks of data, so say three times as much on top of that, a
|
||||
thousand times as expensive to store should be in the right ballpark.
|
||||
|
||||
When you download, you are downloading from only a single peer on the blockdag, but you are downloading tiny fragments dispersed over a large pile of data, so again, a thousand times as expensive to download sounds like it might be in the right ballpark.
|
||||
|
||||
Then storing a chain of keys and the accompanying roots of total state,
|
||||
with one new key per day for ten years will cost about two dollars over ten
|
||||
years.
|
||||
|
||||
Ten megabytes is a pretty big pile of human readable documentation. Let
|
||||
us suppose you want to store ten megabytes of human readable data and
|
||||
read and write access costs a thousand times what tardigrade costs, will
|
||||
cost about twelve dollars.
|
||||
|
||||
So, we should consider the blockdag as an immutable store of arbitrary
|
||||
typed data, a reliable broadcast channel, where some types are executable,
|
||||
and, when executed, cause a change in mutable total state, typically that
|
||||
a new unspent coin record is added, and an old unspent coin record is
|
||||
deleted.
|
||||
|
||||
In another use, a valid update to a chain of signatures should cause a
|
||||
change in the signature associated with a name, the association being
|
||||
mutable state controlled by immutable data. Thus we can implement
|
||||
corporations on the blockdag by a chain of signatures, each of which
|
||||
represents [an n of m multisig](./PracticalLargeScaleDistributedKeyGeneration.pdf “Practical Large Scale Distributed Key Generation”).
|
BIN
docs/blockseer.jpg
Normal file
After Width: | Height: | Size: 167 KiB |
46117
docs/byzantine_paxos.pdf
Normal file
11
docs/check_html.sh
Normal file
@ -0,0 +1,11 @@
|
||||
#!/bin/bash
|
||||
cd `dirname $0`
|
||||
for f in ../*.html *.html libraries/*.html
|
||||
do
|
||||
echo " " $f
|
||||
tidy -lang en_us --doctype html5 -utf8 -access 4 -e -q -o $TMP/fred.html "$f"
|
||||
done
|
||||
echo "checked all html files for html5 compliance."
|
||||
|
||||
|
||||
|
1149
docs/client_server.md
Normal file
BIN
docs/code.ico
Normal file
After Width: | Height: | Size: 4.2 KiB |
334
docs/contracts_on_blockchain.md
Normal file
@ -0,0 +1,334 @@
|
||||
---
|
||||
title:
|
||||
Contracts on the blockchain
|
||||
---
|
||||
# Terminology
|
||||
|
||||
A rhocoin is an unspent transaction output, and it is the public key
|
||||
that identifies that unspent transaction output, the private key that
|
||||
can sign with that public key, the point which is the mathematical
|
||||
object of which that public key is the text representation, and the
|
||||
scalar which is the mathematical object that the secret can construct.
|
||||
|
||||
A public key and and its associated secret key can do all sorts of
|
||||
things as well as control rocoins – logons, identifying participants in
|
||||
a conversation, and signing a message, among them.
|
||||
|
||||
We talk about points and scalars, meaning points on the elliptic curve
|
||||
and scalars, large numbers modulo the order of the curve, the
|
||||
mathematical objects underlying these keys, when we combine them
|
||||
mathematically in interesting ways, for example adding several points to
|
||||
create a public key that requires several secrets, possessed by several
|
||||
different people, to use. Scalars can be added and multiplied, points
|
||||
can be added or subtracted from other points, and points can be
|
||||
multiplied by scalars. When we design contracts on the blockchain, we
|
||||
should refer to scalars and points, but the rest of the time, we should
|
||||
talk about coins, public keys, and private keys. Normal people should
|
||||
never need to hear of points and scalars, and should not need to know
|
||||
what they are. Only people writing software to manage blockchain based
|
||||
interactions need to talk about or think about points and scalars.
|
||||
|
||||
# Instant irrevocable transctions
|
||||
|
||||
## Why we need Schnorr signatures
|
||||
|
||||
In order that people can meet in person to exchange fiat for blockchain
|
||||
money in person, they need a transfer that is instant and irrevocable.
|
||||
|
||||
Bob wants to buy blockchain money for cash. Ann wants to sell for cash.
|
||||
They agree to meet in person, and do the exchange – paper money in a
|
||||
brown paper bag.
|
||||
|
||||
Ann and Bob cooperate over the network to create a postdated transaction
|
||||
spending an as yet nonexistent coin whose public key is the sum of a
|
||||
public key whose secret key is known only to Bob, and a public key whose
|
||||
secret key is known only to Ann, and *after* that transaction is
|
||||
created, Ann issues a transaction placing value an unspent transaction
|
||||
output in that coin, creates a rhocoin for that public key, knowing that
|
||||
if nothing further is done, the coin eventually comes back to her. Then,
|
||||
before the postdated transaction becomes placeable on the blockchain,
|
||||
they meet in person, and once the cash is in her hands and she has
|
||||
counted it, she then reveals her secret key to Bob, and his wallet can
|
||||
instantly tell him that the coin is in his wallet right and spendable
|
||||
right now – but will become spendable by Ann at block number so and so.
|
||||
Bob can now spend that coin, but Ann still cannot spend it and needs to
|
||||
spend it before the postdated transaction becomes spendable on the
|
||||
blockchain. He presumably spends it to a coin wholly controlled by him,
|
||||
or uses it in some other transaction, and Ann discards the now useless
|
||||
postdated transaction.
|
||||
|
||||
Note that such a joint shnorr signature is absolutely indistinguishable
|
||||
on the blockchain from any other signature, so no one, except for Ann
|
||||
and Bob, can tell there was anything different about this transaction.
|
||||
|
||||
To represent this contract to normies, we call it a coin commitment, a
|
||||
coin locked to someone else’s wallet for a certain period. We refain
|
||||
from talking about points and scalars. Ann creates a coin that she
|
||||
cannot use for anything except paying Bob, until a certain time has
|
||||
passed, and once this coin is registered on the blockchain, Bob knows
|
||||
she has created this coin and what it is worth, (though no one except
|
||||
Ann and Bob knows it is for this purpose) but once the coin is on the
|
||||
blockchain, she can use it to instantly pay Bob, in a message over the
|
||||
network that his wallet will immediately register as payment completed,
|
||||
without needing a third party escrow agent that governments will want to
|
||||
register and regulate, as you have use in order to do instant payments
|
||||
with bitcoin, or offline by Bob manually typing in the secret that makes
|
||||
the coin spendable by Bob, though it can be texted in the clear, since
|
||||
no one but Bob can make use of it. It only needs to be kept secret until
|
||||
the time comes for Bob to receive the payment. But if she does not
|
||||
instantly pay Bob, she can convert it into a regular coin, spendable by
|
||||
her and only by her at any time, once its lockup time has expired.
|
||||
conversely, once she has given Bob the secret, Bob can use it any
|
||||
transaction, such as a transaction spending it to himself, before its
|
||||
time is up. The blockchain contains no information showing this
|
||||
committed coin is any different from any other, the information that
|
||||
makes this coin different being in the secrets in Ann’s and Bob’s
|
||||
wallets, not in the blockchain where governments are likely to look for
|
||||
someone to regulate. The information that makes this coin different is
|
||||
that the secret that controls it is split between Ann’s wallet and
|
||||
Bob’s wallet, and Ann and Bob have already created a secret
|
||||
transaction, stored in Ann’s wallet, spending the coin to a destination
|
||||
determined by Ann, which transaction remains in her wallet until the
|
||||
time is up, or until Bob, having received the secret from Ann, spends
|
||||
the coin. This transaction is made possible, not by any terribly clever
|
||||
cryptographic mathematics, but by the fact that our blockchain, unlike
|
||||
the others, is organized around client wallets chatting privately with
|
||||
other client wallets. Every other blockchain has necessary cryptographic
|
||||
mathematics to do the equivalent, usually more powerfull and general
|
||||
than anything on the rhocoin blockchain, and Monaro has immensely
|
||||
superior cryptographic capabilities, but somehow, they don’t, the
|
||||
difference being that rhocoin is designed to avoid uses of the internet
|
||||
that render a blockchain vulnerable to regulation, rather than to use
|
||||
clever cryptography to avoid regulation. The attack points whereby
|
||||
government is getting hold of crypto currencies are not the
|
||||
cryptography, which is usually bulletproof, but the domain name system,
|
||||
the certificate authorities, and the world wide web, which is completely
|
||||
vulnerable.
|
||||
|
||||
# General purpose scripts
|
||||
|
||||
Ethereum has a general purpose script language, which seems like
|
||||
overkill, and indeed it was overkill, since humans started to misbehave,
|
||||
and other humans started to judge transactions, so that the laws of
|
||||
mathematics failed to dictate the outcomes, and those human judges are
|
||||
predictably misbehaving.
|
||||
|
||||
Bitcoin has a also has a fully general stackbased language
|
||||
pay-to-scripthash (P2SH) also called Bitcoin script. But this led to
|
||||
problems, so they effectively disabled it, by making only a small set of
|
||||
scripts permissible. Seems that we have not yet figured out how to
|
||||
safely enable run time user designed contracts on the blockchain. We
|
||||
instead need a short list of payment rules fixed at compile time. We
|
||||
shall not create a script language capable of embedding arbitrary
|
||||
contracts in the blockchain at runtime, as it would be impossible for one of
|
||||
the parties to figure out the gotchas cooked up by the other party.
|
||||
|
||||
Rather than our model being the infamous click through contract and shrink
|
||||
wrap contract, our model should be the returnable writs of The Lion of
|
||||
Justice, Henry the First. The Anglo Saxon legal system has been going
|
||||
downhill since the death of Henry the second. It is time for a restoration.
|
||||
If we cannot restore, bypass. People wanted to use the legal system of The
|
||||
Lion of Justice, rather than the ecclesiastical legal system, and if we do
|
||||
things right, people will want to use our legal system.
|
||||
|
||||
A returnable writ is a royal command that has blanks that the parties to a
|
||||
dispute or a contract can fill in, but they cannot write their own writ
|
||||
ad hoc.
|
||||
|
||||
The trouble with excessive flexibility is that the parties to a dispute are
|
||||
likely to have asymmetric knowledge of the implications of the contract, which
|
||||
problem can be mitigated by having as few possible contracts as possible, and
|
||||
those contracts authorized by the consensus of the blockchain. We can
|
||||
increase flexibility by having multi transaction transactions, where different
|
||||
elements of the aggregate transaction invoke different writs, but too much
|
||||
flexibility is likely to bite people.
|
||||
|
||||
# Atomic Swaps on separate blockchains
|
||||
|
||||
A proof of stake currency is like a corporation, like shares in a
|
||||
corporation. So we are going to have many corporations, and individuals
|
||||
will want to exchange shares in one corporation, with shares in
|
||||
another. We would like to do this without direct linking of
|
||||
blockchains, without trusted intermediaries, because a trusted
|
||||
intermediary is a target for regulation, where the state imposes
|
||||
mandatory trust, while protecting profoundly untrustworthy behavior.
|
||||
|
||||
Bob is owns some crypto currency in the foo blockchain, and wants to
|
||||
exchange it with Carol’s crypto carrency in the bar blockchain.
|
||||
|
||||
Bob agrees with Carol to give her three units of his foo currency, for
|
||||
five units of her bar currency.
|
||||
|
||||
But, how do we make it so that the transaction is completed? We don’t
|
||||
want Carol giving five units, and Bob replying “Hah hah fooled you”.
|
||||
|
||||
So Carol creates an output of five units that can be spent by a secret
|
||||
key that only she knows after ten blocks, but until then can be spent by
|
||||
Bob’s secret, plus a preimage that only she knows. The output contains
|
||||
Bob’s public key, Carol’s public key, and the public hash of a secret
|
||||
preimage known only to Carol. Bob waits until that output becomes final
|
||||
and spendable in the bar blockchain. Bob then creates an output of three
|
||||
units that can be spent by a secret key that Bob knows after five blocks
|
||||
in the foo chain, but until then, can be spent by a carol’s secret key,
|
||||
plus the preimage of a value known only to Carol. The output also
|
||||
contains Carol’s public key, Bob’s public key, and the pubic hash of a
|
||||
secret preimage known only to Carol, except that the public keys in
|
||||
Bob’s output are in the opposite order to Carol’s, and the times are
|
||||
different. After a while, both outputs become final and spendable in
|
||||
their respective blockchains. Carol then uses her preimage to spend
|
||||
those three units in the foo chain, thereby automatically revealing it
|
||||
to Bob, which preimage Bob immediately employs to spend the output on
|
||||
the bar chain.
|
||||
|
||||
To spend an output, it has to be an input, one of many, to a
|
||||
transaction, and the whole transaction has to be signed by every
|
||||
signature required by every input, as each input defines a valid
|
||||
signature. So an input/output carries information amounting to a
|
||||
quantity of money, possibly a user name, and a signature definition. In
|
||||
the simplest and most common case, a public key defines what would
|
||||
constitute a valid signature.
|
||||
|
||||
The immutability of the output comes from the fact that is part of
|
||||
transaction, and the entire transaction has to be as signed, that the
|
||||
transaction has to be signed by the signatures for all the inputs. For
|
||||
this case, contract conditional on pre-image for a certain range of
|
||||
block numbers, the signature block has to the pre-image as well as the
|
||||
regular signature.
|
||||
|
||||
# Micro and Nanopayments.
|
||||
|
||||
The blockchain is heavy, thus unsuitable for small and fast payments,
|
||||
such as the payments needed for storage and bandwidth while pirating
|
||||
files.
|
||||
|
||||
Small fast payments require trust, thus trusted intermediaries backed up
|
||||
by the blockchain.
|
||||
|
||||
How do we know if we can trust an intermediary?
|
||||
|
||||
Because he has a history of signed contracts, that it is easy to prove
|
||||
whether a contract has been honored, and no one has produced a contract
|
||||
that he signed, alleged that contract was dishonored, and he could not
|
||||
prove it was honored.
|
||||
|
||||
Assume Bob is a trusted intermediary between Ann and Carol. Ann wants to
|
||||
pay people, Carol among them, probabilistically – in lottery tickets
|
||||
that if won will result in payments on the blockchain.
|
||||
|
||||
The protocol is that Ann creates the unspent transaction output, which
|
||||
comes to exist, unspent, on the blockchain, and no one can spend it
|
||||
except Bob says so, since any transaction spending that output will need
|
||||
a Bob signature. So if Bob is an OK guy, no double spending shall ever
|
||||
happen. If no double spending has ever happened, we can probably trust
|
||||
Bob for transactions similar to past transactions. If no past double
|
||||
spends, likely no future double spends.
|
||||
|
||||
Ann promises to give Carol a lottery ticket for services rendered, Carol
|
||||
gives Ann a signed hash of a secret preimage, and renders those
|
||||
services. Ann issues a lottery ticket by creating a random number, and
|
||||
giving Carol a signed hash of Carol’s hash and the lottery identifier.
|
||||
The ticket will be valid if the hash of Ann’s secret preimage, and
|
||||
Carol’s secret preimage has certain properties – typically that its
|
||||
value in big endian order modulo 2^64^ is less than a certain amount.
|
||||
The ticket commits Ann to the lottery conditions, being a hash of their
|
||||
secrets, and the conditions agreed to.
|
||||
|
||||
Carol shows Bob the lottery ticket, and asks
|
||||
|
||||
> “will I get paid if the secrets under the hashes meet the required
|
||||
> condition.”
|
||||
|
||||
Bob tells Carol
|
||||
|
||||
> “Sure. I will issue a payment for block number such of the blockchain,
|
||||
> the current block, if the preimage meets the conditions.”
|
||||
|
||||
thereby assuring Carol that Ann is on the up and up and providing the
|
||||
data needed to create a valid transaction if the lottery ticket is
|
||||
valid. Bob provides a link to the transaction output that will be used
|
||||
showing that Carol has put real money where her mouth is, asserts it is
|
||||
unspent, and promises not to validate any further potentially winning
|
||||
lottery tickets for this block period, unless shown evidence that
|
||||
previously validated lottery tickets were losing tickets.
|
||||
|
||||
Carol, who has some reason to trust Bob, now knows that Ann is issuing
|
||||
valid lottery tickets – even if this was a losing lottery ticket. Bob
|
||||
will refuse to issue more validations for this lottery and this block
|
||||
period. unless Carol provides proof that this was a losing lottery
|
||||
ticket.
|
||||
|
||||
If Carol goes dark at this point, the money is frozen till the next
|
||||
block period, which causes minor and temporary inconvenience for Ann and
|
||||
Bob but does not benefit Carol.
|
||||
|
||||
Suppose it is a winning lottery ticket – she informs Bob and Carol so
|
||||
that they know to issue a new lottery, and now injects it into the
|
||||
blockchain with the required data, and hashes that links to the
|
||||
conversations between herself, Bob, and Alice. If the payment goes
|
||||
through – the transaction inputs are real and have not been previously
|
||||
spent. then done. If the transaction fails for block n of the block
|
||||
chain, when Bob said it would succeed, she uses the proof of failure -
|
||||
that blockchain input was invalid or spent in this block when Bob said
|
||||
it would be valid and unspent for this block, plus the previous
|
||||
conversations between herself, Bob, and Carol, to prove to the world
|
||||
that Bob was cheating.
|
||||
|
||||
If, on the other hand, there are lots of transactions of this form that
|
||||
have successfully gone through, and none that failed in this fashion,
|
||||
these provide compelling evidence that Bob is an honest dealer.
|
||||
|
||||
If Bob goes dark at some point (going dark means ceasing to respond, or
|
||||
issuing responses that will be rejected and ignored as invalid) the
|
||||
money remains unspent and unspendable, Ann and Carol are inconvenienced,
|
||||
no one wins, and no proof of bad behavior is generated. But Bob cannot
|
||||
stay up for losing lottery tickets, but then conveniently go dark for
|
||||
winning lottery tickets because he issues the data required for a
|
||||
winning lottery ticket (or data that would prove he is a cheater) before
|
||||
he knows whether the ticket is winning or not.
|
||||
|
||||
If Carol goes dark, she does not get paid, but does not get proof of bad
|
||||
behavior by Ann or Bob.
|
||||
|
||||
If Ann goes dark, she does not pay Carol. Carol knows Ann is behaving
|
||||
badly, ceases to deal with her, but may not get proof of bad behavior by
|
||||
Ann that she can show to other people. But getting such proof is not
|
||||
very useful anyway, since Carol’s identity is cheap and expendable,
|
||||
while Bob’s identity is durable and valuable.
|
||||
|
||||
Random stranger Ann contacts random stranger Carol, offers to pay in
|
||||
lottery tickets for data. Perhaps she wants to watch some movies. Shows
|
||||
proof that lottery prize recently existed in a recent block on the
|
||||
blockchain, which shows that Ann is not a total flake. Gets some data on
|
||||
trust. Provides lottery ticket. Bob says lottery ticket could win,
|
||||
depending on a secret that Carol has committed to, but not revealed.
|
||||
Carol checks that in the past Bob has paid out on lots of lottery
|
||||
tickets and not defected on any lottery ticket. More trust ensues. Ann
|
||||
now has reason to trust Carol, Carol has reason to trust Ann, without
|
||||
anyone being exposed to large risks and without any actual blockchain
|
||||
transactions taking place. It takes an expensive blockchain transaction
|
||||
to create the lottery prize, but having created it, Ann and Bob can do
|
||||
very large numbers of transactions off blockchain on trust.
|
||||
|
||||
# Institutions on the blockchain
|
||||
|
||||
A lot of people, instead of controlling outputs on the bitcoin
|
||||
blockchain by secret keys, have a login account, username and password,
|
||||
with an “exchange”. That “exchange” (the institution, not the
|
||||
particular transaction mediated by the institution) owes them bitcoins,
|
||||
payable on demand. And from time to time an exchange goes belly up,
|
||||
blaming the federal government or hackers, and just does not pay its
|
||||
clients the bitcoins it owes.
|
||||
|
||||
We call them “exchanges”, not “banks”, because the word “banks” implies
|
||||
a government guarantee, and a long past of honoring transactions, which
|
||||
these exchanges seldom have, but they are performing bank like
|
||||
functions.
|
||||
|
||||
We would like to have evidence of the institutions reserve fraction, of
|
||||
its term transformation (maturity transformation).
|
||||
|
||||
Solution: Institution runs its own side chain, with a small number of
|
||||
peers. The Merkle-patricia dac of unspent transaction outputs has in
|
||||
each node the sum of the money in each subtree, and the hash of subtree
|
||||
hashes and sums. Thus the standard client wallet will report the total
|
||||
owing/shares on issue
|
154
docs/contributor_code_of_conduct.md
Normal file
@ -0,0 +1,154 @@
|
||||
---
|
||||
title: Contributor Code of Conduct
|
||||
---
|
||||
|
||||
# Peace on Earth to all men of good will
|
||||
|
||||
May you do good and not evil. May you find forgiveness for yourself and
|
||||
forgive others. May you share freely, never taking more than you give.
|
||||
|
||||
# Operational Security
|
||||
|
||||
A huge problem with software that relates to privacy and/or to money is
|
||||
that frequently strange and overcomplicated design decisions are made,
|
||||
(passive tense because it is strangely difficult to find who made those
|
||||
decisions), decisions whose only apparent utility is to provide paths for
|
||||
hostile organizations to exploit subtle, complex, and unobvious security holes.
|
||||
|
||||
These holes are often designed so that they can only be utilized efficiently
|
||||
by a huge organization with a huge datacentre that collects enormous
|
||||
numbers of hashes and enormous amounts of data, and checks enormous
|
||||
numbers of hashes against an even more enormous number of potential
|
||||
pre-images generated from that data.
|
||||
|
||||
Another huge problem is that if we get penetrated by enemy shills,
|
||||
entryists, and buggers, as the Patriot Front is and the Jan Sixth protestors
|
||||
were, we are likely to wind up like the January sixth protestors, who as I
|
||||
write this are imprisoned indefinitely being tortured by black guards
|
||||
recently imported from the northern part of black Africa, awaiting
|
||||
trial with no likelihood of any actual trial for years.
|
||||
|
||||
## No namefags
|
||||
|
||||
A participant who can be targeted is likely to introduce unobvious security
|
||||
flaws into the software architecture. All contributors should make some
|
||||
effort to protect themselves against a third party subsequently coercing
|
||||
them to use the reputation that they have obtained by contributing to make
|
||||
subsequent harmful contributions.
|
||||
|
||||
All contributors will use a unique name and avatar for the purpose of
|
||||
contributing to this project, and shall not link it to other names of theirs
|
||||
that are potentially subject to pressure. In the event of videoconferencing,
|
||||
the participants shall wear a mask over the lower part of their face that
|
||||
conceals the shape of their mouth and jaw and a rigid hat like a fedora that
|
||||
conceals the shape of the upper part their head.
|
||||
|
||||
Apart from your mouth, the parts of your face that communicate non
|
||||
verbal information turn out to be surprisingly useless for identifying
|
||||
individuals.
|
||||
|
||||
If you wear glasses, should not wear your usual glasses, because facial
|
||||
recognition software is very good at recognizing glasses, and easily
|
||||
distracted, confused, and thrown off by unusual glasses.
|
||||
|
||||
Even if there are gaping holes in our security, which there will be, and
|
||||
even if everyone knows another name of a participant, which they will, no
|
||||
need to make the hole even bigger by mentioning it in public. People who lack
|
||||
security are likely to result in code that lacks security. They come under
|
||||
pressure to introduce an odd architecture for inexplicable reasons. We see
|
||||
this happening all the time in cryptographic products.
|
||||
|
||||
# Code will be cryptographically signed
|
||||
|
||||
Of necessity, we will rest our developer identities on GPG keys, until we
|
||||
can eat our own dogfood and use our own system's cryptographic keys.
|
||||
Login identities shall have no password reset, because that is a security
|
||||
hole. If people forget their password, they should just create a new login
|
||||
that uses the same GPG key.
|
||||
|
||||
# No race, sex, religion, nationality, or sexual preference
|
||||
|
||||
![On the internet nobody knows you are a dog](./images/nobody_know_you_are_a_dog.webp)
|
||||
|
||||
Everyone shall be white, male, heterosexual, and vaguely Christian, even
|
||||
if they quite obviously are not, but no one shall unnecessarily and
|
||||
irrelevantly reveal their actual race, sex, religion, or political orientation.
|
||||
|
||||
All faiths shall be referred to respectfully. Even if they happen to be
|
||||
making war on us, this software may not be very relevant to that kind of
|
||||
warfare, in which case that discussion can be held elsewhere.
|
||||
|
||||
All sovereigns shall be referred to respectfully, if they are referred to at all,
|
||||
which they should not be. If this software is likely to frustrate their
|
||||
objectives, or even contribute to their overthrow, no need to make it
|
||||
personal, no need to trigger our enemies. War will come to us soon
|
||||
enough, no need to go looking for it.
|
||||
|
||||
# No preaching supererogation
|
||||
|
||||
Status must be on the basis of code, good code, and clever code, not on
|
||||
cheap claims of superior virtue.
|
||||
|
||||
When someone plays the holier than thou card, he does not intend to share
|
||||
what we are sharing. Out of envy and covetousness, he intends to deny us
|
||||
what we are sharing, to deny us that which is ours.
|
||||
|
||||
If he is holier than we are, he not only wants what we have, which we will
|
||||
gladly share. He wants us to not have what we have.
|
||||
|
||||
Christians are required to turn the other cheek, and people attempting to
|
||||
maintain a politically neutral environment need to turn the other cheek.
|
||||
But you very quickly run out of cheeks, and then it is on. You cannot be
|
||||
politically neutral when the other guy is not being neutral. You have to
|
||||
bring a gun to a gunfight and a faith to a holy war. People who start
|
||||
politics in an environment intended to be politically neutral have to be
|
||||
purged, and a purge cannot be conducted in a politically neutral manner.
|
||||
You have to target the enemy faith and purge it as the demon worshiping
|
||||
heresy that it is, or else those attempting to maintain political neutrality
|
||||
will themselves be purged as heretics, as happened to the Open Source and
|
||||
Free Software movements. You may not be interested in war, but war is
|
||||
interested in you.
|
||||
|
||||
We want to maintain a politically, racially, religiously, and ethnically
|
||||
neutral environment, but it takes two to tango. You cannot maintain a
|
||||
politically neutral environment in a space where an enemy faction wants
|
||||
their politics to rule. Neutrality cannot actually be neutral. It merely means
|
||||
that the quietly ruling faction is quiet, tolerant of its opponents, and does
|
||||
not demand affirmations of faith. If an enemy faith wants to take over,
|
||||
the ruling faith can no longer be quiet and tolerant of that opponent.
|
||||
|
||||
## No claims of doing good to random unknown beneficiaries
|
||||
|
||||
We are doing this for ourselves, our friends, our kin, and our posterity, not
|
||||
for strangers a thousand miles away, and we only care about strangers a
|
||||
thousand miles away to the extent that they are likely to enable us to make
|
||||
money by making them secure.
|
||||
|
||||
If someone mentions the troubles of people a thousand miles away, it
|
||||
should only be in the frame that we will likely have similar troubles soon
|
||||
enough, or that those people a thousand miles away, of a different race,
|
||||
religion, and language, could use our product to their, and our, mutual
|
||||
advantage, not because he cares deeply for the welfare of far away
|
||||
strangers that he has never met in places he could not find on a map.
|
||||
|
||||
## No victim classes, no identity politics, and no globalism
|
||||
|
||||
The Open Source and Free Software movements were destroyed by
|
||||
official victimhood. Status and leadership must be on the basis of code,
|
||||
good code, and clever code, not on cheap claims of superior oppression.
|
||||
|
||||
The experience of the Open Source and Free Software movement
|
||||
demonstrates that if victimhood is high status, code and code quality must
|
||||
be low status. If victimhood is high status then “you did not build that”.
|
||||
Rather, if victimhood is high status, then good code, silicon fabs, and
|
||||
rockets spontaneously emerged from the fertile soil of sub-Saharan Africa,
|
||||
and was stolen by white male rapists from the brave and stunning black
|
||||
warrior women of sub-Saharan Africa, and social justice demands that the
|
||||
courageous advocate for the brave and stunning black warrior women of
|
||||
sub-Saharan Africa takes what you have, what you gladly would share,
|
||||
away from you.
|
||||
|
||||
Unless, when a female contributor unnecessarily and irrelevantly informs
|
||||
everyone she is female, she is told that she is seeking special treatment on
|
||||
account of sex, and is not going to get it, no organization or group that
|
||||
attempts to develop software is going to survive. Linux is a dead man walking.
|
368
docs/crypto_currency.md
Normal file
@ -0,0 +1,368 @@
|
||||
---
|
||||
title: Crypto currency
|
||||
---
|
||||
|
||||
The objective is to implement the blockchain in a way that scales to one hundred thousand transactions per second, so that it can replace the dollar, while being less centralized than bitcoin currently is, though not as decentralized as purists would like, and preserving privacy better than bitcoin now does, though not as well as Monaro does. It is a bitcoin with minor fixes to privacy and centralization, major fixes to client host trust, and major fixes to scaling.
|
||||
|
||||
The problem of bitcoin clients getting scammed by bitcoin peers will be fixed through Merkle-patricia, which is a a well known and already widely deployed fix – though people keep getting scammed due to lack of a planned bitcoin client-host architecture. Bitcoin was never designed to be client host, but it just tends to happen, usually in a way that quite unnecessarily violates privacy, client control, and client safety.
|
||||
|
||||
Monaro’s brilliant and ingenious cryptography makes scaling harder, and all mining based blockchains tend to the same centralization problem as afflicts bitcoin. Getting decisions quickly about a big pile of data necessarily involves a fair bit of centralization, but the Paxos proof of stake protocol means the center can move at the speed of light in fiber, and from time to time, will do so, sometimes to locations unknown and not easy to find. We cannot avoid having a center, but we can make the center ephemeral, and we can make it so that not everyone, or even all peers, know the network address of the processes holding the secrets that signed the most recent block.
|
||||
|
||||
Scaling accomplished by a client host hierarchy, where each host has many clients, and each host is a blockchain peer.
|
||||
|
||||
A hundred or so big peers, who do not trust each other, each manage a copy of the blockchain.
|
||||
|
||||
The latest block is signed by peers representing a majority of the stake, which is likely to be considerably less than a hundred or so peers.
|
||||
|
||||
Peer stake is delegated from clients – probably a small minority of big clients – not all clients will delegate. Delegation makes privacy more complicated and leakier. Delegations will be infrequent – you can delegate the stake held by an offline cold wallet, whose secret lives in pencil on paper in a cardboard file in a safe, but a peer to which the stake was delegated has to have its secret on line.
|
||||
|
||||
Each peer’s copy of the blockchain is managed, within a rack on the premises of a peer, by a hundred or so shards. The shards trust each other, but that trust does not extend outside the rack, which is probably in a room with a lock on the door and a security camera watching the rack.
|
||||
|
||||
Most people transacting on the blockchain are clients of a peer. The blockchain is in the form of a sharded Merkle-patricia tree, hence the clients do not have to trust their host – they can verify any small fact about the blockchain in that they can verify that peers reflecting a majority of stake assert that so and so is true, and each client can verify that the peers have not rewritten the past.
|
||||
|
||||
Scale is achieved through the client peer hierarchy, and, within each peer, by sharding the blockchain.
|
||||
|
||||
Clients verify those transactions that concern them, but cannot verify that all transactions are valid, because the blockchain is too big. Each peer verifies the entire blockchain from beginning to end. If the blockchain replaces the US dollar as the world currency, then it will rapidly become far too large for any one computer to verify the whole thing, so will have to be verified by a group of mutually trusting and trusted shards, but each such group of shards is a peer. The shards trust shards of the same peer, which are likely running on the same rack in the same locked room under the gaze of the same security camera, but they don’t trust shards of some other peer.
|
||||
|
||||
In each transaction, each client verifies that the other client is seeing the same history and recent state of the blockchain, and in this sense, the blockchain is a consensus of all clients, albeit that consensus is mediated through a small number of large entities that have a lot of power.
|
||||
|
||||
The architecture of power is rather like a corporation, with stake as shares.
|
||||
In a corporation CEO can do anything, except the board can fire him and
|
||||
choose a new CEO at any time. The shareholders could in theory fire the
|
||||
board at any time, but in practice, if less than happy with the board, have
|
||||
to act by transacting through a small number of big shareholders.
|
||||
Centralization is inevitable, but in practice, by and large corporations do
|
||||
an adequate job of pursuing shareholder interests, and when they fail to do
|
||||
so, as with woke capital, Star Wars, or the great minority mortgage
|
||||
meltdown, it is usually due to heavy handed state intervention. Google’s
|
||||
board is mighty woke, but in the Damore affair, human resources decided
|
||||
that they were not woke enough, and in the Soy wars debacle, the board
|
||||
was not woke at all but gave power over Star Wars brand name to wome
|
||||
who threatened them with \#metoo. And if this form of distributed power
|
||||
does not always work all that well, it fails less badly than anything else we
|
||||
have tried. Delegated power representing assets, rather than people, results
|
||||
in centralized power that, by and large, mostly, pursues the interests of
|
||||
those assets. Delegated power representing people, not so much.
|
||||
|
||||
In bitcoin, power is in the hands of a very small number of very large miners. This is a problem, both in concentration of power, which seems difficult to avoid if making decisions rapidly about very large amounts of data, and in that miner interests differ from stakeholder interests. Miners consume very large amounts of power, so have fixed locations vulnerable to state power. They have generally relocated to places outside the US hegemony, into the Chinese or Russian hegemonies, or the periphery of those hegemonies, but this is not a whole lot of security.
|
||||
|
||||
Proof of stake has the advantage that stake is ultimately knowledge of secret keys, and while the state could find the peers representing a majority of stake, they are more mobile than miners, and the state cannot easily find the clients that have delegated stake to one peer, and could easily delegate it to a different peer, the underlying secret likely being offline on pencil and paper in someone’s safe, and hard to figure out whose safe.
|
||||
|
||||
Obviously, at full scale we are always going to have immensely more clients than full peers, likely by a factor of hundreds of thousands, but we need to have enough peers, which means we need to reward peers for being peers, for providing the service of storing blockchain data, propagating transactions, verifying the blockchain, and making the data readily available, rather than for the current pointless bit crunching and waste of electricity employed by current mining.
|
||||
|
||||
Bitcoin proposes to solve the scaling problem by the [Lightning Network, which is a re-invention of correspondent banking and the General Ledger, SubLedger system](https://www.forbes.com/sites/francescoppola/2016/06/17/thunder-and-lightning-in-the-bitcoin-world/). Obviously re-inventing General Ledger and Subledger will improve scaling, but [Central Clearing houses are also needed](https://gendal.me/2013/11/24/a-simple-explanation-of-how-money-moves-around-the-banking-system/).
|
||||
|
||||
The power over the blockchain, and the revenues coming from transaction and storage fees, have to go to this large number of peers, rather than, as at present, mostly to four miners located in China.
|
||||
|
||||
Also, at scale, we are going to have to shard, so that a peer is actually a pool of machines, each with a shard of the blockchain, perhaps with all the machines run by one person, perhaps run by a group of people who trust each other, each of whom runs one machine managing one shard of the blockchain.
|
||||
|
||||
Rewards, and the decision as to which chain is final, has to go to weight of stake, but also to proof of service – to peers, who store and check the blockchain and make it available. For the two to be connected, the peers have to get stake delegated to them by providing services to clients.
|
||||
|
||||
All durable keys should live in client wallets, because they can be secured off the internet. So how do we implement weight of stake, since only peers are sufficiently well connected to actually participate in governance?
|
||||
|
||||
To solve this problem, stakes are held by client wallets. Stakes that are in the clear get registered with a peer, the registration gets recorded in the blockchain, and the peer gets influence, and to some
|
||||
extent rewards, proportional to the stake registered with it, conditional on the part it is doing to supply data storage, verification, and bandwidth.
|
||||
|
||||
My original plan was to produce a better bitcoin from pair based
|
||||
cryptography. But pair based cryptography is slow. Peers would need a
|
||||
blade of computers when the volume surpassed bitcoin levels.
|
||||
|
||||
Maybe not so slow. [There is an assembly library](https://github.com/herumi/mcl) that promises three ops per millisecond
|
||||
|
||||
So instead, swipe, I mean build upon, the cryptonote foundation. (Which already implements the split between network node and wallet.) Two substantial currencies have been built from cryptonote: Monero and bytecoin. Also Boolberry.
|
||||
But, on the other hand [MimbleWimble clearly has the best cryptography – at the bleeding edge](https://github.com/ignopeverell/grin/blob/master/doc/grin4bitcoiners.md).
|
||||
|
||||
> no address. All outputs in Grin are unique and have
|
||||
> no common data with any previous output. Instead of relying on a
|
||||
> known address to send money, transactions have to be built interactively,
|
||||
> with 2 (or more) wallets exchanging data with one
|
||||
> another. Practically, this isn’t so much of a problem as there
|
||||
> are multiple ways for 2 programs to interact privately and securely.
|
||||
> And this interaction could even take place over email or Signal
|
||||
> (or carrier pigeons).
|
||||
|
||||
For example, suppose each peer has a thousand client wallets, and the capacity to connect to any other peer, that peers have fully accessible ports, and that the client wallets, who being behind consumer grade NATS generally do not have fully accessible ports, set up a direct client wallet encrypted connection through their NATS using their peer connections to initialize the connection.
|
||||
|
||||
But obviously this software is not written yet. Still vaporware, but vaporware that sounds very promising.
|
||||
|
||||
Mimblewimble solves the problem of disk storage limiting scale.
|
||||
|
||||
How does it go on bandwidth limiting scale?
|
||||
|
||||
On bandwidth, it kind of sucks. We are going to need shardable peers.
|
||||
|
||||
We need a client peer host architecture that is future compatible with people who have serious money using a special purpose microcomputer with an lcd touchscreen, like an android but incapable of being reprogrammed, because it runs code in rom, and whose only essential functions are: Enter password, copy wallet from one memory card to another, show you what you are signing, and allow you to sign it. Or perhaps a walled garden computer incapable of running any code except code signed by the builder, (except your adversary has physically got at it and replaced it by an evil twin) but otherwise a full internet capable androidish device. From which it follows that not only our host, but our client needs to be accessible through socket io.
|
||||
|
||||
Bitcoin can do about 3 transactions per second That’s a far cry from the 2000 TPS that Visa rams through every second of every day.
|
||||
|
||||
Bitcoin takes at least ten minutes to confirm your transaction.
|
||||
|
||||
Inside the computer, transaction amounts will be represented as big
|
||||
integers with a fixed precision limit, initially sixty four bits. On the
|
||||
blockchain, in the canonical representation, they will be represented as
|
||||
arbitrary precision integers times one thousand raised to a signed arbitrary
|
||||
precision quantity, which for a very long time will be a one byte quantity.
|
||||
The initial smallest representable unit, corresponding to the internal
|
||||
representation inside the computer, $1µρ$, will be represented on the
|
||||
blockchain as $1*1000^{96}$, so that we do not have to think about\
|
||||
whether that byte is
|
||||
signed or unsigned. If, after millennia of deflation, which I think and hope
|
||||
likely, it approaches zero, we will have to start thinking of it as a signed
|
||||
quantity, and if, after millennia of inflation, which I hope is far less
|
||||
likely, it approaches 128, we will start thinking of it as unsigned quantity.
|
||||
|
||||
If rhocoin takes over the world, and the smallest unit is initially worth ten
|
||||
trillion dollars 2^-64^economic growth and various engineered and
|
||||
inadvertent currency leaks will result in slow deflation. If it deflates at
|
||||
two percent a year, then in six hundred years of so, there is going to be a
|
||||
problem with the smallest currency unit becoming too large. I would like my
|
||||
works to outlast those of Ozymandias. But by that time the equivalent of banks
|
||||
will have appeared, and banks can issue the equivalent of banknotes in a
|
||||
arbitrarily small units. Entities will appear that aggregate large numbers of
|
||||
small transactions on their network into a small number of large transaction
|
||||
on the main network. As the network comes to span the stars, transaction
|
||||
global to several stars will necessarily become too slow, leading to systems
|
||||
that aggregate transactions in local currency over time and space into larger,
|
||||
slower, and less frequent transactions on the main network. We don’t have to
|
||||
worry about that kind of scaling for a very long time. The deflation problem
|
||||
will likely be rendered irrelevant by the decentralization problem as we go
|
||||
into space. Figure that one out later – need the Merkle-patricia blockchain
|
||||
and paxos consensus formation on digital assets, and once we can construct and
|
||||
prove arbitrary consensus on arbitrary digital assets, we can build anything.
|
||||
|
||||
But trouble is, I want my data format to outlast the stars. Ozymandias
|
||||
merely built in stone, which probably lasted a millennia or two. I
|
||||
have more durable building materials at hand than Ozymandias did.
|
||||
|
||||
I intend to initially define the smallest representable quantity as something
|
||||
larger than $2^{-62}$ of the currency at issue, and then drop it to the lowest
|
||||
value the ui can handle, probably yoctorho, $yρ$, when the sofware
|
||||
supports that. And, having dropped, it is unlikely to change further for
|
||||
many millenia or so.
|
||||
|
||||
If someone reads this in a few millennia, and the exponent, still eight bits
|
||||
on the blockchain, wraps through zero or one hundred and twenty eight,
|
||||
drink to me as the first builder who truly built to live forever.
|
||||
|
||||
One solution is to have the canonical blockchain format, and the base
|
||||
communication format that every client and peer must support, even
|
||||
though obviously any two peers can agree to any format to communicate
|
||||
between each other, represent money in binary form as variable precision
|
||||
base one thousand floating point, and to the users in units of the metric
|
||||
prefixes tera giga, mega, kilo ... milli, micro, (and eventually nano, pico,
|
||||
femto). When deflation runs us out of prefixes, in a few millennia or so,
|
||||
perhaps the prefixes will wrap back to from zepto to yotta, but we can
|
||||
worry about that UI detail in the far future, supposing that the language
|
||||
has not radically changed by then.
|
||||
|
||||
We have a configurable limit on the smallest representable quantity, which
|
||||
just happens to correspond to translating everything to sixty four bit
|
||||
integers, but that limit can be changed as necessary without breaking the
|
||||
canonical format - thus the canonical format will suffice forever. The sixty
|
||||
four bit integers will be an internal implementation detail, a particular
|
||||
internal representation of unsigned arbitrary precision floating point base
|
||||
one thousand, which can change in any one peer without breaking
|
||||
anything, and with machines using different internal representations still
|
||||
able to communicate with each other.
|
||||
|
||||
M2, total US money supply, is about ten trillion, MB, the hard central bank
|
||||
issuance that roots M2, the base money, is about three trillion, the difference
|
||||
being term transformation.
|
||||
|
||||
Assuming we want to replace all money everywhere, and support
|
||||
transactions down to one thousandth of a cent, $2^{64}-1$ millicents is over
|
||||
one hundred trillion, which will suffice. (We don’t allow negative account
|
||||
values in the base money.)
|
||||
|
||||
So, assuming at full volume, the currency is worth ten trillion, the base
|
||||
unit will be worth around 0.005 millicents. And at initial release, we want
|
||||
the total value of the currency to be about twenty million, so the base unit
|
||||
of the currency will be initially worth about 1E-10 cents. We want plenty of
|
||||
headroom for additional currency issue, so will initially issue only one
|
||||
sixty fourth of the possible currency, and want to initially issue sixteen
|
||||
million worth, so want the smallest unity of currency to be\
|
||||
$2^{-64}*64*\$16\,000\,000$, which is approximately $\$2*10^{-10}$
|
||||
|
||||
Assuming we only have $2^{60}$ of the smallest base unit, and that when we
|
||||
are competing on an equal footing with other media of exchange, it has a
|
||||
capitalization of two trillion, then again the smallest base unit will be
|
||||
worth about two millicents, which is inconveniently small. So our named
|
||||
unit has to be a million or a billion times larger,
|
||||
|
||||
If my plans work out, the currency will be controlled by the whales, who
|
||||
have net positive value in the currency, hence want permanent deflation,
|
||||
rather than the bankers, who owe a lot of promises to pay in the currency,
|
||||
backed by promises that when push comes to shove are likely to result in
|
||||
the delivery of property, rather than currency, and therefore have regular
|
||||
banking crises, resulting in regular demands to debase the currency,
|
||||
resulting in permanent inflation. So, assuming permanent deflation, make
|
||||
the smallest base unit the microrho, $1µρ$. So, when we are competing in
|
||||
the big leagues, our named unit will be worth about two dollars. Which is
|
||||
inconveniently small, but I anticipate further deflation eventually.
|
||||
|
||||
Traditional coinage had as its lowest value coin the half reale, or the
|
||||
maravedi, one third of a reale. The most common coin in use was the peso,
|
||||
the piece of eight rendered so famous in pirate lore, which was eight reales
|
||||
or twenty four maravedi, subsequently divided into one hundred cents.
|
||||
The famous doubloon was sixteen reales, or forty eight maravedi.
|
||||
|
||||
An eight reale coin, peso, was worth about a hundred dollars in today's
|
||||
money, so people were disinclined to use coins for small transaction, or
|
||||
disinclined to be minutely precise about the value of a transaction. So we
|
||||
want, after taking over the world economy, our standard unit of currency
|
||||
to be worth about four dollars, which is about 80000 times our smallest
|
||||
unit. But we want to use powers of a thousand, milli, kilo, mega, etc, So
|
||||
our base unit is going to be the microrho, or $μρ$, and our standard unit, the
|
||||
rho or $ρ$, is going to be worth about ten trillion$*1000000*2^{-64}$ which
|
||||
is about half a dollar. Or we could make our smallest representable unit the
|
||||
$nρ$, with might leave us with an inconveniently large value of the rho, and
|
||||
everyone using millirho followed by a decimal point and the rest in $μρ$,
|
||||
which is inconvenient. But, if we always display quantities in the metric
|
||||
unit such that the quantity is less than a thousand of that unit, but equal to
|
||||
or greater than one of that unit, it is OK.
|
||||
|
||||
If we make our smallest possible base unit the $nρ$, then the maximum
|
||||
possible currency on issue, until we go to internally representing values
|
||||
within the computer as 128 bit, which is not that hard, since our
|
||||
representation on the blockchain and to humans is arbitrary precision
|
||||
times powers of a thousand, then the maximum transaction of which
|
||||
computers are capable is going to be eighteen billion rho, which is not a
|
||||
limitation. What is a limitation is that at scale, people will commonly be
|
||||
transacting in $mρ$. On the other hand, if we start out transacting in $ρ$, and
|
||||
end up transacting in $mρ$, that is continual propaganda for the currency as
|
||||
a store of value. At scale, the $mρ$ will be a roughly right sized value to get
|
||||
your mind around in small transactions, and the $ρ$ the right sized value for
|
||||
asking how your solvency is going and how much a car or a house is
|
||||
worth.
|
||||
|
||||
We need to strongly support sidechains and chaumian cash, sidechains so that we can have competing protocols and higher volumes. Cryptonote has something arguably better than Chaumian cash.
|
||||
|
||||
Our financial system is corrupt and oppressive. Cryptocurrencies represent an opportunity to route around that system, and make lots of money doing so.
|
||||
|
||||
Cryptocurrency is real, and presents the opportunity to make enormous amounts of money. Also, cryptocurrency scams are real, and present the opportunity to lose enormous amounts of money.
|
||||
|
||||
The successful altcoin will be genuinely decentralized, as bitcoin was designed to be, originally was, and to some extent still is. Most of the altcoins, possibly all of them except the Bitcoins and Ethereum, are furtively centralized.
|
||||
|
||||
It will use, or at least offer the option, of Zooko type wallet names.
|
||||
|
||||
It will be scalable to enormous numbers of transactions with low transaction costs, as Steemit and Ripple are, but Bitcoin and Ethereum are not.
|
||||
|
||||
It will support sidechains, and exchanges will be sidechained.
|
||||
|
||||
It will be a blogging and tweeting platform, as Steemit is, and will be a decentralized blogging and tweeting platform, as Steemit is not.
|
||||
|
||||
Every website [reporting on the altcoin boom and the initial coin offering boom](https://coinmarketcap.com/coins/) has an incentive to not look too closely at the claimed numbers. Looks to me that only Bitcoin and Steemit.com have substantial numbers of real users making real arms length transactions. Maybe Ethereum and Ripple also. The rest are unlikely to have any significant number of real, arms length, users.
|
||||
|
||||
The crypto coin business is full of scammers, and there is no social pressure against scammers, no one wants to look too closely, because a close look would depress the market.
|
||||
|
||||
Most of the alt currencies are just me-too copies of bitcoin, not adding any substantial value, and/or they cannot scale, and they are deceptive about how centralized and how vulnerable to state attack they are. Nearly all of them are furtively centralized, as Bitcoin never was. They all claim to be decentralized, but when you read the white paper, as with Waves, or observe actual practice, as with Steemit, they are usually completely centralized, and thus completely vulnerable to state pressure, and quite likely state seizure as an unregulated financial product, thus offer no real advantage over conventional financial products.
|
||||
|
||||
The numbers [show](https://coinmarketcap.com/coins/) that Bitcoin is number one, ethereum number two, ripple number four, and steemit.com number eighteen, but my wild assed guess is that Bitcoin is number one, steemit number two, ethereum number three. I have absolutely no idea where ripple stands. No one is providing data that would enable us to estimate real, arms length users.
|
||||
|
||||
Bitcoin exchanges are banks, and banks naturally become fractional reserve institutions. Bitcoin exchanges are furtively and secretly investing customer deposits, without reporting the resulting term transformation.
|
||||
|
||||
Genuinely free market banks, and bitcoin exchanges are genuinely free market banks, have a financial incentive to engage in term transformation – borrow short, lend long. Which is great for everyone until a rainy day comes, rains on everyone, and everyone withdraws their deposits all at the same time, and suddenly all those long term loans cannot be liquidated except at a loss, whereupon the ~~banks~~exchanges turn to the state, and so begin the transition from a backed currency to a state currency, ceasing to be free market banks.
|
||||
|
||||
The trouble with fractional reserve is that free market banks, banks trading in a backed, rather than state, currency, tend to deny, understate and misrepresent the term transformation risk, making them slowly, and often unintentionally, drift into becoming scams. If the reserve fraction is visible to customers, then we could rely on caveat emptor. Right now, however, every bitcoin exchange is drifting into becoming a scam.
|
||||
|
||||
We need, and we could easily have but do not have, a system where the amount of bitcoins owed to customers by an exchange is knowable and provable, and the amount of bitcoins owned by an exchange is knowable and provable, so that the reserve fraction is visible, whereupon the exchange would have to provide information about the extent and nature of its term transformation, or else would likely lose customers, or at least would lose large, long term customers. This would involve the decentralized cryptocurrency making each exchange a sidechain operating a centralized cryptocurrency backed by the decentralized cryptocurrency. Which would also help mightily with scaling.
|
||||
|
||||
Bitcoin and ethereum is truly decentralized, in that it is a protocol that any entity can use, and that in practice lots of entities do use. If the government grabs some hosts, or some hosts do bad things, they can just be ignored, and the system continues elsewhere. They also use Zooko type identities, which in practice means your wallet name looks like line noise. This is outstandingly user hostile, and a reason so many people use exchanges, but it provides the core of resistance to state power.
|
||||
|
||||
Unfortunately, Bitcoin and Ethereum face scaling limits. Maybe ethereum will fix its scaling limits. Bitcoin does not seem to be fixing them. This makes Bitcoin and Ethereum transactions inherently expensive, which is likely to prevent them from replacing the corrupt and oppressive US government controlled financial system.
|
||||
|
||||
Steemit.com has a far superior design which does not result in scaling limits – although we have yet to see how its witness election system will perform at scale – as the system scales, money holders have less incentive to vote, less incentive to vote responsibly, and voting will inherently cost more.
|
||||
|
||||
Steemit.com is also highly centralized. The altcoin that will win will be the one needs to be scalable all the way to Visa and Mastercard levels, and needs to be visibly decentralized, visibly resistant to state seizure, and needs to have a mechanism that makes the fractional reserves of exchanges visible to exchange users.
|
||||
|
||||
Bitcoin was genuinely decentralized from the beginning, and over time became more centralized. Big exchanges and a small number of big miners are on the path to inadvertently turning it into another branch of the oppressive and corrupt government fiat money system.
|
||||
|
||||
The new altcoin offering are for the most part not genuinely decentralized. They have a plan for becoming genuinely decentralized some time in the future, but the will and ability to carry the plan through has not been demonstrated.
|
||||
|
||||
I like the steemit design. The witness system is scalable, the witness election system has problems which may be fixable, or may be inherent.
|
||||
|
||||
But I have a suspicion that investing in steemit is only going to profit whoever owns steemit.com, not the owners of steemit currency.
|
||||
|
||||
According to Steemit documentation, it looks like a well designed cryptocurrency that deserves to replace Bitcoin, because it is more scalable, more user friendly, and more immediately usable.
|
||||
|
||||
Well, that is what it looks like. Except its front end is the steemit.com website, and any one website can easily be seized by the feds. If actually decentralized, it should be a bunch of websites using a common crypto currency and a common identity system,
|
||||
|
||||
Remember usenet: A common protocol, and an internal name system. The particular host through which you accessed it did not matter all that much, because all hosts had to behave much the same. Steemit should be something like usenet with money, and it is not.
|
||||
|
||||
The way usenet worked, anyone (meaning anyone’s computer and his client program) could join as a client by having an agreement with a host, and anyone (meaning anyone’s powerful and well connected computer system) could join as a host by having an agreement with a few existing members.
|
||||
|
||||
A successful altcoin needs to be a blogging platform like Steemit, but it also needs to be a federation, like Usenet or Mastodon. Many of the blogs will be offering goods or services for cryptocurrency.
|
||||
|
||||
Then one could be more sure that success of the federation currency would benefit owners of the currency, rather than owners of a single central website.
|
||||
|
||||
Needs to be Mastodon with the ability to support a blog like post, and like Steemit, and unlike Mastodon, to send and receive money. Steemit.com is wordpress.com with the ability to send and receive money.
|
||||
|
||||
Bitcoin has a decentralized name system, rooted in Zooko style names that are not human intelligible. Its resistance to state power comes partly from the fact that there are several miners and anyone can be a miner, and partly from its decentralized name system.
|
||||
|
||||
Steemit has a communication and blogging system. But if I hold steemit currency, steemit.com connects that to my phone number, which the government connects to my true name. All that handy dandy data that the government would like all in one place that you can serve a warrant on or mount a raid on. Or just sell for profit.
|
||||
|
||||
Need a decentralizedd communication, identity, name, and blogging system, unlike Steemit.com’s centralized communication and blogging system, and a name system that is resistant to government intervention and control, like Bitcoin’s name system. Thus the blogs offering goods and services for crypto currency will be resistant to regulation or seizure by the state. When a ruler meddles as much as our state does, he gives dangerously great power to those dangerously close to him. The regulatory state inevitably drifts into anarcho tyranny, or, like Venezuela, into violent and chaotic anarchy.
|
||||
|
||||
But we also want human readable names. How can we square Zooko’s triangle? (As Aaron Schwarz famously asked, and then infamously gave a very stupid answer.) I will give my answer as to how a crypto currency can square Zooko’s triangle in a following post. (The answer being, much as namecoin does it.)
|
||||
|
||||
Now since any crypto currency system is a generalized secure name system with money, how do we make this system available for general access between computers?
|
||||
|
||||
Our wallet client will provide an interface to something that looks and acts very much like your browser bookmarks system. Except that links in the system correspond to a new kind of url, perhaps ro: This will be registered the same way magnet, https, mailto, and http are registered. In windows they are registry entryies of the form
|
||||
|
||||
> Computer\\HKEY_CLASSES_ROOT\\http\\shell\\open\\command
|
||||
|
||||
except, of course, that ours shall be
|
||||
|
||||
> Computer\\HKEY_CLASSES_ROOT\\ro\\shell\\open\\command
|
||||
|
||||
In our name system, links consist of a wallet name followed by a path. The target wallet maps these names to a server somewhere, likely on his system, and a client protocol, such as http, on your system.
|
||||
|
||||
The target may want a client walletname, or the client username and shared secret, which is usually stored in the link, but if it is not, has to be typed into the wallet software when you are opening the link. Any required user name and password negotiation is done in the wallet UI, not in the UI of the client being launched.
|
||||
|
||||
If the client protocol is http, this results in the wallet creating on your system a port which maps to a port on the destination system, and then launching your browser. If a username and password is needed, then the wallet does the negotiation and launches the browser with a transient cookie.
|
||||
|
||||
Thus, suppose the url ro:example_name/foo maps to http protocol with some target system determined by the owner of example_name.
|
||||
|
||||
Then some port, perhaps 3237 on your system, will be mapped to port 80 on the target system, then the url ro:example_name/foo/bar will result in the command to launch your browser to http://localhost:3237/bar
|
||||
|
||||
This is not a system for attaching to our legacy browser system. It is global connection and protocol negotiation system which can be used for legacy systems such as http. That browsers will mishandle these translated urls is a browser bug. They should talk directly to the wallet client, and say "give me a socket for this ro protocol url."
|
||||
|
||||
TCP identified protocols by small numbers, and target machines by rather larger numbers. This totally failed to scale, and we have to replace it with a [better scheme](./protocol_specification.html), with support for urls such as "magnet" and "http" as a degenerate special case of this more general and more powerful scheme.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
The coin to invest in, the coin that I will invest in both in money and as a software contributor, will solve the scaling problem, will be capable of scaling all the way to wiping out the US\$ as a world currency. It will have integral support for sidechains with payments out of one sidechain to another sidechain being endorsed by sidechain signature which can be generated by arbitrarily complex rules idiosyncratic to that sidechain provided that conformity to the rules has verification of bounded computational time that the central chain can evaluate. It will have an efficient system for securing history in which Merkle trees do not grow to enormous depth, so that it is possible to efficiently verify any one small part of history without needing to verify all transactions that have ever taken place. (Because scalability implies we abandon everyone verifying everything down to the last byte.)
|
||||
|
||||
It will be decentralized in the sense that if the police grab every single major contributor, software writer, and server, they cannot change the rules and make the currency act differently, they can only seize the money of the people that they have grabbed.
|
||||
|
||||
A Merkle tree is a tree where every node contains the hash of its immediate children. Thus the hash of the root of any subtree guarantees the contents of all its descendants, just as the hash of a file guarantees the contents of the entire file.
|
||||
|
||||
This means that we can keep on adding to the tree, while keeping the past immutable, which is a useful feature for tracking who owns what, and who owes what. If many people see the current hash at time X, you cannot change details about the past of time X without revealing what you have been up to.
|
||||
|
||||
Any tree can be severely unbalanced, for example a binary tree where every node has a right hand child, and very few nodes have a left hand child, in which case the depth of the tree is approximately proportional to the total number of nodes in the tree – and the tree grows to enormous depth when the total number of node is enormous.
|
||||
|
||||
Or it can be approximately balanced, in which case the depth of the tree is approximately proportional to the log of the number of nodes, which is always a reasonably small number even if the number of nodes is enormous.
|
||||
|
||||
And a hash that testifies to every transaction that anyone ever did is going to be the hash of an enormous number of nodes. But if it is at the root of a tree of moderate depth, then we can validate any part of the tree for conformity with the rules without validating the entire tree for conformity to the rules.
|
||||
|
||||
A blockchain is a Merkle tree that is chain like, rather than tree like. Its depth grows linearly with its size, thus in time it becomes very deep. Every node must store or at least have processed and summaried, the entire tree. Thus if many equal nodes, cost of adding transactions is proportional to the number of nodes
|
||||
|
||||
Thus, if we want a decentralized system, this can get very expensive.
|
||||
|
||||
We want a system that can resist state power, a system where if the state grabs a few individuals and coerces them, it can seize their money, and perhaps all the money that they manage for other people, but cannot seize the entire system. If it wants to grab control of everyone’s money, has to grab everyone, or at least grab most people. Thus reducing the cost by having a few people authorized to validate the blockchain is a bad option, since the state could grab those people, or those people could conspire together to scam everyone.
|
||||
|
||||
A blockchain runs on a set of nodes, each of which may be under the control of a separate company or organization. These nodes connect to each other in a dense peer-to-peer network, so that no individual node acts as a central point of control or failure. Each node can generate and digitally sign transactions which represent operations in some kind of ledger or database, and these transactions rapidly propagate to other nodes across the network in a gossip-like way.
|
||||
|
||||
## The way bitcoin works
|
||||
|
||||
Each node independently verifies every new incoming transaction for validity, in terms of: (a) its compliance with the blockchain’s rules, (b) its digital signature and (c) any conflicts with previously seen transactions. If a transaction passes these tests, it enters that node’s local list of provisional unconfirmed transactions (the “memory pool”), and will be forwarded on to its peers. Transactions which fail are rejected outright, while others whose evaluation depends on unseen transactions are placed in a temporary holding area (the “orphan pool”).
|
||||
|
||||
At periodic intervals, a new block is generated by one of the “validator” nodes on the network, containing a set of as-yet unconfirmed transactions. Every block has a unique 32-byte identifier called a “hash”, which is determined entirely by the block’s contents. Each block also includes a timestamp and a link to a previous block via its hash, creating a literal “block chain” going back to the very beginning.
|
||||
|
||||
Just like transactions, blocks propagate across the network in a peer-to-peer fashion and are independently verified by each node. To be accepted by a node, a block must contain a set of valid transactions which do not conflict with each other or with those in the previous blocks linked. If a block passes this and other tests, it is added to that node’s local copy of the blockchain, and the transactions within are “confirmed”. Any transactions in the node’s memory pool or orphan pool which conflict with those in the new block are immediately discarded.
|
||||
|
||||
Every chain employs some sort of strategy to ensure that blocks are generated by a plurality of its participants. This ensures that no individual or small group of nodes can seize control of the blockchain’s contents. Most public blockchains like bitcoin use “proof-of-work” which allows blocks to be created by anyone on the Internet who can solve a pointless and fiendishly difficult mathematical puzzle. By contrast, in private blockchains, blocks tend to be signed by one or more permitted validators, using an appropriate scheme to prevent minority control.
|
||||
|
||||
Depending on the consensus mechanism used, two different validator nodes might simultaneously generate conflicting blocks, both of which point to the same previous one. When such a “fork” happens, different nodes in the network will see different blocks first, leading them to have different opinions about the chain’s recent history. These forks are automatically resolved by the blockchain software. In bitcoin, the probability of this conflict continuing drops rapidly and exponentially, but never goes to zero.
|
||||
|
||||
This document is licensed under the [CreativeCommons Attribution-Share Alike 3.0 License](http://creativecommons.org/licenses/by-sa/3.0/)
|
35
docs/cyperhpunk_manifesto.md
Normal file
@ -0,0 +1,35 @@
|
||||
---
|
||||
description: >-
|
||||
“A Cypherpunk’s Manifesto” was written by Eric Hughes and published on March 9, 1993.
|
||||
robots: 'index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1'
|
||||
title: >-
|
||||
Eric Hughes: A Cypherpunk’s Manifesto
|
||||
viewport: 'width=device-width, initial-scale=1.0'
|
||||
---
|
||||
**Privacy is necessary for an open society in the electronic age. Privacy is not secrecy. A private matter is something one doesn’t want the whole world to know, but a secret matter is something one doesn’t want anybody to know. Privacy is the power to selectively reveal oneself to the world.**
|
||||
|
||||
![The following essay was written by Eric Hughes and published on March 9, 1993. A Cypherpunk’s Manifesto was originally published on activism.net](./eric.jpg "Eric Hughes: A Cypherpunk’s Manifesto"){width="100%"}
|
||||
|
||||
If two parties have some sort of dealings, then each has a memory of their interaction. Each party can speak about their own memory of this; how could anyone prevent it? One could pass laws against it, but the freedom of speech, even more than privacy, is fundamental to an open society; we seek not to restrict any speech at all. If many parties speak together in the same forum, each can speak to all the others and aggregate together knowledge about individuals and other parties. The power of electronic communications has enabled such group speech, and it will not go away merely because we might want it to.
|
||||
|
||||
Since we desire privacy, we must ensure that each party to a transaction have knowledge only of that which is directly necessary for that transaction. Since any information can be spoken of, we must ensure that we reveal as little as possible. In most cases personal identity is not salient. When I purchase a magazine at a store and hand cash to the clerk, there is no need to know who I am. When I ask my electronic mail provider to send and receive messages, my provider need not know to whom I am speaking or what I am saying or what others are saying to me; my provider only need know how to get the message there and how much I owe them in fees. When my identity is revealed by the underlying mechanism of the transaction, I have no privacy. I cannot here selectively reveal myself; I must *always* reveal myself.
|
||||
|
||||
Therefore, privacy in an open society requires anonymous transaction systems. Until now, cash has been the primary such system. An anonymous transaction system is not a secret transaction system. An anonymous system empowers individuals to reveal their identity when desired and only when desired; this is the essence of privacy.
|
||||
|
||||
Privacy in an open society also requires cryptography. If I say something, I want it heard only by those for whom I intend it. If the content of my speech is available to the world, I have no privacy. To encrypt is to indicate the desire for privacy, and to encrypt with weak cryptography is to indicate not too much desire for privacy. Furthermore, to reveal one’s identity with assurance when the default is anonymity requires the cryptographic signature.
|
||||
|
||||
We cannot expect governments, corporations, or other large, faceless organizations to grant us privacy out of their beneficence. It is to their advantage to speak of us, and we should expect that they will speak. To try to prevent their speech is to fight against the realities of information. Information does not just want to be free, it longs to be free. Information expands to fill the available storage space. Information is Rumor’s younger, stronger cousin; Information is fleeter of foot, has more eyes, knows more, and understands less than Rumor.
|
||||
|
||||
We must defend our own privacy if we expect to have any. We must come together and create systems, which allow anonymous transactions to take place. People have been defending their own privacy for centuries with whispers, darkness, envelopes, closed doors, secret handshakes, and couriers. The technologies of the past did not allow for strong privacy, but electronic technologies do.
|
||||
|
||||
We the Cypherpunks are dedicated to building anonymous systems. We are defending our privacy with cryptography, with anonymous mail forwarding systems, with digital signatures, and with electronic money.
|
||||
|
||||
Cypherpunks write code. We know that someone has to write software to defend privacy, and since we can’t get privacy unless we all do, we're going to write it. We publish our code so that our fellow Cypherpunks may practice and play with it. Our code is free for all to use, worldwide. We don’t much care if you don’t approve of the software we write. We know that software can’t be destroyed and that a widely dispersed system can’t be shut down.
|
||||
|
||||
Cypherpunks deplore regulations on cryptography, for encryption is fundamentally a private act. The act of encryption, in fact, removes information from the public realm. Even laws against cryptography reach only so far as a nation’s border and the arm of its violence. Cryptography will ineluctably spread over the whole globe, and with it the anonymous transactions systems that it makes possible.
|
||||
|
||||
For privacy to be widespread it must be part of a social contract. People must come and together deploy these systems for the common good. Privacy only extends so far as the cooperation of one’s fellows in society. We the Cypherpunks seek your questions and your concerns and hope we may engage you so that we do not deceive ourselves. We will not, however, be moved out of our course because some may disagree with our goals.
|
||||
|
||||
The Cypherpunks are actively engaged in making the networks safer for privacy. Let us proceed together apace.
|
||||
|
||||
Onward.
|
245
docs/dovecot.md
Normal file
@ -0,0 +1,245 @@
|
||||
---
|
||||
lang: en
|
||||
title: Install Dovecot on Debian 10
|
||||
---
|
||||
# Purpose
|
||||
|
||||
We want postfix working with Dovecot so that we can send and access our emails from email client such as thunderbird client on another computer.
|
||||
|
||||
# Enable SMTPS in postfix
|
||||
|
||||
## prerequisite
|
||||
|
||||
You have already enabled [postfix TLS] and made sure that it is working by checking your logs of emails successfully sent and received.
|
||||
|
||||
[postfix TLS]:set_up_build_environments.html#tls
|
||||
|
||||
## setup postfix to talk to dovecot
|
||||
|
||||
We are going to enable `smtps`, port 465, which your email client probably
|
||||
refers to as `SSL/TLS` and `ufw` refers to as `'Postfix SMTPS'`
|
||||
|
||||
We are *not* going to enable `submission`, port 587, which your email client
|
||||
probably refers to as `STARTTLS`, and `ufw` refers to as `'Postfix Submission'`,
|
||||
because `STARTTLS` is vulnerable to downgrade attacks if
|
||||
your enemies have substantial power over the network, and many major
|
||||
email clients do not support it for that reason. Since we are using normal
|
||||
passwords, a successful downgrade attack will leak the password, enabling
|
||||
the enemy to read and modify mail from that client, and to send spearphish,
|
||||
shill, scam, and spam emails as the client identity.
|
||||
|
||||
Passwords are a vulnerability, and in a hostile, untrustworthy, and
|
||||
untrusting world need to be replaced by ZKA resting on a BIPS style
|
||||
wallet secret, but we have to make do with `smtps` until we create something better.
|
||||
|
||||
```bash
|
||||
nano /etc/postfix/master.cf
|
||||
```
|
||||
|
||||
You will find the lines we are about to change already in the `master.cf` file,
|
||||
but commented out, and some of them need to be amended.
|
||||
|
||||
```default
|
||||
smtps inet n - y - - smtpd
|
||||
-o syslog_name=postfix/smtps
|
||||
-o smtpd_tls_wrappermode=yes
|
||||
-o smtpd_sasl_auth_enable=yes
|
||||
-o smtpd_relay_restrictions=permit_sasl_authenticated,reject
|
||||
-o smtpd_recipient_restrictions=permit_mynetworks,permit_sasl_authenticated,reject
|
||||
-o smtpd_sasl_type=dovecot
|
||||
-o smtpd_sasl_path=private/auth
|
||||
```
|
||||
|
||||
Now we tell postfix to talk to dovecot over lmtp
|
||||
|
||||
```bash
|
||||
postconf -e mailbox_transport=lmtp:unix:private/dovecot-lmtp
|
||||
postconf -e smtputf8_enable=no
|
||||
```
|
||||
|
||||
Obviously this is not going to work until after we install and configure
|
||||
dovecot, so don't restart and test postfix yet.
|
||||
|
||||
# Install Dovecot
|
||||
|
||||
```bash
|
||||
apt -qy update && apt -qy upgrade
|
||||
apt -qy install dovecot-imapd dovecot-pop3d dovecot-lmtpd
|
||||
dovecot --version
|
||||
# These instructions assume version 2.3 or above
|
||||
nano /etc/dovecot/dovecot.conf
|
||||
```
|
||||
|
||||
```default
|
||||
protocols = imap pop3 lmtp
|
||||
!include_try /usr/share/dovecot/protocols.d/*.protocol
|
||||
```
|
||||
|
||||
## Authentication
|
||||
|
||||
Edit the authentication file for Dovecot and update following values.
|
||||
|
||||
```bash
|
||||
nano /etc/dovecot/conf.d/10-auth.conf
|
||||
```
|
||||
|
||||
```default
|
||||
disable_plaintext_auth = yes
|
||||
auth_mechanisms = plain
|
||||
auth_username_format = %n
|
||||
```
|
||||
|
||||
## Setup Mailbox Directory
|
||||
|
||||
After that, edit mail configuration file to configure location of the Mailbox. Make sure to set this to correct location where your email server is configure to save users emails.
|
||||
|
||||
```bash
|
||||
nano /etc/dovecot/conf.d/10-mail.conf
|
||||
```
|
||||
|
||||
```default
|
||||
mail_location = maildir:~/Maildir
|
||||
mail_privileged_group = mail
|
||||
```
|
||||
|
||||
```bash
|
||||
adduser dovecot mail
|
||||
```
|
||||
|
||||
We already told postfix to talk to dovecot. Now we must tell dovecot to talk to postfix using lmtp.
|
||||
|
||||
```bash
|
||||
nano /etc/dovecot/conf.d/10-master.conf
|
||||
```
|
||||
|
||||
Delete the old `service lmtp` definition`, and replace it with:
|
||||
|
||||
```default
|
||||
service lmtp {
|
||||
unix_listener /var/spool/postfix/private/dovecot-lmtp {
|
||||
mode = 0600
|
||||
user = postfix
|
||||
group = postfix
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Delete the old `service auth` definition, and replace it with:
|
||||
|
||||
```bash
|
||||
# Postfix smtp-auth
|
||||
service auth {
|
||||
unix_listener /var/spool/postfix/private/auth {
|
||||
mode = 0660
|
||||
user = postfix
|
||||
group = postfix
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
## Setup SSL
|
||||
|
||||
```bash
|
||||
nano /etc/dovecot/conf.d/10-ssl.conf
|
||||
```
|
||||
|
||||
```default
|
||||
ssl=required
|
||||
ssl_cert = </etc/letsencrypt/live/rhocoin.org/fullchain.pem
|
||||
ssl_key = </etc/letsencrypt/live/rhocoin.org/privkey.pem
|
||||
ssl_prefer_server_ciphers = yes
|
||||
ssl_min_protocol = TLSv1.2
|
||||
```
|
||||
|
||||
## Auto-create Sent and Trash Folder
|
||||
```bash
|
||||
nano /etc/dovecot/conf.d/15-mailboxes.conf
|
||||
```
|
||||
|
||||
Add the line `auto = subscribe` to the special folders entries:
|
||||
|
||||
```default
|
||||
mailbox Trash {
|
||||
`auto = subscribe
|
||||
special_use = \Trash
|
||||
}
|
||||
|
||||
mailbox Junk {
|
||||
`auto = subscribe
|
||||
special_use = \Junk
|
||||
}
|
||||
|
||||
mailbox Drafts {
|
||||
`auto = subscribe
|
||||
special_use = \Drafts
|
||||
}
|
||||
|
||||
mailbox Trash {
|
||||
`auto = subscribe
|
||||
special_use = \Trash
|
||||
}
|
||||
|
||||
mailbox Sent {
|
||||
`auto = subscribe
|
||||
special_use = \Sent
|
||||
}
|
||||
```
|
||||
|
||||
## Manage Dovecot Service
|
||||
|
||||
To enable Dovecot service.
|
||||
|
||||
```bash
|
||||
systemctl enable dovecot.service
|
||||
systemctl restart postfix dovecot
|
||||
systemctl status dovecot
|
||||
systemctl status postfix
|
||||
ss -lnpt | grep master
|
||||
ss -lnpt | grep dovecot
|
||||
```
|
||||
|
||||
## Open ports
|
||||
|
||||
- don't enable IMAP - 143
|
||||
- IMAPS - 993
|
||||
- don't enable POP3 - 110
|
||||
- POP3S - 995
|
||||
|
||||
```bash
|
||||
ufw allow IMAPS
|
||||
ufw allow POP3S
|
||||
ss -lnpt | grep master
|
||||
ss -lnpt | grep dovecot
|
||||
ufw status verbose
|
||||
```
|
||||
|
||||
You did set ufw to default deny incoming, so that IMAP and POP3 are blocked.
|
||||
|
||||
# Configure Desktop Email Client
|
||||
|
||||
Edit 🠆 Account Settings 🠆 Account Actions 🠆 Add Mail Account
|
||||
|
||||
Select manual configuration, SSL/TLS, and normal password.
|
||||
|
||||
Now send and receive some test emails, as you did before, but this time
|
||||
you will be receiving them on your desktop, rather than logging in and using thunderbird
|
||||
|
||||
As before:
|
||||
|
||||
```bash
|
||||
cat /var/log/mail.log | grep -E '(warning|error|fatal|panic)'
|
||||
```
|
||||
|
||||
# Next steps
|
||||
|
||||
Now that you have an email service that people can access from their
|
||||
desktop using an email client such as thunderbird, you probably
|
||||
[want several other domain names and hosts to use it](set_up_build_environments.html#virtual-domains-and-virtual-users).
|
||||
|
||||
# Credits
|
||||
|
||||
This tutorial is largely based on the excellent [linuxbabe] tutorial
|
||||
|
||||
[linuxbabe]:https://www.linuxbabe.com/mail-server/secure-email-server-ubuntu-postfix-dovecot
|
||||
"Install Dovecot IMAP server on Ubuntu & Enable TLS Encryption"
|
||||
{target="_blank"}
|
46
docs/download_and_build_on_windows.md
Normal file
@ -0,0 +1,46 @@
|
||||
---
|
||||
title: Download and build on windows
|
||||
---
|
||||
|
||||
You will need an up to date edition of Visual Studio, Git-Bash for
|
||||
windows, and Pandoc
|
||||
|
||||
In a Git bash command prompt, `cd` to the directory where you intend to
|
||||
install the source code.
|
||||
|
||||
```bash
|
||||
git clone --recursive git@cpal.pw:~/wallet
|
||||
```
|
||||
|
||||
Then launch the visual studio X64 native tools command prompt. `cd` to the
|
||||
wallet directory that was just created
|
||||
|
||||
```DOS
|
||||
winConfigure
|
||||
```
|
||||
|
||||
If all goes well, `winConfigure.bat` will call `"%ProgramFiles%"\Git\git-bash winConfigure.sh`
|
||||
to launch `winConfigure.sh` in the bash command prompt,
|
||||
`winConfigure.sh` will configure the libraries to be built, then
|
||||
`winConfigure.bat` will call `msbuild`, which is Microsoft’s equivalent to
|
||||
`make` on steroids, `msbuild` will then build the libraries, and finally build
|
||||
and launch the wallet with the unit test flag set to perform and display the
|
||||
unit test. To rebuild the wallet after you have changed something, use
|
||||
Visual Studio.
|
||||
|
||||
To rebuild the html files after changing the markdown files, call the bash
|
||||
script `mkdocs.sh`, which calls Pandoc.
|
||||
|
||||
If something goes wrong, and it is not obvious what went wrong, go back
|
||||
to the git-bash command prompt and try `./winConfigure.sh`
|
||||
|
||||
Pandoc needs to be available from the git-bash command prompt to build
|
||||
html documentation from the markdown documentation. It gets called
|
||||
from `mkdocs.sh`. It normally is available if you have installed Pandoc.
|
||||
|
||||
Git-bash needs to be available from native tools command prompt. If
|
||||
installed, it normally is as
|
||||
|
||||
```DOS
|
||||
"%ProgramFiles%"\Git\git-bash
|
||||
```
|
97
docs/duck_typing.md
Normal file
@ -0,0 +1,97 @@
|
||||
---
|
||||
title: Duck Typing
|
||||
---
|
||||
Assume naming system based on Zooko’s triangle. At what point should
|
||||
human readable names with mutable and context dependent meanings be nailed
|
||||
down as globally unique identifiers?
|
||||
|
||||
The most flexible, most convenient, most powerful, most general, and thus
|
||||
most prone to disaster form of run time typing is duck typing, wherein the
|
||||
human readable name gets translated on the fly in the run time context,
|
||||
and the run time context is whatever is on the end users machine.
|
||||
|
||||
The optimal, most powerful, most powerful, most convenient typing that is
|
||||
safe is duck typing that defaults to *compile time* translation of
|
||||
human readable type identifiers into globally unique type identifiers,
|
||||
translating using the environment present on the end user’s machine.
|
||||
|
||||
The generality of run time duck typing is dangerous when you expect your
|
||||
program to run on someone else’s machine.
|
||||
|
||||
In fact it is dangerous even on your own machine. Python executing duck
|
||||
typed code produces surprising and strange results. C++ refuses to compile it
|
||||
without endless incomprehensible boilerplate. Haskel, on the other hand, for
|
||||
all its faults, just simply does as you thought you were telling it to do.
|
||||
|
||||
Python’s duck typing causes endless install grief. Once a complex program
|
||||
moves away from its home environment, endless install problems arise,
|
||||
because the objects returned are not exactly the objects expected.
|
||||
Successive versions of the API return objects that look less and less like
|
||||
the previous versions.
|
||||
|
||||
This, of course, is exactly the problem that COM and its successor NET
|
||||
quite successfully solved, but the solution relies on compilation. The
|
||||
compiled code running on a newer API is guaranteed to receive the sort of
|
||||
objects that the compile time API would have given it or fail cleanly, even
|
||||
though the api and the compiled code were compiled by different people on
|
||||
different and dissimilar machines.
|
||||
|
||||
Sometimes you want run time typing for flexibility, some times you don’t.
|
||||
|
||||
If your program is going to be used in foreign environment, you usually
|
||||
want types identified by human readable names, which can stand for many
|
||||
things, translated into types identified into globally unique identifiers
|
||||
by the translation environment on your machine, the machine on which you
|
||||
are debugging the code, rather than the end user’s machine. Duck typing is
|
||||
optimal when developing code that will only run on your own machine in
|
||||
your own environment.
|
||||
|
||||
Source code, therefore, should come with libraries mapping human readable names
|
||||
to the globally unique type names on which that source code was tested and
|
||||
depends.
|
||||
|
||||
The greatest flexibility is to have choice as to when local names will be
|
||||
bound to globally unique identifiers, compile time, or run time. To avoid
|
||||
install hell, should default to compile time, except where run time duck
|
||||
typing is explicitly invoked..
|
||||
|
||||
Forty nine times out of fifty the compiler can type objects better than
|
||||
you can, and ninety nine times out of a hundred the run time can type
|
||||
objects better than you can, and with vastly less effort, but it is that
|
||||
one time out of fifty, and the one time out of a hundred, that bites you.
|
||||
|
||||
Haskel has by far the best duck typing system, producing predictable,
|
||||
expected, and intended results, without rigidity or incomprhensible boiler
|
||||
plate, unlike C++ metacode. The programmer expresses his intention in a
|
||||
programmer intuitive way, and Haskel correctly divines his intent, translates
|
||||
into rigid compile time types as appropriate to the situation, or produces a
|
||||
relevant type error.
|
||||
|
||||
The C++11 `auto` type and `decltype` are sort of
|
||||
compile time duck typing, steps in that direction. `Decltype`
|
||||
is duck typing inside the elaborate C++ straitjacket. If `auto`
|
||||
references a `decltype`, that is pretty close to duck
|
||||
typing. Real duck typing is doubtless a lot better, but it is
|
||||
a good step.
|
||||
|
||||
However the C++11 `auto` and `decltype`
|
||||
require you to explicitly write polymorphism into your code using the
|
||||
convoluted complex template formalism and inheritance, whereas Python is
|
||||
casually polymorphic by default – and thus apt to produce wildly unexpected results.
|
||||
|
||||
But even when you are not in fact supporting polymorphism, `auto`,
|
||||
`decltype` and the C\# `var` find the correct types
|
||||
better than you do, avoiding unintended type conversions, which are a huge
|
||||
source of C++ bugs. So it is better to use auto and decltype
|
||||
wherever possible,whenever you do not want to explicitly force type
|
||||
conversion, wherever the exact type is not all that important to you.
|
||||
|
||||
Sometimes you really do care that this thing is a uint, it has got to be
|
||||
a uint, we are taking advantage of the fact that it is a uint, and if we
|
||||
turned it into a ulong or a short, or whatever, then the mechanism would
|
||||
break. But usually, particularly with the more complex types,
|
||||
the precise type is irrelevant noise and a useless distraction. You
|
||||
generally want to know what is being done, not how it is being done.
|
||||
Further, if you explicitly specify how it is being done, you are likely to
|
||||
get it wrong, resulting in mysterious and disastrous type conversions.
|
||||
|
BIN
docs/eltoo.pdf
Normal file
BIN
docs/eric.jpg
Normal file
After Width: | Height: | Size: 184 KiB |
185
docs/estimating_frequencies_from_small_samples.md
Normal file
@ -0,0 +1,185 @@
|
||||
---
|
||||
lang: en
|
||||
title: Estimating frequencies from small samples
|
||||
# katex
|
||||
---
|
||||
# The problem to be solved
|
||||
|
||||
Because protocols need to be changed, improved, and fixed from time to
|
||||
time, it is essential to have a protocol negotiation step at the start of every networked interaction, and protocol requirements at the start of every store
|
||||
and forward communication.
|
||||
|
||||
But we also want anyone, anywhere, to be able to introduce new
|
||||
protocols, without having to coordinate with everyone else, as attempts to
|
||||
coordinate the introduction of new protocols have ground to a halt, as
|
||||
more and more people are involved in coordination and making decisions.
|
||||
The IETF is paralyzed and moribund.
|
||||
|
||||
So we need a large enough address space that anyone can give his
|
||||
protocol an identifier without fear of stepping on someone else’s identifier.
|
||||
But this involves inefficiently long protocol identifiers, which can become
|
||||
painful if we have lots of protocol negotiation, where one system asks
|
||||
another system what protocols it supports. We might have lots of
|
||||
protocols in lots of variants each with long names.
|
||||
|
||||
So our system forms a guess as to the likelihood of a protocol, and then
|
||||
sends or requests enough bits to reliably identify that protocol. But this
|
||||
means it must estimate probabilities from limited data. If one’s data is
|
||||
limited, priors matter, and thus a Bayesian approach is required.
|
||||
|
||||
# Bayesian Prior
|
||||
|
||||
The Bayesian prior is the probability of a probability, or, if this recursion
|
||||
is philosophically troubling, the probability of a frequency. We have an
|
||||
urn containing a very large number of samples, from which we have taken
|
||||
few or no samples. What proportion of samples in the urn will be
|
||||
discovered to have property X?
|
||||
|
||||
Let our prior estimate of probability that the proportion of samples in
|
||||
the urn that are X is ρ be $Ρ_{prior}(ρ)$
|
||||
|
||||
This is the probability of a probability. The probability is the sum over all the prior probabilities of probabilities.
|
||||
|
||||
Then our estimate of the chance $P_X$ that the first sample will be X is
|
||||
$$P_X = \int_0^1 Ρ_{prior}(ρ) dρ$$
|
||||
|
||||
Then if we take one sample out of the urn, and it is indeed X, then we
|
||||
update all our our priors by:
|
||||
$$P_{new}(ρ) = \frac{ρ × Ρ_{prior}(ρ)}{P_X}$$
|
||||
|
||||
# Beta Distribution
|
||||
|
||||
The Beta distribution is
|
||||
$$P_{αβ}(ρ) = \frac{ρ^{α-1} × (1-ρ)^{β-1}}{B(α,β)}$$
|
||||
where
|
||||
$$B(α,β) = \frac{Γ(α) × Γ(β)}{Γ(α + β)}$$
|
||||
|
||||
$Γ(α) = (α − 1)!$ for positive integer α\
|
||||
$Γ(1) = 1 =0!$\
|
||||
$B(1,1) = 1$\
|
||||
$B(1,2) = ½$\
|
||||
$Γ(α+1) = α Γ(α)$ for all α
|
||||
|
||||
Let us call this probability distribution, the prior of our prior
|
||||
|
||||
It is convenient to take our prior to be a Beta distribution, for if our prior
|
||||
the proportion of samples that are X is the Beta distribution $α,β$, and we
|
||||
take three samples, one of which is X, and two of which are not X, then
|
||||
our new distribution is the Beta distribution $α+1,β+2$
|
||||
|
||||
If our distribution is the Beta distribution α,β, then the probability
|
||||
that the next sample will be X is $\frac{α}{α+β}$
|
||||
|
||||
If $α$ and $β$ are large, then the Beta distribution approximates a delta
|
||||
function
|
||||
|
||||
If $α$ and $β$ equal $1$, then the Beta distribution assumes all probabilities
|
||||
equally likely.
|
||||
|
||||
That, of course, is a pretty good prior, which leads us to the conclusion
|
||||
that if we have seen $n$ samples that are green, and $m$ samples that are not
|
||||
green, then the probability of the next sample being green is $\frac{n+1}{(n+m+2}$
|
||||
|
||||
Realistically, until we have seen diverse results there is a finite probability
|
||||
that all samples are X, or all not X, but no beta function describes this
|
||||
case.
|
||||
|
||||
If our prior for the question “what proportion of men are mortal?” was a
|
||||
beta distribution, we would not be convinced that all men are mortal until
|
||||
we had first checked all men – thus a beta distribution is not always a
|
||||
plausible prior, though it rapidly converges to a plausible prior as more
|
||||
data comes in.
|
||||
|
||||
So perhaps a fairly good prior is half of one, and half of the other. The
|
||||
principle of maximum entropy tell us to choose our prior to be $α=1$,
|
||||
$β=1$, but in practice, we usually have some reason to believe all
|
||||
samples are alike, so need a prior that weights this possibility.
|
||||
|
||||
# Weight of evidence
|
||||
|
||||
The weight of evidence is the inverse of entropy of $P(ρ)$
|
||||
$$\int_0^1 Ρ_{prior}\small(ρ\small) × \ln\big({Ρ_{prior} \small(ρ\small)}\big) dρ$$
|
||||
the lower the entropy, the more we know about the distribution P(ρ),
|
||||
hence the principle of maximum entropy – that our distribution should
|
||||
faithfully represent the weight of our evidence, no stronger and no
|
||||
weaker.
|
||||
|
||||
The principle of maximum entropy leaves us with the question of what
|
||||
counts as evidence. To apply, we need to take into account *all*
|
||||
evidence, and everything in the universe has some relevance.
|
||||
|
||||
Thus to answer the question “what proportion of men are mortal” the
|
||||
principle of maximum entropy, naiely applied, leads to the conclusion
|
||||
that we cannot be sure that all men are mortal until we have first checked
|
||||
all men. If, however, we include amongst our priors the fact that
|
||||
all men are kin, then that all men are X, or no men are X has to have a
|
||||
considerably higher prior weighting than the proposition that fifty
|
||||
percent of men are X.
|
||||
|
||||
The Beta distribution is mathematically convenient, but
|
||||
unrealistic. That the universe exists, and we can observe it,
|
||||
already gives us more information than the uniform distribution, thus the
|
||||
principle of maximum entropy is not easy to apply.
|
||||
|
||||
Further, in networks, we usually care about the current state of the
|
||||
network, which is apt to change, thus we frequently need to apply a decay
|
||||
factor, so that what was once known with extremly high probability, is now
|
||||
only known with reasonably high probability. There is always some
|
||||
unknown, but finite, substantial, and growing, probability of a large
|
||||
change in the state of the network, rendering past evidence
|
||||
irrelevant.
|
||||
|
||||
Thus any adequately flexible representation of the state of the network
|
||||
has to be complex, a fairly large body of data, more akin to a spam filter
|
||||
than a boolean.
|
||||
|
||||
# A more realistic prior
|
||||
|
||||
Suppose our prior, before we take any samples from the urn, is that the probability that the proportion of samples in the urn that are X is ρ is
|
||||
$$\frac{1}{3}P_{11} (ρ) + \frac{1}{3}δ(ρ) + \frac{1}{3}δ(1-ρ)$$
|
||||
|
||||
We are allowing for a substantial likelihood of all X, or all not X.
|
||||
|
||||
If we draw out $m + n$ samples, and find that $m$ of them are X, and $n$ of
|
||||
them are not X, then the $δ$ terms drop out, and our prior is, as usual the
|
||||
Beta distribution
|
||||
$$P_{m+1,n+1}(ρ) = \frac{ρ^m × (1-ρ)^n }{B(m+1,n+1)}$$
|
||||
if neither m nor n is zero.
|
||||
|
||||
But suppose we draw out n samples, and all of them are X, or none of
|
||||
them are X.
|
||||
|
||||
Without loss of generality, we may suppose all of them are X.
|
||||
|
||||
Then what is our prior after n samples, all of them X?
|
||||
|
||||
After one sample, n=1, our new estimate is
|
||||
|
||||
$$\frac{2}{3} × \bigg(\frac{ρ}{B(1,1)} + δ(1−ρ)\bigg)$$
|
||||
$$=\frac{1}{3}\frac{ρ}{B(2,1)} + \frac{2}{3}δ(1−ρ)$$
|
||||
|
||||
We see the beta distributed part of the probability distribution keeps
|
||||
getting smaller, and the delta distributed part of the probability keeps
|
||||
getting higher.
|
||||
|
||||
And our estimate that the second sample will also be X is
|
||||
$$\frac{8}{9}$$
|
||||
|
||||
After two samples, n=2, our new estimate is
|
||||
|
||||
Probability $\frac{1}{4}$
|
||||
|
||||
Probability distribution $\frac{1}{4}ρ^2+\frac{3}{4}δ(1−ρ)$
|
||||
|
||||
And our estimate that the third sample will also be X is $\frac{15}{16}$
|
||||
|
||||
By induction, after n samples, all of them members of category X, our new
|
||||
estimate for one more sample is
|
||||
$$1-(n+2)^{-2}=\frac{(n+3)×(n+1)}{(n+2)^2}$$
|
||||
|
||||
Our estimate that the run will continue forever is
|
||||
$$\frac{(n+1)}{n+2}$$
|
||||
|
||||
Which corresponds to our intuition on the question “all men are mortal” If we find no immortals in one hundred men, we think it highly improbable that we will encounter any immortals in a billion men.
|
||||
|
||||
In contrast, if we assume the beta distribution, this implies that the likelihood of the run continuing forever is zero.
|
150
docs/generating_numbers_unpredictable_to_an_attacker.md
Normal file
@ -0,0 +1,150 @@
|
||||
---
|
||||
title: Generating numbers unpredictable to an attacker
|
||||
---
|
||||
```default
|
||||
From: Kent Borg <kentborg@borg.org> 2021-03-30
|
||||
To: Cryptography Mailing List
|
||||
```
|
||||
|
||||
Entropy is important to RNGs but unfortunately RNG people are at risk of
|
||||
devoutly worshiping at the alter of "Countable Entropy", blinded to
|
||||
realities, ready with jeers for anyone who does not share their extreme
|
||||
theology.
|
||||
|
||||
These people are so in the thrall of the theoretical that they are blinded to
|
||||
the practical and any other theories.
|
||||
|
||||
And for practical purposes, it is the unguessability of the RNG that
|
||||
matters. Any source of unguessable data is a good thing to use to drive an
|
||||
RNG. Even sources that are dismissed as "squish" by the most devout and
|
||||
most blinded can be good. And there is a great example that these disciples
|
||||
can't see.
|
||||
|
||||
# Time Distribution is Hard
|
||||
|
||||
NTP is really great, I love it, so cool. It can set my computer's clock with a
|
||||
precision measured in milliseconds. Very impressive it can do this just by
|
||||
applying algorithms to hardware that is already present.
|
||||
|
||||
If one wants better, the next option is to get time from GPS. According to
|
||||
gps.gov a specialized receiver, at a fixed location, can know the time
|
||||
within 40ns. This is pretty cool, too. It is good enough to synchronize RF
|
||||
signals between CDMA cell sites so phones can communicate with more
|
||||
than one site as the same time.
|
||||
|
||||
GPS also depends on billions of dollars of infrastructure with an annual
|
||||
budget that must be in the millions. People think GPS is about location,
|
||||
but at its core it is really about time distribution. From end-to-end a design
|
||||
where every component is doing its best to carefully keep and distribute
|
||||
precise time. If one pays attention to details and has the money for good
|
||||
hardware (far more than just a smartphone), to get 40ns is very cool.
|
||||
|
||||
# Guessing Time is Even Harder
|
||||
|
||||
With all that money can constructive effort, one can do 40ns. What are you
|
||||
going to do to do better? Get specific. (Warning, it's not going to be easy.)
|
||||
|
||||
# Cheap Unguessability
|
||||
|
||||
A 1 GHz clock has a cycle time of 1ns. (Is it even possible to buy an Intel-based
|
||||
machine that runs slower than 1GHz these days?) 1ns is a lot
|
||||
smaller than 40ns. You don't know the value of my clock.
|
||||
|
||||
Intel chips have a timestamp counter that increments with every tick of the
|
||||
system clock. You don't know the value of my counter.
|
||||
|
||||
The system clock isn't fed to the CPU, the CPU is fed a much lower
|
||||
frequency, that it then multiplies up using analog on-chip PLL circuitry.
|
||||
That clock is then (carefully) distributed on-chip. And even then, different
|
||||
parts of the chip are in different clock domains, because clock distribution
|
||||
and synchronization is hard.
|
||||
|
||||
So the "system clock" doesn't exist outside the CPU, it is only a "CPU clock",
|
||||
and not all parts of the CPU are even privy to it.
|
||||
|
||||
No one at any distance outside that chip knows the value of the timestamp
|
||||
counter. A program might contain the instruction to read the timestamp
|
||||
counter, but by the time anything is done with that value, it will have
|
||||
changed.
|
||||
|
||||
Is there "entropy" in that system clock? Some, but only some. The PLL will
|
||||
have some jitter, the precision of the lower frequency input clock will have
|
||||
iffy precision and be subject to drift.
|
||||
|
||||
Is there "unguessability" in that system clock? Plenty! At least to any
|
||||
observer at any distance (i.e., outside the computer).
|
||||
|
||||
Remember, it takes billions of dollars and lots of careful design and
|
||||
cooperation to distribute 40ns. time. No such effort nor expense has been made
|
||||
to tell the world the precise value of my 1ns period (or less) CPU clock.
|
||||
|
||||
No one outside my computer knows its precise value.
|
||||
|
||||
# Back on Topic
|
||||
|
||||
Intel hardware has a great source of unguessability in its timestamp
|
||||
counter. All you need is an uncorrelated sampling of this clock. Say, a
|
||||
network interrupt.
|
||||
|
||||
I know the squish patrol is now all upset, because external observers can
|
||||
be the one's sending these packets with careful timing. So what? The
|
||||
timing can't be careful enough. The value that is read from the timestamp
|
||||
counter in servicing that interrupt depends on knowing edge timings far
|
||||
more closely than 1ns, for every time the observer guesses a value on the
|
||||
wrong side of one of these edges, one bit of unguessability slips by.
|
||||
|
||||
# RNGs are Still Hard
|
||||
|
||||
A (1) uncorrelated sampling of a (2) fast clock is, indeed, a good source of
|
||||
unguessability.
|
||||
|
||||
But, make sure both those things be true.
|
||||
|
||||
Is virtualization messing with how these things work? Is variable clock
|
||||
scaling messing with it? Have interrupts been virtualized in some
|
||||
predictable way? Is the timestamp counter being messed with in an
|
||||
attempt to have it not appear to be warped by clock scaling and effectively
|
||||
running much slower? Is some OS scheduling algorithm synchronizing
|
||||
interrupt servicing with timestamp values?
|
||||
|
||||
Just because there is an underappreciated way to feed an RNG doesn't
|
||||
mean there aren't plenty of ways to still mess it up. ("Um, it turns out the
|
||||
RNG isn't in production builds." Who will notice?)
|
||||
|
||||
Implementation matters.
|
||||
|
||||
But the fact remains time distribution is hard, the period of a gigahertz clock is small. No one at any distance knows its value. An awful lot of computers out there can use this to drive their RNGs.
|
||||
|
||||
-kb, the Kent who laments that Arm CPUs didn't have something like a timestamp counter last he looked.
|
||||
|
||||
# Attacks
|
||||
|
||||
```default
|
||||
From: Barney Wolff <barney@databus.com> 2021-05-31
|
||||
To: Cryptography Mailing List
|
||||
```
|
||||
|
||||
Surely this depends on how many guesses an attacker is
|
||||
allowed before being detected and blocked. If there's
|
||||
no penalty for guessing wrong, as with an offline attack,
|
||||
I doubt the GHz ticker can contribute more than about 20
|
||||
bits or so.
|
||||
|
||||
# Implementation
|
||||
|
||||
```default
|
||||
From: jrzx <jrzx@protonmail.ch> 2021-06-06
|
||||
To: Cryptography Mailing List
|
||||
```
|
||||
|
||||
Every network or disk event provides several bits of unguessability. You
|
||||
are going to accumulate a 128 bits in a hundred milliseconds or so.
|
||||
|
||||
Accumulate the bits into Knuth's additive number generator 3.2.2, then
|
||||
hash the seed.
|
||||
|
||||
Continue accumulating randomness into the seed when you get
|
||||
uncorrelated events. Continue hashing the seed when you need more
|
||||
random numbers.
|
||||
|
||||
The attacker performing an offline attack will have to guess all 128 bits.
|
1137
docs/identity.md
Normal file
BIN
docs/images/nobody_know_you_are_a_dog.webp
Normal file
After Width: | Height: | Size: 6.5 KiB |
BIN
docs/images/postfix_cfg.png
Normal file
After Width: | Height: | Size: 302 KiB |
BIN
docs/images/postfix_cfg1.png
Normal file
After Width: | Height: | Size: 125 KiB |
BIN
docs/images/postfix_cfg1.webp
Normal file
After Width: | Height: | Size: 23 KiB |
BIN
docs/images/postfix_cfg2.png
Normal file
After Width: | Height: | Size: 94 KiB |
BIN
docs/images/postfix_cfg2.webp
Normal file
After Width: | Height: | Size: 16 KiB |
BIN
docs/images/postfix_cfg3.png
Normal file
After Width: | Height: | Size: 48 KiB |
BIN
docs/images/postfix_cfg3.webp
Normal file
After Width: | Height: | Size: 9.1 KiB |
BIN
docs/images/working_mailersever.webp
Normal file
After Width: | Height: | Size: 20 KiB |
148
docs/index.md
Normal file
@ -0,0 +1,148 @@
|
||||
---
|
||||
title: How to Save the World
|
||||
---
|
||||
I have almost completed an enormous design document for an uncensorable social network intended to contain a non evil scalable proof of stake currency, and I have a wallet that can generate secrets, but the wallet is missing no end of critical features – it is pre-pre alpha. When it is early pre alpha, I am going to publish it on Gitea, and call for assistance.
|
||||
|
||||
Here is a link to one version of the [white paper](social_networking.html), focusing primarily on social media. (But though information wants to be free, programmers need to get paid.)
|
||||
|
||||
Here is a link to [another version](white_paper.html) of the white paper, focusing primarily on money and getting rich by protecting capitalism from the state.
|
||||
|
||||
# Speech and commerce
|
||||
|
||||
As the internet goes, so goes the world. For freedom of speech to
|
||||
exist, there must be freedom of speech on the internet, and if there is
|
||||
freedom of speech on the internet, there is freedom of speech, for
|
||||
governments will find it very hard to stop it. If freedom of information,
|
||||
file sharing and open source code on the internet, then there is freedom
|
||||
of information, if there is freedom of association on the internet, then
|
||||
there is freedom of association – and, the big one, the one we have least,
|
||||
the one under most severe threat, if there is freedom of commerce on the
|
||||
internet …
|
||||
|
||||
We can establish these freedoms by technological and business means
|
||||
instead of political means. These means turned out to be more difficult
|
||||
than expected in the heady days of the [cypherpunk](cypherpunk_program.html)
|
||||
movement.
|
||||
|
||||
To secure all these, we need the right software, software that
|
||||
successfully applies the cryptographic tools that have been developed.
|
||||
|
||||
Governments are getting worse, governments *always* get worse,
|
||||
yet what is outside the government’s power is getting stronger.
|
||||
|
||||
It is the nature of governments to always get worse over time, resulting
|
||||
in them either collapsing or being bypassed by new forms of government.
|
||||
|
||||
The cypherpunk program was that governments would be bypassed, as
|
||||
organization moved to the internet, hidden behind cryptography. The
|
||||
cypherpunk program died, yet lives – for China’s industrialization is
|
||||
being organized through the VPNs of firms whose servers are located in the
|
||||
cayman islands. These firms do transactions largely by trading each other’s
|
||||
IOUs in private conversations rather than through regular bank
|
||||
’t. Cypherpunks imagined that they would be living in tropical
|
||||
paradises running businesses nominally located in tax havens. It has not
|
||||
come true for them, but an increasing proportion of the world’s business
|
||||
does work that way.
|
||||
|
||||
In the cypherpunk vision, people of moderate wealth would escape the
|
||||
power of government – unfortunately what is happening is merely
|
||||
billionaires escaping the power of government. To revive and accomplish
|
||||
the cypherpunk vision, we need to make these capabilities and methods more
|
||||
widely available – available not just to the super rich but to the better
|
||||
off middle class – not necessarily the ordinary middle class, but rather
|
||||
the sort of middle class person who has a passport in more than one
|
||||
country and does not need to show up at the office at 9AM every
|
||||
morning. From thence it will eventually trickle down to the regular
|
||||
middle class.
|
||||
|
||||
At the same time as we see a billion people industrializing in an
|
||||
industrialization run from islands on the internet, we also see a variety
|
||||
of private use of force organizations also organized over the internet
|
||||
popping up – thus for example the extortion operation against oil
|
||||
companies in Nigeria was in part run over the internet from South Africa.
|
||||
Somali pirates were largely eradicated by private security firms whose
|
||||
home nation is far from clear.
|
||||
|
||||
We are seeing entirely legal and government approved mercenaries, not
|
||||
quite legal and sort of government approved mercenaries, illegal but
|
||||
government tolerated militias and armed mosques, illegal distributors of
|
||||
recreational chemicals very successfully resisting government power, and
|
||||
assorted extortionists and terrorists. Yes, extortionists and terrorists
|
||||
are bad things, but that people are ever less inclined to rely on
|
||||
government provision of protection against them is a good thing.
|
||||
|
||||
The power of states is increasing, in the sense that taxes and
|
||||
regulation is increasing, that government ownership is increasing, that
|
||||
large firms function by special privilege granted by the government to
|
||||
those firms to the detriment of those less privileged – but at the same
|
||||
time, that which is outside the power of the state is growing
|
||||
stronger. It is a pattern that recurs every few hundred years,
|
||||
leading to the renewal, or the collapse, of civilization.
|
||||
|
||||
# Major concepts
|
||||
|
||||
- PKI and SSL needs to be obsoleted and replaced. As Bruce
|
||||
Schneier said in Secrets and Lies: 〝SSL is just simply a (very
|
||||
slow) Diffie-Hellman key-exchange method. Digital certificates
|
||||
provide no actual security for electronic commerce; it’s a complete sham〞
|
||||
|
||||
The underlying problem is that our mental name handling
|
||||
mechanism is intended for the relatively small social groups of the
|
||||
Neolithic. True names fail when we attempt to scale to the internet.
|
||||
The current name system is rooted in governmental and quasi
|
||||
governmental entities, who use this power to gently encourage
|
||||
nominally private institutions to censor the internet. Similarly, the
|
||||
encryption system of https allows the government to intercept any
|
||||
website with a man in the middle attack. To fix this, we need a
|
||||
name system rooted in the blockchain, with encryption rooted in
|
||||
Zooko’s triangle, as with crypto currency
|
||||
|
||||
- [Zooko’s triangle](zookos_triangle.html), The solution is an ID system based on Zooko’s
|
||||
triangle, allowing everyone to have as many IDs as they want, but
|
||||
no one else can forge their IDs, ensuring that each identity has a
|
||||
corresponding public key, thus making end to end encryption easy.
|
||||
These identities may correspond to people you can instant message,
|
||||
or web sites, particularly secure web sites that require logon, such
|
||||
as banks, or indeed any service. Thus, they also correspond to
|
||||
bank accounts, that work like Swiss numbered bank account, in that your identity is a secret.
|
||||
- Protocol negotiation at the levels equivalent to TCP and UDP, and
|
||||
default encryption and authentication at those levels, as with ssh.
|
||||
- Ability to introduce new protocols and upgrade old protocols without central coordination, just as Zooko allows us to introduce
|
||||
new identities without central coordination. Central authority is failing, has become an obstacle, instead of the fast way to get things done.
|
||||
- File sharing with upload credits.
|
||||
- Single signon, buddy list user interface for web page logon.
|
||||
- Messaging system integrated with single signon – message
|
||||
authentication, all messages end to end encrypted. Zooko identity
|
||||
means yurls, which means a problem in getting people onto our buddy list.
|
||||
- Money transfer integrated with instant messaging.
|
||||
- Money transfer uses ripple.
|
||||
- Each money transfer creates a record of accompanying obligation,
|
||||
equivalent record on both sides of the transaction. You can put put
|
||||
money in a message, and for the recipient to get it out of the
|
||||
message, he has to sign a receipt that says this money is for such
|
||||
and such, and he took the money – a receipt that only the person who
|
||||
sent the money and the person who received the money can read, and
|
||||
any financial intermediaries cannot read, though they will need
|
||||
proof that the requested receipt exists, without them being able to
|
||||
read what the receipt is for. The records provide a basis for
|
||||
generating reputation of Zooko based identities.
|
||||
|
||||
This web page is intended to keep track of the various technologies
|
||||
needed to implement liberty on the internet. There are lots of them, and
|
||||
they are all fairly complex and many of them subtle and very difficult to
|
||||
understand, so this web page will always be severely incomplete. Right now
|
||||
it is almost totally incomplete, I have just got started listing stuff:
|
||||
|
||||
# Details
|
||||
|
||||
This list severely incomplete, when finished will be at least a screen’s
|
||||
worth, probably several screens.
|
||||
|
||||
- [how to build an operating system that is largely immune to viruses, Trojans and spyware](safe_operating_system.html)
|
||||
- [how to stop
|
||||
phishing and browser session hijacking, how to do browser security
|
||||
right.](how_browser_security_should_be_done.html)
|
||||
- [How to do VPNs right](how_to_do_VPNs.html)
|
||||
- [How to prevent malware](safe_operating_system.html)
|
||||
- [The cypherpunk program](cypherpunk_program.html)
|
||||
- [Replacing TCP and UDP](replacing_TCP.html)
|
1
docs/katex.min.css
vendored
Normal file
1
docs/katex.min.js
vendored
Normal file
932
docs/libraries.md
Normal file
@ -0,0 +1,932 @@
|
||||
---
|
||||
title: Libraries
|
||||
---
|
||||
|
||||
# Git submodules
|
||||
|
||||
Libraries are best dealt with as [Git submodules].
|
||||
|
||||
[Git submodules]: https://github.com/psi4/psi4/wiki/External-subprojects-using-Git-and-CMake
|
||||
|
||||
[build libraries]:https://git-scm.com/book/en/v2/Git-Tools-Submodules
|
||||
|
||||
Git submodules leak complexity and surprising and inconvenient behavior
|
||||
all over the place if one is trying to make a change that affects multiple
|
||||
modules simultaneously. But having your libraries separate from your git
|
||||
repository results in non portable surprises and complexity. Makes it hard
|
||||
for anyone else to build your project, because they will have to, by hand,
|
||||
tell your project where the libraries are on their system.
|
||||
|
||||
You need an enormous pile of source code, the work of many people over
|
||||
a very long time, and GitSubmodules allows this to scale, because the
|
||||
local great big pile of source code references many independent and
|
||||
sovereign repositories in the cloud. If you have one enormous pile of
|
||||
source code in one enormous git repository, things get very very slow. If
|
||||
you rely someone else's compiled code, things break and you get
|
||||
accidental and deliberate backdoors, which is a big concern when you are
|
||||
doing money and cryptography.
|
||||
|
||||
GitSubmodules is hierarchical, but source code has strange loops. The Bob
|
||||
module uses the Alice module and the Carol module, but Alice uses Bob
|
||||
and Carol, and Carol uses Alice and Bob. How do you make sure that all
|
||||
your modules are using the same commit of Alice?
|
||||
|
||||
Well, if modules have strange loops you make one of them the master, and
|
||||
the rest of them direct submodules of that master, brother subs to each
|
||||
other, and they are all using the same commit of Alice as the master. And
|
||||
you should try to write or modify the source code so that they all call their
|
||||
brother submodules through the one parent module above them in the
|
||||
hierarchy, that they use the source code of their brothers through the
|
||||
source code of their master, rather than directly incorporating the header
|
||||
files of their brothers at compile time, albeit the header file of the master
|
||||
that they include may well include the header of their brother, so that they
|
||||
are indirectly, through the master header file, including the brother header
|
||||
file.
|
||||
|
||||
# Git subtrees
|
||||
|
||||
Git subtrees are an alternative to submodules, and many people
|
||||
recommend them because they do not break the git model the way
|
||||
submodules do.
|
||||
|
||||
But subtrees do not scale. If you have an enormous pile of stuff in your
|
||||
repository, Git has to check every file to see if it has changed every time,
|
||||
which rather rapidly becomes painfully slow if one is incorporating a lot
|
||||
of projects reflecting a lot of work by a lot of people. GitSubmodules
|
||||
means you can incorporate unlimited amounts of stuff, and Git only has to
|
||||
check the particular module that you are actually working on.
|
||||
|
||||
Maybe subtrees would work better if one was working on a project where
|
||||
several parts were being developed at once, thus a project small enough
|
||||
that scaling is not an issue. But such projects, if successful, grow into
|
||||
projects where scaling is an issue. And if you are a pure consumer of a
|
||||
library, you don't care that you are breaking the git model, because you are
|
||||
seldom making synchronized changes in module and submodule.
|
||||
|
||||
The submodule model works fine, provided the divisions between one
|
||||
submodule and the next are such that one is only likely to make changes in
|
||||
one module at at time.
|
||||
|
||||
# Passphrases
|
||||
|
||||
All wallets now use random words - but you cannot carry an eighteen word random phrase though an airport in you head
|
||||
|
||||
Should use [grammatically correct passphrases](https://github.com/lungj/passphrase_generator).
|
||||
|
||||
Using those dictionaries, the phrase (adjective noun adverb verb adjective
|
||||
noun) can encode sixty eight bits of entropy. Two such phrases suffice,
|
||||
being stronger than the underlying elliptic curve. With password
|
||||
strengthening, we can randomly leave out one of the adjectives or adverbs
|
||||
from one of the passphrases.
|
||||
|
||||
# Polkadot, substack and gitcoin
|
||||
|
||||
It has become painfully apparent that building a blockchain is a very large project.
|
||||
|
||||
Polkadot is a blockchain ecosystem, and substack a family of libraries for
|
||||
constructing blockchains. It is a lot a easier to refactor an existing
|
||||
blockchain than to start entirely from scratch.
|
||||
|
||||
Polkadot is designed to make its ecosystem subordinate to the primary
|
||||
blockchain, which I do not want - but it also connects its ecosystem to
|
||||
bitcoin by De-Fi (or promises to do so, I don't know how well it works) so
|
||||
accepting that subordination is a liquidity event. We can fix things so
|
||||
that the tail will wag the dog once the tail gets big enough, as China licensed
|
||||
from ARM, then formed a joint venture with ARM, then hijacked the joint
|
||||
venture, once it felt it no longer needed to keep buying the latest ARM
|
||||
intellectual property. Licensing was a fully subordinate relationship, the
|
||||
joint venture was cooperation between unequal parties, and now ARM
|
||||
China is a fully independent and competing technology, based on the old
|
||||
ARM technology, but advancing it separately, independently, and in its
|
||||
own direction. China forked the ARM architecture.
|
||||
|
||||
Accepting a fully subordinate relationship to get connected, and then
|
||||
defecting on subordination when strong enough, is a sound strategy.
|
||||
|
||||
[Gitcoin]:https://gitcoin.co/
|
||||
"Build and Fund the Open Web Together"
|
||||
|
||||
And talking about connections: [Gitcoin]
|
||||
|
||||
Gitcoin promises connection to money, and connection to a community of
|
||||
open source developers. It is Polkadot's money funnel from VCs to
|
||||
developers. The amount of cash in play is rather meagre, but it provides a
|
||||
link to the real money, which is ICOs.
|
||||
|
||||
I suspect that its git hosting has been co-opted by the enemy, but that is
|
||||
OK, provided our primary repo is not co-opted by the enemy.
|
||||
|
||||
# Installers
|
||||
|
||||
Wine to run Windows 10 software under Linux is a bad idea, and
|
||||
Windows Subsystem for Linux to run Linux software under Windows 10
|
||||
is a much worse idea – it is the usual “embrace and extend” evil plot by
|
||||
Microsoft against open source software, considerably less competently
|
||||
executed than in the past.
|
||||
|
||||
## The standard gnu installer
|
||||
|
||||
```bash
|
||||
./configure && make && make install
|
||||
```
|
||||
|
||||
## The standard windows installer
|
||||
|
||||
Wix creating an `*.msi` file.
|
||||
|
||||
Which `*.msi` file can be wrapped in an executable, but there is no sane
|
||||
reason for this and you are likely to wind up with installs that consist of an
|
||||
executable that wraps an msi that wraps an executable that wraps an msi.
|
||||
|
||||
To build an `*.msi`, you need to download the Wix toolset, which is referenced in the relevant Visual Studio extensions, but cannot be downloaded from within the Visual Studio extension manager.
|
||||
|
||||
The Wix Toolset, however, requires the net framework in order to install it
|
||||
and use it, which is the cobbler’s children going barefoot. You want a
|
||||
banana, and have to install a banana tree, a monkey, and a jungle.
|
||||
|
||||
There is a [good web page](https://stackoverflow.com/questions/1042566/how-can-i-create-an-msi-setup) on WIX resources
|
||||
|
||||
There is an automatic wix setup: Visual Studio-> Tools-> Extensions&updates ->search Visual Studio Installer Projects
|
||||
|
||||
Which is the Microsoft utility for building wix files. It creates a quite adequate wix setup by gui, in the spirit of the skeleton windows gui app.
|
||||
|
||||
## [NSIS](https://nsis.sourceforge.io/Download) Nullsoft Scriptable Install System.
|
||||
|
||||
People who know what they are doing seem to use this install system, and they
|
||||
write nice installs with it.
|
||||
|
||||
To build setup program:
|
||||
|
||||
1. Build both x64 and Win32 Release configs
|
||||
1. When you construct wallet.nsi in nullsoft, add it to your project.
|
||||
1. When building a deliverable, Right click on the WalletSetup.nsi file in Visual Studio project and select properties.
|
||||
1. Set Excluded from Build to No
|
||||
1. OK Properties
|
||||
1. Right click .nsi file again and choose Compile.
|
||||
1. Set the .nsi file properties back to Excluded from Build.
|
||||
|
||||
This manual building of the setup is due to the fact that we need both x64
|
||||
and Win32 exes for the setup program and Visual Studio doesn’t provide a
|
||||
way to do this easily.
|
||||
|
||||
# Package managers
|
||||
|
||||
Lately, however, package managers have appeared: Conan and [vcPkg](https://blog.kitware.com/vcpkg-a-tool-to-build-open-source-libraries-on-windows/). Conan lacks wxWidgets, and has far fewer packages than [vcpkg](https://libraries.io/github/Microsoft/vcpkg).
|
||||
|
||||
I have attempted to use package managers, and not found them very useful. It
|
||||
is easier to deal with each package as its own unique special case. The
|
||||
uniform abstraction that a package manager attempts to provide invariably
|
||||
leaks badly, while piling cruft on top of the library. Rather than
|
||||
simplifying library use, piles its own idiosyncratic complexification on top
|
||||
of the complexities of the library, often inducing multiplicative complexity,
|
||||
as one attempts to deal with the irregularities and particulars of a
|
||||
particular library though a package manager that is unaware of and incapable
|
||||
of dealing with the particularity of that particular package, and is
|
||||
unshakeably convinced that the library is organized in way that is different
|
||||
from the way it is in fact organized.
|
||||
|
||||
# Multiprecision Arithmetic
|
||||
|
||||
I will need multiprecision arithmetic if I represent information in a base or
|
||||
dictionary that is not a power of two.
|
||||
|
||||
[MPIR]:]http://mpir.org/
|
||||
{target="_blank"}
|
||||
|
||||
[GMP]:https://gmplib.org
|
||||
{target="_blank"}
|
||||
|
||||
The best libraries are [GMP] for Linux and
|
||||
[MPIR] for windows. These are reasonably
|
||||
compatible, and generally only require very trivial changes to produce a Linux
|
||||
version and a windows version. Boost attempts to make the changes invisible,
|
||||
but adds needless complexity and overhead in doing so, and obstructs control.
|
||||
MPIR has a Visual Studio repository on Github, and a separate Linux repository
|
||||
on Github. GMP builds on a lot of obscure platforms, but not really supported
|
||||
on Windows.
|
||||
|
||||
For supporting Windows and Linux only, MPIR all the way is the way to go. For
|
||||
compatibility with little used and obscure environments, you might want to
|
||||
have your own custom thin layer that maps GMP integers and MPIR integers to
|
||||
your integers, but that can wait till we have conquered the world.
|
||||
|
||||
My most immediate need for MPIR is the extended Euclidean algorithm
|
||||
for modular multiplicative inverse, which it, of course, supports,
|
||||
`mpz_gcdext`, greatest common divisor extended, but which is deeply
|
||||
hidden in the [documentation](http://www.mpir.org/mpir-3.0.0.pdf).
|
||||
|
||||
# [wxWidgets](./libraries/building_and_using_libraries.html#instructions-for-wxwidgets)
|
||||
|
||||
# Networking
|
||||
|
||||
## notbit client
|
||||
|
||||
A bitmessage client written in C. Designed to run on a linux mail server
|
||||
and interface bitmessage to mail. Has no UI, intended to be used with the linux mail UI.
|
||||
|
||||
Unfortunately, setting up a linux mail server is a pain in the ass. Needs the Zooko UI.
|
||||
|
||||
But its library contains everything you need to share data around a group of people, many of them behind NATs.
|
||||
|
||||
Does not implement NAT penetration. Participants behind a NAT are second class unless they implement port forwarding, but participants with unstable IPs are not second class.
|
||||
|
||||
## Game Networking sockets
|
||||
|
||||
[Game Networking Sockets](https://github.com/ValveSoftware/GameNetworkingSockets)
|
||||
|
||||
A reliable udp library with congestion control which has vastly more development work done on it than any other reliable udp networking library, but which is largely used to work with Steam gaming, and Steam's closed source code. Has no end of hooks to closed source built into it, but works fine without those hooks.
|
||||
|
||||
Written in C++. Architecture overly specific and married to Steam. Would
|
||||
have to be married to Tokio to have massive concurrency. But you don't
|
||||
need to support hundreds of clients right away.
|
||||
|
||||
Well, perhaps I do, because in the face of DDOS attack, you need to keep
|
||||
a lot of long lived inactive connections around for a long time, any of
|
||||
which could receive a packet at any time. I need to look at the
|
||||
GameNetworkingSockets code and see how it listens on lots and lots of
|
||||
sockets. If it uses [overlapped IO], then it is golden. Get it up first, and it put inside a service later.
|
||||
|
||||
[Overlapped IO]:client_server.html#the-select-problem
|
||||
{target="_blank"}
|
||||
|
||||
The nearest equivalent Rust application gave up on congestion control, having programmed themselves into a blind alley.
|
||||
|
||||
## Tokio
|
||||
|
||||
Tokio is a Rust framework for writing highly efficient highly scalable
|
||||
services. Writing networking for a service with large numbers of clients is
|
||||
very different between Windows and Linux, and I expect Tokio to take care
|
||||
of the differences.
|
||||
|
||||
There really is not any good C or C++ environment for writing services
|
||||
except Wt, which is completely specialized for the case of writing a web
|
||||
service whose client is the browser, and which runs only on Linux.
|
||||
|
||||
## wxWidgets
|
||||
wxWidgets has basic networking capability built in and integrated with its
|
||||
event loop, but it is a bit basic, and is designed for a gui app, not for a
|
||||
server – though probably more than adequate for initial release. It only
|
||||
supports http, but not https and websockets.
|
||||
[LibSourcery](https://sourcey.com/libsourcey) is a far more powerful
|
||||
networking library, which supports https and websockets, and is designed to
|
||||
interoperate with nginx and node.js. But integrating it with wxWidgets is
|
||||
likely to be nontrivial.
|
||||
|
||||
WxWidgets sample code for sockets is in %WXWIN%/samples/sockets. There is a
|
||||
[recently updated version on github]. Their example code supports TCP and
|
||||
UDP. But some people argue that the sampling is insufficiently responsive -
|
||||
you really need a second thread that damned well sits on the socket, rather
|
||||
than polling it. And that second thread cannot use wxSockets.
|
||||
|
||||
[recently updated version on github]:https://github.com/wxWidgets/wxWidgets/tree/master/samples/sockets
|
||||
|
||||
Programming sockets and networking in C is a mess. The [much praised guide
|
||||
to sockets](https://beej.us/guide/bgnet/html/single/bgnet.html) goes on for
|
||||
pages and pages describing a “simple” example client server. Trouble is that
|
||||
C, and old type Cish C++ exposes all the dangly bits. The [QT client server
|
||||
example](https://stackoverflow.com/questions/5773390/c-network-programming),
|
||||
on the other hand, is elegant, short, and self explanatory.
|
||||
|
||||
The code project has [example code written in C++](https://www.codeproject.com/Articles/13071/Programming-Windows-TCP-Sockets-in-C-for-the-Begin), but it is still mighty intimidating compared to the QT client server example. I have yet to look at the wxWidgets client server examples – but looking for wxWidgets networking code has me worried that it is a casual afterthought, not adequately supported or adequately used.
|
||||
|
||||
ZeroMQ is Linux, C, and Cish C++.
|
||||
|
||||
Boost Asio is highly praised, but I tried it, and concluded its architecture
|
||||
is broken, trying to make simplicity and elegance where it cannot be made,
|
||||
resulting in leaky abstractions which leak incomprehensible complexity the
|
||||
moment you stray off the beaten path – I feel they have lost control of their
|
||||
design, and are just throwing crap at it trying to make something that
|
||||
cannot work, work. I similarly found the Boost time libraries failed, leaking
|
||||
complexity that they tried to hide, with the hiding merely adding complexity.
|
||||
|
||||
[cpp-httplib](https://github.com/yhirose/cpp-httplib) is wonderful in its
|
||||
elegance, simplicity, and ease of integration. You just include a single
|
||||
header. Unfortunately, it is strictly http/https, and we need something that
|
||||
can deal with the inherently messy lower levels.
|
||||
|
||||
[Poco](http://pocoproject.org/) does everything, and is C++, but hey, let us first see how far we can get with wxWidgets.
|
||||
|
||||
Further, the main reason for doing https integration with the existing
|
||||
browser web ecosystem, whose security is fundamentally broken, due the
|
||||
state’s capacity to seize names, and the capacity of lots of entities to
|
||||
intercept ssl. It might well be easier to fork opera or embed chromium. I
|
||||
notice that Chromium has features supporting payment built into it, a bunch
|
||||
of “PaymentMethod\*\*\*\*\*Event”
|
||||
|
||||
The best open source browser, and best privacy browser, is Opera, in that it comes from an entity less evil than Google.
|
||||
|
||||
[Opera](https://bit.ly/2UpSTFy) needs to be configured with [a bunch of privacy add ons](https://gab.com/PatriotKracker80/posts/c3kvL3pBbE54NEFaRGVhK1ZiWCsxZz09) [HTTPS Everywhere Add-on](https://bit.ly/2ODbPeE),
|
||||
[uBlock](https://bit.ly/2nUJLqd), [DisconnectMe](https://bit.ly/2HXEEks), [Privacy-Badger](https://bit.ly/2K5d7R1), [AdBlock Plus](https://bit.ly/2U81ddo), [AdBlock for YouTube](https://bit.ly/2YBzqRh), two tracker blockers, and three ad blockers.
|
||||
|
||||
It would be great if we could make our software another addon, possibly chatting by websocket to the wallet.
|
||||
|
||||
The way it would work be to add another protocol to the browser:
|
||||
ro://name1.name2.name3/directory/directory/endpoint. When you connect to such
|
||||
an endpoint, your wallet, possibly a wallet with no global name, connects to
|
||||
the named wallet, and gets IP, a port, a virtual server name, a cookie
|
||||
unique for your wallet, and the hash of the valid ssl certificate for that
|
||||
name, and then the browser makes a connection to the that server, ignoring
|
||||
the CA system and the DNS system. The name could be a DNS name and the
|
||||
certificate a CA certificate, in which case the connection looks to the
|
||||
server like any other, except for the cookie which enables it to send
|
||||
messages, typically a payment request, to the wallet.
|
||||
|
||||
# Safe maths
|
||||
|
||||
[Safeint]:https://github.com/dcleblanc/SafeInt
|
||||
{target="_blank"}
|
||||
|
||||
We could implement transaction outputs and inputs as a fixed amount of
|
||||
fungible tokens, limited to $2^{64}-1$ tokens, using [Safeint] That will be
|
||||
future proof for a long time, but not forever.
|
||||
|
||||
Indeed, anything that does not use Zksnarks is not future proof for the
|
||||
indefinite future.
|
||||
|
||||
Or we could implement decimal floating point with unlimited exponents
|
||||
and mantissa implemented on top of [MPIR]
|
||||
|
||||
Or we could go ahead with the canonical representation being unlimited
|
||||
decimal exponent and unlimited mantissa, but the wallet initially only
|
||||
generates, and only can handle, transactions that can be represented by[Safeint], and always converts the mantissa plus decimal exponent to and
|
||||
from a safeint.
|
||||
|
||||
if we rely on safeint, and our smallest unit is the microrho, that is room for
|
||||
eighteen trillion rho. We can start actually using the unlimited precision of
|
||||
the exponent and the mantissa in times to come - not urgent, merely
|
||||
architect it into the canonical format.
|
||||
|
||||
From the point of view of the end user, this will merely be an upgrade that
|
||||
allows nanorho, picorho, femptorho, attorho, zeptorho, yoctorho, and allows a decimal point in yoctorho quantities. And then we go to a new unit, the jim, with one thousand yottajim equals one yoctorho, a billion yoctojim equals one attorho, a trillion exajim equals one attorho.
|
||||
|
||||
To go all the way around to two byte exponents, for testing purposes, will
|
||||
need some additional new units after the jim. (And we should impose a
|
||||
minimum unit size of $10^{-195}$ rho or $10{-6} rho, thereby ensuring
|
||||
that transaction size is bounded while allowing compatibility for future expansion.)
|
||||
|
||||
Except in test and development code, any attempt to form a transaction
|
||||
involving quantities with exponents less than $1000^{-2}$ will cause a
|
||||
gracefully handled exception, and in all code any attempt to display
|
||||
or perform calculations on transaction inputs and outputs for which no
|
||||
display units exist will cause an ungracefully handled exception.
|
||||
|
||||
In the first release configuration parameters, the lowest allowed exponent
|
||||
will be $1000^{-2}$, corresponding to microrho, and the highest allowed
|
||||
exponent $1000^4$, corresponding to terarho, and machines will be
|
||||
programmed to vote "incapable" and "no" on any proposal to change those
|
||||
parameters. However they will correctly handle transactions beyond those
|
||||
limits provided that when quantities are expressed in the smallest unit of
|
||||
any of the inputs and outputs, the sum of all the inputs and of all the
|
||||
outputs remains below $2^{64}$. To ensure that all releases are future
|
||||
compatible, the blockchain should have some exajim transactions, and
|
||||
unspent transaction outputs but the peers should refuse to form any more
|
||||
of them. The documentation will say that arbitrarily small and large new
|
||||
transaction outputs used to be allowed, but are currently not allowed, to
|
||||
reduce the user interface attack surface that needs to be security checked
|
||||
and to limit blockchain bloat, and since there is unlikely to be demand for
|
||||
this, this will probably not be fixed for a very long time.
|
||||
|
||||
Or perhaps it would be less work to support humungous transactions from
|
||||
the beginning, subject to some mighty large arbitrary limit to prevent
|
||||
denial of service attack, and eventually implementing native integer
|
||||
handling of normal sized transactions as an optimization, for transactions where all quantities fit within machine sized words, and rescaled intermediate outputs will be less than $64 - \lceil log_2($number of inputs and outputs$) \rceil$ bits.
|
||||
|
||||
Which leads me to digress how we are going to handle protocol updates:
|
||||
|
||||
## handling protocol updates
|
||||
|
||||
1. Distribute software capable of handling the update.
|
||||
1. A proposed protocol update transaction is placed on the blockchain.
|
||||
1. Peers indicate capability to handle the protocol update. Or ignore it,
|
||||
or indicate that they cannot. If a significant number of peers
|
||||
indicate capability, peers that lack capability push their owners for
|
||||
an update.
|
||||
1. A proposal to start emitting data that can only handled by more
|
||||
recent peers is placed on the blockchain.
|
||||
1. If a significant number of peers vote yes, older peers push more
|
||||
vigorously for an update.
|
||||
1. If a substantial supermajority votes yes by a date specified in the
|
||||
proposal, then they start emitting data in the new format on a date
|
||||
shortly afterwards. If no supermajority by the due date, the
|
||||
proposal is dead.
|
||||
|
||||
# [Zlib compression libraries.](./libraries/zlib.html)
|
||||
|
||||
Built it, easy to use, easy to build, easy to link to. Useful for large amounts of text, provides, but does not use, CRC32
|
||||
|
||||
[Cap\'n Proto](./libraries/capnproto.html)
|
||||
|
||||
[Crypto libraries](./libraries/crypto_library.html)
|
||||
|
||||
[Memory Safety](./libraries/memory_safety.html).
|
||||
|
||||
[C++ Automatic Memory Management](./libraries/cpp_automatic_memory_management.html)
|
||||
|
||||
[C++ Multithreading](./libraries/cpp_multithreading.html)
|
||||
|
||||
[Catch testing library](https://github.com/catchorg/Catch2)
|
||||
[Boost](https://github.com/boostorg/boost)
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
## Boost
|
||||
|
||||
My experience with Boost is that it is no damned good: They have an over
|
||||
elaborate pile of stuff on top of the underlying abstractions, which pile has high runtime cost, and specializes the underlying stuff in ways that only
|
||||
work with boost example programs and are not easily generalized to do what
|
||||
one actually wishes done.
|
||||
|
||||
Their abstractions leak.
|
||||
|
||||
[Boost high precision arithmetic `gmp_int`]:https://gmplib.org/
|
||||
|
||||
[Boost high precision arithmetic `gmp_int`] A messy pile built on top of
|
||||
GMP. Its primary benefit is that it makes `gmp` look like `mpir` Easier to use [MPIR] directly.
|
||||
|
||||
The major benefit of boost `gmp` is that it runs on some machines and
|
||||
operating systems that `mpir` does not, and is for the most part source code
|
||||
compatible with `mpir`.
|
||||
|
||||
A major difference is that boost `gmp` uses long integers, which are on sixty
|
||||
four bit windows `int32_t`, where `mpir` uses `mpir_ui` and `mpir_si`, which are
|
||||
on sixty four bit windows `uint64_t` and `int64_t`. This is apt to induce no
|
||||
end of major porting issues between operating systems.
|
||||
|
||||
Boost `gmp` code running on windows is apt to produce radically different
|
||||
results to the same boost `gmp` code running on linux. Long `int` is just not
|
||||
portable, and should never be used. This kind of issue is absolutely typical
|
||||
of boost.
|
||||
|
||||
In addition to the portability issue, it is also a typical example of boost
|
||||
abstractions denying you access to the full capability of the thing being
|
||||
abstracted away. It is silly to have a thirty two bit interface between sixty
|
||||
four bit hardware and unlimited arithmetic precision software.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
## Database
|
||||
|
||||
The blockchain is a massively distributed database built on top of a pile of
|
||||
single machine, single disk, databases communicating over the network. If you
|
||||
want a single machine, single disk, database, go with SQLite, which in WAL
|
||||
mode implements synch interaction on top of hidden asynch.
|
||||
|
||||
[SQLite](https://www.Sqlite.org/src/doc/trunk/README.md) have their own way of doing things, that does not play nice with Github.
|
||||
|
||||
The efficient and simple way to handle interaction with the network is via
|
||||
callbacks rather than multithreading, but you usually need to handle
|
||||
databases, and under the hood, all databases are multithreaded and blocking.
|
||||
If they implement callbacks, it is usually on top of a multithreaded layer,
|
||||
and the abstraction is apt to leak, apt to result in unexpected blocking on a
|
||||
supposedly asynchronous callback.
|
||||
|
||||
SQLite recommends at most one thread that writes to the database, and
|
||||
preferably only one thread that interacts with the database.
|
||||
|
||||
## The Invisible Internet Project (I2P)
|
||||
|
||||
[Comes](https://geti2p.net/en/) with an I2P webserver, and the full api for streaming stuff. These
|
||||
appear as local ports on your system. They are not tcp ports, but higher
|
||||
level protocols, *and* UDP. (Sort of UDP - obviously you have to create a
|
||||
durable tunnel, and one end is the server, the other the client.)
|
||||
|
||||
Inconveniently, written in java.
|
||||
|
||||
## Internet Protocol
|
||||
|
||||
[QUIC] UDP with flow control and reliability. Intimately married to http/2,
|
||||
https/2, and google chrome. Cannot call as library, have to analyze code,
|
||||
extract their ideas, and rewrite. And, looking at their code, I think they
|
||||
have written their way into a blind alley.
|
||||
|
||||
But QUIC is http/2, and there is a gigantic ecosystem supporting http/2.
|
||||
|
||||
We really have no alternative but to somehow interface to that ecosystem.
|
||||
|
||||
[QUIC]: https://github.com/private-octopus/picoquic
|
||||
|
||||
[QUIC] is UDP with flow control, reliability, and SSL/TLS encryption, but no
|
||||
DDoS resistance, and total insecurity against CA attack.)
|
||||
|
||||
## Boost Asynch
|
||||
|
||||
Boost implements event oriented multithreading in IO service, but don’t like
|
||||
it because it fails to interface with Microsoft’s implementation of asynch
|
||||
internet protocol, WSAAsync, and WSAEvent. Also because brittle,
|
||||
incomprehensible, and their example programs do not easily generalize to
|
||||
anything other than that particular example.
|
||||
|
||||
To the extent that you need to interact with a database, you need to process
|
||||
connections from clients in many concurrent threads. Connection handlers are
|
||||
run in thread, that called `io_service::run()`.
|
||||
|
||||
You can create a pool of threads processing connection handlers (and waiting
|
||||
for finalizing database connection), by running `io_service::run()` from
|
||||
multiple threads. See Boost.Asio docs.
|
||||
|
||||
## Asynch Database access
|
||||
|
||||
MySQL 5.7 supports [X Plugin / X Protocol, which allows asynchronous query execution and NoSQL But X devapi was created to support node.js and stuff. The basic idea is that you send text messages to mysql on a certain port, and asynchronously get text messages back, in google protobuffs, in php, JavaScript, or sql. No one has bothered to create a C++ wrapper for this, it being primarily designed for php or node.js](https://dev.mysql.com/doc/refman/5.7/en/document-store-setting-up.html)
|
||||
|
||||
SQLite nominally has synchronous access, and the use of one read/write
|
||||
thread, many read threads is recommended. But under the hood, if you enable
|
||||
WAL mode, access is asynchronous. The nominal synchrony sometimes leaks into
|
||||
the underlying asynchrony.
|
||||
|
||||
By default, each `INSERT` is its own transaction, and transactions are
|
||||
excruciatingly slow. Wal normal mode fixes this. All writes are writes to the
|
||||
writeahead file, which gets cleaned up later.
|
||||
|
||||
The authors of SQLite recommend against multithreading writes, but we
|
||||
do not want the network waiting on the disk, nor the disk waiting on the
|
||||
network, therefore, one thread with asynch for the network, one purely
|
||||
synchronous thread for the SQLite database, and a few number crunching
|
||||
threads for encryption, decryption, and hashing. This implies shared
|
||||
nothing message passing between threads.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
[Facebook Folly library]provides many tools, with such documentation as
|
||||
exists amounting to “read the f\*\*\*\*\*g header files”. They are reputed
|
||||
to have the highest efficiency queuing for interthread communication, and it
|
||||
is plausible that they do, because facebook views efficiency as critical.
|
||||
Their [queuing header file]
|
||||
(https://github.com/facebook/folly/blob/master/folly/MPMCQueue.h) gives us
|
||||
`MPMCQueue`.
|
||||
|
||||
[Facebook Folly library]:https://github.com/facebook/folly/blob/master/folly/
|
||||
|
||||
On the other hand, boost gives us a lockless interthread queue, which should
|
||||
be very efficient. Assuming each thread is an event handler, rather than
|
||||
pseudo synchronous, we queue up events in the boost queue, and handle all
|
||||
unhandled exceptions from the event handler before getting the next item from
|
||||
the queue. We keep enough threads going that we do not mind threads blocking
|
||||
sometimes. The queue owns objects not currently being handled by a
|
||||
particular thread. Objects are allocated in a particular thread, and freed in
|
||||
a particular thread, which process very likely blocks briefly. Graphic
|
||||
events are passed to the master thread by the wxWindows event code, but we
|
||||
use our own mutltithreaded event code to handle everything else. Posting an
|
||||
event to the gui code will block briefly.
|
||||
|
||||
I was looking at boost’s queues and lockless mechanisms from the point of
|
||||
view of implementing my own thread pool, but this is kind of stupid, since
|
||||
boost already has a thread pool mechanism written to handle the asynch IO
|
||||
problem. Thread pools are likely overkill. Node.js does not need them,
|
||||
because its single thread does memory to memory operations.
|
||||
|
||||
Boost provides us with an [`io_service` and `boost::thread` group], used to
|
||||
give effect to asynchronous IO with a thread pool. `io_service` was specifically written to perform io, but can be used for any
|
||||
thread pool activity whatsoever. You can “post” tasks to the io_service,
|
||||
which will get executed by one of the threads in the pool. Each such task has
|
||||
to be a functor.
|
||||
|
||||
[`io_service` and `boost::thread` group]:http://thisthread.blogspot.com/2011/04/multithreading-with-asio.html
|
||||
|
||||
Since supposedly nonblocking operations always leak and block, all we can do
|
||||
is try to have blocking minimal. For example nonblocking database operations
|
||||
always block. Thus our threadpool needs to be many times larger than our set
|
||||
of hardware threads, because we will always wind up doing blocking operations.
|
||||
|
||||
The C++11 multithreading model assumes you want to do some task in parallel,
|
||||
for example you are multiplying two enormous matrices, so you spawn a bunch
|
||||
of threads, then you wait for them all to complete using `join`, or all to
|
||||
deliver their payload using futures and promises. This does not seem all that
|
||||
useful, since the major practical issue is that you want your system to
|
||||
continue to be responsive while it is waiting for some external hardware to
|
||||
reply. When you are dealing with external events, rather than grinding a
|
||||
matrix in parallel, event oriented architecture, rather than futures,
|
||||
promises, and joins is what you need.
|
||||
|
||||
Futures, promises, and joins are useful in the rather artificial case that
|
||||
responding to an remote procedure call requires you to make two or more
|
||||
remote procedure calls, and wait for them to complete, so that you then have
|
||||
the data to respond to a remote procedure call.
|
||||
|
||||
Futures, promises, and joins are useful on a server that launches one thread
|
||||
per client, which is often a sensible way to do things, but does not fit that
|
||||
well to the request response pattern, where you don’t have a great deal of
|
||||
client state hanging around, and you may well have ten thousand clients If
|
||||
you can be pretty sure you are only going to have a reasonably small number
|
||||
of clients at any one time, or and significant interaction between clients,
|
||||
one thread per client may well make a lot of sense.
|
||||
|
||||
I was planning to use boost asynch, but upon reading the boost user threads,
|
||||
sounds fragile, a great pile of complicated unintelligible code that does
|
||||
only one thing, and if you attempt to do something slightly different,
|
||||
everything falls apart, and you have to understand a lot of arcane details,
|
||||
and rewrite them.
|
||||
|
||||
[Nanomsg](http://nanomsg.org/)is a socket library, that provides a layer on
|
||||
top of everything that makes everything look like sockets, and provides
|
||||
sockets specialized to various communication patterns, avoiding the roll your
|
||||
own problem. In the zeroMQ thread, people complained that [a simple hello
|
||||
world TCP-IP program tended to be disturbingly large and complex]
|
||||
Looks to me that [Nanomsg] wraps a lot of that complexity.
|
||||
|
||||
[a simple hello world TCP-IP program tended to be disturbingly large and complex]:http://250bpm.com/blog
|
||||
|
||||
# Sockets
|
||||
|
||||
A simple hello world TCP-IP program tends to be disturbingly large and
|
||||
complex, and windows TCP-IP is significantly different from posix TCP-IP.
|
||||
|
||||
Waiting on network events is deadly, because they can take arbitrarily large
|
||||
time, but multithreading always bites. People who succeed tend to go with
|
||||
single thread asynch, similar to, [or part of, the window event handling
|
||||
loop].
|
||||
|
||||
[or part of, the window event handling loop]:https://www.codeproject.com/Articles/13071/Programming-Windows-TCP-Sockets-in-C-for-the-Begin
|
||||
|
||||
Asynch code should take the form of calling a routine that returns
|
||||
immediately, but passing it a lambda callback, which gets executed in the
|
||||
most recently used thread.
|
||||
|
||||
Interthread communication bites – you don’t want several threads accessing
|
||||
one object, as synch will slow you down, so if you multithread, better to
|
||||
have a specialist thread for any one object, with lockless queues passing
|
||||
data between threads. One thread for all writes to SQLite, one thread for
|
||||
waiting on select.
|
||||
|
||||
Boost Asynch supposedly makes sockets all look alike, but I am frightened of
|
||||
their work guard stuff – looks to me fragile and incomprehensible. Looks to
|
||||
me that no one understands boost asynch work guard, not even the man who
|
||||
wrote it. And they should not be using boost bind, which has been obsolete
|
||||
since lambdas have been available, indicating bitrot.
|
||||
|
||||
Because work guard is incomprehensible and subject to change, will just keep
|
||||
the boost io object busy with a polling timer.
|
||||
|
||||
And I am having trouble finding boost asynch documented as a sockets library.
|
||||
Maybe I am just looking in the wrong place.
|
||||
|
||||
[A nice clean tutorial depicting strictly synchronous tcp.](https://www.binarytides.com/winsock-socket-programming-tutorial/)
|
||||
|
||||
[Libpcap and Win10PCap](https://en.wikipedia.org/wiki/Pcap#Wrapper_libraries_for_libpcap) provide very low level, OS independent, access to packets, OS independent because they are below the OS, rather than above it. [Example code for visual studio.](https://www.csie.nuk.edu.tw/~wuch/course/csc521/lab/ex1-winpcap/)
|
||||
|
||||
[Simple sequential procedural socket programming for windows sockets.](https://www.binarytides.com/winsock-socket-programming-tutorial/)
|
||||
|
||||
If I program from the base upwards, the bottom most level would be a single
|
||||
thread sitting on a select statement. Whenever the select fired, would
|
||||
execute a corresponding functor transfering data between userspace and system
|
||||
space.
|
||||
|
||||
One thread, and only one thread, responsible for timer events and
|
||||
transferring network data between userspace and systemspace.
|
||||
|
||||
If further work required in userspace that could take significant time (disk
|
||||
operations, database operations, cryptographic operations) that functor under
|
||||
that thread would stuff another functor into a waitless stack, and a bunch
|
||||
of threads would be waiting for that waitless stack to be signaled, and one
|
||||
of those other threads would execute that functor.
|
||||
|
||||
The reason we have a single userpace thread handling the select and transfers
|
||||
between userpace and systemspace is that that is a very fast and very common
|
||||
operation, and we don’t want to have unnecessary thread switches, wherein
|
||||
one thread does something, then immediately afterwards another thread does
|
||||
almost the same thing. All quickie tasks should be handled sequentially by
|
||||
one thread that works a state machine of functors.
|
||||
|
||||
The way to do asynch is to wrap sockets in classes that reflect the intended
|
||||
use and function of the socket. Call each instance of such a class a
|
||||
connection. Each connection has its own state machine state and its own
|
||||
**message dispatcher, event handler, event pump, message pump**.
|
||||
|
||||
A single thread calls select and poll, and drives all connection instances in
|
||||
all transfers of data between userspace and systemspace. Connections also
|
||||
have access to a thread pool for doing operations (such as file, database and
|
||||
cryptography, that may involve waits.
|
||||
|
||||
The hello world program for this system is to create a derived server class
|
||||
that does a trivial transformation on input, and has a path in server name
|
||||
space, and a client class that sends a trivial input, and displays the result.
|
||||
|
||||
Microsoft WSAAsync\[Socketprocedure\] is a family of socket procedures
|
||||
designed to operate with, and be driven by, the Window ui system, wherein
|
||||
sockets are linked to windows, and driven by the windows message loop. Could
|
||||
benefit considerably by being wrapped in connection classes.
|
||||
|
||||
I am guessing that wxWidgets has a similar system for driving sockets,
|
||||
wherein a wxSocket is plugged in to the wxWidget message loop. On windows,
|
||||
wxWidget wraps WSASelect, which is the behavior we need.
|
||||
|
||||
Microsoft has written the asynch sockets you need, and wxWidgets has wrapped
|
||||
them in an OS independent fashion.
|
||||
|
||||
WSAAsyncSelect
|
||||
|
||||
WSAEventSelect
|
||||
|
||||
select
|
||||
|
||||
Using wxSockets commits us to having a single thread managing everything. To
|
||||
get around the power limit inherent in that, have multiple peers under
|
||||
multiple names accessing the same database, and have a temporary and
|
||||
permanent redirect facility – so that if you access `peername,` your
|
||||
connection, and possibly your link, get rewritten to `p2.peername` by peers
|
||||
trying to balance load.
|
||||
|
||||
Microsoft tells us:
|
||||
|
||||
> receiving, applications use the WSARecv or WSARecvFrom functions to supply
|
||||
buffers into which data is to be received. If one or more buffers are posted
|
||||
prior to the time when data has been received by the network, that data could
|
||||
be placed in the user’s buffers immediately as it arrives. Thus, it can
|
||||
avoid the copy operation that would otherwise occur at the time the recv or
|
||||
recvfrom function is invoked.
|
||||
|
||||
Moral is, we should use the sockets that wrap WSA.
|
||||
|
||||
# Tcl
|
||||
|
||||
Tcl is a really great language, and I wish it would become the language of my new web, as JavaScript is the language of the existing web.
|
||||
|
||||
But it has been semi abandoned for twenty years.
|
||||
|
||||
It consists of a string (which is implemented under the hood as a copy on
|
||||
write rope, with some substrings of the rope actually being run time typed
|
||||
C++ types that can be serialized and deserialized to strings) and a name
|
||||
table, one name table per interpreter, and at least one interpreter per
|
||||
thread. The entries in the name table can be strings, C++ functions, or run
|
||||
time typed C++ types, which may or may not be serializable or deserializable,
|
||||
but conceptually, it is all one big string, and the name table is used to
|
||||
find C and C++ functions which interpret the string following the command.
|
||||
Execution consists of executing commands found in the string, which transform
|
||||
it into a new string, which in turn gets transformed into a new string,
|
||||
until it gets transformed into the final result. All code is metacode. If
|
||||
elements of the string need to be deserialized to and from a C++ run time
|
||||
type, (because the command does not expect that run time type) but cannot be,
|
||||
because there is no deserialization for that run time type, you get a run
|
||||
time error, but most of the time you get, under the hood, C++ code executing
|
||||
C++ types – it is only conceptually a string being continually transformed
|
||||
into another string. The default integer is infinite precision, because
|
||||
integers are conceptually arbitrary length strings of numbers.
|
||||
|
||||
To sandbox third party code, including third party gui code, just restrict
|
||||
the nametable to have no dangerous commands, and to be unable to load c++
|
||||
modules that could provide dangerous commands.
|
||||
|
||||
It is faster to bring up a UI in Tcl than in C. We get, for free, OS
|
||||
independence.
|
||||
|
||||
Tcl used to be the best level language for attaching C programs to, and for
|
||||
testing C programs, or it would be if SWIG actually worked. The various C
|
||||
components of Tcl provide an OS independent layer on top of both Linux and
|
||||
Windows, and it has the best multithread and asynch system.
|
||||
|
||||
It is also a metaprogramming language. Every Tcl program is a metaprogram – you always write code that writes code.
|
||||
|
||||
The Gui is necessarily implemented as asynch, something like the JavaScript
|
||||
dom in html, but with explicit calls to the event/idle loop. Multithreading
|
||||
is implemented as multiple interpreters, at least one interpreter per thread,
|
||||
sending messages to each other.
|
||||
|
||||
# Time
|
||||
|
||||
After spending far too much time on this issue, which is has sucked in far
|
||||
too many engineers and far too much thought, and generated far too many
|
||||
libraries, I found the solution was c++11 Chrono: For short durations, we
|
||||
use the steady time in milliseconds, where each machine has its own
|
||||
epoch, and no two machines have exactly the same milliseconds. For
|
||||
longer durations, we use the system time in seconds, where all machines
|
||||
are expected to be within a couple of seconds of each other. For the human
|
||||
readable system time in seconds to be displayed on a particular machine,
|
||||
we use the ISO format 2012‑01‑14_15:39:34+10:00 (timezone with 10
|
||||
hour offset equivalent to Greenwich time 2012‑01‑14_05:39:34+00:00)
|
||||
|
||||
[For long durations, we use signed system time in seconds, for short durations unsigned steady time in milliseconds.](./libraries/rotime.cpp)
|
||||
|
||||
Windows and Unix both use time in seconds, but accessed and manipulated in
|
||||
incompatible ways.
|
||||
|
||||
Boost has numerous different and not altogether compatible time libraries,
|
||||
all of them overly clever and all of them overly complicated.
|
||||
|
||||
wxWidgets has OS independent time based on milliseconds past the epoch, which
|
||||
however fails to compress under Cap\'n Proto.
|
||||
|
||||
I was favourably impressed by the approach to time taken in tcp packets,
|
||||
that the time had to be approximately linear, and in milliseconds or larger,
|
||||
but they were entirely relaxed about the two ends of a tcp connection
|
||||
using different clocks with different, and variable, speeds.
|
||||
|
||||
It turns out you can go a mighty long way without a global time, and to the
|
||||
extent that you do need a global time, should be equivalent to that used in
|
||||
email, which magically hides the leap seconds issue.
|
||||
|
||||
# UTF‑8 strings
|
||||
|
||||
Are supported by the wxWidgets wxString, which provide support to and
|
||||
from wide character variants and locale variants. (We don't want locale
|
||||
variants, they are obsolete. The whole world is switching to UTF, but
|
||||
our software and operating environments lag)
|
||||
|
||||
`wString::ToUTF8()` and `wString::FromUTF8()` do what you would expect.
|
||||
|
||||
On visual studio, need to set your source files to have bom, so that Visual
|
||||
Studio knows that they are UTF‑8, need to set the compiler environment in
|
||||
Visual Studio to UTF‑8 with `/Zc:__cplusplus /utf-8 %(AdditionalOptions)`
|
||||
|
||||
And you need to set the run time environment of the program to UTF‑8
|
||||
with a manifest.
|
||||
|
||||
You will need to place all UTF‑8 string literals and string constants in a
|
||||
resource file, which you will use for translated versions.
|
||||
|
||||
If you fail to set the compilation and run time environment to UTF‑8 then
|
||||
for extra confusion, your debugger and compiler will *look* as if they are
|
||||
handling UTF‑8 characters correctly as single byte characters, while at
|
||||
least wxString alerts you that something bad is happening by run time
|
||||
translating to the null string.
|
||||
|
||||
Automatic string conversion in wxWidgets is *not* UTF‑8, and if you have
|
||||
any unusual symbols in your string, you get a run time error and the empty
|
||||
string. So wxString automagic conversions will rape you in the ass at
|
||||
runtime, and for double the confusion, your correctly translated UTF‑8
|
||||
strings will look like errors. Hence the need to make sure that the whole
|
||||
environment from source code to run time execution is consistently UTF‑8,
|
||||
which has to be separately ensured in three separate place.
|
||||
|
||||
When wxWidgets is compiled using `#define wxUSE_UNICODE_UTF8 1`,
|
||||
it provides UTF‑8 iterators and caches a character index, so that accessing
|
||||
a character by index near a recently used character is fast. The usual
|
||||
iterators `wx.begin()`, `wx.end()`, const and reverse iterators are available.
|
||||
I assume something bad happens if you advance a reverse iterator after
|
||||
writing to it.
|
||||
|
||||
wxWidgets compiled with `#define wxUSE_UNICODE_UTF8 1` is the
|
||||
way of the future, but not the way of the present. Still a work in progress
|
||||
Does not build under Windows. Windows now provide UTF8 entries to all
|
||||
its system functions, which should make it easy.
|
||||
|
||||
wxWidgets provides `wxRegEx` which, because wxWidgets provides index
|
||||
by entity, should just work. Eventually. Maybe the next release.
|
||||
|
||||
# [UTF8-CPP](http://utfcpp.sourceforge.net/ "UTF-8 with C++ in a Portable Way")
|
||||
|
||||
A powerful library for handling UTF‑8. This somewhat duplicates the
|
||||
facilities provided by wxWidgets with `wxUSE_UNICODE_UTF8==1`
|
||||
|
||||
For most purposes, wxString should suffice, when it actually works with
|
||||
UTF8. Which it does not yet on windows. We shall see. wxWidgets
|
||||
recommends not using wxString except to communicate with wxWidgets,
|
||||
and not using it as general UTF‑8 system. Which is certainly the current
|
||||
state of play with wxWidgets.
|
||||
|
||||
For regex to work correctly, probably need to do it on wxString's native
|
||||
UTF‑16 (windows) or UTF‑32 (unix), but it supposedly works on `UTF8`,
|
||||
assuming you can successfully compile it, which you cannot.
|
||||
|
||||
# Cap\'n Proto
|
||||
|
||||
[Designed for a download from github and run cmake install.](https://capnproto.org/install.html) As all software should be.
|
||||
|
||||
But for mere serialization to of data to a form invariant between machine
|
||||
architectures and different compilers and different compilers on the same
|
||||
machine, overkill for our purposes. Too much capability.
|
||||
|
||||
# Awesome C++
|
||||
|
||||
[Awesome C++] A curated list of awesome C/C++ frameworks, libraries, resources, and shiny things
|
||||
|
||||
[Awesome C++]:https://cpp.libhunt.com
|
||||
"A curated list of awesome C/C++ frameworks, libraries, resources, and shiny things"
|
||||
{target="_blank"}
|
||||
|
||||
I encountered this when looking at the Wt C++ Web framework, which seems to be mighty cool except I don't think I have any use for a web framework. But [Awesome C++] has a very pile of things that I might use.
|
||||
|
||||
Wt has the interesting design principle that every open web page maps to a
|
||||
windows class, every widget on the web page, maps to a windows class,
|
||||
every row in the sql table maps to a windows class. Cool design.
|
||||
|
||||
# Opaque password protocol
|
||||
|
||||
[Opaque] is PAKE done right.
|
||||
|
||||
[Opaque]:https://blog.cryptographyengineering.com/2018/10/19/lets-talk-about-pake/
|
||||
"Let’s talk about PAKE" {target="_blank"}
|
||||
|
||||
Server stores a per user salt, the users public key, and the user's secret key
|
||||
encrypted with a secret that only the user ever learns.
|
||||
|
||||
Secret is generated by the user from the salt and his password by
|
||||
interaction with the server without the the user learning the salt, nor the hash of the salt, nor the server the password or the hash of the password.
|
||||
User then strengthens the secret generated from salt and password
|
||||
applying a large work factor to it, and decrypts the private key with it.
|
||||
User and server then proceed with standard public key cryptography.
|
||||
|
||||
If the server is evil, or the bad guys seize the server, everything is still
|
||||
encrypted and they have to run, not a hundred million trial passwords
|
||||
against all users, but a hundred million passwords against *each* user. And
|
||||
user can make the process of trying a password far more costly and slow than
|
||||
just generating a hash. Opaque zero knowledge is designed to be as
|
||||
unfriendly as possible to big organizations harvesting data on an industrial
|
||||
scale. The essential design principle of this password protocol is that
|
||||
breaking a hundred million passwords by password guessing should be a
|
||||
hundred million times as costly as breaking one password by password
|
||||
guessing. The protocol is primarily designed to obstruct the NSA's mass
|
||||
harvesting.
|
||||
|
||||
It has the enormous advantage that if you have one strong password which
|
||||
you use for many accounts, one evil server cannot easily attack your
|
||||
accounts on other servers. To do that, it has to try every password - which
|
||||
runs into your password strengthening.
|
21
docs/libraries/app.cpp
Normal file
@ -0,0 +1,21 @@
|
||||
#include "stdafx.h"
|
||||
|
||||
|
||||
app::app()
|
||||
{
|
||||
}
|
||||
|
||||
app::~app()
|
||||
{
|
||||
}
|
||||
|
||||
bool app::OnInit()
|
||||
{
|
||||
wxFrame* mainFrame = new wxFrame(nullptr, wxID_ANY, L"Hello World … °C");
|
||||
//wxFrame* mainFrame = new wxFrame(nullptr, wxID_ANY, wcp);
|
||||
mainFrame->Show(true);
|
||||
return true;
|
||||
}
|
||||
|
||||
wxIMPLEMENT_APP(app);
|
||||
|
10
docs/libraries/app.h
Normal file
@ -0,0 +1,10 @@
|
||||
#pragma once
|
||||
|
||||
class app :
|
||||
public wxApp
|
||||
{
|
||||
public:
|
||||
app();
|
||||
virtual ~app();
|
||||
virtual bool OnInit() override;
|
||||
};
|
616
docs/libraries/building_and_using_libraries.md
Normal file
@ -0,0 +1,616 @@
|
||||
---
|
||||
title: Building the project and its libraries in Visual Studio
|
||||
---
|
||||
|
||||
# General instructions
|
||||
|
||||
At present the project requires the environment to be set up by hand, with a
|
||||
lot of needed libraries separately configured and separately built.
|
||||
|
||||
We need to eventually make it so that it is one git project with submodules
|
||||
which can be build with one autotools command with submodules, and one visual
|
||||
studio command with subprojects, so that
|
||||
|
||||
```bash
|
||||
git clone --recursive git://example.com/foo/rhocoin.git
|
||||
cd rhocoin
|
||||
devenv rhocoin.sln /build
|
||||
```
|
||||
|
||||
will build all the required libraries.
|
||||
|
||||
And similarly we want autotools to build all the submodules
|
||||
|
||||
```bash
|
||||
git clone --recursive git://example.com/foo/rhocoin.git
|
||||
cd rhocoin
|
||||
./configure; make && make install
|
||||
```
|
||||
|
||||
so that the top level configure and make does the `./configure; make && make install` in each submodule.
|
||||
|
||||
which might also create a deb file that could be used in
|
||||
|
||||
```bash
|
||||
apt-get -qy install ./rhocoin.deb
|
||||
```
|
||||
|
||||
But we are a long way from being there yet. At present the build environment
|
||||
is painfully hand made, and has to be painfully remade every time some updates
|
||||
a library on which it relies.
|
||||
|
||||
To build in Visual Studio under Microsoft windows
|
||||
|
||||
- Set the environment variable `SODIUM` to point to the Libsodium directory containing the directory `src/libsodium/include` and build the static linking, not the DLL, library following the libsodium instructions.
|
||||
- Set the environment variable `WXWIN` to point to the wxWidgets directory containing the directory `include/wx` and build the static linking library from the ide using the provided project files
|
||||
|
||||
If you are building this project using the Visual Studio ide, you should use the ide to build the libraries, and if you are building this project using the makefiles, you should use the provided make files to build the libraries. In theory this should not matter, but all too often it does matter.
|
||||
|
||||
When building libsodium and wxWidgets in Visual Studio, have to retarget the
|
||||
solution to use the current Microsoft libraries, retarget to use x64, and
|
||||
change the code generation default in every project versions from
|
||||
Multithreaded Dll to Multithreaded
|
||||
|
||||
Sqlite is not incorporated as an already built library, but as source code.,
|
||||
as the sqlite3 amalgamation file, one very big C file.
|
||||
|
||||
# Instructions for wxWidgets
|
||||
|
||||
## Setting wxWidgets project in Visual Studio
|
||||
|
||||
First set up your environment variables as described in [Directory Structure
|
||||
Microsoft Windows](../set_up_build_environments.html#dirstruct).
|
||||
|
||||
Run the wxWidgets windows setup, wxMSW-X.X.X-Setup.exe. The project will
|
||||
build with wxMSW-3.1.2, and will not build with earlier versions. Or just
|
||||
unzip the file into the target directory, as the install does not in fact do
|
||||
any useful configuration.
|
||||
|
||||
Build instructions are in `%WXWIN%\docs\msw\install.txt` The setup program
|
||||
for wxWidgets should set the environment variable WXWIN correctly, but does
|
||||
not do so. Manually set the WXWIN environment variable
|
||||
|
||||
When building in Visual Studio, have to retarget the solution to use the
|
||||
current libraries, retarget to use x64, and change the code generation
|
||||
default in every project versions from Multithreaded Dll to Multithreaded
|
||||
(select all projects except the custom build project, then go to
|
||||
properties/C++/code generation/Runtime). The DLL change has to be done
|
||||
separately for release and debug builds, since Debug uses the MTd libraries,
|
||||
and Release uses the MT libraries.
|
||||
|
||||
A Visual Studio project needs the library build by the wxWidget Visual Studio
|
||||
project, an nmake project needs the library built by the wxWidget makefile.vs
|
||||
|
||||
If you build roWallet under nmake, using Visual Studio tools, you should
|
||||
build your wxWidgets libraries using the using nmake – fmakefile.vc, not the
|
||||
wxWidget Visual Studio project files.
|
||||
|
||||
If you build roWallet using the Visual Studio project, you should build your
|
||||
wxWidgets libraries using Visual Studio and the wxWidgets Visual Studio
|
||||
project files.
|
||||
|
||||
UDP sockets seem seriously under supported and undocumented in
|
||||
wxWidgets, though theoretically present.
|
||||
|
||||
The [discussion](http://www.wxwidgets.org/search/?q=datagrams "wx Widgets Datagrams"),
|
||||
however, makes up for the paucity of documentation.
|
||||
|
||||
wxWidgets somehow neglects to mention that you need to use the
|
||||
different and entirely incompatible UDP specific system calls, `recvfrom()`
|
||||
and `sendto()` and instead of `read()` and `write()`
|
||||
|
||||
If using C sockets, need to pull in completely different header files on
|
||||
Unix than on Windows, including Winsock2 rather than Winsock on
|
||||
windows, but these completely different header files pull in almost the
|
||||
same routines that work in almost the same way.
|
||||
|
||||
```C
|
||||
#ifdef _WIN64
|
||||
#include <winsock2.h>
|
||||
#else
|
||||
#include <sys/socket.h>
|
||||
#include <netinet/in.h>
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/wait.h>
|
||||
#include <netdb.h>
|
||||
#endif
|
||||
```
|
||||
|
||||
If using wxWidgets, need to build with
|
||||
|
||||
```c++
|
||||
#define WXWIN_COMPATIBILITY_3_0 0
|
||||
#define wxUSE_COMPILER_TLS 2
|
||||
#define wxUSE_STD_CONTAINERS 1
|
||||
#define wxUSE_IPV6 1
|
||||
```
|
||||
|
||||
in `%WXWIN%\include\wx\msw\setup.h`
|
||||
|
||||
And for gnu builds, `./configure && make && make install`, have to
|
||||
change them separately and again in configure. What `configure` actually
|
||||
does is difficult to understand and predict, so I have put asserts in the code
|
||||
to detect and complain about unsatisfactory settings.
|
||||
|
||||
```C++
|
||||
#ifdef _WIN64
|
||||
constexpr bool b_WINDOWS = true;
|
||||
#else
|
||||
constexpr bool b_WINDOWS = false;
|
||||
#endif
|
||||
static_assert( __cplusplus >= 201703l, "Out of date C syntax");
|
||||
static_assert(wxUSE_IPV6 == 1, "IP6 unavailable in wxWidgets");
|
||||
static_assert(WXWIN_COMPATIBILITY_3_0 == 0, "wxWidgets api out of date");
|
||||
static_assert(wxUSE_COMPILER_TLS == (b_WINDOWS ? 2 : 1), "out of date workarounds in wxWidgets for windows bugs");
|
||||
static_assert(wxUSE_STD_CONTAINERS_COMPATIBLY == 1, "interoperability between stl and wxWidgets broken");
|
||||
static_assert(wxUSE_STD_CONTAINERS == 1, "wxWidgets api out of date");
|
||||
```
|
||||
|
||||
The two wxWidgets libraries that you build can co-exist because stored in
|
||||
different directories of `%WXWIN%`. Unfortunately the visual studio build
|
||||
projects default to the multithreaded dll, which breaks every other library,
|
||||
because multithreaded, rather than multithreaded dll, is the usual windows
|
||||
default used by statically linked libraries. so each subproject in the
|
||||
wxWidgets Visual Studio project has to be changed to link to multithreaded,
|
||||
rather than multithreaded DLL. This is a bug, or at least an inconvenient
|
||||
deviation from usual practice, in the current release of wxWidgets.
|
||||
|
||||
If built by a visual studio project, the wxWidgets build constructs a header
|
||||
file in a build location for visual studio projects to use, if built by nmake
|
||||
under visual studio tools, the build constructs a header file in another
|
||||
build location for nmake projects to use.
|
||||
|
||||
```c++
|
||||
#ifdef _DEBUG
|
||||
#pragma comment(lib, "wxbase31ud.lib")
|
||||
#else
|
||||
#pragma comment(lib, "wxbase31u.lib")
|
||||
#endif
|
||||
```
|
||||
|
||||
You are going to have to setup the environment variables %GSL%, %SODIUM% and
|
||||
%WXWIN% on your windows machines for my visual studio project files to work
|
||||
and going to have to build libsodium and wxWidgets in Visual Studio.
|
||||
|
||||
## Moving a sample from the samples directory
|
||||
|
||||
To an unrelated directory tree.
|
||||
|
||||
Create an empty desktop project using Visual Studio Wizard. Change the build
|
||||
type displayed at the top of Visual Studio from Debug X86 to Debug X64
|
||||
|
||||
In Solution Explorer (a palette-window of the VisualStudio-mainwindow), click
|
||||
on the project name, then click on the properties icon, the little wrench,
|
||||
and then in the left pane of the Property Pages dialog box, expand
|
||||
Configuration Properties and select VC++ Directories. Additional include- or
|
||||
lib-paths are specifyable there.
|
||||
|
||||
Set General/C++ Language Standard to ISO(S++17) Standard (std::c++17)
|
||||
|
||||
Add to the include paths (`properties/configuration properties/VC++
|
||||
Directories/Include Directories`
|
||||
|
||||
```
|
||||
$(GSL)\include
|
||||
$(WXWIN)\include\msvc
|
||||
$(WXWIN)\include
|
||||
$(SODIUM)\src\libsodium\include
|
||||
```
|
||||
|
||||
Add to the lib path (`properties/configuration properties/VC++
|
||||
Directories/Library Directories`) the location of the wxWidgets libraries
|
||||
|
||||
```
|
||||
$(WXWIN)\lib\vc_x64_lib\
|
||||
$(SODIUM)\Build\Debug\x64
|
||||
```
|
||||
|
||||
Set Linker/System to Windows (/SUBSYSTEM:WINDOWS). This is always set to
|
||||
CONSOLE, for no sane reason, even if you tell it to create an empty windows
|
||||
project.
|
||||
|
||||
Put unit test command line arguments (-dt) in Configuration/Debugging/Command
|
||||
Arguments.
|
||||
|
||||
Add the header, source, and resource files. The C++ option will then become
|
||||
available in properties. Be mindful that if you edit a wxWidgets \*.rc file
|
||||
in Visual Studio, Visual Studio destroys it.
|
||||
|
||||
Set C/++/Code Generation/Runtime Library to Multi-threaded Debug(/MTd) in
|
||||
place of multi threaded DLL. When you switch back and forth between release
|
||||
and debug, this is apt to be set incorrectly, for reasons that are hard to
|
||||
keep track of.
|
||||
|
||||
Set C++/Preprocessor to \_DEBUG, \_WINDOWS, in place of \_DEBUG, \_CONSOLE
|
||||
for the Debug version, and \_NDEBUG, \_WINDOWS for the release version. If
|
||||
you compile a release version without defining \_NDEBUG, a flood of mystery
|
||||
linker error messages ensue, caused by the fact that I use \_NDEBUG to select
|
||||
library version, which is an improvement on the previous Visual Studio
|
||||
behavior, where Visual Studio cheerfully generated an executable that
|
||||
mysteriously just died at runtime because of mismatched libraries.
|
||||
|
||||
These instructions lifted wholesale from:\
|
||||
[Creating wxWidgets Programs with Visual Studio 2017](https://usingcpp.wordpress.com/2018/02/15/creating-wxwidgets-programs-with-visual-studio-2017-part-1/)\
|
||||
[Hello World Example](https://docs.wxwidgets.org/stable/overview_helloworld.html)
|
||||
|
||||
When you add the sqlite3.c amalgamation, make sure to mark it as not using
|
||||
precompiled headers before the first attempt to compile it, otherwise it
|
||||
breaks horribly, (C++ format precompiled headers being incompatible with C
|
||||
precompiled headers) and when you belatedly turn off the precompiled headers,
|
||||
some data that Visual Studio has generated hangs around, so that turning off
|
||||
the precompiled headers fails fix the problem.
|
||||
|
||||
Similarly, if you do a typo in the include paths. Remains stuck on the old
|
||||
include paths even if you fix it. To do a real clean, close down visual
|
||||
studio, delete the directories generated by visual studio, and *then* your
|
||||
edits to the configuration will propagate properly
|
||||
|
||||
wxWidgets for visual studio should be [installed by the windows installer]
|
||||
(https://www.wxwidgets.org/downloads/), then follow the instructions in
|
||||
`%WXWIN%\docs\msw\install.md`, after adjusting the visual studio projectg
|
||||
files to build with multithreaded, rather than multithreaded DLL, for to
|
||||
avoid DLL hell, we are not building with Microsoft DLLs. Set Project to X64.
|
||||
|
||||
Visual Studio resource editor will destroy a resource file that is intended
|
||||
for wxWidgets. WxWidgets resource files should only be edited in a text
|
||||
editor.
|
||||
|
||||
The native Icon format of wxWidgets is xpm, which does not have built in
|
||||
support for multiple icon sizes, multiple color resolutions, and partial
|
||||
transparency. The greenfish icon editor works to convert any icon format to
|
||||
any other, (though it forgets to include the const qualifier) but, sad to
|
||||
say, native wxWidgets icons, xpm, are lowest common denominator, therefore,
|
||||
crap. And xpm format is also crap, a crap representation of crap data, making
|
||||
windows native bitmap format look good by comparison.
|
||||
|
||||
Most of the documentation for wxWidgets is the samples directory, which is
|
||||
set up to build only in the samples directory. To move a project out of the
|
||||
samples directory, and get it compiling in the environment you have set up
|
||||
for your code in Visual Studio, copy the sample, and copy the resources its
|
||||
needs into your target directory, then correct the sample, so that instead of
|
||||
looking for resources relative to itself, in the wxWidgets directory, it
|
||||
looks for its rc file and stuff in its new home directory. You will need the
|
||||
resource files from the root of the samples, and possibly some files from
|
||||
samples/images.
|
||||
|
||||
Due to a persistent bug in visual studio, probably need to delete the
|
||||
generated project studio files so that the new values will take effect. Do
|
||||
not touch \*.rc files with Visual Studio resource editor, or it will break
|
||||
them.
|
||||
|
||||
## wxWidgets on Windows with Mingw
|
||||
|
||||
I never succeeded in doing this, so there are probably no end of gotchas of
|
||||
which I am unaware.
|
||||
|
||||
1. [Run the wxWidgets windows setup, wxMSW-X.X.X-Setup.exe]
|
||||
(https://github.com/wxWidgets/wxWidgets/releases/). The project will build
|
||||
with wxMSW-3.1.2, and will not build with earlier versions
|
||||
|
||||
2. [Build wxWidgets]
|
||||
(http://wiki.codeblocks.org/index.php/WxWindowsQuickRef). See also the
|
||||
instructions in wxWidgets-X.X.X/docs/msw/install.txt, or
|
||||
docs/msw-X.X.X/install.md. CodeBlocks on windows wants you to build in the
|
||||
command window.
|
||||
|
||||
3. Install [Code::Blocks](http://www.codeblocks.org/downloads).
|
||||
|
||||
4. Do the Code::Blocks [hello world tutorial]
|
||||
(http://wiki.codeblocks.org/index.php?title=WxSmith_tutorial%3a_Hello_world).
|
||||
|
||||
But Code::Blocks looks suspiciously dead.
|
||||
|
||||
## wxWidgets on Ubuntu
|
||||
|
||||
Install build essentials, and gtk (gtk being the build essentials for Ubuntu
|
||||
GUI) The native and home environment of wxWidgets is the Mate fork of Gnome,
|
||||
and it is behavior in all other environments is derived from this behavior.
|
||||
|
||||
sudo apt-get install libgtk-3-dev build-essential checkinstall
|
||||
|
||||
These instructions copied from
|
||||
[How to compile and install wxWidgets on Ubuntu/Debian/Linux Mint](https://www.binarytides.com/install-wxwidgets-Ubuntu/) more or
|
||||
less wholesale.
|
||||
|
||||
Download [`Source code tar.gz`](https://github.com/wxWidgets/wxWidgets/releases/) from the latest releases.
|
||||
|
||||
Place the source in the directory `code/wxWidgets/`.
|
||||
|
||||
See the instructions in
|
||||
`code/wxWidgets/docs/gtk/install.txt`
|
||||
|
||||
```bash
|
||||
cd code/wxWidgets
|
||||
mkdir gtk-build
|
||||
cd gtk-build/
|
||||
./configure --disable-shared --enable-unicode
|
||||
make
|
||||
```
|
||||
|
||||
## Memory management in wxWidgets
|
||||
|
||||
In C++17, if you are using exceptions, everything is owned by the stack, and
|
||||
heap values are owned by stack variables which release the heap value in
|
||||
their destructor. `std::unique_ptr` and `std::shared_ptr`, and your own
|
||||
custom heap owning objects.
|
||||
|
||||
In wxWidgets, ownership is complicated, because different aspects of
|
||||
ownership are owned in different hierarchies.
|
||||
|
||||
The destructors of modal windows get called in the normal fashion, but modeless windows are always referenced in derived code by non owning
|
||||
pointers pointing into the heap. They are owned by their parent window
|
||||
when their constructor calls its base constructor, and their derived
|
||||
destructor somehow mysteriously gets called by wxWidgets after
|
||||
wxWidgets finishes destroying all the windows that the higher level
|
||||
window owns.
|
||||
|
||||
Suppose you have a complicated window with a complicated hierarchy of
|
||||
modeless windows in a complicated hierarchy of sizers.
|
||||
|
||||
Suppose it wants to destroy one of its many children?
|
||||
|
||||
Then it asks the child for the child's sizer, detaches it, calls destroy
|
||||
on the child, which somehow eventually calls the highly derived
|
||||
child destructor, and tells the sizers to do layout again.
|
||||
|
||||
A sizer full of windows can be treated as if it was an owning window by
|
||||
calling wxSizer::Clear on it with the argument to delete child windows.
|
||||
But otherwise the owning window that wants to delete one particular
|
||||
window by derived code has to do so by non owning pointers pointing into
|
||||
the heap
|
||||
|
||||
wxWidgets has a windows hierarchy, with windows owning windows, with memory
|
||||
ownership by windows, and events being passed up the hierarchy.
|
||||
|
||||
wxWidgets has the bind hierarchy, where an object can get events from an
|
||||
object with a different lifetime, so it has to unbind in its destructor.
|
||||
(Exept in the normal case where all graphical objects get constructed at the
|
||||
start of the program and destructed at the end of the program.)
|
||||
|
||||
And wxWidgets has the sizer hierarchy, which allocates screen space to
|
||||
child windows within the parent window.
|
||||
|
||||
Sizer objects have to be detached, and then explicitly deleted, and child
|
||||
windows have to be detached from the sizer hierarchy, then explicitly
|
||||
destroyed. And then you call layout on the master sizer of the owning window.
|
||||
|
||||
And then you have the tab hierachy. You have to tell the main window the tab
|
||||
order of its child windows, which is by default the order of their creation.
|
||||
|
||||
If you are potentially managing an indefinitely large number of windows,
|
||||
like a browser, you need the wxNotebook control. (Which lacks a close
|
||||
button on the tabs)
|
||||
|
||||
So, most complex case: You have sizer that acts like a window within a
|
||||
larger window. It is a sizer within a larger sizer. It gets constructed in
|
||||
response to user events, and destructed in response to user events, while
|
||||
the main window continues. The windows within the frame are direct
|
||||
children of larger window, and they are also children of the sizer, which is
|
||||
a child of the main sizer, which is owned and controlled by the larger
|
||||
window.
|
||||
|
||||
But, for future integration with wxNotebook, probably best to put it into an
|
||||
object of type wxPanel. So you define an object that will contain ordinary
|
||||
dumb pointers to all these objects, which takes the parent window and the
|
||||
parent sizer as a arguments. On construction it constructs all the wxWidgets
|
||||
child windows that it needs, with the main window as their window parent,
|
||||
puts them into its sizer and prepends its sizer to the main sizer as their
|
||||
sizer grandparent, binds them, calls layout on the main sizer, and on
|
||||
destruction unbinds them, detaches them from their sizer, calls delete on the
|
||||
sizer objects, then destroy on the window objects and then calls layout on
|
||||
the main sizer.
|
||||
|
||||
But the way the experts do it is to make that object a wxPanel window.
|
||||
|
||||
On construction, first construct windows, then put them into the sizer, then
|
||||
bind them, then call layout. On destruction, first unbind, then detach from
|
||||
sizer, then destroy sizer objects, then destroy windows objects, then call
|
||||
layout on the main sizer.
|
||||
|
||||
## wxAppConsole
|
||||
|
||||
Generates an event based program with no gui, console input and console
|
||||
output. Should use only objects in wxBase. wxBase is a separate library, and
|
||||
you should avoid linking to all the other libraries, except wxNet, and not
|
||||
even wxCore, because if you are linking to another library, the other
|
||||
routines in the other library will expect a gui.
|
||||
|
||||
Our program that synchronizes with other peers is going to need to run as a
|
||||
daemon on Linux and service on windows, with no UI except it talks to the
|
||||
client wallet, which will run as a regular gui program, possibly on another
|
||||
machine far away.
|
||||
|
||||
It is probably easier if we use the same library, wxWidgets, for OS
|
||||
independence for both daemon and client.
|
||||
|
||||
## wxWidgets support
|
||||
|
||||
[wxWidgets mailing list.]
|
||||
(https://www.wxwidgets.org/support/mailing-lists/guide/) I have never used
|
||||
this. I have often had my share of grief on wxWidgets, but by the time I
|
||||
reduced it to a problem capable of being addressed on the mailing list, the
|
||||
problem was always solved.
|
||||
|
||||
Which tells me that wxWidgets is pretty good, and the considerable grief that
|
||||
it generates is only that which is inevitable on a multiplatform gui.
|
||||
|
||||
# Instructions for Libsodium
|
||||
|
||||
Supports Ristretto25519, the crypto tool that I am competent to use. Nothing else seems to support this.
|
||||
|
||||
[Libsodium](https://download.libsodium.org/libsodium/content/) is the authoritative library, derived most directly from Bernstein.
|
||||
|
||||
git clone https://github.com/jedisct1/libsodium.git
|
||||
cd libsodium
|
||||
|
||||
Compiled as a separate project under visual studio. Retarget the solution to use the current libraries, retarget to use x64, and change the code generation default (properties/C++/code For the your visual studio project to include the sodium.h file, have to define SODIUM_STATIC before including the header file when linking to the static sodium library.
|
||||
|
||||
```h
|
||||
#define SODIUM_STATIC
|
||||
#include <sodium.h>
|
||||
#pragma comment(lib, "libsodium.lib")
|
||||
```
|
||||
|
||||
To link to libsodium.lib have to add `$(SODIUM)\Build\Debug\x64` to Visual Studio/project properties/linker/General/Additional Library Directories, where SODIUM is the environment variable pointing to the root libsodium directory, and similarly for the Release version of your code `$(SODIUM)\Build\Release\x64`. Assembly code is disabled by default, need to re-enable it once unit test works with encrypted utd communication.
|
||||
|
||||
We need to eventually re-organize as a git subproject and autotools subproject, but, for the moment, it is a separate library project, hence the environment variable SODIUM.
|
||||
|
||||
Might be better to not use environment variables $(SODIUM) and to use Git submodules and autotools submodules instead, was wxWidgets does internally.
|
||||
|
||||
Set the libsodium project properties to DebugLib x64
|
||||
|
||||
Set the libsodium project properties/General/configuration type to static (should be static already if you have selected Debug Lib and ReleaseLib)
|
||||
|
||||
It provides all the crypto algorithms you need, including Argon2 for password stretching, and freedom from NSA. Neglects to include base 58 encoding. Wrote my own base 64 library, which differs from the standard base 64 library in being oriented to small items, addressing arbitrary bit fields rather than blocks of three bytes, in being url safe, and in mapping 0, o, and O to zero, and mapping 1, i, I, and l to one, to allow for human entry.
|
||||
|
||||
Going to need an algorithm for generating passphrases, but passphrases will be hashed and argoned,
|
||||
|
||||
# Instructions for Sqlite
|
||||
|
||||
Sqlite is not incorporated as an already built library, nor as a library at
|
||||
all, but as source code.
|
||||
|
||||
When you add the sqlite3.c amalgamation, make sure to mark it as not using
|
||||
precompiled headers before the first attempt to compile it, otherwise it
|
||||
breaks horribly, (C++ format precompiled headers being incompatible with C
|
||||
precompiled headers) and when you belatedly turn off the precompiled headers,
|
||||
some data that Visual Studio has generated hangs around, so that turning off
|
||||
the precompiled headers fails fix the problem.
|
||||
|
||||
Similarly, if you do a typo in the include paths. Remains stuck on the old
|
||||
include paths even if you fix it. To do a real clean, close down visual
|
||||
studio, delete the directories generated by visual studio, and *then* your
|
||||
edits to the configuration will propagate properly
|
||||
|
||||
[SQLite](https://sqlite.org/index.html)
|
||||
|
||||
[Download](https://sqlite.org/download.html)
|
||||
|
||||
When creating the database, should turn WAL on:`PRAGMA journal_mode=WAL; PRAGMA synchronous=1;` but before distributing it, make sure the WAL file has been cleaned out with sqlite3_close().
|
||||
|
||||
Wal mode ensures that we never get a busy error from normal business, only under abnormal conditions, provided that only one thread in one process ever writes to the the database. Which means we do not face the hard problem of handling the busy error.
|
||||
|
||||
We have also chosen our threading model so that database connections and their associated compiled sql statements are thread local, but one database connection can be connected to many databases, and each database can have many database connections from many threads and many processes
|
||||
|
||||
Wal mode in the default settings means that writes are normally instantaneous, because they do not hit the disk, but every so often, on completing a transaction, four megabytes get written to disk (which saves a whole lot of read/writes to disk, replacing them with sequential writes without too many reads). So it is probably not a good idea to download large amounts of data from the internet on the UI thread, because every so often the write thread is going to do four megabyes of actual write to actual disk.
|
||||
|
||||
In Wal mode, recently “written” transaction data will be read from memory.
|
||||
So it is a premature optimization to keep data in program memory, when the
|
||||
same data is probably in Wal memory or in cache.
|
||||
|
||||
Because Wal writes are near instantaneous, multiple threads writing seldom
|
||||
block each other – but if one thread is busy writing while the previous
|
||||
thread is busy flushing four megabytes of actual data to disk, the Wal file
|
||||
may grow excessively. According to the documentation, a write thread does the
|
||||
flushing, but it looks to me that a background thread does the flushing.
|
||||
|
||||
But the UI thread will typically only be doing reads and writes every now and
|
||||
then, so if one has only one continually writing thread, you don’t have to
|
||||
use restart or full which means you very rarely get busy calls, so do not
|
||||
have to handle them elegantly and efficiently.
|
||||
|
||||
I suspect that the wal file gets cleaned mighty fast, so before doing anything clever, measure.by running `PRAGMA wal_checkpoint` every minute or so, which returns a table of one row and three columns telling you how much stuff needs to be written when the checkpoint started, and how much was going to be written when it completed, leaving another thread working on actually writing it. (Two PRAGMA wal_checkpoints in rapid succession are apt to give the same answer) Also log how long the checkpoint took. If the second column is substantially larger than a thousand, and the third column is kind of small, you have a checkpoint problem. This problem is unlikely to happen, because the disk has a much bigger pipe than the network, so it looks to me that with the usual settings, wal and passive checkpointing, all the actual disk write IO is going to take place in a backround thread, because we are just not going to be disk bound on updates and writes. We may find ourselves disk bound on random access reads, but Wal, transactions, and checkpoints are not relevant to that problem.
|
||||
|
||||
```bash
|
||||
del C:\Users\studi\AppData\Local\RoBlockchain\RoData3cx8bdx.sqlite
|
||||
sqlite3 C:\Users\studi\AppData\Local\RoBlockchain\RoData3cx8bdx.sqlite
|
||||
```
|
||||
|
||||
Where the file `RoData3cx8bdx.sqlite` contains:
|
||||
|
||||
```sql
|
||||
PRAGMA encoding = "UTF-8";
|
||||
PRAGMA journal_mode=WAL;
|
||||
PRAGMA foreign_keys = OFF;
|
||||
PRAGMA synchronous = 1;
|
||||
CREATE TABLE w( --wallets
|
||||
PublicKey BLOB PRIMARY KEY NOT NULL UNIQUE,
|
||||
Name TEXT NOT NULL UNIQUE, – automatically creates index
|
||||
PetName TEXT,
|
||||
title TEXT
|
||||
);
|
||||
CREATE TABLE MasterSecret(
|
||||
PublicKey BLOB PRIMARY KEY NOT NULL UNIQUE,
|
||||
Secret BLOB,
|
||||
Expires INTEGER NOT NULL,
|
||||
FOREIGN KEY(PublicKey) REFERENCES w(PublicKey)
|
||||
);
|
||||
INSERT INTO w VALUES(X'deadbeef','Adam',NULL,NULL);
|
||||
.dump
|
||||
.output RoData3cx8bdx.sql
|
||||
.dump
|
||||
.exit
|
||||
```
|
||||
|
||||
We are stashing everything in the user specific directory `wxStandardPaths::GetAppDocumentsDir()`
|
||||
and the user specific database, which is wrong. Eventually we will need two databases, one global to all users on a particular machine, which you select when you install, and one for each particular user, that gets generated when the particular user first runs his wallet.
|
||||
|
||||
Further confusing matters, wxConfigBase settings are always per user on windows.
|
||||
|
||||
At the moment, only one database and one config object.
|
||||
|
||||
In our own code, we don’t need to issue `PRAGMA synchronous = 1; PRAGMA
|
||||
foreign_keys = ON;` because we modify the sqlite3.c with the following:
|
||||
|
||||
I need to customize the sqlite3.c almalgamation with my own custom compile options as follows.
|
||||
|
||||
```C
|
||||
//My custom compile options
|
||||
#define SQLITE_DQS 0 //Doublequote names, single quote strings. This setting disables the double – quoted string literal misfeature.
|
||||
#define SQLITE_THREADSAFE 2 //One thread, one database connection. Data structures such as compiled SQL are threadlocal. But sqlite3 is empowered to do its own multithreading. Many databases per database connection. Database connection and compiled sql statements are threadlocal. last_insert_rowid() is not subject to race conditions in this mode, returning the most recent rowid generated by the thread.
|
||||
#define SQLITE_DEFAULT_MEMSTATUS 0 //Don’t track memory usage. Disables the ability of the program using sqlite3 to monitor its memory usage. This setting causes the sqlite3_status() interfaces that track memory usage to be disabled. This helps the sqlite3_malloc() routines run much faster, and since SQLite uses sqlite3_malloc() internally, this helps to make the entire library faster.
|
||||
#define SQLITE_DEFAULT_WAL_SYNCHRONOUS 1 // in WAL mode, recent changes to the database might be rolled back by a power loss, but the database will not be corrupted. Furthermore, transaction commit is much faster in WAL mode using synchronous=NORMAL than with the default synchronous=FULL. For these reasons, it is recommended that the synchronous setting be changed from FULL to NORMAL when switching to WAL mode. This compile-time option will accomplish that.
|
||||
#define SQLITE_DEFAULT_FOREIGN_KEYS 0 //Dont handle foreign key constraints. Programmer has to do it himself.
|
||||
#define SQLITE_LIKE_DOESNT_MATCH_BLOBS 1 //Blobs are not strings. Historically, SQLite has allowed BLOB operands to the LIKE and GLOB operators. But having a BLOB as an operand of LIKE or GLOB complicates and slows the LIKE optimization. When this option is set, it means that the LIKE and GLOB operators always return FALSE if either operand is a BLOB. That simplifies the implementation of the LIKE optimization and allows queries that use the LIKE optimization to run faster.
|
||||
#define SQLITE_MAX_EXPR_DEPTH 0 //Setting the maximum expression parse-tree depth to zero disables all checking of the expression parse-tree depth, which simplifies the code resulting in faster execution, and helps the parse tree to use less memory.
|
||||
#define SQLITE_OMIT_DECLTYPE 1 // By omitting the (seldom-needed) ability to return the declared type of columns from the result set of query, prepared statements can be made to consume less memory.
|
||||
#define SQLITE_OMIT_DEPRECATED 1
|
||||
#define SQLITE_DQS 0 //Don’t accept double quoted string literals.
|
||||
#define SQLITE_OMIT_PROGRESS_CALLBACK 1
|
||||
#define SQLITE_OMIT_SHARED_CACHE 1
|
||||
#define SQLITE_OMIT_UTF16 1
|
||||
#define SQLITE_USE_ALLOCA 1 //Make use of alloca() for dynamically allocating temporary stack space for use within a single function, on systems that support alloca(). Without this option, temporary space is allocated from the heap
|
||||
#define SQLITE_OMIT_LOAD_EXTENSION 1
|
||||
#define SQLITE_TEMP_STORE 3 //Temporary files are in memory
|
||||
#define SQLITE_OMIT_AUTOINIT 1 //.The SQLite library needs to be initialized using a call to sqlite3_initialize() before certain interfaces are used.This initialization normally happens automatically the first time it is needed.However, with the SQLITE_OMIT_AUTOINIT option, the automatic initialization is omitted.This helps many API calls to run a little faster(since they do not have to check to see if initialization has already occurredand then run initialization if it has not previously been invoked) but it also means that the application must call sqlite3_initialize() manually.If SQLite is compiled with – DSQLITE_OMIT_AUTOINIT and a routine like sqlite3_malloc() or sqlite3_vfs_find() or sqlite3_open() is invoked without first calling sqlite3_initialize(), the likely result will be a segfault
|
||||
//end my custom compile options*/
|
||||
```
|
||||
|
||||
[To compile the standard sqlite3.exe tool](https://www.sqlite.org/howtocompile.html).
|
||||
|
||||
We don’t need to issue `PRAGMA journal_mode=WAL; PRAGMA schema.synchronous
|
||||
= 1;` in our own code except if we create a new database, which our code will
|
||||
typically not do, because our install will copy a working database.
|
||||
|
||||
In our code, we do need to issue `PRAGMA optimize;` on close.
|
||||
|
||||
Missing from their short summary of the C interface is:
|
||||
|
||||
```C
|
||||
/*
|
||||
** Return UTF-8 encoded English language explanation of the most recent
|
||||
** error.
|
||||
*/
|
||||
SQLITE_API const char *sqlite3_errmsg(sqlite3 *db)
|
||||
```
|
||||
|
||||
The general rule being that for any unexpected value of rc, any value other than `SQLITE_OK`, `SQLITE_ROW`, and `SQLITE_DONE`, display that message. `sqlite3_exec` wraps this, but we should probably not use `sqlite3_exec`.
|
||||
|
||||
SQlite’s pragma for identifying that something is the right application file format does not seem to work, or maybe it does work and there is some magic that I am not aware of. But this does not matter, because it only protects against Murphy, not Machiavelli, and we are going to need to guard against Machiavelli.
|
||||
|
||||
We need to make sure that compiled sql statements are only compiled after the database connection is live, and destroyed before the database is closed.
|
||||
|
||||
The order of construction and destruction within an object is
|
||||
|
||||
- First, and only for the constructor of the most derived class as described below, virtual base classes shall be initialized in the order they appear on a depth-first left-to-right traversal of the directed acyclic graph of base classes, where “left-to-right” is the order of appearance of the base class names in the derived class base-specifier-list.
|
||||
- Then, direct base classes shall be initialized in declaration order as they appear in the base-specifier-list (regardless of the order of the mem-initializers).
|
||||
- Then, non-static data members shall be initialized in the order they were declared in the class definition (again regardless of the order of the mem-initializers).
|
||||
- Finally, the compound-statement of the constructor body is executed. \[ Note: the declaration order is mandated to ensure that base and member subobjects are destroyed in the reverse order of initialization.
|
||||
|
||||
Objects on the stack are destroyed in the reverse of the order that they were declared.
|
||||
|
||||
The gui object that enables the user to manipulate the contents of the database should contain the database connection, and the compiled sql objects that manipulate the database, declared in that order, so that they get destroyed in that reverse order, compiled sql being destroyed before the database connection is destroyed, and so that in order to create the gui to manipulate the database, we have to first construction a database connection. Since the creation could throw, and we need to handle the throw in the gui, we need a gui already constructed, which then attempts to construct more gui, and informs the user if the construction fails.
|
||||
|
||||
So the outer gui figures out a plausible database name, and then attempts to construct the inner gui whose constructor then attempts to open a database connection and make sure the database is in a good state, and throws if it cannot construct a good connection to a good database.
|
||||
|
||||
To protect against Machiavelli, peers have to by default check the early part of the block chain to make sure it has the correct hash, preferably in obfuscated and undocumented code, accessed through a define located in a rather random header file so that Machiavelli will have to audit my code, then supply his victims with the new code as well as the new database. Or just hard code the genesis block, though Machiavelli will have an easier time finding that one. Maybe both. Hard code the genesis block, and have obfuscated hard code for the early blocks.
|
611
docs/libraries/cpp_automatic_memory_management.md
Normal file
@ -0,0 +1,611 @@
|
||||
---
|
||||
title:
|
||||
C++ Automatic Memory Management
|
||||
---
|
||||
# Memory Safety
|
||||
Modern, mostly memory safe C++, is enforced by:\
|
||||
|
||||
- gsl
|
||||
- Microsoft safety checker
|
||||
- Guidelines
|
||||
- language checker
|
||||
|
||||
`$ clang-tidy test.cpp -checks=clang-analyzer-cplusplus*, cppcoreguidelines-*, modernize-*` will catch most of the issues that esr
|
||||
complains about, in practice usually all of them, though I suppose that as
|
||||
the project gets bigger, some will slip through.
|
||||
|
||||
static_assert(__cplusplus >= 201703, "C version of out of date");
|
||||
|
||||
The gsl adds span for pointer arithmetic, where the
|
||||
size of the array pointed to is kept with the pointer for safe iteration and
|
||||
bounds checking during pointer maths. This should be available in the standard template library with C20.
|
||||
|
||||
Modern C++ as handles arrays as arrays where possible, but they quickly
|
||||
decay to pointers – which you avoid using spans. std::array is a C array
|
||||
whose size is known at compile time, and which is protected from decay to
|
||||
a pointer. std::vector is a dynamically resizable and insertable array
|
||||
protected from decay to a pointer – which can have significant overheads.
|
||||
std::make_unique, std::make_shared create pointers to memory managed
|
||||
objects. (But single objects, not an array, use spans for pointer
|
||||
arithmetic)
|
||||
|
||||
auto sp = std::make_shared<int>(42);
|
||||
std::weak_ptr<T> wp{sp};
|
||||
|
||||
# Array sizing and allocation
|
||||
|
||||
/* This code creates a bunch of "brown dog" strings on the heap to test automatic memory management. */
|
||||
char ca[]{ "red dog" }; //Automatic array sizing
|
||||
std::array<char,8> arr{"red dog"}; //Requires #include <array>
|
||||
/* No automatic array sizing, going to have to count your initializer list. */
|
||||
/* The pointer of the underlying array is referenced by &arr[0] but arr is not the underlying array, nor a pointer to it. */
|
||||
/* [0] invokes operator[], and operator[] is the member function that accesses the underlying array.*/
|
||||
/* The size of the underlying array is referenced by arr.size();*/
|
||||
/* size known at compile time, array can be returned from a function getting the benefits of stack allocation.*/
|
||||
// can be passed around like POD
|
||||
char *p = new char[10]{ "brown dog" }; //No automatic array
|
||||
// sizing for new
|
||||
std::unique_ptr<char[]>puc{ p }; // Now you do not have
|
||||
// to remember to delete p
|
||||
auto puc2 = std::move(puc); /* No copy constructor. Pass by reference, or pass a view, such as a span.*/
|
||||
std::unique_ptr<char> puc3{ new char[10]{ "brown dog" } };
|
||||
/* Array size unknown at compile or run time, needs a span, and you have to manually count the initialization list. */
|
||||
/* Compiler guards against overflow, but does not default to the correct size.*/
|
||||
/* You can just guess a way too small size, and the compiler in its error message will tell you what the size should be. */
|
||||
auto pu = std::make_unique<char[]>(10); // uninitialized,
|
||||
// needs procedural initialization.
|
||||
|
||||
/* span can be trivially created from a compile time declared array, an std:array or from a run time std:: vector, but then these things already have the characteristics of a span, and they own their own storage. */
|
||||
/* You would use a span to point into an array, for example a large blob containing smaller blobs.*/
|
||||
|
||||
// Placement New:
|
||||
char *buf = new char[1000]; //pre-allocated buffer
|
||||
char *p = buf;
|
||||
MyObject *pMyObject = new (p) MyObject();
|
||||
p += (sizeof(MyObject+7)/8)*8
|
||||
/* Problem is that you will have to explictly call the destructor on each object before freeing your buffer. */
|
||||
/* If your objects are POD plus code for operating on POD, you don’t have to worry about destructors.*/
|
||||
// A POD object cannot do run time polymorphism.
|
||||
/* The pointer referencing it has to be of the correct compile time type, and it has to explicitly have the default constructor when constructed with no arguments.*/
|
||||
/* If, however, you are building a tree in the pre-allocated buffer, no sweat. */
|
||||
/* You just destruct the root of the tree, and it recursively destructs all its children. */
|
||||
/* If you want an arbitrary graph, just make sure you have owning and non owning pointers, and the owning pointers form a tree. */
|
||||
/* Anything you can do with run time polymorphism, you can likely do with a type flag.*/
|
||||
|
||||
static_assert ( std::is_pod<MyType>() , "MyType for some reason is not POD" );
|
||||
class MyClass
|
||||
{
|
||||
public:
|
||||
MyClass()=default; // Otherwise unlikely to be POD
|
||||
MyClass& operator=(const MyClass&) = default; // default assignment Not actually needed, but just a reminder.
|
||||
};
|
||||
|
||||
### alignment
|
||||
|
||||
```c++
|
||||
// every object of type struct_float will be aligned to alignof(float) boundary
|
||||
// (usually 4)
|
||||
struct alignas(float) struct_float {
|
||||
// your definition here
|
||||
};
|
||||
|
||||
// every object of type sse_t will be aligned to 256-byte boundary
|
||||
struct alignas(256) sse_t
|
||||
{
|
||||
float sse_data[4];
|
||||
};
|
||||
|
||||
// the array "cacheline" will be aligned to 128-byte boundary
|
||||
alignas(128) char cacheline[128];
|
||||
```
|
||||
|
||||
# Construction, assignment, and destruction
|
||||
|
||||
six things: ([default
|
||||
constructor](https://en.cppreference.com/w/cpp/language/default_constructor),
|
||||
[copy
|
||||
constructor](https://en.cppreference.com/w/cpp/language/copy_constructor),
|
||||
[move
|
||||
constructor](https://en.cppreference.com/w/cpp/language/move_constructor),
|
||||
[copy
|
||||
assignment](https://en.cppreference.com/w/cpp/language/copy_assignment),
|
||||
[move
|
||||
assignment](https://en.cppreference.com/w/cpp/language/move_assignment)
|
||||
and [destructor](https://en.cppreference.com/w/cpp/language/destructor))
|
||||
are generated by default – except when they are not.
|
||||
|
||||
So it is arguably a good idea to explicitly declare them as default or
|
||||
deleted.
|
||||
|
||||
Copy constructors
|
||||
|
||||
A(const A& a)
|
||||
|
||||
Copy assignment
|
||||
|
||||
A& operator=(const A other)
|
||||
|
||||
Move constructors
|
||||
|
||||
class_name ( class_name && other)
|
||||
A(A&& o)
|
||||
D(D&&) = default;
|
||||
|
||||
Move assignment operator
|
||||
|
||||
V& operator=(V&& other)
|
||||
|
||||
Move constructors
|
||||
|
||||
class_name ( class_name && )
|
||||
|
||||
## rvalue references
|
||||
|
||||
Move constructors and copy constructors primarily exist to tell the
|
||||
compiler how to handle temporary values, rvalues, that have references to possibly
|
||||
costly resources.
|
||||
|
||||
`class_name&&` is rvalue reference, the canonical example being a reference to a compiler generated temporary.
|
||||
|
||||
The primary purpose of rvalue references is to support move semantics in
|
||||
objects that reference resources, primarily unique_pointer.
|
||||
|
||||
`std::move(t)` is equivalent to `static_cast<decltype(t)&&>(t)`, causing move
|
||||
semantics to be generated by the compiler.
|
||||
|
||||
`t`, the compiler assumes is converted by your move constructor or move assignment into a valid state where your destructor will not need to anything very costly.
|
||||
|
||||
`std::forward(t)` causes move semantics to be invoked iff the thing referenced
|
||||
is an rvalue, typically a compiler generated temporary, *conditionally*
|
||||
forwarding the resources.
|
||||
|
||||
where `std::forward` is defined as follows:
|
||||
|
||||
template< class T > struct remove_reference {
|
||||
typedef T type;
|
||||
};
|
||||
template< class T > struct remove_reference<T&> {
|
||||
typedef T type;
|
||||
};
|
||||
template< class T > struct remove_reference<T&&> {
|
||||
typedef T type;
|
||||
};
|
||||
|
||||
template<class S>
|
||||
S&& forward(typename std::remove_reference<S>::type& a) noexcept
|
||||
{
|
||||
return static_cast<S&&>(a);
|
||||
}
|
||||
|
||||
`std::move(t)` and `std::forward(t)` don't actually perform any action
|
||||
in themselves, rather they cause the code referencing `t` to use the intended
|
||||
copy and intended assignment.
|
||||
|
||||
## constructors and destructors
|
||||
|
||||
If you declare the destructor deleted that prevents the compiler from
|
||||
generating its own, possibly disastrous, destructor, but then, of
|
||||
course, you have to define your own destructor with the exact same
|
||||
signature, which would ordinarily stop the compiler from doing that
|
||||
anyway.
|
||||
|
||||
When you declare your own constructors, copiers, movers, and deleters,
|
||||
you should generally mark them noexcept.
|
||||
|
||||
struct foo {
|
||||
foo() noexcept {}
|
||||
foo( const foo & ) noexcept { }
|
||||
foo( foo && ) noexcept { }
|
||||
~foo() {}
|
||||
};
|
||||
|
||||
Destructors are noexcept by default. If a destructor throws an exception as
|
||||
a result of a destruction caused by an exception, the result is undefined,
|
||||
and usually very bad. This problem is resolved in complicated ad hoc
|
||||
ways that are unlikely to be satisfactory.
|
||||
|
||||
If you need to define a copy constructor, probably also need to define
|
||||
an assignment operator.
|
||||
|
||||
t2 = t1; /* calls assignment operator, same as "t2.operator=(t1);" */
|
||||
Test t3 = t1; /* calls copy constructor, same as "Test t3(t1);" */
|
||||
|
||||
## casts
|
||||
|
||||
You probably also want casts. The surprise thing about a cast operator
|
||||
is that its return type is not declared, nor permitted to be declared,
|
||||
DRY. Operator casts are the same thing as constructors, except declared
|
||||
in the source class instead of the destination class, hence most useful
|
||||
when you are converting to a generic C type, or to the type of an
|
||||
external library that you do not want to change.
|
||||
|
||||
struct X {
|
||||
int y;
|
||||
operator int(){ return y; }
|
||||
operator const int&(){ return y; } /* C habits would lead you to incorrectly expect "return &y;", which is what is implied under the hood. */
|
||||
operator int*(){ return &y; } // Hood is opened.
|
||||
};
|
||||
|
||||
Mpir, the Visual Studio skew of GMP infinite precision library, has some
|
||||
useful and ingenious template code for converting C type functions of
|
||||
the form `SetAtoBplusC(void * a, void * b, void * c);` into C++
|
||||
expressions of the form `a = b+c*d;`. It has a bunch of intermediate
|
||||
types with no real existence, `__gmp_expr<>` and `__gmp_binary_expr<>`
|
||||
and methods with no real existence, which generate the appropriate
|
||||
calls, a templated function of potentially unlimited complexity, to
|
||||
convert such an expression into the relevant C type calls using
|
||||
pointers. See section mpir-3.0.0.pdf, section 17.5 “C++ Internals”.
|
||||
|
||||
I don’t understand the Mpir code, but I think what is happening is that
|
||||
at run time, the binary expression operating on two base types creates a
|
||||
transient object on the stack containing pointers to the two base types,
|
||||
and the assignment operator and copy create operator then call the
|
||||
appropriate C code, and the operator for entities of indefinite
|
||||
complexity creates base type values on the stack and a binary expression
|
||||
operator pointing to them.
|
||||
|
||||
Simpler, but introducing a redundant copy, to always generate
|
||||
intermediate values on the stack, since we have fixed length objects
|
||||
that do not need dynamic heap memory allocation, not that costly, and
|
||||
they are not that big, at worst thirty two bytes, so clever code is apt
|
||||
to cost in overheads of pointer management
|
||||
|
||||
That just means we are putting 256 bits of intermediate data on the
|
||||
stack instead of 128, hardly a cost worth worrying about. And in the
|
||||
common bad case, (a+b)\*(c+d) clever coding would only save one stack
|
||||
allocation and redundant copy.
|
||||
|
||||
# Template specialization
|
||||
|
||||
namespace N {
|
||||
template<class T> class Y { /*...*/ }; // primary template
|
||||
template<> class Y<double> ; // forward declare specialization for double
|
||||
}
|
||||
template<>
|
||||
class N::Y<double> { /*...*/ }; // OK: specialization in same namespace
|
||||
|
||||
is used when you have sophisticated template code, because you have to
|
||||
use recursion for looping as the Mpir system uses it to evaluate an
|
||||
arbitrarily complex recursive expression – but I think my rather crude
|
||||
implementation will not be nearly so clever.
|
||||
|
||||
extern template int fun(int);
|
||||
/*prevents redundant instantiation of fun in this compilation unit – and thus renders the code for fun unnecessary in this compilation unit.*/
|
||||
|
||||
# Template traits, introspection
|
||||
|
||||
Template traits: C++ has no syntactic sugar to ensure that your template
|
||||
is only called using the classes you intend it to be called with.
|
||||
|
||||
Often you want different templates for classes that implement similar functionality in different ways.
|
||||
|
||||
This is the entire very large topic of template time, compile time code, which is a whole new ball of wax that needs to be dealt with elsewhere
|
||||
|
||||
# Abstract and virtual
|
||||
|
||||
An abstract base class is a base class that contains a pure virtual
|
||||
function ` virtual void features() = 0;`.
|
||||
|
||||
A class can have a virtual destructor, but not a virtual constructor.
|
||||
|
||||
If a class contains virtual functions, then the default constructor has
|
||||
to initialize the pointer to the vtable. Otherwise, the default
|
||||
constructor for a POD class is empty, which implies that the default
|
||||
destructor is empty.
|
||||
|
||||
The copy and swap copy assignment operator, a rather slow and elaborate
|
||||
method of guaranteeing that an exception will leave the system in a good
|
||||
state, is never generated by default, since it always relates to rather
|
||||
clever RAII.
|
||||
|
||||
An interface class is a class that has no member variables, and where
|
||||
all of the functions are pure virtual! In other words, the class is
|
||||
purely a definition, and has no actual implementation. Interfaces are
|
||||
useful when you want to define the functionality that derived classes
|
||||
must implement, but leave the details of how the derived class
|
||||
implements that functionality entirely up to the derived class.
|
||||
|
||||
Interface classes are often named beginning with an I. Here’s a sample
|
||||
interface class:.
|
||||
|
||||
class IErrorLog
|
||||
{
|
||||
public:
|
||||
virtual bool openLog(const char *filename) = 0;
|
||||
virtual bool closeLog() = 0;
|
||||
|
||||
virtual bool writeError(const char *errorMessage) = 0;
|
||||
|
||||
virtual ~IErrorLog() {} // make a virtual destructor in case we delete an IErrorLog pointer, so the proper derived destructor is called
|
||||
// Notice that the virtual destructor is declared to be trivial, but not declared =0;
|
||||
};
|
||||
|
||||
[Override
|
||||
specifier](https://en.cppreference.com/w/cpp/language/override)
|
||||
|
||||
struct A
|
||||
{
|
||||
virtual void foo();
|
||||
void bar();
|
||||
};
|
||||
|
||||
struct B : A
|
||||
{
|
||||
void foo() const override; // Error: B::foo does not override A::foo
|
||||
// (signature mismatch)
|
||||
void foo() override; // OK: B::foo overrides A::foo
|
||||
void bar() override; // Error: A::bar is not virtual
|
||||
};
|
||||
|
||||
Similarly [Final
|
||||
specifier](https://en.cppreference.com/w/cpp/language/final)
|
||||
|
||||
[To obtain aligned
|
||||
storage](http://www.cplusplus.com/reference/type_traits/aligned_storage/)for
|
||||
use with placement new
|
||||
|
||||
void* p = aligned_alloc(sizeof(NotMyClass));
|
||||
MyClass* pmc = new (p) MyClass; //Placement new.
|
||||
// ...
|
||||
pmc->~MyClass(); //Explicit call to destructor.
|
||||
aligned_free(p);.
|
||||
|
||||
# GSL: Guideline Support Library
|
||||
|
||||
The Guideline Support Library (GSL) contains functions and types that
|
||||
are suggested for use by the C++ Core Guidelines maintained by the
|
||||
Standard C++ Foundation. This repo contains [Microsoft’s implementation
|
||||
of GSL](https://github.com/Microsoft/GSL).
|
||||
|
||||
git clone https://github.com/Microsoft/GSL.git
|
||||
cd gsl
|
||||
git tag
|
||||
git checkout tags/v2.0.0
|
||||
|
||||
Which implementation mostly works on gcc/Linux, but is canonical on
|
||||
Visual Studio.
|
||||
|
||||
For usage of spans ([the replacement for bare naked non owning pointers
|
||||
subject to pointer
|
||||
arithmetic)](http://codexpert.ro/blog/2016/03/07/guidelines-support-library-review-spant/)
|
||||
|
||||
For usage of string spans ([String
|
||||
spans](http://codexpert.ro/blog/2016/03/21/guidelines-support-library-review-string_span/)
|
||||
These are pointers to char arrays. There does not seem to be a UTF‑8
|
||||
string_span.
|
||||
|
||||
GSL is a preview of C++20, as boost contained a preview of C++11.
|
||||
|
||||
It is disturbingly lacking in official documentation, perhaps because
|
||||
still subject to change.
|
||||
|
||||
[Unofficial
|
||||
documentation](http://modernescpp.com/index.php/c-core-guideline-the-guidelines-support-library)
|
||||
|
||||
It provides an optional fix for C’s memory management problems, while
|
||||
still retaining backward compatibility to the existing pile of rusty
|
||||
razor blades and broken glass.
|
||||
|
||||
# The Curiously Recurring Template Pattern
|
||||
|
||||
[CRTP](https://www.fluentcpp.com/2017/05/16/what-the-crtp-brings-to-code/),
|
||||
makes the relationship between the templated base class or classes and
|
||||
the derived class cyclic, so that the derived class tends to function as
|
||||
real base class. Useful for mixin classes.
|
||||
|
||||
template <typename T> class Mixin1{
|
||||
public:
|
||||
// ...
|
||||
void doSomething() //using the other mixin classes and the derived class T
|
||||
{
|
||||
T& derived = static_cast<T&>(*this);
|
||||
// use derived...
|
||||
}
|
||||
private:
|
||||
mixin1(){}; // prevents the class from being used outside the mix)
|
||||
friend T;
|
||||
};
|
||||
|
||||
template <typename T> class Mixin2{
|
||||
{
|
||||
public:
|
||||
// ...
|
||||
void doSomethingElse()
|
||||
{
|
||||
T& derived = static_cast<T&>(*this);
|
||||
// use derived...
|
||||
}
|
||||
private:
|
||||
Mixin2(){};
|
||||
friend T;
|
||||
};
|
||||
|
||||
class composite: public mixin1<composite>, public mixin2<composite>{
|
||||
composite( int x, char * y): mixin1(x), mixin2(y[0]) { ...}
|
||||
composite():composite(7,"a" ){ ...}
|
||||
}
|
||||
|
||||
# Aggregate initialization
|
||||
|
||||
A class of aggregate type has no constructors – the aggregate
|
||||
constructor is implied default.
|
||||
|
||||
A class can be explicitly defined to take aggregate initialization
|
||||
|
||||
Class T{
|
||||
T(std::initializer_list<const unsigned char> in){
|
||||
for (auto i{in.begin); i<in.end(); i++){
|
||||
do stuff with i
|
||||
}
|
||||
}
|
||||
|
||||
but that does not make it of aggregate type. Aggregate type has *no*
|
||||
constructors except default and deleted constructors
|
||||
|
||||
# functional programming
|
||||
|
||||
To construct a lambda in the heap:
|
||||
|
||||
auto p = new auto([a,b,c](){})
|
||||
|
||||
Objects inside the lambda are constructed in the heap.
|
||||
|
||||
similarly placement `new`, and `unique_ptr`.
|
||||
|
||||
To template a function that takes a lambda argument:
|
||||
|
||||
template <typename F>
|
||||
void myFunction(F&& lambda){
|
||||
//some things
|
||||
|
||||
You can put a lambda in a class using decltype,and pass it around for
|
||||
continuations, though you would probably need to template the class:
|
||||
|
||||
template<class T>class foo {
|
||||
public:
|
||||
T func;
|
||||
foo(T in) :func{ in } {}
|
||||
auto test(int x) { return func(x); }
|
||||
};
|
||||
....
|
||||
auto bar = [](int x)->int {return x + 1; };
|
||||
foo<(bar)>foobar(bar);
|
||||
|
||||
But we had to introduce a name, bar, so that decltype would have
|
||||
something to work with, which lambdas are intended to avoid. If we are
|
||||
going to have to introduce a compile time name, easier to do it as an
|
||||
old fashioned function, method, or functor, as a method of a class that
|
||||
is very possibly pod.
|
||||
|
||||
If we are sticking a lambda around to be called later, might copy it by
|
||||
value into a templated class, or might put it on the heap.
|
||||
|
||||
auto bar = []() {return 5;};
|
||||
|
||||
You can give it to a std::function:
|
||||
|
||||
auto func_bar = std::function<int()>(bar);
|
||||
|
||||
In this case, it will get a copy of the value of bar. If bar had
|
||||
captured anything by value, there would be two copies of those values on
|
||||
the stack; one in bar, and one in func_bar.
|
||||
|
||||
When the current scope ends, func_bar will be destroyed, followed by
|
||||
bar, as per the rules of cleaning up stack variables.
|
||||
|
||||
You could just as easily allocate one on the heap:
|
||||
|
||||
auto bar_ptr = std::make_unique(bar);
|
||||
|
||||
std::function <int(int)> increm{[](int arg{return arg+1;}}
|
||||
|
||||
presumably does this behind the scenes
|
||||
|
||||
On reflection we could probably use this method to produce a
|
||||
templated function that stored a lambda somewhere in a templated class
|
||||
derived from a virtual base class for execution when the event triggered
|
||||
by the method fired, and returned a hashcode to the templated object for
|
||||
the event to use when the event fired. The event gets the event handler
|
||||
from the hashcode, and the virtual base class in the event handler fires
|
||||
the lambda in the derived class, and the lambda works as a continuation,
|
||||
operating in the context wherein it was defined, making event oriented
|
||||
programming almost as intuitive as procedural programming.
|
||||
|
||||
But then we have a problem, because we would like to store event
|
||||
handlers in the database, and restore them when program restarts, which
|
||||
requires pod event handlers, or event handlers constructible from POD
|
||||
data, which a lambda is not.
|
||||
|
||||
We could always have some event handlers which are inherently not POD
|
||||
and are never sent to a database, while other event handlers are, but
|
||||
this violates the dry design principle. To do full on functional
|
||||
programming, use std::function and std::bind, which can encapsulate
|
||||
lambdas and functors, but are slow because of dynamic allocation
|
||||
|
||||
C++ does not play well with functional programming. Most of the time you
|
||||
can do what you want with lambdas and functors, using a pod class that
|
||||
defines operator(\...)
|
||||
|
||||
# auto and decltype(variable)
|
||||
|
||||
In good c++, a tremendous amount of code behavior is specified by type
|
||||
information, often rather complex type information, and the more one’s
|
||||
code description is in types, the better.
|
||||
|
||||
But specifying types everywhere violates the dry principle, hence,
|
||||
wherever possible, use auto and decltype(variable) to avoid redundant
|
||||
and repeated type information. Wherever you can use an auto or a
|
||||
decltype for a type, use it.
|
||||
|
||||
In good event oriented code, events are not triggered procedurally, but
|
||||
by type information or data structures, and they are not handled
|
||||
procedurally, as by defining a lambda, but by defining a derived type.
|
||||
|
||||
# Variable length Data Structures
|
||||
|
||||
C++ just does not handle them well, except you embed a vector in them,
|
||||
which can result in messy reallocations.
|
||||
|
||||
One way is to drop back into old style C, and tell C++ not to fuck
|
||||
around.
|
||||
|
||||
struct Packet
|
||||
{
|
||||
unsigned int bytelength;
|
||||
unsigned int data[];
|
||||
|
||||
private:
|
||||
// Will cause compiler error if you misuse this struct
|
||||
void Packet(const Packet&);
|
||||
void operator=(const Packet&);
|
||||
};
|
||||
Packet* CreatePacket(unsigned int length)
|
||||
{
|
||||
Packet *output = (Packet*) malloc((length+1)*sizeof(Packet));
|
||||
output->bytelength = length;
|
||||
return output;
|
||||
}
|
||||
|
||||
Another solution is to work around C++’s inability to handle variable
|
||||
sized objects by fixing your hash function to handle disconnected data.
|
||||
|
||||
# for_each
|
||||
|
||||
template<class InputIterator, class Function>
|
||||
Function for_each(InputIterator first, InputIterator last, Function fn){
|
||||
while (first!=last) {
|
||||
fn (*first);
|
||||
++first;
|
||||
}
|
||||
return move(fn);
|
||||
}
|
||||
|
||||
# Range-based for loop
|
||||
|
||||
for(auto x: temporary_with_begin_and_end_members{ code;}
|
||||
for(auto& x: temporary_with_begin_and_end_members{ code;}
|
||||
for(auto&& x: temporary_with_begin_and_end_members{ code;}
|
||||
for (T thing = foo(); auto& x : thing.items()) { code; }
|
||||
|
||||
The types of the begin_expr and the end_expr do not have to be the same,
|
||||
and in fact the type of the end_expr does not have to be an iterator: it
|
||||
just needs to be able to be compared for inequality with one. This makes
|
||||
it possible to delimit a range by a predicate (e.g. “the iterator
|
||||
points at a null character”).
|
||||
|
||||
If range_expression is an expression of a class type C that has both a
|
||||
member named begin and a member named end (regardless of the type or
|
||||
accessibility of such member), then begin_expr is \_\_range.begin() and
|
||||
end_expr is \_\_range.end();
|
||||
|
||||
for (T thing = foo(); auto x : thing.items()) { code; }
|
||||
|
||||
Produces code equivalent to:
|
||||
|
||||
T thing = foo();
|
||||
auto bar = thing.items();
|
||||
auto enditer = bar.end;
|
||||
for (auto iter = bar.begin(); iter != enditer; ++iter) {
|
||||
x = *iter;
|
||||
code;
|
||||
}
|
519
docs/libraries/cpp_multithreading.md
Normal file
@ -0,0 +1,519 @@
|
||||
---
|
||||
title: C++ Multithreading
|
||||
---
|
||||
Computers have to handle many different things at once, for example
|
||||
screen, keyboard, drives, database, internet.
|
||||
|
||||
These are best represented as communicating concurrent processes, with
|
||||
channels, as in Go routines. Even algorithms that are not really handling
|
||||
many things at once, but are doing a single thing, such as everyone’s
|
||||
sample program, the sieve of Eratosthenes, are cleanly represented as
|
||||
communicating concurrent processes with channels.
|
||||
|
||||
[asynch await]:../client_server.html#the-equivalent-of-raii-in-event-oriented-code
|
||||
|
||||
On the other hand, also, not quite so cleanly, represented by [asynch await] which makes for much lighter weight code, more cleanly interfaceable with C++.
|
||||
|
||||
Concurrency is not the same thing as parallelism.
|
||||
|
||||
A node.js program is typically thousands of communicating concurrent
|
||||
processes, with absolutely no parallelism, in the sense that node.js is single
|
||||
threaded, but a node.js program typically has an enormous number of code
|
||||
continuations, each of which is in effect the state of a concurrent
|
||||
communicating process. Lightweight threads as in Go are threads that on
|
||||
hitting a pause get their stack state stashed into an event handler and
|
||||
executed by event oriented code, so one can always accomplish the same
|
||||
effect more efficiently by writing directly in event oriented code.
|
||||
|
||||
And it is frequently the case that when you cleverly implement many
|
||||
concurrent processes with more than one thread of execution, so that some
|
||||
of your many concurrent processes are executed in parallel, your program
|
||||
runs slower, rather than faster.
|
||||
|
||||
C++ multithreading is written around a way of coding that in practice does
|
||||
not seem all that useful – parallel bitbashing. The idea is that you are
|
||||
doing one thing, but dividing that one thing up between several threads to get
|
||||
more bits bashed per second, the archetypical example being a for loop
|
||||
performed in parallel, and then all the threads join after the loop is
|
||||
complete.
|
||||
|
||||
The normal case however is that you want to manage a thousand things at
|
||||
once, for example a thousand connections to the server. You are not
|
||||
worried about how many millions of floating point operations per second,
|
||||
but you are worried about processes sitting around doing nothing while
|
||||
waiting for network or disk operations to complete.
|
||||
|
||||
For this, you need concurrent communicating processes, as in Go or event
|
||||
orientation as in node.js or nginx, node.js, not necessarily parallelism,
|
||||
which C++ threads are designed around.
|
||||
|
||||
The need to deal with many peers and a potentially enormous number of
|
||||
clients suggests multiprocessing in the style of Go and node.js, rather than
|
||||
what C++ multiprocessing is designed around, suggests a very large
|
||||
number of processes that are concurrent, but not all that parallel, rather
|
||||
than a small number of processes that are concurrent and also substantially
|
||||
parallel. Representing a process by a thread runs into troubles at around
|
||||
sixty four threads.
|
||||
|
||||
It is probably efficient to represent interactions between peers as threads,
|
||||
but client/peer are going to need either events or Go lightweight threads,
|
||||
and client/client interactions are going to need events.
|
||||
|
||||
Existing operating systems run far more than sixty four threads, but this
|
||||
only works because grouped into processes, and most of those processes
|
||||
inactive. If you have more than sixty four concurrently active threads in an
|
||||
active process, with the intent that half a dozen or so of those active
|
||||
concurrent threads will be actually executing in parallel, as for example a
|
||||
browser with a thread for each tab, and sixty four tabs, that active process
|
||||
is likely to be not very active.
|
||||
|
||||
Thus scaling Apache, whether as threads on windows or processes under
|
||||
Linux, is apt to die.
|
||||
|
||||
# Need the solutions implemented by Tokio, Actix, Node.js and Go
|
||||
|
||||
Not the solutions supplied by the C++ libraries, because we are worrying
|
||||
about servers, not massive bit bashing.
|
||||
|
||||
Go routines and channels can cleanly express both the kind of problems
|
||||
that node.js addresses, and also address the kind of problem that C++
|
||||
threads address, typically that you divide a task into a dozen subtasks, and
|
||||
then wait for them all to complete before you take the next step, which are
|
||||
hard to express as node.js continuations. Goroutines are a more flexible
|
||||
and general solution, that make it easier to express a wider range of
|
||||
algorithms concisely and transparently, but I am not seeing any mass rush
|
||||
from node.js to Go. Most of the time, it is easy enough to write in code
|
||||
continuations inside an event handler.
|
||||
|
||||
The general concurrent task that Google’s massively distributed database
|
||||
is intended to express is that you have a thousand tasks each of which
|
||||
generate a thousand outputs, which get sorted, and each of the enormous
|
||||
number of items that sort into the same equivalence group gets aggregated
|
||||
in a commutative operation, which can therefore be handled by any
|
||||
number of processes in any order, and possibly the entire sort sequence
|
||||
gets aggregated in an associative operation, which can therefore be
|
||||
handled by any number of processes in any order.
|
||||
|
||||
The magic in the Google massively parallel database is that one can define a
|
||||
a massively parallel operation on a large number of items in a database
|
||||
simultaneously, much as one defines a join in SQL, and one can define
|
||||
another massively parallel operation as commutative and or associative
|
||||
operations on the sorted output of such a massively parallel operation. But
|
||||
we are not much interested in this capability. Though something
|
||||
resembling that is going to be needed when we have to shard.
|
||||
|
||||
# doing node.js in C++
|
||||
|
||||
Dumb idea. We already have the node.js solution in a Rust library.
|
||||
|
||||
Actix and Tokio are the (somewhat Cish) solutions.
|
||||
|
||||
## Use Go
|
||||
|
||||
Throw up hands in despair, and provide an interface linking Go to secure
|
||||
Zooko ids, similar to the existing interface linking it to Quic and SSL.
|
||||
|
||||
This solution has the substantial advantage that it would then be relatively
|
||||
easy to drop in the existing social networking software written in Go, such
|
||||
as Gitea.
|
||||
|
||||
We probably don’t want Go to start managing C++ spawned threads, but
|
||||
the Go documentation seems to claim that when a Go heavyweight thread
|
||||
gets stuck at a C mutex while executing C code, Go just spawns another to
|
||||
deal with the lightweight threads when the lightweight threads start piling
|
||||
up.
|
||||
|
||||
When a C++ thread wants to despatch an event to Go, it calls a Go routine
|
||||
with a select and a default, so that the Go routine will never attempt to
|
||||
pause the C++ spawned thread on the assumption that it is a Go spawned
|
||||
thread. But it would likely be safer to call Goroutines on a thread that was
|
||||
originally spawned by Go.
|
||||
|
||||
## doing it in C the C way
|
||||
|
||||
Processes represented as threads. Channels have a mutex. A thread grabs
|
||||
total exclusive ownership of a channel whenever it takes something out or
|
||||
puts something in. If a channel is empty or full, it then waits on a
|
||||
condition on the mutex, and when the other thread grabs the mutex and
|
||||
makes the channel ready, it notices that the other process or processes are
|
||||
waiting on condition, the condition is now fulfilled, and sends a
|
||||
notify_one.
|
||||
|
||||
Or, when the channel is neither empty nor full, we have an atomic spin lock,
|
||||
and when sleeping might become necessary, then we go to full mutex resolution.
|
||||
|
||||
Which implies a whole pile of data global to all threads, which will have
|
||||
to be atomically changed.
|
||||
|
||||
This can be done by giving each thread two buffers for this global data
|
||||
subject to atomic operations, and single pointer or index that points to the
|
||||
currently ruling global data set. (The mutex is also of course global, but
|
||||
the flag saying whether to use atomics or mutex is located in a data
|
||||
structure managed by atomics.)
|
||||
|
||||
When a thread wants to atomically update a large object (which should be
|
||||
sixty four byte aligned) it constructs a copy of the current object, and
|
||||
atomically updates the pointer to the copy, if the pointer was not changed
|
||||
while it was constructing. The object is immutable while being pointed at.
|
||||
|
||||
Or we could have two such objects, with the thread spinning if one is in
|
||||
use and the other already grabbed, or momentarily sleeping if an atomic
|
||||
count indicates other threads are spinning on a switch awaiting
|
||||
completion.
|
||||
|
||||
The read thread, having read, stores its read pointer atomically with
|
||||
`memory_order_release`, ored with the flag saying if it is going to full
|
||||
mutex resolution. It then reads the write pointer with
|
||||
`memory_order_acquire`, that the write thread atomically wrote with
|
||||
`memory_order_release`, and if all is well, keeps on reading, and if it is
|
||||
blocked, or the write thread has gone to mutex resolution, sets its mutex
|
||||
resolution flag and proceeds to mutex resolution. When it is coming out of
|
||||
mutex resolution, about to release the mutex, it clears its mutex resolution
|
||||
flag. The mutex is near the flags by memory location, all part of one object
|
||||
that contains a mutex and atomic variables.
|
||||
|
||||
So the mutex flag is atomically set when the mutex has not yet been
|
||||
acquired, but the thread is unconditionally going to acquire it, but non
|
||||
atomically cleared when the mutex still belongs to the thread, but is
|
||||
unconditionally going to release it.
|
||||
|
||||
If many read threads reading from one channel, then each thread has to
|
||||
`memory_order_acquire` the read pointer, and then, instead of
|
||||
`memory_order_release`ing it, has to do an
|
||||
`atomic_compare_exchange_weak_explicit`, and if it changed while it was
|
||||
reading abort its reads and start over.
|
||||
|
||||
Similarly if many write threads writing to one channel, each write thread
|
||||
will have first spin lock acquire the privilege of being the sole write thread
|
||||
writing, or spin lock acquire a range to write to. Thus in the most general
|
||||
case, we have a spin locked atomic write state that specifies an area that
|
||||
has been written to, an area that is being written to, and an area that is
|
||||
available to be acquired for writing, a spin locked atomic read state, and
|
||||
mutex that holds both the write state and the read state. In the case of a
|
||||
vector buffer with multiple writers, the atomic states are three wrapping
|
||||
atomic pointers that go through the buffer in the same direction,
|
||||
|
||||
We would like to use direct memory addresses, rather than vector or deque
|
||||
addresses, which might require us to write our own vector or deque. See
|
||||
the [thread safe deque](https://codereview.stackexchange.com/questions/238347/a-simple-thread-safe-deque-in-c "A simple thread-safe Deque in C++"), which however relies entirely on locks and mutexes,
|
||||
and whose extension to atomic locks is not obvious.
|
||||
|
||||
Suppose you are doing atomic operations, but some operations might be
|
||||
expensive and lengthy. You really only want to spin lock on amending data
|
||||
that is small and all in close together in memory, so on your second spin,
|
||||
the lock has likely been released.
|
||||
|
||||
Well, if you might need to sleep a thread, you need a regular mutex, but
|
||||
how are you going to interface spin locks and regular mutexes?
|
||||
|
||||
You could cleverly do it with notifies, but I suspect it is costly compared
|
||||
to just using a plain old vanilla mutex. Instead you have some data
|
||||
protected by atomic locks, and some data protected by regular old
|
||||
mutexes, and any time the data protected by the regular old mutex might
|
||||
change, you atomically flag a change coming up, and every thread then
|
||||
grabs the mutex in order to look amend or even look at the data, until on
|
||||
coming out of the mutex with the data, they see the flag saying the mutex
|
||||
protected data might change is now clear.
|
||||
|
||||
After one has flagged the change coming up, and grabbed the mutex, wha
|
||||
happens if another thread is cheerfully amending the data in a fast
|
||||
operation, having started before you grabbed the mutex? The other thread
|
||||
has to be able to back out of that, and then try again, this try likely to be
|
||||
with mutex resolution. But what if the other thread wants to write into a
|
||||
great big vector, and reallocations of the vector are mutex protected. And
|
||||
we want atomic operations so that not everyone has to grab the mutex every
|
||||
time.
|
||||
|
||||
Well, any time you want to do something to the vector, it fits or it does not.
|
||||
And if it does not fit, then mutex time. You want all threads to switch
|
||||
to mutex resolution, before any thread actually goes to work reallocating
|
||||
the vector. So you are going to have to use the costly notify pattern. “I am
|
||||
out of space, so going to sleep until I can use the mutex to amend the
|
||||
vector. Wake me up when last thread using atomics has stopped using
|
||||
atomics that directly reference memory, and has switched to reading the
|
||||
mutex protected data, so that I can change the mutex protected data.”
|
||||
|
||||
The std::vector documentation says that vector access is just as efficient as
|
||||
array access, but I am a little puzzled by this claim, as a vector can be
|
||||
moved, and specifically requests that you have a no throw move operation for
|
||||
optimization, and having a no copy is standard where it contains things that
|
||||
might have ownership. (Which leads to complications when one has containers
|
||||
of containers, since C++ is apt to helpfully generate a broken copy
|
||||
implementation.)
|
||||
|
||||
Which would suggest that vector access is through indirection, and
|
||||
indirects with threading create problems.
|
||||
|
||||
## lightweight threads in C
|
||||
|
||||
A lightweight thread is just a thread where, whenever a lightweight thread
|
||||
needs to be paused by its heavyweight thread, the heavyweight thread
|
||||
stores the current stack state in the heap, and move on to deal with other
|
||||
lightweight threads that need to be taken care of. Which collection of
|
||||
preserved lightweight thread stack states amount to a pile of event
|
||||
handlers that are awaiting events, and having received events, are then
|
||||
waiting for a heavyweight thread to process that event handler.
|
||||
|
||||
Thus one winds up with what suspect it the Tokio solution, a stack that
|
||||
is a tree, rather than a stack.
|
||||
|
||||
Hence the equivalence between node.js and nginx event oriented
|
||||
programming, and Go concurrent programming.
|
||||
|
||||
# costs
|
||||
|
||||
Windows 10 is limited to sixty four threads total. If you attempt to create
|
||||
more threads than that, it still works, but performance is apt to bite, with
|
||||
arbitrary and artificial thread blocking. Hence goroutines, that implement
|
||||
unofficial threads inside the official threads.
|
||||
|
||||
Thread creation and destruction is fast, five to twenty microseconds, so
|
||||
thread pools do not buy you much, except that your memory is already
|
||||
going to be cached. Another source says 40 microseconds on windows,
|
||||
and fifty kilobytes per thread. So, a gigabyte of ram could have twenty
|
||||
thousand threads hanging around. Except that the windows thread
|
||||
scheduler dies on its ass.
|
||||
|
||||
There is a reasonable discussion of thread costs [here](https://news.ycombinator.com/item?id=22456642)
|
||||
|
||||
General message is that lots of languages have done it better, often
|
||||
immensely better, Go among them.
|
||||
|
||||
Checking the C++ threading libraries, they all single mindedly focus on
|
||||
the particular goal of parallelizing computationally intensive work. Which
|
||||
is not in fact terribly useful for anything you are interested in doing.
|
||||
|
||||
# Atomics
|
||||
|
||||
```C++
|
||||
typedef enum memory_order {
|
||||
memory_order_relaxed, // relaxed
|
||||
memory_order_consume, // consume
|
||||
/* No one, least of all compiler writers, understands what
|
||||
"consume" does.
|
||||
It has consequences which are difficult to understand or predict,
|
||||
and which are apt to be inconsistent between architectures,
|
||||
libraries, and compilers. */
|
||||
memory_order_acquire, // acquire
|
||||
memory_order_release, // release
|
||||
memory_order_acq_rel, // acquire/release
|
||||
memory_order_seq_cst // sequentially consistent
|
||||
/* "sequentially consistent" interacts with the more commonly\
|
||||
used acquire and release in ways difficult to understand or
|
||||
predict, and in ways that compiler and library writers
|
||||
disagree on. */
|
||||
} memory_order;
|
||||
```
|
||||
|
||||
I don’t think I understand how to use atomics correctly.
|
||||
|
||||
`Atomic_compare_exchange_weak_explicit` inside a while loop is
|
||||
a spin lock, and spin locks are complicated, apt to be inefficient,
|
||||
potentially catastrophic, and avoiding catastrophe is subtle and complex.
|
||||
|
||||
To cleanly express a concurrent algorithm you need a thousand
|
||||
communicating processes, as goroutines or node.js continuations, nearly
|
||||
all of which are sitting around waiting for the another thing to send them
|
||||
a message or be ready to receive their message, while atomics give you a
|
||||
fixed small number of threads all barreling full speed ahead. Whereupon
|
||||
you find yourself using spin locks.
|
||||
|
||||
Rather than moving data between threads, you need to move threads between
|
||||
data, between one continuation and the next.
|
||||
|
||||
Well, if you have a process that interacts with Sqlite, each thread has to
|
||||
have its own database connection, in which case it needs to be a pool of
|
||||
threads maybe you have a pool of database threads that do work received
|
||||
from a bunch of asynch tasks through a single fixed sized fifo queue, and
|
||||
send the results back through another fifo queue, with threads waking up
|
||||
when the queue gets more stuff in it, and going to sleep when the queue
|
||||
empties, with the last thread signalling “wake me up when there is
|
||||
something to do”, and pushback happening when buffer is full.
|
||||
|
||||
Go demonstrates that you can cleanly express algorithms as concurrent
|
||||
communicating processes using fixed size channels. An unbuffered
|
||||
channel is just a coprocess, with a single thread of execution switching
|
||||
between the two coprocesses, without any need for locks or atomics, but
|
||||
with a need for stack fixups. But Node.js seems to get by fine with code
|
||||
continuations instead of Go’s stack fixups.
|
||||
|
||||
A buffered channel is just a fixed size block of memory with alignment,
|
||||
size, and atomic wrapping read and write pointers.
|
||||
|
||||
Why do they need to be atomic?
|
||||
|
||||
So that the read thread can acquire the write pointer to see how much data
|
||||
is available, and release the read pointer so that the write thread can
|
||||
acquire the read pointer to see how much space is available, and
|
||||
conversely the write thread acquires the read pointer and releases the write
|
||||
pointer.And when write thread updates the write pointer it updates it *after*
|
||||
writing the data and does a release on the write pointer atomic, so that
|
||||
when the read thread does an acquire on the write pointer, all the data that
|
||||
the write pointer says was written will actually be there in the memory that
|
||||
read thread is looking at.
|
||||
|
||||
Multiple routines can send data into a single channel, and, with select, a
|
||||
single channel can receive data from any channels.
|
||||
|
||||
But, with go style programming, you are apt to have far more routines
|
||||
than actual hardware threads servicing them, so you are still going to need
|
||||
to sleep your threads, making atomic channels an optimization of limited
|
||||
value.
|
||||
|
||||
Your input buffer is empty. If you have one thread handling the one
|
||||
process for that input stream, going to have to sleep it. But this is costly.
|
||||
Better to have continuations that get executed when data is available in the
|
||||
channel, which means your channels are all piping to one thread, that then
|
||||
calls the appropriate code continuation. So how is one thread going to do a
|
||||
select on a thousand channels?
|
||||
|
||||
Well, we have a channel full of channels that need to be serviced. And
|
||||
when that channel empties, mutex.
|
||||
|
||||
Trouble is, I have not figured out how to have a thread wait on multiple
|
||||
channels. The C++ wait function does not implement a select. Well, it
|
||||
does, but you need a condition statement that looks over all the possible
|
||||
wake conditions. And it looks like all those wake conditions have to be on
|
||||
a single mutex, on which there is likely to be a lot of contention.
|
||||
|
||||
It seems that every thread grabs the lock, modifies the data protected by
|
||||
the lock, performs waits on potentially many condition variables all using
|
||||
the same lock and protected by the same lock, condition variables that
|
||||
look at conditions protected by the lock, then releases the lock
|
||||
immediately after firing the notify.
|
||||
|
||||
But it could happen that if we try to avoid unnecessarily grabbing the
|
||||
mutex, one thread sees the other thread awake, just when it is going to
|
||||
sleep, so I fear I have missed a spin lock somewhere in this story.
|
||||
|
||||
If we want to avoid unnecessary resort to mutex, we have to spin lock on a
|
||||
state machine that governs entry into mutex resolution. Each thread makes
|
||||
its decision based on the current state of channel and state machine, an
|
||||
does a `Atomic_compare_exchange_weak_explicit` to amend the state of the
|
||||
state machine. If the state machine has not changed, the decision goes
|
||||
through. If the state machine was changed, presumably by the other thread,
|
||||
it re-evaluates its decision and tries again.
|
||||
|
||||
Condition variables are designed to support the case where you have one
|
||||
thread or a potentially vast pool of threads waiting for work, but are not
|
||||
really designed to address the case where one thread is waiting for work
|
||||
from a potentially vast pool of threads, and I rather think I will have to
|
||||
handcraft a handler for this case from atomics and, ugh, dangerous spin
|
||||
loops implemented in atomics.
|
||||
|
||||
A zero capacity Go channel sort of corresponds to a C++ binary
|
||||
semaphore. A finite and small Go channel sort of corresponds to C++
|
||||
finite and small semaphore. Maybe the solution is semaphores, rather than
|
||||
atomic variables. But I am just not seeing a match.
|
||||
|
||||
I notice that notifications seems to be built out of a critical section, with
|
||||
lots of grabbing a mutex and releasing a mutex, with far too much
|
||||
grabbing a mutex and releasing a mutex. Under the hood, likely a too-clever
|
||||
and complicated use of threads piling up on the same critical
|
||||
section. So maybe we need some spin state atomic state machine system
|
||||
that drops spinning threads to wait on a semaphore. Each thread on a
|
||||
channel drops the most recent state channel after reading, and most recent
|
||||
state after writing, onto an atomic variable.
|
||||
|
||||
But the most general case is many to many, with many processes doing a
|
||||
select on many channels. We want a thread to sleep if all the channels on
|
||||
which it is doing a select are blocked on the operation it wants to do, and
|
||||
we want processes waiting on a channel to keep being woken up, one at a
|
||||
time, as long a channel has stuff that processes are waiting on.
|
||||
|
||||
# C++ Multithreading
|
||||
|
||||
`std:aysnc` is designed to support the case where threads spawn more
|
||||
threads if there is more work to do, and the pool of threads is not too large,
|
||||
and threads terminate when they are out of work, or do the work
|
||||
sequentially if doing it in parallel seems unlikely do yield benefits. C++ by
|
||||
default manages the decision for you.
|
||||
|
||||
Maybe the solution is to use threads where we need stack state, and
|
||||
continuations serviced by a single thread where we expect to handle one
|
||||
and only one reply. Node.js gets by fine on one thread and one database
|
||||
connection.
|
||||
|
||||
```C++
|
||||
#include &t;thread>
|
||||
static_assert(__STDCPP_THREADS__==1, "Needs threads");
|
||||
// As thread resources have to be managed, need to be wrapped in
|
||||
// RAII
|
||||
class ThreadRAII {
|
||||
std::thread & m_thread;
|
||||
public:
|
||||
// As a thread object is moveable but not copyable, the thread obj
|
||||
// needs to be constructed inside the invocation of the ThreadRAII
|
||||
// constructor. */
|
||||
ThreadRAII(std::thread & threadObj) : m_thread(threadObj){}
|
||||
~ThreadRAII(){
|
||||
// Check if thread is joinable then detach the thread
|
||||
if(m_thread.joinable()){
|
||||
m_thread.detach();
|
||||
}
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
Examples of thread construction
|
||||
|
||||
```C++
|
||||
void foo(char *){
|
||||
…
|
||||
}
|
||||
|
||||
class foo_functor
|
||||
{
|
||||
public:
|
||||
void operator()(char *){
|
||||
…
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main(){
|
||||
ThreadRAII thread_one(std::thread (foo, "one"));
|
||||
ThreadRAII thread_two(
|
||||
std::thread (
|
||||
(foo_functor()),
|
||||
"two"
|
||||
)
|
||||
);
|
||||
const char three[]{"three"};
|
||||
ThreadRAII thread_lambda(
|
||||
std::thread(
|
||||
[three](){
|
||||
…
|
||||
}
|
||||
)
|
||||
);
|
||||
}
|
||||
```
|
||||
|
||||
C++ has a bunch of threading facilities that are designed for the case that
|
||||
a normal procedural program forks a bunch of tasks to do stuff in parallel,
|
||||
and then when they are all done, merges the results with join or promise
|
||||
and future, and then the main program does its thing.
|
||||
|
||||
This is not so useful when the main program is a event oriented, rather
|
||||
than procedural.
|
||||
|
||||
If the main program is event oriented, then each thread has to stick around
|
||||
for the duration, and has to have its own event queue, which C++ does not
|
||||
directly provide.
|
||||
|
||||
In this case threads communicate by posting events, and primitives that do
|
||||
thread synchronization (promise, future, join) are not terribly useful.
|
||||
|
||||
A thread grabs its event queue, using the mutex, pops out the next event,
|
||||
releases the mutex, and does its thing.
|
||||
|
||||
If the event queue is empty, then, without releasing it, the thread
|
||||
processing events waits on a [condition variable](https://thispointer.com//c11-multithreading-part-7-condition-variables-explained/). (which wait releases the
|
||||
mutex). When another thread grabs the event queue mutex and stuffs
|
||||
something into into the event queue, it fires the [condition variable](https://thispointer.com//c11-multithreading-part-7-condition-variables-explained/), which
|
||||
wakes up and restores the mutex of the thread that will process the event
|
||||
queue.
|
||||
|
||||
Mutexes need to construct RAII objects, one of which we will use in
|
||||
constructing the condition object.
|
52
docs/libraries/git_bash_undocumented.md
Normal file
@ -0,0 +1,52 @@
|
||||
---
|
||||
title: Git Bash undocumented command line
|
||||
---
|
||||
|
||||
git-bash is a `mintty.exe` wrapper and bash wrapper – it winds up invoking
|
||||
other processes that do the actual work. While git-bash.exe is undocumented, `mintty.exe` and [`bash.exe`](https://www.gnu.org/software/bash/manual/bash.html) [are documented](http://www.gnu.org/gethelp/).
|
||||
|
||||
`git-bash.exe` sets up the environment in windows for `bash.exe`, then launches the bash shell
|
||||
|
||||
Example Windows shortcut to bash script: `/x/src/wallet/docs/mkdocs.sh`
|
||||
|
||||
"C:\Program Files\Git\git-bash.exe" --cd=X:\src\wallet --needs-console --no-hide --command=usr\bin\bash.exe --login -i docs/mkdocs.sh
|
||||
|
||||
Notice that the paths to the left of the invocation of `bash` are in Windows
|
||||
format, and the paths to the right of the invocation of bash are in gnu
|
||||
format.
|
||||
|
||||
Albeit this way of executing a bash script in windows is too clever by half,
|
||||
since you should be able to execute it just by clicking on it.
|
||||
|
||||
`--cd=D:\src`
|
||||
Sets the initial working directory to `/d/src` (windows path, launches bash
|
||||
with the corresponding gnu path)
|
||||
|
||||
`--no-cd`
|
||||
does not set working directory.
|
||||
|
||||
`--cd-to-home`
|
||||
Sets the working directory to home.
|
||||
|
||||
`--command=`command-line
|
||||
Executes `<command-line>` instead of the embedded string resource.
|
||||
|
||||
`--minimal-search-path`
|
||||
Ensures that only `/cmd/` is added to the `PATH` instead of `/mingw??/bin` and `/usr/bin/`
|
||||
|
||||
`--no-minimal-search-path`
|
||||
Normal search path
|
||||
|
||||
`--needs-console`
|
||||
Ensures that there is a Win32 console associated with the spawned process
|
||||
|
||||
`--no-needs-console`
|
||||
Fails to ensure that there is a Win32 console
|
||||
|
||||
`--hide`
|
||||
Hides the console window. This makes sense if you are launching a script and
|
||||
not expecting any feedback. But it means that the script has no means to
|
||||
give you an error message.
|
||||
|
||||
`--no-hide`
|
||||
Does not hide the console window.
|
1
docs/libraries/pandoc_templates/after.pandoc
Normal file
@ -0,0 +1 @@
|
||||
<p style="background-color: #ccffcc; font-size: 80%;"><a rel="license" href="http://creativecommons.org/licenses/by/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by/4.0/80x15.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>.</p>
|
1
docs/libraries/pandoc_templates/before.pandoc
Normal file
@ -0,0 +1 @@
|
||||
<p><a href="./index.html"> To Home page</a></p>
|
21
docs/libraries/pandoc_templates/header.pandoc
Normal file
@ -0,0 +1,21 @@
|
||||
<style>
|
||||
body {
|
||||
max-width: 30em;
|
||||
margin-left: 1em;
|
||||
}
|
||||
p.center {text-align:center;}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td, th {
|
||||
border: 1px solid #999;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
h1.title{
|
||||
text-align: center; font-size: xxx-large;
|
||||
}
|
||||
</style>
|
||||
<link rel="shortcut icon" href="../../rho.ico">
|
||||
|
31
docs/libraries/pandoc_templates/style.css
Normal file
@ -0,0 +1,31 @@
|
||||
body {
|
||||
max-width: 30em;
|
||||
margin-left: 1em;
|
||||
}
|
||||
p.center {text-align:center;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td, th {
|
||||
border: 1px solid #999;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
code{white-space: pre-wrap;
|
||||
}
|
||||
span.smallcaps{font-variant: small-caps;
|
||||
}
|
||||
span.underline{text-decoration: underline;
|
||||
}
|
||||
div.column{display: inline-block; vertical-align: top; width: 50%;
|
||||
}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;
|
||||
}
|
||||
ul.task-list{list-style: none;
|
||||
}
|
||||
.display.math{display: block; text-align: center; margin: 0.5rem auto;
|
||||
}
|
||||
h1.title{text-align: center; font-size: xxx-large;
|
||||
}
|
74
docs/libraries/review_of_crypto_libraries.md
Normal file
@ -0,0 +1,74 @@
|
||||
---
|
||||
title: Review of Cryptographic libraries
|
||||
---
|
||||
|
||||
# Noise Protocol Framework
|
||||
|
||||
[Noise](https://noiseprotocol.org/) is an architecture and a design document,
|
||||
not source code. Example source code exists for it, though the [C example]
|
||||
(https://github.com/rweather/noise-c) uses a build architecture that may not
|
||||
fit with what you want, and uses protobuf, while you want to use Cap’n
|
||||
Proto or roll your own serialization. It also is designed to use several
|
||||
different implementations of the core crypto protocols, one of them being
|
||||
libsodium, while you want a pure libsodium only version. It might be easier
|
||||
to implement your own version, using the existing version as a guide.
|
||||
Probably have to walk through the existing version.
|
||||
|
||||
# [Libsodium](./building_and_using_libraries.html#instructions-for-libsodium)
|
||||
|
||||
# I2P
|
||||
|
||||
The [Invisible Internet Project](https://geti2p.net/en/about/intro) does a great deal of the chat capability that you want. You need to interface with their stuff, rather than duplicate it. In particular, your wallet identifiers need to be I2P identifiers, or have corresponding I2P identifiers, and your anonymized transactions should use the I2P network.
|
||||
|
||||
They have a substitute for UDP, and a substitute for TCP, and your anonymized transactions are going to use that.
|
||||
|
||||
# Amber
|
||||
|
||||
[Amber](https://github.com/bernedogit/amber)
|
||||
|
||||
Not as fast and efficient as libsodium, and further from Bernstein. Supports base 58, but [base58check](https://en.bitcoin.it/wiki/Base58Check_encoding#Base58_symbol_chart) is specifically bitcoin protocol, supporting run time typed checksummed cryptographically strong values. Note that any value you are displaying in base 58 form might as well be bitstreamed, for the nearest match between base 58 and base two is that 58^7^ is only very slightly larger than 2^41^, so you might as well use your prefix free encoding for the prefix.
|
||||
|
||||
[Curve25519](https://github.com/msotoodeh/curve25519)
|
||||
|
||||
Thirty two byte public key, thirty two byte private key.
|
||||
|
||||
Key agreement is X25519
|
||||
|
||||
Signing is ED25519. Sixty four byte signature.
|
||||
|
||||
Trouble is that amber does not include Bernstein’s assembly language optimizations.
|
||||
|
||||
[ED25519/Donna](https://github.com/floodyberry/ed25519-donna) does include Bernstein’s assembly language optimizations, but is designed to compile against OpenSSL. Probably needs some customization to compile against Amber. Libsodium is designed to be uncontaminated by NSA.
|
||||
|
||||
ED25519 does not directly support [Schnorr signatures](schnorr-signatures.pdf), being nonprime. Schnorr signatures can do multisig, useful for atomic exchanges between blockchains, which are multisig, or indeed arbitary algorithm sig. With some cleverness and care, they support atomic exchanges between independent block chains.
|
||||
|
||||
explanation of how to do [Schnorr multisignatures](https://www.ietf.org/archive/id/draft-ford-cfrg-cosi-00.txt) [using ED25519](https://crypto.stackexchange.com/questions/50448/schnorr-signatures-multisignature-support#50450)
|
||||
|
||||
Amber library packages all these in what is allegedly easy to incorporate form, but does not have Schnorr multisignatures.
|
||||
|
||||
[Bernstein paper](https://ed25519.cr.yp.to/software.html).
|
||||
|
||||
The fastest library I can find for pairing based crypto is [herumi](https://github.com/herumi/mcl).
|
||||
|
||||
How does this compare to [Curve25519](https://github.com/bernedogit/amber)?
|
||||
|
||||
There is a good discussion of the performance tradeoff for crypto and IOT in [this Internet Draft](https://datatracker.ietf.org/doc/draft-ietf-lwig-crypto-sensors/), currently in IETF last call:
|
||||
|
||||
From the abstract:.
|
||||
|
||||
> This memo describes challenges associated with securing resource-
|
||||
> constrained smart object devices. The memo describes a possible
|
||||
> deployment model where resource-constrained devices sign message
|
||||
> objects, discusses the availability of cryptographic libraries for
|
||||
> small devices and presents some preliminary experiences with those
|
||||
> libraries for message signing on small devices. Lastly, the memo
|
||||
> discusses trade-offs involving different types of security
|
||||
> approaches.
|
||||
|
||||
The draft contains measurement and evaluations of libraries, allegedly
|
||||
including herumi. But I don’t see any references to the Herumi library in
|
||||
that document, nor any evaluations of the time required for pairing based
|
||||
cryptography in that document. Relic-Toolkit is not Herumi and is supposedly
|
||||
markedly slower than Herumi.
|
||||
|
||||
Looks like I will have to compile the libraries myself and run tests on them.
|
BIN
docs/libraries/schnorr-signatures.pdf
Normal file
8
docs/libraries/stdafx.cpp
Normal file
@ -0,0 +1,8 @@
|
||||
// stdafx.cpp : source file that includes just the standard includes
|
||||
// wxHello.pch will be the pre-compiled header
|
||||
// stdafx.obj will contain the pre-compiled type information
|
||||
|
||||
#include "stdafx.h"
|
||||
|
||||
// TODO: reference any additional headers you need in STDAFX.H
|
||||
// and not in this file
|
20
docs/libraries/stdafx.h
Normal file
@ -0,0 +1,20 @@
|
||||
// stdafx.h : include file for standard system include files,
|
||||
// or project specific include files that are used frequently, but
|
||||
// are changed infrequently
|
||||
//
|
||||
|
||||
#pragma once
|
||||
#define wxUSE_UNICODE 1
|
||||
#define wxUSE_UNICODE_WCHAR 1
|
||||
#include <wx/wxprec.h>
|
||||
#include <array>
|
||||
#include "app.h"
|
||||
|
||||
#ifdef _DEBUG
|
||||
#pragma comment(lib, "wxbase31ud.lib")
|
||||
#else
|
||||
#pragma comment(lib, "wxbase31u.lib")
|
||||
#endif
|
||||
|
||||
|
||||
// TODO: reference additional headers your program requires here
|
595
docs/lightning_layer.md
Normal file
@ -0,0 +1,595 @@
|
||||
---
|
||||
title:
|
||||
Lightning Layer
|
||||
---
|
||||
# This discussion of the lightning layer may well be obsoleted
|
||||
|
||||
by the elegant cryptography of [Scriptless Scripts] using adaptive Schnorr
|
||||
signatures and of [Anonymous Multi-Hop Locks].
|
||||
|
||||
Contingent payments can reveal a key to an alien blockchain on the bitcoin blockchain, and [zero knowledge contingent payments on the bitcoin chain] can reveal any arbitrarily complex secret that fulfils any arbitrarily complex condition.
|
||||
|
||||
[Scriptless Scripts]:https://tlu.tarilabs.com/cryptography/scriptless-scripts/introduction-to-scriptless-scripts.html
|
||||
"Introduction to Scriptless Scripts – Tari Labs University"
|
||||
|
||||
[Anonymous Multi-Hop Locks]: anonymous_multihop_locks_lightning_network.pdf
|
||||
"Anonymous Multi-Hop Locks for Blockchain Scalability and Interoperability"
|
||||
|
||||
[zero knowledge contingent payments on the bitcoin chain]:https://bitcoincore.org/en/2016/02/26/zero-knowledge-contingent-payments-announcement/
|
||||
"The first successful Zero-Knowledge Contingent Bitcoin Payment"
|
||||
|
||||
I need to understand the [Anonymous Multi-Hop Locks] primitive, and
|
||||
rewrite this accordingly.
|
||||
|
||||
Scriptless scripts have the huge advantage of being self enforcing – it is
|
||||
impossible to defect from the script, because there is no script – there are
|
||||
just actions that are cryptographically possible or impossible.
|
||||
|
||||
But scriptless scripts cannot in themselves solve the hard problem, that all
|
||||
participants in a multilateral transaction need to know _in a short time_ that
|
||||
the whole multilateral transaction has definitely succeeded or definitely
|
||||
failed. This inherently requires a reliable broadcast channel, though if
|
||||
everyone is cooperating, they don’t have to actually put anything on that
|
||||
channel. But they need the capability to resort to that channel if something
|
||||
funny happens, and that capability has to be used or lost within a time limit.
|
||||
|
||||
So, the adapter secret not only has to become known to the participants, it has to become known to the participants within timing limits.
|
||||
|
||||
[Anonymous Multi-Hop Locks] can ensure that the lightning network is
|
||||
always in a definite state, and that those parts of it that are undergoing
|
||||
state change are locked, but need to be embedded in a protocol that
|
||||
ensures that those locks always go away in a short time with commitment
|
||||
of the total transaction, or rollback of the total transaction, and that the
|
||||
participants in the transaction always know whether the transaction was
|
||||
committed or rolled back within a short time. Scriptless scripts are
|
||||
timeless, and need to be embedded in scripted scripts that have timing
|
||||
constraints, and which require information to be broadcast over a reliable
|
||||
broadcast channel if the information is available, yet certain time limits are
|
||||
nonetheless exceeded.
|
||||
|
||||
My conclusion was that full circle unbreakability of lightning network
|
||||
transactions within time limits needs a reliable broadcast, and I envisaged
|
||||
a hierarchy of reliable broadcasters, (sidechains, with some sidechains
|
||||
representing a group of bilateral lightning network gateways that act as
|
||||
one multilateral lightning network gateway) But this conclusion may be
|
||||
wrong or overly simple – though we are still going to need sidechains and
|
||||
hierarchical reliable broadcasting, because it can do no end of things that
|
||||
are very difficult otherwise.
|
||||
|
||||
But reliable broadcast mechanism both supplies and requires a solution to
|
||||
distributed Byzantine fault tolerant consensus, so the problem of getting a
|
||||
lock up and bringing it down is a general distributed Byzantine fault
|
||||
tolerant consensus problem, and perhaps viewing it as a reliable broadcast
|
||||
problem is a misperception and misanalysis.
|
||||
|
||||
Rather, the blockdag requires a mechanism to establish a total order of
|
||||
blocks, and the distributed multihop lock requires a mechanism to
|
||||
establish the state of the lock, the classic distributed state machine
|
||||
problem addressed by Practical Byzantine Fault Tolerant distributed
|
||||
consensus. Albeit implementing Practical Byzantine Fault Tolerant
|
||||
distributed consensus as a state machine over a blockdag may well be a
|
||||
simpler and more humanly intelligible form of this algorithm. But a state
|
||||
machine for BEGIN … COMMIT is going to be a fundamentally different
|
||||
state machine to the one that constructs a total order of transactions.
|
||||
|
||||
# Lightning layer concept
|
||||
|
||||
The lightning layer is a design that implements something like full reserve
|
||||
correspondence banking on top of the blockchain layer, thereby solving
|
||||
the problem of the blockchain exposing too much information about
|
||||
everyone’s transactions to everyone, allowing a blockchain solution to
|
||||
scale to completely replacing fiat currency, and allowing instant on the
|
||||
spot transactions.
|
||||
|
||||
A lightning gateway allows instant trustless unilateral transactions (Alice
|
||||
pays Bob) between two or more people, but you have to lock up a
|
||||
significant amount of money in each gateway, and if you have more than
|
||||
two participants, and one of the participants goes off the internet, then
|
||||
the protocols necessary for the gateway to keep going between the remaining
|
||||
participants become a bit more complicated. Multiparty gateways will be
|
||||
(eventually) implemented as sidechains.
|
||||
|
||||
A lightning gateway consists of coin (unspent transaction output) on the
|
||||
blockchain that need a joint signature to be spent. The parties to coin
|
||||
construct a transaction splitting it up between them before they create that
|
||||
coin on the blockchain, and then do not drop that transaction to the
|
||||
blockchain layer, keeping the transaction for when one them decides to break
|
||||
up the gateway. To change ownership of the coin, to increase the amount of the
|
||||
coin owned by one of them and decrease the amount owned by the other, they
|
||||
generate a new transaction, and then do not drop that transaction to the
|
||||
blockchain layer either.
|
||||
|
||||
And the problem is that we don’t want unilateral transactions. We want Alice
|
||||
pays Bob, and Bob gets a proof that he paid Alice, completing the circle.
|
||||
|
||||
And, to work like correspondence banking, we want Alice pays Bob, Bob pays
|
||||
Carol, Carol pays Dan, Dan pays Erin, Erin pays Frank, and Frank gets proof
|
||||
that he paid Ann, completing the circle.
|
||||
|
||||
And we want every unilateral transaction in the circle to go through, or none
|
||||
of them to go through. We want the whole circle of unilateral transactions
|
||||
making up a complete multilateral transaction to succeed or every unilateral
|
||||
transaction in the whole circle to fail.
|
||||
|
||||
And we want it to normally and usually succeed fast, and if it fails, to take
|
||||
a reasonably short time to fail. And if it fails, we want all participants to
|
||||
know it has definitely failed after a reasonably short timeout.
|
||||
|
||||
# The Bitcoin lightning layer is not
|
||||
|
||||
It is not correspondence banking with predictable behavior enforced by
|
||||
cryptography.
|
||||
|
||||
It is correspondence banking that uses cryptography, where the participants
|
||||
trustworthy behaviour is enforced from behind the scenes by unclear authority,
|
||||
which is likely to lead to the existing problem where authority is supposed to
|
||||
protect you from the bankers, but who protects you from authority?
|
||||
|
||||
For the lightning network to change from a single consistent state of who owns
|
||||
what part of each gateway unspent transaction output to another single
|
||||
consistent state is equivalent to the well known hard problems of a reliable
|
||||
broadcast channel, the Byzantine generals problem, the two generals problem,
|
||||
and acid transactions on a distributed database.
|
||||
|
||||
So when I looked at the lightning documents for a lightning layer on top of
|
||||
bitcoin I expected to see discussion of smart contracts on the bitcoin layer
|
||||
or lightning network protocols on the lightning layer for resolving these hard
|
||||
problems and eventually reaching a resolution, so that the network would
|
||||
always eventually reach a consistent and intended state. Not seeing them.
|
||||
|
||||
Instead what I do see is that the lightning network is safe because
|
||||
“misconduct” will result in you losing your bitcoins.
|
||||
|
||||
It will? Who do you lose them to? How is *misconduct* defined? Who decides
|
||||
*misconduct*? The guy who gets your bitcoins? Do we have a central banker of
|
||||
the lightning network somewhere?
|
||||
|
||||
We should not be talking about “misconduct”. We should be talking about the
|
||||
lighting network entering an inconsistent state due to lost messages, nodes
|
||||
going down at inopportune moments, and nodes deviating from the protocol, or
|
||||
entering a state that some of the participants in the transaction did not
|
||||
intend or expect.
|
||||
|
||||
The cryptographic term for misconduct is “Byzantine failure”, which indeed
|
||||
normally results from wickedness, but can result from bugs or data corruption.
|
||||
|
||||
While the usual and archetypal cause of Byzantine failure is that someone
|
||||
wrote or modified software for the purposes of betrayal, lying, cheating, and
|
||||
stealing, it happens often enough as a result of data corruption or running a
|
||||
program that was compiled under a compiler and in an environment different
|
||||
from that it was tested and developed on.
|
||||
|
||||
A gateway does a unilateral transaction between the parties on the lightning
|
||||
layer who control controlling the gateway unspent transaction output by
|
||||
generating a new transaction on the bitcoin layer breaking up the gateway
|
||||
between the parties, and the parties refrain from committing that transaction
|
||||
to the blockchain, and instead endless generate new transactions, which do not
|
||||
get played either, thereby changing what part of the gateway unspent
|
||||
transaction output is owned by each party to the gateway.
|
||||
|
||||
*What happens if someone commits an out of date bitcoin layer transaction to
|
||||
the blockchain?*
|
||||
|
||||
Does the central banker confiscate the bitcoin of the party who committed the
|
||||
transaction. How does he do that?
|
||||
|
||||
Suppose Alice’s gateway to Bob is blocked because she now owns the whole of
|
||||
that gateway unspent transaction output, and her gateway to Frankie is blocked
|
||||
because Frankie owns all of that gateway.
|
||||
|
||||
So she organizes a transaction that moves bitcoin from the Ann/Bob gateway to
|
||||
the Ann/Frankie gateway. So what Alice intends is that Alice pays Bob, Bob
|
||||
pays Carol, Carol pays Dan, Dan pays Erin, Erin pays Frank, and Frank pays
|
||||
Alice. Except that in the middle the transaction Carol and Erin ungracefully
|
||||
disconnect from the network, so that either Ann generated a bitcoin layer
|
||||
transaction giving bitcoin to Bob, but Frankie did not generate a bitcoin
|
||||
layer transaction to Ann, or the other way around.
|
||||
|
||||
*What happens when a transaction fails and leaves the parties in an inconsistent
|
||||
state?*
|
||||
|
||||
Does the central banker decide that Carol and Erin were engaged in
|
||||
misconduct and confiscate their bitcoin?*
|
||||
|
||||
# Trustless Unilateral transactions
|
||||
|
||||
If you wanted to buy or sell cryptocurrency for cash or gold, you could
|
||||
arrange over the internet to meet someone in person, set up a lightning
|
||||
gateway with him, and then, when you met him in person, pay him instantly, on
|
||||
the spot, in a trustless unilateral transaction without need to wait some
|
||||
considerable time for the payment to clear.
|
||||
|
||||
Suppose you wanted to sell bitcoin for gold, in person, both parties are going
|
||||
to meet, probably with a weapon in their pocket. You could create a jointly
|
||||
signed unspent transaction output, after you and the other party jointly sign
|
||||
a bitcoin layer transaction giving the bitcoin back to you. And then, when
|
||||
meeting, create a jointly signed bitcoin layer transaction giving the bitcoin
|
||||
to the man who is giving you the gold. Except what is going to stop you from
|
||||
committing the earlier transaction to the blockchain a few seconds before the
|
||||
meeting?
|
||||
|
||||
OK, let us suppose we have supersedeable transactions. They have to be
|
||||
committed to the blockchain for a time before they take effect, and if someone
|
||||
submits a transaction with the same signatures but a higher priority, the one
|
||||
with lower priority fails to take effect. Then you can endlessly generate
|
||||
fresh transactions, each with higher priority than the previous one, and never
|
||||
commit them to the blockchain unless the gateway between the parties is
|
||||
abandoned.
|
||||
|
||||
It would take a little while for the gateway to become available, but once it
|
||||
was available, instant irrevocable payments become possible.
|
||||
|
||||
And if one has an account with a service provider over the internet, one could
|
||||
set up a gate way with that service provider, and after each session or each
|
||||
service, make a small instant payment, without the cost and delay of making
|
||||
transactions on the blockchain.
|
||||
|
||||
It would be possible to do such instant two party transactions with bitcoin
|
||||
today, although the wallets are generally not set up to support it, nor is the
|
||||
way the main blockchain processes transactions, but with BTC blockchain
|
||||
working as it today, such transactions are not trustless. If you want to do a
|
||||
BTC transaction you are trusting the lightning network, which means you
|
||||
are trusting you do not whom.
|
||||
|
||||
At the time this is written, the lightning network for bitcoin has not been
|
||||
adequately implemented, or even fully designed. Existing implementations on
|
||||
top of the bitcoin blockchain still require some trust in intermediaries, and
|
||||
thus require trust in some mysterious authority with the mysterious
|
||||
capability to punish the intermediaries, and to have a real lighting network on
|
||||
bitcoin requires changes in bitcoin which the miners are not going along with,
|
||||
and which perhaps are underscoped and not fully thought out, ad hoc changes
|
||||
to fit with what already existed, and what was politically feasible. And it
|
||||
seems that rather less was politically feasible than one might hope.
|
||||
|
||||
# Cryptographic implementation of trustless unilateral transactions
|
||||
|
||||
Ann and Bob create on the blockchain a coin (unspent transaction output,
|
||||
utxo) whose corresponding key is the sum of secret key known only to
|
||||
Ann, and a secret key known only to Bob. Before they create this coin,
|
||||
they create a transaction on it, signed by their joint key, that creates two
|
||||
coins, one with a secret key known only to Bob, and one with a secret key
|
||||
known only to Ann. They keep this transaction to themselves and do not
|
||||
place it on the blockchain.
|
||||
|
||||
# Multilateral (circle) transactions
|
||||
|
||||
The lightning layer will function like correspondence banking, only with
|
||||
good behavior cryptographically enforced on the lightning vertices, rather
|
||||
than by state supervision of the “banks”. This will require a blockchain
|
||||
layer designed to support it.
|
||||
|
||||
Correspondence banking merges large numbers of small transactions into a small
|
||||
number of large pooled transactions which are eventually settled on the
|
||||
blockchain in one big transaction, with several parties to the transaction
|
||||
representing a very large number of parties engaged in a very large number of
|
||||
transactions.
|
||||
|
||||
But correspondence banking works by trust, thus the intermediaries have to
|
||||
be well known and subject to pressure, which is apt to mean subject to
|
||||
government pressure – and government has interests that are in conflict with
|
||||
those of people attempting to use a medium of exchange and a store of
|
||||
value.
|
||||
|
||||
lightning network correspondence banking
|
||||
------------------------------------------------------------------------ -----------------------------------------------------------------------------------
|
||||
merges many small two party transactions into a few large transactions merges many small two party transactions into a few large transactions
|
||||
lightning vertex bank
|
||||
lightning gateway bank account
|
||||
trustless need to trust the banks
|
||||
instant Slow in that you can never be sure if a transaction will be honored for some time
|
||||
Payer id visible to payer vertex, payee id visible to payee vertex Government issued id of payer and payee visible to all intermediaries.
|
||||
|
||||
We have a separate blockchain of supersedeable transactions. A
|
||||
transaction gets committed to the primary blockchain after it has been
|
||||
sitting on the supersedable chain for a while, if, at the time it is
|
||||
evaluated for commitment, all of its inputs are valid unspent outputs on
|
||||
the primary blockchain, and none of them are inputs to a currently valid
|
||||
higher priority transaction on the blockchain of supersedable
|
||||
transactions, the priority being an arbitrary length binary value.
|
||||
|
||||
If one party tries to break up a gateway with an out of date
|
||||
distribution, the other party notices it sitting on the supersedable
|
||||
blockchain, and issues a more up to date transaction. Normally this
|
||||
should never happen, since when one of the parties wants to break up a
|
||||
gateway, the other party should agree to a regular transaction. However,
|
||||
one party may go offline and stay offline, especially if the last
|
||||
transaction reduced the the value of their interest in the gateway to
|
||||
zero.
|
||||
|
||||
A gateway in the lightning network layer is a jointly signed coin on the
|
||||
blockchain on the blockchain layer. Bob’s account with BigCrypto is a coin
|
||||
on the blockchain layer, for which there exists a a jointly signed
|
||||
transaction distributing that block between two blocks, one wholly owned by
|
||||
Bob, and one wholly owned by BigCrypto, thus giving BigCrypto effective
|
||||
offchain control of one part of that block, and Bob effective control of the
|
||||
other part of the block, and by generating new superseding transaction, they
|
||||
can effectively transfer ownership of part of the block instantly without
|
||||
anything going on the blockchain.
|
||||
|
||||
But …
|
||||
|
||||
# Acid
|
||||
|
||||
We have to make sure that transactions are acid on the lightning network as a
|
||||
whole, that transactions are atomic, consistent, isolated, and durable.
|
||||
|
||||
## Atomic and consistent
|
||||
|
||||
Ann has an account with Bob, Bob has an account with Carol.
|
||||
|
||||
To change what is Ann’s account requires a transaction signed by
|
||||
Bob and Ann, and similarly for Carol.
|
||||
|
||||
Ann wants to pay Carol, but does not want to sign a reduction in her account
|
||||
with Bob, unless she is sure that Carol gets the corresponding increase. Bob
|
||||
does not want to sign an increase in Carol’s account, unless it gets Ann’s
|
||||
signature on a decrease in her account. Not to mention that Ann probably
|
||||
does not want to sign a decrease on her account without getting a receipt
|
||||
from Carol. Full circle transaction. We need to guarantee that either the
|
||||
full circle goes through, or none of the separate unilateral transactions in
|
||||
the circle go through.
|
||||
|
||||
## Reliable broadcast channel
|
||||
|
||||
The solution to atomicity and maintaining consistency between different
|
||||
entities on the lightning network is the reliable broadcast channel.
|
||||
|
||||
Such as the blockchain itself. Create a special zero value transaction that
|
||||
has no outputs and carries its own signature, but can be a required input to
|
||||
other transactions, and whose construction requires the cooperation of all
|
||||
the parties. Each gateway constructs a transaction only be valid if a code
|
||||
is placed on the blockchain that requires the cooperation of all the gateways
|
||||
within a short time. Once the code exists, and everyone knows it exists,
|
||||
they proceed with bilateral transactions that do not require the code and
|
||||
only potentially go on the blockchain. If not everyone knows it exists, and
|
||||
it does not appear on the blockchain within a short time, then the
|
||||
transaction fails. If everyone knows it exists, the transaction succeeds. If
|
||||
not everyone knows it exists, but it appears on the blockchain within the
|
||||
time limit, the transaction succeeds, and each party could potentially play
|
||||
the transaction, and thus effectively owns the corresponding part of the
|
||||
gateway coin, regardless of whether they play it or not.
|
||||
|
||||
A reliable broadcast channel is something that somehow works like a
|
||||
classified ad did back in the days of ink on paper newspapers. The physical
|
||||
process of producing the newspaper guaranteed that every single copy had the
|
||||
exact same classified ad in it, and that ad must have been made public on a
|
||||
certain date. Easy to do this with a printing press that puts ink on
|
||||
paper. Very hard to do this, with electronic point to point communications.
|
||||
|
||||
But let us assume we somehow have a reliable broadcast channel:
|
||||
|
||||
All the parties agree on a Merkle tree, which binds them if the joint
|
||||
signature to that Merkle tree appears on the reliable broadcast channel
|
||||
within a certain short time period.
|
||||
|
||||
And, if some of them have the joint signature, then knowing that they could
|
||||
upload it to the reliable broadcast channel, they each agree to superseding
|
||||
unilateral transactions. If Bob expects payment from Ann and expects to
|
||||
make payment to Carol, and he has the joint signature, and knows Carol has a
|
||||
copy of the authenticated joint signature, because Carol sent him the
|
||||
signature and he sent Ann the signature, of it, then he knows Carol can
|
||||
*make* him pay her, and knows he can *make* Ann pay him. So he just goes
|
||||
right ahead with unilateral transactions that supersede the transaction that
|
||||
relies on the reliable broadcast channel. And if every party to the
|
||||
transaction does that, none of them actually broadcast the signature the
|
||||
reliable broadcast channel. Which in consequence, by merely being available
|
||||
enforces correct behaviour, and is seldom likely to need to actually
|
||||
broadcast anything. And when something is actually broadcast on that
|
||||
channel, chances are that all the transactions that that broadcast enables
|
||||
will have been superseded.
|
||||
|
||||
Each party, when receives a copy of the joint signature that he *could* upload
|
||||
to the reliable broadcast channel, sends a copy to the counter party that he
|
||||
expects to pay him, and each party, when he receives a copy from the party he
|
||||
expects to pay, performs the unilateral payment to that party that supersedes
|
||||
and the transaction using the reliable broadcast network.
|
||||
|
||||
And if a party has a copy of the joint signature and the document that it
|
||||
signs for the full circle transaction, but finds himself unable to perform
|
||||
the superseding unilateral transactions with his counterparties, (perhaps
|
||||
their internet connection or their computer went down) then he uploads the
|
||||
signature to the reliable broadcast channel.
|
||||
|
||||
When the signature is uploaded to reliable broadcast channel, this does not
|
||||
give the reliable broadcast channel any substantial information about the
|
||||
amount of the transaction, and who the parties to the transaction are, but the
|
||||
node of the channel sees IP addresses, and this could frequently be used to
|
||||
reconstruct a pretty good guess about who is transacting with whom and why.
|
||||
As we see with Monaro, a partial information leak can be put together with
|
||||
lots of other sources of information to reconstruct a very large information
|
||||
leak.
|
||||
|
||||
But most of the time, the channel is not likely to be used, which means it
|
||||
will have only small fragments of data, not enough to put together to form
|
||||
a meaningful picture, hence the privacy leak is unlikely to be very useful
|
||||
to those snooping on other people’s business.
|
||||
|
||||
### Other use cases for a reliable broadcast channel
|
||||
|
||||
The use case of joint signatures implies an immutable data structure of the
|
||||
tuple oid, hash, public key, and two scalars.
|
||||
|
||||
But another use case is to publicly root private immutable data.
|
||||
|
||||
If you continually upload the latest version, you wind up uploading most of
|
||||
tree, or all of it, which does not add significantly to the cost of each
|
||||
interaction recorded. The simplest sql friendly data structure is (oid of
|
||||
this item, public key, hash, your index of hash, oids of two child hashes)
|
||||
with the reliable broadcast channel checking that the child hashes do in fact
|
||||
generate the hash, and that the tuple (public key, index of hash) is unique.
|
||||
|
||||
If the data is aged out after say, three months, cannot directly check
|
||||
uniqueness and correctness for the nodes that are the roots of big trees. How
|
||||
do you know someone has not made up several different and mutually
|
||||
inconsistent pasts for immutable append only data structure associated with
|
||||
this key?
|
||||
|
||||
To work around this problem, allow unbalanced Merkle trees, consisting of
|
||||
(oid of this item, public key, hash, your tree node index, index of the
|
||||
highest index leaf governed by this hash, oids of two child hashes) If an
|
||||
unbalanced node referencing an old tree root is uploaded at intervals of less
|
||||
than three months, it can be used to prove the contents and uniqueness of
|
||||
the old balanced binary tree root, since the most recent unbalanced node must
|
||||
have also proved contents and uniqueness, using a less recent unbalanced
|
||||
node, and unbalanced nodes must also be unique on the tuple (public key,
|
||||
index of node, index of highest leaf node governed) Someone can forget his
|
||||
old past, and, after three months, start making up a new past, but the
|
||||
witness for the new past can only begin on the day he starts the upload. He
|
||||
cannot construct several realities, and six months later, choose which
|
||||
reality he finds convenient. Or rather he can, but he cannot provide a six
|
||||
month earlier witness to it.
|
||||
|
||||
You upload several nodes that constitute the unbalanced tree right side path
|
||||
that points at the balanced rest of the old tree every two months, superseding
|
||||
the previous right hand side of the unbalanced tree, and thus maintaining a
|
||||
chain of proofs stretching into the past that proves that throughout this
|
||||
period, there is one and only one immutable data structure associated with
|
||||
this public key.
|
||||
|
||||
A common case is that they key certifying the state of the immutable data may
|
||||
change, with the torch being passed from key to key. In which case the
|
||||
upload of total state needs to reference the used key, and an earlier,
|
||||
normally the earliest, signing key, with links in the chain of keys
|
||||
authorizing keys being renewed at less than the timeout interval for data to
|
||||
be immutable, but unavailable from the reliable broadcast network. If the
|
||||
witness asserts that key is authorized by a chain of keys going back to an
|
||||
earlier or the earliest keys, then it relies on its previous witness, rather
|
||||
than re-evaluating the entire, possibly very long, chain of keys every time.
|
||||
|
||||
But, if the witness cannot do that, then the entire, possibly very very long,
|
||||
chain of keys and signatures has be uploaded for the witness to record the
|
||||
earlier, or earliest key, as authorizing the current ever changing key.
|
||||
Similarly if the client starts uploading to a new witness. But such very
|
||||
long proofs will only be have to done once in a very long while, and done
|
||||
once for everyone.
|
||||
|
||||
Because of Byzantine failure or network failure, such a chain may fork. The
|
||||
protocol has to be such that if a fork develops by network failure,
|
||||
it will be fixed, with one of the forks dying when the network functions
|
||||
better, and if it fails by Byzantine failure,
|
||||
we get two sets of reliable broadcast channels,
|
||||
each testifying that the other reliable broadcast channel is unreliable,
|
||||
and each testifying that a different fork is the valid fork,
|
||||
and which fork you follow depends on which reliable broadcast channel you
|
||||
subscribe to.
|
||||
|
||||
Another use case is for wallet recovery, with mutable data structures
|
||||
encrypted by the private key whose primary key is the public key.
|
||||
|
||||
## implementing and funding a reliable broadcast channel
|
||||
|
||||
Tardigrade has a somewhat similar architecture to the proposed Reliable
|
||||
Broadcast network charges $120 per year for per TB of storage, $45 per
|
||||
terabyte of download. So for uploading a single signature, and downloading
|
||||
it six times, which one hash, one elliptic point, and two scalars, one
|
||||
hundred and twenty eight bytes, so the cost of doing what tardigrade does
|
||||
with reliable broadcast network operated by a single operator would be
|
||||
$4 × 10^{-7}$ dollars. Which might as well be free, except we have to charge some tiny amount to prevent DDoS.
|
||||
|
||||
But, when the system is operating at scale, will want the reliable broadcast
|
||||
network to have many operators, who synchronize with each other so that the
|
||||
data is available to each of them and all of them and from each of them and
|
||||
all of them, and can testify when the
|
||||
data became available, so the cost will be
|
||||
many times that. Which is still insignificant. If the network is composed
|
||||
of a hundred operators, and there are substantial overheads to maintain
|
||||
synchrony and truth, we are still only looking a cost of $0.0001 per
|
||||
transaction. Maybe we should charge for opening the account, and then
|
||||
every hundredth transaction.
|
||||
|
||||
We also want the operators to be genuinely independent and separate from each
|
||||
other. We don’t want a single inherently authorized reliable broadcast channel,
|
||||
because it is inherently a low cost target for the fifty one percent attack.
|
||||
I have been thinking about implementing a reliable broadcast channel as
|
||||
byzantine Paxos protocol, but this gives us a massive low cost fifty one
|
||||
percent attack vulnerability. If the reliable broadcast channel is cheap
|
||||
enough to be useful, it is cheap enough for the fifty one percent attack.
|
||||
We want cheap testimony of valuable facts, which makes consensus mechanisms
|
||||
unlikely to work.
|
||||
|
||||
A better way reliably implementing a reliable
|
||||
broadcast channel is as a network of trusted witnesses, each of which keeps an
|
||||
eye on the reliability of other witnesses, because each periodically uploads
|
||||
the unbalanced tree testifying to its total state on several of the others,
|
||||
and each makes it data reciprocally available to several of the others, and
|
||||
each monitors the availability of several of the others, and each provides a
|
||||
signed chain of its total state with each witness, or some witnesses. Because
|
||||
the data is reciprocally available, each can testify to the uptime and
|
||||
reliability of each of the others, and none can hide its state. Each makes
|
||||
each of the nodes in each of its balanced trees available by index, the top
|
||||
node in each of its unbalanced right hand tree is signed, and nodes in the
|
||||
unbalanced tree constitute a separate sql like table, which times out
|
||||
considerably faster than the nodes with a single index.
|
||||
|
||||
Downtime, failure to provide oids on request, and losing its recent state,
|
||||
will be detected and will whack its reputation.
|
||||
|
||||
And it is up to the individual human to decide which trusted witness to follow,
|
||||
which human decision roots all the automatic computer decisions.
|
||||
|
||||
## isolated
|
||||
|
||||
Isolated means that one transaction cannot mangle another transaction.
|
||||
Easiest way to do this is that one gateway of the lightning network cannot
|
||||
handle a transaction while another is pending. Which is going to be a problem
|
||||
when we have nodes handling a lot of traffic, but a more efficient fix can
|
||||
wait till we have traffic problems.
|
||||
|
||||
For a more efficient fix, we will need relative as well as absolute
|
||||
transactions. A relative transaction, on being appended to an absolute
|
||||
transaction, subtracts something from one of the outputs, and adds it to
|
||||
another outputs. If this would produce a negative output, the compound
|
||||
transaction is invalid. But this can wait until we have busy gateways.
|
||||
|
||||
## durable
|
||||
|
||||
Durable means that if something bad happens to your node, you can probably
|
||||
recover, and if you don’t recover, no one else has problems.
|
||||
|
||||
If one party to a gateway goes down and does not come up, possibly because he
|
||||
has lost the secret key to the gateway, the other party drops the most recent
|
||||
transaction of the lightning layer to the blockchain layer.
|
||||
|
||||
If one party to a gateway goes down, but eventually comes up again, possibly
|
||||
with lost data, they resynchronize.
|
||||
|
||||
Suppose you lose your wallet?
|
||||
|
||||
At present the standard way of recovering a wallet is that you create a wallet
|
||||
using your BIPS master password, and it scans the blockchain for coins whose
|
||||
public key corresponds to the secret keys it might have generated.
|
||||
|
||||
Which works to recover your money, but does not recover your metadata, and
|
||||
will not recover a lightning gateway, since the your public key is not on the
|
||||
blockchain, rather the sum of your public key, and the public key of other
|
||||
parties to the gateway.
|
||||
|
||||
Metadata is becoming more important. If you lose your Wasabi wallet metadata,
|
||||
you have a big problem, and if we use the wallet as the basis for private end
|
||||
to end encrypted messages that can carry money, and for uncensorable social
|
||||
networking, the wallet will have a lot of very valuable metadata.
|
||||
|
||||
We really need cloud backup of wallets, with the wallet encrypted by a secret
|
||||
key derived from its master key, and cloud backup paid for though a lightning
|
||||
gateway. And this needs to be built into the wallet, the way recovery by
|
||||
scanning the blockchain is built into wallets today.
|
||||
|
||||
# trustless, fast, and free from blockchain analysis
|
||||
|
||||
That full circle transactions are atomic and consistent means that the
|
||||
lightning network can operate without trust or reputation – completely
|
||||
pseudonymous parties with no reputation can become nodes in the network,
|
||||
making it very hard for the state to apply know your customer pressure.
|
||||
|
||||
If the full content of each gateway’s ownership never becomes known
|
||||
except to the parties to the gateway, then the effect is to mingle and
|
||||
merge coins on the blockchain, creating fungibility and untraceability,
|
||||
as well as instant transactions.
|
||||
|
||||
If, on the other hand, we had a rather small number of trusted nodes that
|
||||
have a special, central, and privileged position on the
|
||||
lightning network, this would recreate the traceability problem of
|
||||
correspondence banking. The lightning network has to trust the reliable
|
||||
broadcast channels with reliability and broadcasting, but the information on
|
||||
that channel is meaningless to anyone other than the parties to the
|
||||
transaction.
|
1461
docs/merkle_patricia-dac.md
Normal file
72
docs/mkdocs.sh
Normal file
@ -0,0 +1,72 @@
|
||||
#!/bin/bash
|
||||
set -e
|
||||
cd `dirname $0`
|
||||
|
||||
if [[ "$OSTYPE" == "linux-gnu"* ]]; then
|
||||
osoptions=""
|
||||
elif [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
osoptions=""
|
||||
elif [[ "$OSTYPE" == "cygwin" ]]; then
|
||||
osoptions="--fail-if-warnings --eol=lf "
|
||||
elif [[ "$OSTYPE" == "msys" ]]; then
|
||||
osoptions="--fail-if-warnings --eol=lf "
|
||||
fi
|
||||
templates="./pandoc_templates/"
|
||||
options=$osoptions"--toc -N --toc-depth=5 --wrap=preserve --metadata=lang:en --include-in-header=$templates/header.pandoc --include-before-body=$templates/before.pandoc --include-after-body=$templates/after.pandoc --css=$templates/style.css -o"
|
||||
for f in *.md
|
||||
do
|
||||
len=${#f}
|
||||
base=${f:0:($len-3)}
|
||||
if [ $f -nt $base.html ];
|
||||
then
|
||||
katex=""
|
||||
for i in 1 2 3 4
|
||||
do
|
||||
read line
|
||||
if [[ $line =~ katex$ ]];
|
||||
then
|
||||
katex=" --katex=./"
|
||||
fi
|
||||
done <$f
|
||||
pandoc $katex $options $base.html $base.md
|
||||
echo "$base.html from $f"
|
||||
#else
|
||||
# echo " $base.html up to date"
|
||||
fi
|
||||
done
|
||||
cd libraries
|
||||
for f in *.md
|
||||
do
|
||||
len=${#f}
|
||||
base=${f:0:($len-3)}
|
||||
if [ $f -nt $base.html ];
|
||||
then
|
||||
katex=""
|
||||
for i in 1 2 3 4
|
||||
do
|
||||
read line
|
||||
if [[ $line =~ katex ]];
|
||||
then
|
||||
katex=" --katex=./"
|
||||
fi
|
||||
done <$f
|
||||
pandoc $katex $options $base.html $base.md
|
||||
echo "$base.html from $f"
|
||||
#else
|
||||
# echo " $base.html up to date"
|
||||
fi
|
||||
done
|
||||
cd ../..
|
||||
templates=docs/pandoc_templates/
|
||||
for f in *.md
|
||||
do
|
||||
len=${#f}
|
||||
base=${f:0:($len-3)}
|
||||
if [ $f -nt $base.html ];
|
||||
then
|
||||
pandoc $osoptions --wrap=preserve --from markdown --to html --metadata=lang:en --include-in-header=$templates/header.pandoc --css=$templates/style.css -o $base.html $base.md
|
||||
echo "$base.html from $f"
|
||||
#else
|
||||
# echo " $base.html up to date"
|
||||
fi
|
||||
done
|
BIN
docs/multischnorr-20151012.pdf
Normal file
600
docs/multisignature.md
Normal file
@ -0,0 +1,600 @@
|
||||
---
|
||||
title:
|
||||
Multisignature
|
||||
# katex
|
||||
---
|
||||
|
||||
To do a Schnorr multisignature, you just list all the signatures that
|
||||
went into it, and the test key is just adding the all the public keys
|
||||
together. Which gives you pretty fast, though totally non anonymous,
|
||||
voting, with no need for an elaborate, ingenious, and difficult to
|
||||
understand key distribution system.
|
||||
|
||||
Supposedly [Schnorr multisignature can be done on
|
||||
ED25519](https://datatracker.ietf.org/meeting/99/materials/slides-99-cfrg-collective-edwards-curve-digital-signature-algorithm-draft-ford-cfrg-cosi-00-00)
|
||||
, but I do not see any useful information on how to do it, and Libsodium
|
||||
fails to mention Schnorr.
|
||||
|
||||
However we can easily do it with ristretto25519.
|
||||
|
||||
[Bernstein explains how to do Schnorr, and briefly visits Schnorr
|
||||
multisignature.](./multischnorr-20151012.pdf)
|
||||
|
||||
# Notation for elliptic curve cryptography
|
||||
|
||||
The group operation where one obtains a third elliptic point from two elliptic
|
||||
points is represented by addition, $A=B+C$, capitals represent members of the
|
||||
group, (elliptic points, public keys), lower case represents integers modulo
|
||||
the order of the group (scalars, private keys), and $h()$ is a hash function
|
||||
that maps an arbitrary stream of arguments of arbitrary type to an integer
|
||||
modulo the order of the group.
|
||||
|
||||
An upper case letter represents the result of multiplying the base point of
|
||||
the group by the value represented by the lower case letter. Thus if\
|
||||
$B_{base}$ is the base point then $K=kB_{base}\,$
|
||||
|
||||
$h(\cdots)$ is hash function that generates an integer modulo the order of the group (hence our use of $h$ rather than $H$) from an arbitrary stream of arguments of arbitrary type. It is hard to reverse, the only efficient way of reversing it being to guess all possible pre images from which it might be constructed..
|
||||
|
||||
# Single Schnorr Signature
|
||||
|
||||
Signer secret key is $k$, his public key is $K$
|
||||
|
||||
## Sign message with $k$
|
||||
|
||||
- Generate an unpredictable $r$ from the message and the secret key. If we ever use the same $r$ with two
|
||||
different values of $c$, we reveal
|
||||
our public key, which is why we
|
||||
make $r$ depend on the same message as we are signing.
|
||||
|
||||
we could use some randomness to generate the konce (key used once, single use secret key), but this would mean that we would sign the same thing twice with two different signatures, which in a much signed item, such as a key or a Zooko id, is undesirable.
|
||||
|
||||
- Compute $r = h(k,M)$, a per message secret scalar. Or $r$ can be an
|
||||
unpredictable secret scalar, randomly generated, but we have to make sure it is truly random and never repeated, which is apt to fail because of computer determinism.
|
||||
- Compute $R$, a per message elliptic point, a Konce, meaning an
|
||||
elliptic point that can be used only once, never to be reused because
|
||||
made public.
|
||||
- Compute $c = h(R,$ Message$)$
|
||||
- Compute $s = r+c*k\,$\
|
||||
Note that:\
|
||||
$S= R+c*K$
|
||||
|
||||
signature is $c, s$
|
||||
|
||||
## Verify signature
|
||||
|
||||
- Check that $c, s$ are valid scalars, which is trivial.
|
||||
- Check that $K$, the signing public key, is a valid member of the
|
||||
prime order group, which is not always trivial.
|
||||
- Compute $R = S − c*K$\
|
||||
The reason the signer makes $s$ public rather than $S$, is that to prevent a signature from being faked by someone who does not know the secret underlying $S$.
|
||||
- Check that $c = h(R,$ Message$)$
|
||||
|
||||
# Schnorr multisignature
|
||||
|
||||
Obvious problem with multisig – you need proof that each putative signer
|
||||
is able to produce a singlesig, hence no space advantage for konces, which is the major crypto currency use. but for voting on
|
||||
the Paxos protocol, major advantage, since the singlesig is apt to be
|
||||
durable and cached, so the signature signifying approval of the
|
||||
blockchain is single Schnorr signature connected to a possibly large
|
||||
collection of individual signing keys. To prove the multisig on a new
|
||||
block is valid, you need to have and have checked a single sig for each
|
||||
of the signatories, but it does not have to be a new singlesig. It can
|
||||
be an old cached singlesig, so you don’t need to download and check
|
||||
each of the singlesigs with each new block. You only need to check a
|
||||
single signature, assuming you already have old singlesigs.
|
||||
|
||||
So, Schnorr multisig, at least in the simple version, presupposes a
|
||||
large existing data structure with good cryptographic properties,
|
||||
presumably a Merkle-patricia tree, listing each party entitled to sign,
|
||||
the self signed signature of that party, and the voting weight and
|
||||
voting rights of that party, and the hash at the root of that tree is a
|
||||
Schnorr multisig.
|
||||
|
||||
This, of course, occupies slightly more space than the simple tuple of
|
||||
signatures, but has the advantage that it does not need to change very
|
||||
often, and usually only a small part of it needs to change, while the
|
||||
multisig by itself is relatively small.
|
||||
|
||||
But it is not that small. For each possible multisig, you have to list
|
||||
the keys of all the signers, and then look up their rights in the Merkle
|
||||
patricia tree, which is OK if the members of the board are hiring the
|
||||
CEO, but is going to suck mightily if the shareholders are electing the
|
||||
board.
|
||||
|
||||
However, chances are in practice, it will commonly be the case that the
|
||||
same group votes over and over again, so, if space and verification time
|
||||
is a concern, you can record previous multisigs and their
|
||||
verification in the Merkle tree, and record the signatures of the latest
|
||||
signing as a delta on some past signature, and do the verification as a
|
||||
delta on some past verification.
|
||||
|
||||
Suppose you are doing the Paxos protocol, and every consensus block has
|
||||
to be signed by one hundred peers, every few minutes. Well, you are
|
||||
still going to have to verify the single sigs of the peers signing the
|
||||
group signature, which gives you no savings in time or space over simple
|
||||
tuple of singlesigs. You are still going to have to verify one hundred
|
||||
peers. But if it is usually the same clique signing each each time, it
|
||||
will take you very little time or space to prove that this consensus
|
||||
hash was signed by the same clique as signed the other hundred consensus
|
||||
hashes, which you verified a long time ago, and do not need to repeat
|
||||
the verification. Of course their voting rights might have changed, but
|
||||
if the cliques do not change much, their voting rights probably don’t
|
||||
change much, so you don’t have to do full verification every single
|
||||
time.
|
||||
|
||||
If the latest sig is compatible with the current and older voting rights,
|
||||
the sig should reference the older voting rights Merkle-patricia tree of
|
||||
voting rights, and the peers should validate it on the basis of the
|
||||
current voting rights, thus validating that the older voting right
|
||||
suffices. This could produce a false signature and a fork if the voting
|
||||
rights radically change, and old voters don’t like the change, but in
|
||||
such a large scale public event, it will be highly visible and detected.
|
||||
Each client wallet when talking a client wallet on another fork will
|
||||
discover that its root is out of date. We implement default wallet
|
||||
behavior to accept the root with the most recent voting rights list, and
|
||||
of two roots with the same voting rights list, the root most recently
|
||||
signed.
|
||||
|
||||
## Generate Signature
|
||||
|
||||
Kevin and Josephine have private keys $k_{Kevin}$ and $k_{Josephine}$, and public
|
||||
keys $K_{Kevin}$ and $K_{Josephine}$,
|
||||
and want to generate a multisig to verify to a verifier that both of them signed
|
||||
|
||||
- Kevin generates an unpredictable scalar $r_{Kevin}$ which is never
|
||||
twice the same, is random rather than deterministic as in single sig
|
||||
(otherwise two multisig signings of the same message will reveal his
|
||||
private key, since he cannot predict or control $c$)\
|
||||
Josephine similarly computes $r_{Josephine}\,$
|
||||
- Kevin shares $R_{Kevin}$ with each of the other
|
||||
signatories, and they with him.
|
||||
- Each of the signatories computes $R = R_{Kevin} +R_{Josephine} +\dots$
|
||||
- Each of the signatories computes $c = h(R,$ Message$)$
|
||||
- Kevin computes $s_{Kevin}=r_{Kevin} + c*k_{Kevin}$\
|
||||
Josephine computes $s_{Josephine}=r_{Josephine} + c*k_{Josephine}$, and
|
||||
similarly each of the other signatories.\
|
||||
$S_{Kevin} = s_{Kevin}*B_{base} = R_{Kevin} + c*K_{Kevin}^{}$
|
||||
- Kevin shares $s_{Kevin}$ with each of the other signatories, and they with him
|
||||
Each of the signatories computes\
|
||||
$s = s_{Kevin} + s_{Josephine} +\dots$\
|
||||
(thus Kevin cannot predict or control $s$, thus has to use a random
|
||||
rather than deterministic konce)
|
||||
- signature is $c, s$
|
||||
|
||||
## Verify Multsignature
|
||||
|
||||
- Check that $c, s$ are valid scalars, which is trivial.
|
||||
- Check that each of the signing keys is a valid member of the prime
|
||||
order group, and a singlesig exists for each signing key of the multisig.
|
||||
- $S=s*B_{base}\,$
|
||||
- Compute $R = S − c*(K_{Kevin} + K_{Josephine} +\dots )$
|
||||
- Check that $c = h(R,$ Message$)$, proving that $S = s*B = R + c*(K_{Kevin}+K_{Josephine}+\dots)$
|
||||
|
||||
Often the checker does not know nor care whether there are multiple
|
||||
co-signers, because the co-signers have already generate an assertion that
|
||||
$(K_{Kevin}+K_{Josephine}+\dots)$ is the public key, and hence does not
|
||||
have to check the singlesig.
|
||||
|
||||
If there are quite a few signers, this should be implemented as
|
||||
server-client.
|
||||
|
||||
Each of the clients submits the signing key and a konce (key used once),
|
||||
server replies with\
|
||||
$K=\sum\limits_{client}K_{client}$, the sum of all signing keys\
|
||||
$R=\sum\limits_{client}R_{client}$, the sum of all konces\
|
||||
and $c=h(R,$ Message$)$, the value to be signed.\
|
||||
Each of the clients checks $c$ and returns $s_{client}$
|
||||
Server replies with\
|
||||
$s=\sum\limits_{client}s_{client}\,$
|
||||
|
||||
Each client checks the signature $c, s$
|
||||
|
||||
For a two party signature, this collapses to a simpler algorithm. Client
|
||||
sends his konce. Server returns $s_{server}$, the sum of all signing keys,
|
||||
and the sum of all konces.
|
||||
Then client computes the signing key, returns it, and the server checks to
|
||||
see if it works. Should be a three packet handshake.
|
||||
|
||||
It is commonly the case that one party is the payee or beneficiary of
|
||||
the thing being signed, an the other party does not particularly need or
|
||||
want the signature, other than he needs to know it exists for his
|
||||
records, and may need to resend the signing secret, accompanied by the
|
||||
signing key and the identifier of the thing being signed.
|
||||
|
||||
In this case, the payee sends the konce (key used once, the single use
|
||||
secret), and the payee sends the konce, and does not need
|
||||
the beneficiary’s konce, nor the completed signature,
|
||||
and treats the transaction as completed, because he has seen the thing
|
||||
being signed, and signed it. Doing it the other way around adds an extra
|
||||
point of failure.
|
||||
|
||||
For a large number of parties, you are going to have hold the protocol
|
||||
open for a considerable time.
|
||||
|
||||
For a vast number of parties, you are never going to get everyone to
|
||||
complete the protocol all the way through, so have to use threshold
|
||||
cryptography, where if everyone gets a enough information, even if they
|
||||
do not get all of it, they can calculate the signature.
|
||||
|
||||
# Threshold Signatures
|
||||
|
||||
A threshold signature has the interesting feature that it is a randomness
|
||||
beacon. If there is one honest participant party to the signing it generates a
|
||||
random value unpredictable to all participants, This has obvious utility in
|
||||
selecting the witness set and leader in blockdag algorithms equivalent to
|
||||
Practical Byzantine Fault Tolerant Consensus, and in distributing discrete
|
||||
shares in a way that is fair and on average over time approximates
|
||||
continuous stake as closely as possible
|
||||
|
||||
The participants sign off on assertion that their stake is such and such, and
|
||||
the signature itself controls the random distribution of fractional voting
|
||||
shares in the next signature as whole shares, so that voting shares, as
|
||||
nearly as possible, approximate ownership shares.
|
||||
|
||||
This is not in fact a useful discussion of Threshold signatures, so much as a
|
||||
list of links. I don’t entirely trust myself to implement threshold
|
||||
signatures.
|
||||
|
||||
Suredbits describes the [FROST algorithm for Schnorr distributed key generation and signing].
|
||||
This algorithm is not robust. If any one of the
|
||||
participants goes down in the middle you have to start all over, but it is
|
||||
clean, simple, and easy to understand.
|
||||
|
||||
[FROST algorithm for Schnorr distributed key generation and signing]:https://suredbits.com/schnorr-applications-frost/
|
||||
"Schnorr Applications: FROST"
|
||||
|
||||
Each of the participants acts as the trusted dealer for his own share, hands
|
||||
out shares in it to everyone else, and the final key is the sum of everyone's
|
||||
share.
|
||||
|
||||
This description is easily the most intelligible description of distributed
|
||||
key generation and signature generation that I have read, because it is
|
||||
adapted to the special case of Schnorr signatures, which have the huge
|
||||
advantage of linearity. But, not robust.
|
||||
|
||||
The practical limit for non robust schemes is about fifty participants. If
|
||||
you have a hundred participants it keeps starting over and over again, and
|
||||
eventually this causes more and more people to drop out in the middle as
|
||||
they lose patience.
|
||||
|
||||
[Practical Large Scale Distributed Key Generation]:
|
||||
./PracticalLargeScaleDistributedKeyGeneration.pdf
|
||||
|
||||
[Revisiting the Distributed Key Generation for Discrete-Log Based Cryptosystems]:
|
||||
./threshold_shnorr.pdf
|
||||
|
||||
[Practical Large Scale Distributed Key Generation] references
|
||||
[Revisiting the Distributed Key Generation for Discrete-Log Based Cryptosystems]
|
||||
which references Schnorr signatures.
|
||||
|
||||
It is a follow up to the (unscalable and arguably inherently centralized)
|
||||
[Secure Distributed Key Generation for Discrete-Log Based
|
||||
Cryptosystems](./SecureDistributedKeyGeneration.pdf)
|
||||
|
||||
The basic algorithm is that you generate a distributed key, from which
|
||||
a subset of the key recipients can generate a Schnorr signature.
|
||||
|
||||
[Revisiting the Distributed Key Generation for Discrete-Log Based
|
||||
Cryptosystems](./threshold_shnorr.pdf) gives reasonably detailed instructions
|
||||
for implementing threshold Schnorr, without any trusted central authority but
|
||||
assumes various cryptographic primitives that we do not in fact have, among
|
||||
them a reliable broadcast channel.
|
||||
|
||||
## Reliable Broadcast Channel
|
||||
|
||||
A key cryptographic primitive in threshold signatures, and indeed in almost
|
||||
every group cryptographic protocol, is the reliable broadcast channel – that
|
||||
any participant can reliably send a message that is available to all
|
||||
participants.
|
||||
|
||||
In actual practice we have unreliable point to point two party communications,
|
||||
from which we have to construct a broadcast channel.
|
||||
|
||||
Practical applications of these cryptographic protocols seem to be relying on
|
||||
a trusted broadcaster, who is apt to be untrustworthy when there is money,
|
||||
power, or valuable secrets lying on the table.
|
||||
|
||||
Trouble is that in practice, certain messages are likely to be hidden from
|
||||
certain participants, and other participants will be unaware that they are
|
||||
hidden, or they will receive a discrepant message and incorrectly believe they
|
||||
are receiving the same message as others.
|
||||
|
||||
In a large group, one can assume that more than half the participants are
|
||||
honest, so one can construct a broadcast channel using the Paxos protocol.
|
||||
|
||||
Every distributed cryptographic protocol needs a secure broadcast
|
||||
channel, and every blockchain is a secure broadcast channel.
|
||||
|
||||
One of the requirements of secure reliable broadcast channel is that _it
|
||||
stays up_. But a secure broadcast channel for a lightning type
|
||||
transaction is going to be created and shut down. And if it can be
|
||||
legitimately shut down, it can be shut down at exactly the wrong moment
|
||||
for some of the participants and exactly the right time for some of the
|
||||
participants. Hence the use of a “trusted” broadcast authority, who
|
||||
stays up.
|
||||
|
||||
We could attain the same effect by a hierarchy of secure reliable broadcast
|
||||
channels, in which a narrow subchannel involving a narrower set of
|
||||
participants can be set up on the broader channel, and shutdown, with its
|
||||
final shutdown signature available in the broader channel, such that someone
|
||||
who has the right code can find on the durable broadcast channel, the signature he needs.
|
||||
|
||||
But interesting protocols are likely to involve small groups for which we want
|
||||
the transaction to fail if any of the participants are defecting.
|
||||
|
||||
For example, the lightning protocol is cryptographically enforced
|
||||
correspondence banking, and an eternal problem in correspondence banking is
|
||||
insider check kiting. A shill sends a check to another shill, so that one
|
||||
correspondence banker can scam another correspondence banker, so the group
|
||||
attempting to organize the transaction is going to consist of two shills, one
|
||||
scammer, one pigeon, and one innocent third party roped in to obscure who is
|
||||
doing the scamming and who is being scammed, giving a majority of three evil
|
||||
participants against two good and trusting participants.
|
||||
|
||||
By and large, the more money and power that is on the table, the smaller the
|
||||
group engaging in the cryptographic protocol is apt to be.
|
||||
|
||||
We want the transaction to fail in such cases. Generalizing to all
|
||||
group cryptographic protocols, we want the broadcast channel to fail and
|
||||
to be seen to fail in such cases.
|
||||
|
||||
The Byzantine Paxos protocol is designed for a large group and is intended to
|
||||
keep going permanently in the face of the hardware or software failure of some
|
||||
participants, and Byzantine defection by a small conspiracy of participants.
|
||||
|
||||
For a reliable broadcast channel to be reliable, you are relying on it to
|
||||
stay up, because if it goes down and stays down, its state for transactions
|
||||
near the time it went down cannot be clearly defined.
|
||||
|
||||
For a reliable broadcast channel to be in a well defined state on shutdown,
|
||||
it has to have continued broadcasting its final state to anyone interested
|
||||
for some considerable time after it reached its final state. So you are
|
||||
trusting someone to keep it going and available. In this sense, no group
|
||||
cryptographic transaction can be entirely trustless.
|
||||
|
||||
I intend that the rho blockchain will primarily a notarization system that
|
||||
just happens to special case notarizing rhocoin transactions. The notaries
|
||||
will be a collection of durable broadcast channels, each one typically
|
||||
maintained by a single host, albeit a notarization will not be evidence usable
|
||||
on the blockchain until its notary block is Merkle chained by the blockchain.
|
||||
If the blockchain automatically trusts notary signatures, they will rapidly
|
||||
cease to be trustworthy. The chain, not the signature, makes it officially
|
||||
official. The notary signature and oid is merely a promise to make it
|
||||
official. The blockchain will treat notaries as untrusted, so that everyone
|
||||
else can treat them as trusted at low risk.
|
||||
|
||||
## scaling
|
||||
|
||||
According to [Practical Large Scale Distributed Key Generation](./PracticalLargeScaleDistributedKeyGeneration.pdf) their algorithm is of
|
||||
order ${[log (n)]}^3$, meaning it should produce a Schnorr signature for
|
||||
thousands of voters with hundreds of thousands of shares, in a a potentially
|
||||
decentralized manner, without a trusted dealer, making it useful for digital
|
||||
corporations, and capable of electing producing a chain of signatures
|
||||
(shareholders sign the board, board signs the CEO, CEO signs every corporate
|
||||
officer identity, CEO organizes the election of the board), capable of being
|
||||
evaluated by everyone interacting with the business over the net.
|
||||
|
||||
Obviously the reliable broadcast protocol of such a very large scale key
|
||||
generation will look more like a regular blockchain, since many entities will
|
||||
drop out or fail to complete directly.
|
||||
|
||||
# [Blind SchnorrSignature.](https://www.math.uni-frankfurt.de/~dmst/teaching/WS2013/Vorlesung/Pointcheval,Stern.pdf)
|
||||
|
||||
[See also](https://eprint.iacr.org/2019/877.pdf).
|
||||
|
||||
[and](https://suredbits.com/schnorr-applications-blind-signatures/)
|
||||
|
||||
Blind Schnorr signatures are vulnerable to the [Wagner attack], which
|
||||
can be defeated by refusing to do large numbers of blind signatures in
|
||||
parallel, and/or randomly failing to complete some blind signatures.
|
||||
|
||||
[Wagner attack]:https://www.iacr.org/archive/crypto2002/24420288/24420288.pdf
|
||||
|
||||
# Regular Elliptic Signature
|
||||
|
||||
Signer secret scalar $k$. Signer public point $K=k*B$. $B$ is base point.
|
||||
|
||||
Signature is scalar $s$ and point $R$, such that
|
||||
$S = s*B = h(R,$ Message$)*K+ R$
|
||||
|
||||
## Signing
|
||||
|
||||
Generate random secret scalar $r$, public point $R=r*B$
|
||||
|
||||
calculate public scalar $s = h(R$, Message)*k + r$
|
||||
|
||||
Reveal s and R for message.
|
||||
|
||||
## [Blind signing using a regular elliptic signature](https://pdfs.semanticscholar.org/e58a/1713858a5b9355a9e18adfe3abfc05de244e.pdf)
|
||||
|
||||
# Pairing based cryptography
|
||||
|
||||
In pairing based cryptography the computational Diffie--Hellman problem
|
||||
is believed to be infeasible while the simpler decisional
|
||||
Diffie--Hellman problem can be easily solved using the pairing function.
|
||||
|
||||
In a cryptographic group, given $G$, $xG$, and $yG$ it is hard to find
|
||||
$xyG$, unless you know $x$ or $y$.
|
||||
|
||||
In a pairing based group, given $G$, $xG$, $yG$ and $xyG$ you can
|
||||
easily *verify* that $xyG$ is correct, even though you are unable to
|
||||
calculate the correct value.
|
||||
|
||||
Pairing based cryptography can do all sorts of really cool things, and I
|
||||
am not sure how fast it can do them, but we do not in fact have any
|
||||
obvious need for it.
|
||||
|
||||
## Proposed uses for pairing based crypto
|
||||
|
||||
We want money payments to operate in private invitation only messaging
|
||||
and blog groups, without causing the group to be exposed.
|
||||
|
||||
We also need pairing based crypto to interface between crypto currency,
|
||||
and the corporate form, though threshold signatures. So we have a white
|
||||
face, a grey face, and a black face. The white face is the interface
|
||||
between crypto cash and to the corporate form that exists both
|
||||
cryptographically and on government registries, the grey face is the
|
||||
interface to the corporate form that exists only cryptographically, not
|
||||
registered with the state (with the result that scams will abound) and
|
||||
the black face is secret invitation only blogs and messaging groups,
|
||||
within which it is possible to make secret payments.
|
||||
|
||||
These invitation only blogs and messaging groups will exist with and
|
||||
within open searchable blogs and messaging groups, hidden by the secret
|
||||
handshake protocol.
|
||||
|
||||
The structure will be ultimately rooted in [Zooko’s triangle](./zookos_triangle.html), but normal
|
||||
people will most of the time sign in by zero knowledge password
|
||||
protocol, your identity will be derivative from someone else’s Zooko
|
||||
based identity.
|
||||
|
||||
Useful links on this topic are “XDH assumption”, “pairing based
|
||||
cryptography”, “Bilinear Diffie-Hellman”, and “gap Diffie—Hellman
|
||||
(GDH) groups”.
|
||||
|
||||
A list of libraries now
|
||||
[available](https://gist.github.com/artjomb/f2d720010506569d3a39) PBC
|
||||
looks like the best. MIRACL uses a strong copyleft license. AGPL: all
|
||||
the software linked against free software (free in GNU/FSF sense) is
|
||||
also free software and freely available. GNU Public License is the most
|
||||
famous of such “strong copyleft” FOSS licenses. The GPL copyleft
|
||||
clause triggers when an application is distributed outside of company
|
||||
boundaries. And servicing customers from a company server running the
|
||||
code constitutes distribution.
|
||||
|
||||
MIRACL is written in C, and can be used from C, but is designed for C++.
|
||||
Comes with inline C++ wrappers.
|
||||
|
||||
But hell, I like C++. But C++ not going to fly on android. Scala does
|
||||
not support Visual Studio, and visual studio does not really support
|
||||
android, though it confidently believes that it does.
|
||||
|
||||
Useful threshold signatures requires pairing based cryptography. And
|
||||
pairing based cryptography also needed for useful implementation of the
|
||||
secret handshake (green beard, Masonic Lodge) problem. Allegedly good
|
||||
for zero knowledge password protocol, though I am pretty sure that
|
||||
problem has been solved without using pairing based cryptography.
|
||||
|
||||
Pairing based cryptography is usually described using the notation that
|
||||
the group operation is multiplication, and the one way function
|
||||
combining an integer with a member of the group to produce another
|
||||
member of the group is exponentiation. But pairing based cryptography is
|
||||
easier to understand if we call the group operation addition, whereupon
|
||||
we call the application of an integer modulo the order of the group
|
||||
multiplication, instead of calling it exponentiation.
|
||||
|
||||
In pairing based cryptography, the group supports addition (it is
|
||||
commutative and associative), and *also supports something like multiplication*
|
||||
(it is also commutative and associative), albeit the result of multiplication
|
||||
is not a member of the original group, but a member of another group,
|
||||
the pair group.
|
||||
|
||||
That it supports something like multiplication is described as “Bilinear
|
||||
Diffie-Hellman”, and if you call it “something like multiplication”
|
||||
people who are experts in the field will conclude you are an idiot.
|
||||
|
||||
So, let us, as usual, use the notation that members of the group are capital
|
||||
letters, and integers modulo the order of the group are lower case
|
||||
italics. Let us denote members of the pair group, the result of
|
||||
multiplying members of the original group with each other, with Greek letters.
|
||||
|
||||
Which notation allows us to leave out all the academic stuff about $\forall P \in
|
||||
G$. If it is a regular capital, it stands for any member of $G$, unless
|
||||
otherwise specified. In proper academic language (apart from the fact
|
||||
that I am leaving out all the $\forall P \in G$ stuff):
|
||||
|
||||
Let G be an cyclic group of prime order written additively and $ϓ$
|
||||
another cyclic group of the same order written multiplicatively. A pairing is
|
||||
a map: $e : G × G → ϓ$ , which satisfies the following properties:
|
||||
|
||||
- Bilinearity: $\forall a, b, P, Q : e(aP, bQ) = e(P, Q)^{ab}$
|
||||
(notice that the left hand side of the equals sign is written
|
||||
additively, and the right hand side written multiplicatively)
|
||||
- Non-degeneracy: e ≠ 1
|
||||
- Computability: there exists an efficient algorithm to compute e
|
||||
|
||||
Whereupon in this notation:
|
||||
$B_{base}$, the base point, is widely known, Ann’s private key is $a$, her public key is $A = aB_{base}$
|
||||
|
||||
In C++ is is useful to represent both groups additively, allowing the
|
||||
operation of the addition of any member of either group to another
|
||||
member of the same group, the multiplication of any member of either
|
||||
group by an integer, producing another member of the same group, and the
|
||||
operation e(P,Q), where P and Q are members of the first group, as infix
|
||||
multiplication producing a member of the second group.
|
||||
|
||||
In this notation the magic equality becomes
|
||||
|
||||
Bilinearity: $\forall a, b, P, Q :$
|
||||
$(a*P)*(b*Q) == (a*b)*(P*Q)$
|
||||
|
||||
Requiring us, in C++, to create an infix multiplication operator for the
|
||||
mapping, an infix addition operator for each group, and an infix
|
||||
multiplication by integer operator for each group.
|
||||
|
||||
To sign a document with the secret key $a$, publish $M = $ah($Message$)B$
|
||||
|
||||
To test the signature, check that
|
||||
|
||||
$A*(h(Message)B_{base})=B_{base}*M$
|
||||
|
||||
Which it should because $(a*B_{base})*(h($Message$)*B_{base}) =
|
||||
B_{base}*(a*h($Message$)*B_{base})$ by bilinearity.
|
||||
|
||||
The threshold variant of this scheme is called GDH threshold signature
|
||||
scheme, Gap Diffie Hellman threshold signature scheme.
|
||||
|
||||
This scheme is also good for blind signatures, because if you sign an
|
||||
arbitrary point of the group, which the person asking for the signature
|
||||
knows is sum of an already signed point of the group, multiplied by a
|
||||
random secret, plus the thing he actually wants signed, he can then
|
||||
subtract the two signed quantities to find a third signed quantity,
|
||||
corresponding to a well formed token, that token unknown to the signer.
|
||||
|
||||
## Secret Handshakes
|
||||
|
||||
[Paraphrasing](./secret_handshakes.pdf)
|
||||
|
||||
The Secret society of evil has a frequently changing secret key
|
||||
$k_{evil}$ Ann has a secret key $k_{Ann}$ and public key $K_{Ann} =
|
||||
k_{Ann}B$, Bob has a secret key $k_{Bob}$ and public key $K_{Bob} =
|
||||
b_{Bob}B$
|
||||
|
||||
Let $h(…)$ represent a hash of the serialized arguments of H, which
|
||||
hash is an integer modulo the order of the group. Let $H(…) = h(…)B$.
|
||||
Streams are concatenated with a boundary marker, and accidental
|
||||
occurrences of the boundary marker within a stream are escaped out.
|
||||
|
||||
The evil overlord of the evil society of evil issues Ann and Bob a
|
||||
signed hash of their public keys. For Ann, the signature is
|
||||
$k_{evil}H($“Ann”, $K_{Ann},$ “evil secret society of evil”$)$, similarly for Bob
|
||||
|
||||
The problem is that Ann does not know whether Bob is also a member of
|
||||
the secret society of evil, and wants to send him a message that is only
|
||||
going to be intelligible to him if he secretly has a key signed by the
|
||||
secret society of evil, but is not going to be recognizable to third
|
||||
parties as signed by the secret society of evil.
|
||||
|
||||
So, they use as part of their shared secret whereby they encrypt
|
||||
messages, the secret that Ann can calculate:\ $[k_{evil}H($“Ann”,
|
||||
$K_{Ann},$ “evil secret society of evil”$)]*H($“Bob”, $K_{Bob},$ “evil
|
||||
secret society of evil”$)$
|
||||
|
||||
Bob calculates:
|
||||
$H($“Ann”, $K_{Ann},$ “evil secret society of evil”$)
|
||||
*[k_{evil}H($“Bob”, $K_{Bob},$ “evil secret society of evil”$)]$
|
||||
|
||||
Thus proving possession of their evil keys to each other without
|
||||
revealing them to each other, and without revealing that possession to
|
||||
someone who does not have the expected evil key.
|
||||
|
||||
Overly complicated and excessively clever. In practice, if you have a
|
||||
secret society, you have a secret chatroom, in which case the routing
|
||||
metadata on messages going to and from the chatroom are traceable if
|
||||
they pass through enemy networks.
|
||||
|
||||
But suppose you want battlespace iff (Identify Friend or Foe). You want to
|
||||
send a message directly to an unknown target that will identify as friendly
|
||||
if the other guy is a friendly, but not necessarily let him know enemy if he
|
||||
is enemy. If he already knows you are enemy, you don’t want to give him
|
||||
the means to identify enemy iff from your friends.
|
350
docs/name_system.md
Normal file
@ -0,0 +1,350 @@
|
||||
---
|
||||
title: Name System
|
||||
---
|
||||
We intend to establish a system of globally unique wallet names, to resolve
|
||||
the security hole that is the domain name systm, though not all wallets will
|
||||
have globally unique names, and many wallets will have many names.
|
||||
|
||||
Associated with each globally unique name is set of name servers. When one’s
|
||||
wallet starts up, then if your wallet has globally unique name, it logs in
|
||||
to its name server, which will henceforth direct people to that wallet. If
|
||||
the wallet has a network accessible tcp and/or UDP address it directs people
|
||||
to that address (one port only, protocol negotiation will occur once the
|
||||
connection is established, rather than protocols being defined by the port
|
||||
number). If not, will direct them to a UDT4 rendevous server, probably itself.
|
||||
|
||||
We probably need to support [uTP for the background download of bulk data].
|
||||
This also supports rendevous routing, though perhaps in a different and
|
||||
incompatible way, excessively married to the bittorrent protocol.We might
|
||||
find it easier to construct our own throttling mechanism in QUIC,
|
||||
accumulating the round trip time and square of the round trip time excluding
|
||||
outliers, to form a short term and long term average and variance of the
|
||||
round trip time, and throttling lower priority bulk downloads and big
|
||||
downloads when the short term average rises above the long term average by
|
||||
more than the long term variance. The long term data is zeroed when the IP
|
||||
address of the default gateway(router) is acquired, and is timed out over a
|
||||
few days. It is also ceilinged at a couple of seconds.
|
||||
|
||||
[uTP for the background download of bulk data]: https://github.com/bittorrent/libutp
|
||||
|
||||
In this day and age, a program that lives only on one machine really is not
|
||||
much of a program, and the typical user interaction is a user driving a gui
|
||||
on one machine which is a gui to program that lives on a machine a thousand
|
||||
miles away.
|
||||
|
||||
We have a problem with the name system, the system for obtaining network
|
||||
addresses, in that the name system is subject to centralized state control,
|
||||
and the TCP-SSL system is screwed by the state, which is currently seizing
|
||||
crimethink domain names, and will eventually seize untraceable crypto
|
||||
currency domain names.
|
||||
|
||||
In today’s environment, it is impossible to speak the truth under one’s true
|
||||
name, and dangerous to speak the truth even under any durable and widely used
|
||||
identity. Therefore, people who post under names tend to be unreliable.
|
||||
Hence the term “namefag”. If someone posts under his true name, he is a
|
||||
“namefag” – probably unreliable and lying. Even someone who posts under a
|
||||
durable pseudonym is apt show excessive restraint on many topics.
|
||||
|
||||
The aids virus does not itself kill you. The aids virus “wants” to stick
|
||||
around to give itself lots of opportunities to infect other people, so wants
|
||||
to disable the immune system for obvious reasons. Then, without a immune
|
||||
system, something else is likely to kill you.
|
||||
|
||||
When I say “wants”, of course the aids virus is not conscious, does not
|
||||
literally want anything at all. Rather, natural selection means that a virus
|
||||
that disables the immune system will have opportunities to spread, while a
|
||||
virus that fails to disable the immune system only has a short window of
|
||||
opportunity to spread before the immune system kills it, unless it is so
|
||||
virulent that it likely kills its host before it has the opportunity to
|
||||
spread.
|
||||
|
||||
Similarly, a successful memetic disease that spreads through state power,
|
||||
through the state system for propagation of official truth “wants” to disable
|
||||
truth speaking and truth telling – hence the replication crisis, peer
|
||||
review, and the death of science. We are now in the peculiar situation that
|
||||
truth is best obtained from anonymous sources, which is seriously suboptimal.
|
||||
Namefags always lie. The drug companies are abandoning drug development,
|
||||
because science just does not work any more. No one believes their research,
|
||||
and they do not believe anyone else’s research.
|
||||
|
||||
It used to be that there were a small number of sensitive topics, and if you
|
||||
stayed away from those, you could speak the truth on everything else, but now
|
||||
it is near enough to all of them that it might as well be all of them, hence
|
||||
the replication crisis. Similarly, the aids virus tends to wind up totally
|
||||
suppressing the immune system, even though more selective shutdown would
|
||||
serve its interests more effectively, and indeed the aids virus starts by
|
||||
shutting down the immune system in a more selective fashion, but in the end
|
||||
cannot help itself from shutting down the immune system totally.
|
||||
|
||||
The memetic disease, the demon, does not “want” to shut down truth telling
|
||||
wholesale. It “wants” to shut down truth telling selectively, but inevitably,
|
||||
there is collateral damage, so it winds up shutting down truth telling
|
||||
wholesale.
|
||||
|
||||
To exorcise the demon, we need a prophet, and since the demon occupies the
|
||||
role of the official state church, we need a true king. Since there is a
|
||||
persistent shortage of true Kings, I here speaking as engineer rather than a
|
||||
prophet, so here I am discussing the anarcho agorist solution to anarcho
|
||||
tyranny, the technological solution, not the true king solution.
|
||||
|
||||
Because of the namefag problem and the state snatching domain names, we need,
|
||||
in order to operate an untraceable blockchain based currency not only a
|
||||
decentralized system capable of generating consensus on who owns what cash,
|
||||
we need a system capable of generating consensus on who who owns which human
|
||||
readable globally unique names, and the mapping between human readable names,
|
||||
Zooko triangle names (which correspond to encryption public keys), and
|
||||
network addresses, a name system resistant to the state’s attempts to link
|
||||
names to jobs, careers, and warm bodies that can be beaten up or imprisoned,
|
||||
to link names to property, to property that can be confiscated or destroyed.
|
||||
|
||||
A transaction output can hold an amount of currency, or a minimum amount of
|
||||
currency and a name. Part of the current state, which every block contains,
|
||||
is unused transaction outputs sorted by name.
|
||||
|
||||
If we make unused transaction outputs sorted by name available, might as well
|
||||
make them available sorted by key.
|
||||
|
||||
In the hello world system, we will have a local database mapping names to
|
||||
keys and to network addresses. In the minimum viable product, a global
|
||||
consensus database. We will, however, urgently need a rendezvous system that
|
||||
allows people to set up wallets and peers without opening ports on stable
|
||||
network address to the internet. Arguably, the minimum viable product will
|
||||
have a global database mapping between keys and names, but also a nameserver
|
||||
system, wherein a host without a stable network address can login to a host
|
||||
with a stable network address, enabling rendezvous. When one identity has its
|
||||
name servers registered in the global consensus database, it always tries to
|
||||
login to those and keep the connection alive with a ping that starts out
|
||||
frequent, and then slows down on the Fibonacci sequence, to one ping every
|
||||
1024 secondsplus a random number modulo 1024 seconds. At each ping, tells the
|
||||
server when the next ping coming, and if the server does not get the
|
||||
expected ping, server sends a nack. If the server gets no ack, logs the
|
||||
client out. If the client gets no ack, retries, if still no ack, tries to
|
||||
login to the next server.
|
||||
|
||||
In the minimum viable product, we will require everyone operating a peer
|
||||
wallet to have a static IP address and port forwarding for most functionality
|
||||
to work, which will be unacceptable or impossible for the vast majority of
|
||||
users, though necessarily we will need them to be able to receive money
|
||||
without port forwarding, a static IP, or a globally identified human readable
|
||||
name, by hosting their client wallet on a particular peer. Otherwise no one
|
||||
could get crypto currency they would need to set up a peer.
|
||||
|
||||
Because static IP is a pain, we should also support nameserver on the state
|
||||
run domain name system, as well as nameserver on our peer network, but that
|
||||
can wait a while. And in the end, when we grow so big that every peer is
|
||||
itself a huge server farm, when we have millions of users and a thousand or
|
||||
so peers, the natural state of affairs is for each peer to have a static IP.
|
||||
|
||||
Eventually we want people to be able to do without static IPs and
|
||||
portforwarding, which is going to require a UDP layer. One the other hand, we
|
||||
only intend to have a thousand or so full peers, even if we take over and
|
||||
replace the US dollar as the world monetary system. Our client wallets are
|
||||
going to be the primary beneficiaries of rendevous UDT4.8 routing over UDP.
|
||||
|
||||
We also need names that you can send money to, and name under which you can
|
||||
receives. The current cryptocash system involves sending money to
|
||||
cryptographic identifiers, which is a pain. We would like to be able to send
|
||||
and receive money without relying on identifiers that look like line noise.
|
||||
|
||||
So we need a system similar to namecoin, but namecoin relies on proof of
|
||||
work, rather than proof of stake, and the state’s computers can easily mount
|
||||
a fifty one percent attack on proof of work. We need a namecoin like system
|
||||
but based on proof of stake, rather than proof of work, so that for the state
|
||||
to take it over, it would need to pay off fifty one percent of the
|
||||
stakeholders – and thus pay off the people who are hiding behind the name
|
||||
system to perform untraceable crypto currency transactions and to speak the
|
||||
unspeakable.
|
||||
|
||||
For anyone to get started, we are going to have to enable them to operate a
|
||||
client wallet without IP and port forwarding, by logging on to a peer wallet.
|
||||
The minimum viable product will not be viable without a client wallet that
|
||||
you can use like any networked program. A client wallet logged onto a peer
|
||||
wallet automatically gets the name `username.peername`. The peer could give
|
||||
the name to someone else though error, malice or equipment failure, but the
|
||||
money will remain in the client’s wallet, and will be spendable when he
|
||||
creates another username with another peer. Money is connected to wallet
|
||||
master secret, which should never be revealed to anyone, not with the
|
||||
username. So you can receive money with a name associated an evil nazi
|
||||
identity as one username on one peer, and spend it with a username associated
|
||||
with a social justice warrior on another peer. No one can tell that both
|
||||
names are controlled by the same master secret. You send money to a username,
|
||||
but it is held by the wallet, in effect by the master secret, not by the
|
||||
user name. That people have usernames, that money goes from one username to
|
||||
another, makes transferring money easy, but by default the money goes through
|
||||
the username to the master secret behind the quite discardable username,
|
||||
thus becomes anonymous, not merely pseudonymous after being received. Once
|
||||
you have received the money, you can lose the username, throw it away, or
|
||||
suffer it being confiscated by the peer, and you, not the username, still
|
||||
have the money. You only lose the money if someone else gets the master
|
||||
secret.
|
||||
|
||||
You can leave the money in the username, in which case the peer hosting your
|
||||
username can steal it, but for a hacker to steal it he needs to get your
|
||||
master secret and logon password, or you transfer it to the master secret on
|
||||
your computer, in which case a hacker can steal it, but the peer cannot, and
|
||||
also you can spend it from a completely different username. Since most people
|
||||
using this system are likely to be keen on privacy, and have no good reason
|
||||
to trust the peer, the default will be for the money to go from the username
|
||||
to the master secret.
|
||||
|
||||
Transfers of money go from one username to another username, and this is
|
||||
visible to the person who sent it and the person who received it, but if the
|
||||
transfer is to the wallet and the master secret behind the username, rather
|
||||
than to the username, this is not visible to the hosts. Money is associated
|
||||
with a host and this association is visible, but it does not need to be the
|
||||
same host as your username. By default, money is associated with the host
|
||||
hosting the username that receives it, which is apt to give a hint to which
|
||||
username received it, but you can change this default. If you are receiving
|
||||
crypto currency under one username, and spending it under another username on
|
||||
another host, it is apt to be a good idea to change this default to the host
|
||||
that is hosting the username that you use for spending, because then spends
|
||||
will clear more quickly. Or if both the usernames and both the hosts might
|
||||
get investigated by hostile people, change the default to a host that is
|
||||
hosting your respectable username that you do not use much.
|
||||
|
||||
We also need a state religion that makes pretty lies low status, but that is
|
||||
another post.
|
||||
|
||||
# Mapping between globally unique human readable names and public keys
|
||||
|
||||
The blockchain provides a Merkle-patricia dac of human readable names. Each
|
||||
human readable name links to a list of signatures transferring ownership form
|
||||
one public key to the next, terminating in an initial assignment of the name
|
||||
by a previous block chain consensus. A client typically keeps a few leaves
|
||||
of this tree. A host keeps the entire tree, and provides portions of the tree
|
||||
to each client.
|
||||
|
||||
When two clients link up by human readable name, they make sure that they are
|
||||
working off the same early consensus, the same initial grant of user name by
|
||||
an old blockchain consensus, and also off the same more recent consensus,
|
||||
for possible changes in the public key that has rightful ownership of that
|
||||
name. If they see different Merkle hashes at the root of their trees, the
|
||||
connection fails. Thus the blockchain they are working from has to be the
|
||||
same originally, and also the same more recently.
|
||||
|
||||
This system ensures we know and agree what the public key associated with a
|
||||
name is, but how do we find the network address?
|
||||
|
||||
# Mapping between public keys and nework addresses
|
||||
|
||||
## The Nameserver System
|
||||
|
||||
Typically someone is logged in to a host with an identity that looks like an
|
||||
email address, `paf.foo.bar`, where`bar` is the name of a host that is
|
||||
reliably up, and reliably on the network, and relatively easy to find
|
||||
|
||||
You can ask the host `bar` for the public key and *the network address* of
|
||||
`foo.bar`, or conversely the login name and network address associated with
|
||||
this public key. Of course these values are completely subject to the caprice
|
||||
of the owner of `bar`. And, having obtained the network address of `foo.bar`,
|
||||
you can then get the network address of `paf.foo.bar`
|
||||
|
||||
Suppose someone owns the name `paf`, and you can find the global consensus as
|
||||
to what public key controls `paf`, but he does not have a stable network
|
||||
address. He can instead provide a nameserver – another entity that will
|
||||
provide a rendevous. If `paf` is generally logged in to `foo`, you can
|
||||
contact `foo`, to get rendevous data for `bar.foo`, which is, supposing `foo`
|
||||
to be well behaved, rendevous data for `bar`
|
||||
|
||||
Starting from a local list of commonly used name server names, keys, and
|
||||
network addresses, you eventually get a live connection to the owner of that
|
||||
public key, who tells you that at the time he received your message, the
|
||||
information is up to date, and, for any globally unique human readable names
|
||||
involved in setting up the connection, he is using the same blockchain as you
|
||||
are using.
|
||||
|
||||
Your local list of network addresses may well rapidly become out of date.
|
||||
Information about network addresses flood fills through the system in the
|
||||
form of signed assertions about network addresses by owners of public keys,
|
||||
with timeouts on those assertions, and where to find more up to date
|
||||
information if the assertion has timed out, but we do not attempt to create a
|
||||
global consensus on network addresses. Rather, the authoritative source of
|
||||
information about a network address of a public key comes from successfully
|
||||
performing a live connection to the owner of that public key. You can, and
|
||||
probably should, choose some host as the decider on the current tree of
|
||||
network addresses, but we don’t need to agree on the host. People can work
|
||||
off slightly different mappings about network addresses with no global and
|
||||
complete consensus. Mappings are always incomplete, out of date, and usually
|
||||
incomplete and out of date in a multitude of slightly different ways.
|
||||
|
||||
We need a global consensus, a single hash of the entire blockchain, on what
|
||||
public keys own what crypto currency and what human readable names. We do not
|
||||
need a global consensus on the mapping between public keys and network
|
||||
addresses.
|
||||
|
||||
What you would like to get is an assertion that `paf.foo.bar` has public key
|
||||
such and such, and whatever you need to make network connection to
|
||||
`paf.foo.bar`, but likely `paf.foo.bar` has transient public key, because his
|
||||
identity is merely a username and login at `foo.bar`, and transient network
|
||||
address, because he is behind nat translation. So you ask `bar` about
|
||||
`foo.bar`, and `foo.bar` about `paf.foo.bar`, and when you actually contact
|
||||
`paf.foo.bar`, then, and only then, you know you have reliable information.
|
||||
But you don’t know how long it is likely to remain reliable, though
|
||||
`paf.foo.bar` will tell you (and no other source of information is
|
||||
authoritative, or as likely to be accurate).
|
||||
|
||||
Information about the mapping between public keys and network addresses that
|
||||
is likely to be durable flood fills through the network of nameservers.
|
||||
|
||||
# logon identity
|
||||
|
||||
Often, indeed typically, `ann.foo` contacts `bob.bar`, and `bob.bar` needs
|
||||
continuity information, needs to know that this is truly the same `ann.foo`
|
||||
as contacted him last time – which is what we currently do with usernames and
|
||||
passwords.
|
||||
|
||||
The name `foo` is rooted in a chain of signatures of public keys and requires
|
||||
a global consensus on that chain. But the name `ann.foo` is rooted in logon
|
||||
on `foo`. So `bob.bar` needs to know that `ann.foo` can log on with `foo`,
|
||||
which `ann.foo` does by providing `bob.bar` with a public key signed by `foo`,
|
||||
which might be a transient public key generated the last time she logged
|
||||
on, which will disappear the moment her session on her computer shuts down,
|
||||
or might be a durable public key. But if it is a durable public key, this
|
||||
does not give her any added security, since `foo` can always make up a new
|
||||
public key for anyone he decides to call `ann.foo` and sign it, so he might
|
||||
as well put a timeout on the key, and `ann.foo` might as well discard it when
|
||||
her computer turns off or goes into sleep mode. So, it is in everyone’s
|
||||
interests (except that of attackers) that only root keys are durable.
|
||||
|
||||
`foo`’s key is durable, and information about it is published.`ann.foo`’s
|
||||
key is transient, and information about it always obtained directly from
|
||||
`ann.foo` as a result of `ann.foo` logging in with someone, or as a result of
|
||||
someone contacting `foo` with the intent of logging in to `ann.foo`.
|
||||
|
||||
But suppose, as is likely, the network address of `foo` is not actually all
|
||||
that durable, is perhaps behind a NAT. In that case, it may well be that to
|
||||
contact `foo`, you need to contact `bar`.
|
||||
|
||||
So, `foo!bar` is `foo` logged in on `bar`, but not by a username and
|
||||
password, but rather logged on by his durable public key, attested by the
|
||||
blockchain consensus. So, you get an assertion, flood filled through the
|
||||
nameservers, that the network address of the public key that the blockchain
|
||||
asserts is the rightful controller of `foo`, is likely to be found at `foo!`
|
||||
(public key of `bar`), or likely to be found at `foo!bar`.
|
||||
|
||||
Logons by durable public key will work exactly like logons by username and
|
||||
password, or logons by derived name. It is just that the name of the entity
|
||||
logged on has a different form..
|
||||
|
||||
Just as openssh has logons by durable public key, logons by public key
|
||||
continuity, and logons by username and password, but once you are logged on,
|
||||
it is all the same, you will be able to logon to `bob.bar` as `ann.bob.bar`,
|
||||
meaning a username and password at `bob.bar`, as `ann.foo`, meaning `ann` has
|
||||
a single signon at `foo`, a username and password at `foo`, or as `ann`,
|
||||
meaning `ann` logs on to `bob.bar` with a public key attested by the
|
||||
blockchain consensus as belonging to `ann`.
|
||||
|
||||
And if `ann` is currently logged on to `bob.bar` with a public key attested
|
||||
by the blockchain consensus as belonging to `ann`, you can find the current
|
||||
network address of `ann` by asking `bob.bar` for the network address of
|
||||
`ann!bob.bar`
|
||||
|
||||
`ann.bob.bar` is whosoever `bob.bar` decides to call `ann.bob.bar`, but
|
||||
`ann!bob.bar` is an entity that controls the secret key of `ann`, who is at
|
||||
this moment logged onto `bob.bar`.
|
||||
|
||||
If `ann` asserts her current network address is likely to last a long time,
|
||||
and is accessible without going through
|
||||
|
||||
`bob.bar` then that network address information will flood fill through the
|
||||
network. Less useful network address information, however will not get far.
|
266
docs/number_encoding.md
Normal file
@ -0,0 +1,266 @@
|
||||
---
|
||||
lang: en
|
||||
title: Number encoding
|
||||
---
|
||||
|
||||
# The problem to be solved
|
||||
|
||||
As computers and networks grow, any fixed length fields
|
||||
in protocols tend to become obsolete. Therefore, for future
|
||||
upwards compatibility, we want to have variable precision
|
||||
numbers.
|
||||
|
||||
## Use case
|
||||
|
||||
QR codes and prefix free number encoding is useful in cases where we want data to be self describing – this bunch of bits is to be interpreted in a certain way, used in a certain action, means one thing, and not another thing. At present there is no standard for self description. QR codes are given meanings by the application, and could carry completely arbitrary data whose meaning and purpose comes from outside, from the context.
|
||||
|
||||
Ideally, it should make a connection, and that connection should then launch an interactive environment – the url case, where the url downloads a javascript app to address a particular database entry on a particular host.
|
||||
|
||||
A fixed length field is always in danger of
|
||||
running out, so one needs a committee to allocate numbers.
|
||||
With an arbitrary length field there is always plenty of
|
||||
headroom, we can just let people use what numbers seem good
|
||||
to them, and if there is a collision, well, one or both of
|
||||
the colliders can move to another number.
|
||||
|
||||
For example, the hash of a public key structure has to contain an algorithm
|
||||
identifier as to the hashing algorithm, to accommodate the possibility that
|
||||
in future the existing algorithm becomes too weak, and we must introduce
|
||||
new algorithms while retaining compatibility with the old. But there could
|
||||
potentially be quite a lot of algorithms, though in practice initially there
|
||||
will only be one, and it will be a long time before there are two.
|
||||
|
||||
When I say "arbitrarily large" I do not mean arbitrarily large, since this creates the possibility that someone could break something by sending a number bigger than the software can handle. There needs to be an absolute limit, such as sixty four bits, on representable numbers. But the limit should be larger than is ever likely to have a legitimate use.
|
||||
|
||||
# Solutions
|
||||
|
||||
## Zero byte encoding
|
||||
|
||||
Capt' Proto zero compresses out zero bytes, and uses an encoding such that uninformative and predictable fields are zero.
|
||||
|
||||
## 62 bit compressed numbers
|
||||
|
||||
QUIC expresses a sixty two bit number as one to four sixteen bit numbers. This is the fastest to encode and decode.
|
||||
|
||||
## Leading bit as number boundary
|
||||
|
||||
But it seems to me that the most efficient reasonably fast and elegant
|
||||
solution is a variant on utf8 encoding, though not quite as fast as the
|
||||
encoding used by QUIC:
|
||||
|
||||
Split the number into seven bit fields. For the leading fields, a one bit is
|
||||
prepended making an eight bit byte. For the last field, a zero bit is prepended.
|
||||
|
||||
This has the capability to represent very large values, which is potentially
|
||||
dangerous. The implementation has to impose a limit, but the limit can
|
||||
be very large, and can be increased without breaking compatibility, and
|
||||
without all implementations needing to changing their limit in the same
|
||||
way at the same time.
|
||||
|
||||
## Prefix Free Number Encoding
|
||||
|
||||
In this class of solutions, numbers are embedded as variable sized groups of bits within a bitstream, in a way that makes it possible to find the boundary between one number and the next. It is used in data compression, but seldom used in compressed data transmission, because far too slow.
|
||||
|
||||
This class of problem is that of a
|
||||
[universal code for integers](http://en.wikipedia.org/wiki/Universal_code_%28data_compression%29).
|
||||
|
||||
The particular coding I propose here is a variation on
|
||||
Elias encoding, though I did not realize it when I
|
||||
invented it.
|
||||
|
||||
On reflection, my proposed encoding is too clever by half,
|
||||
better to use Elias δ coding, with large arbitrary
|
||||
limits on the represented numbers, rather than
|
||||
clever custom coding for each field. For the intended purpose of wrapping packets, of collecting UDP packets into messages, and messages into channels, limit the range of representable values to the range j: 0 \< j \< 2\^64, and pack all the fields representing the place of this UDP package in a bunch of messages in a bunch of channels into a single bitstream header that is then rounded into an integral number of bytes..
|
||||
|
||||
We have two bitstream headers, one of which contains always starts with the number 5 to identify the protocol. (Unknown protocols immediately ignored), and then another number to identify the encryption stream and the position in the encryption stream (no windowing). Then we decrypt the rest of the packet starting on a byte boundary. The decrypted packet then has additional bitstream headers.
|
||||
|
||||
For unsigned integers, we restrict the range to less than 2\^64-9. We then add 8 before encoding, and subtract 8 after encoding, so that our Elias δ encoded value always starts with two zero bits, which we always throw away. Thus the common values 0 to 7 inclusive are represented by a six bit value – I want to avoid wasting too much implied probability on the relatively low probability value of zero.
|
||||
|
||||
The restriction on the range is apt to produce unexpected errors, so I suppose we special case the additional 8 values, so that we can represent every signed integer.
|
||||
|
||||
For signed integers, we convert to an unsigned integer\
|
||||
`uint_fast64_t y; y= 2*((uint_fast64_t)(-x)+1) : 2*(uint_fast64_t)x;`\
|
||||
And then represent as a positive integer. The decoding algorithm has to know whether to call the routine for signed or unsigned. By using unsigned maths where values must always be positive, we save a bit. Which is a lot of farting around to save on one bit.
|
||||
|
||||
We would like a way to represent an arbitrarily large
|
||||
number, a Huffman style representation of the
|
||||
numbers. This is not strictly Huffman encoding,
|
||||
since we want to be able to efficiently encode and decode
|
||||
large numbers, without using a table, and we do not have
|
||||
precise knowledge of what the probabilities of numbers are
|
||||
likely to be, other than that small numbers are
|
||||
substantially more probable than large numbers. In
|
||||
the example above, we would like to be able to represent
|
||||
numbers up to O(2^32^), but efficiently represent
|
||||
the numbers one, and two, and reasonably efficiently
|
||||
represent the numbers three and four. So to be
|
||||
strictly correct, “prefix free number encoding”. As we
|
||||
shall see at the end, prefix free number encoding always
|
||||
corresponds to Huffman encoding for some reasonable weights
|
||||
– but we are not worrying too much about weights, so are
|
||||
not Huffman encoding.
|
||||
|
||||
###Converting to and from the representation
|
||||
|
||||
Assume X is a prefix free sequence of bit strings – that is to say, if we
|
||||
are expecting a member of this sequence, we can tell where the member
|
||||
ends.
|
||||
|
||||
Let \[m…n\] represent a sequence of integers m to n-1.
|
||||
|
||||
Then the function X→\[m…n\] is the function that converts a bit string of X
|
||||
to the corresponding integer of \[m…n\], and similarly for \[m…n\]→X.
|
||||
|
||||
Thus X→\[m…n\] and \[m…n}→X provide us with a prefix free representation of
|
||||
numbers greater than or equal to m, and less than n.
|
||||
|
||||
Assume the sequence X has n elements, and we can generate and recognize
|
||||
each element.
|
||||
|
||||
Let ℓ(X,k) be a new sequence, constructed by taking the first element of
|
||||
X, and appending to it the 2^k^ bit patterns of length i, the
|
||||
next element of X and appending to it the 2^k+1^ bit patterns of
|
||||
length k+1, and so on and so forth.
|
||||
|
||||
ℓ is a function that gives us this new sequence from an existing sequence
|
||||
and an integer.
|
||||
|
||||
The new sequence ℓ(X,k) will be a sequence of prefix free bit patterns
|
||||
that has 2^n+k+1^ - 2^k^ elements.
|
||||
|
||||
We can proceed iteratively, and define a sequence ℓ(ℓ(X,j),k), which class
|
||||
of sequences is useful and efficient for numbers that are typically quite
|
||||
small, but could often be very large. We will more precisely
|
||||
prescribe what sequences are useful and efficient for what purposes when
|
||||
we relate our encoding to Huffman coding.
|
||||
|
||||
To generate the m+1[th]{.small} element of ℓ(X,k), where X is a
|
||||
sequence that has n elements:
|
||||
|
||||
Let j = m + 2^k^
|
||||
|
||||
Let p = floor(log~2~(j)) that is to say, p is the position of
|
||||
the high order bit of j, zero if j is one, one if j is two
|
||||
or three, two if j is four, five, six, or seven, and so on and so forth.
|
||||
|
||||
We encode p into its representation using the encoding \[k…n+k\]→X, and
|
||||
append to that the low order p bits of j.
|
||||
|
||||
To do the reverse operation, decode from the prefix free representation to
|
||||
the zero based sequence position, to perform the function ℓ(X,k)→\[0…2^n+k+1^-2^k^\],
|
||||
we extract p from the bit stream using the decoding of X→\[j…n+j\], then
|
||||
extract the next p bits of the bit stream, construct k from 2^p^-2^j^
|
||||
plus the number represented by those bits.
|
||||
|
||||
Now all we need is an efficient sequence X for small numbers.
|
||||
|
||||
Let ℒ(n) be a such a sequence with n values. \
|
||||
The first bit pattern of ℒ(n) is 0\
|
||||
The next bit pattern of ℒ(n) is 10\
|
||||
The next bit pattern of ℒ(n) is 110\
|
||||
The next bit pattern of ℒ(n) is 1110\
|
||||
…\
|
||||
The next to last bit pattern of ℒ(n) is 11…110, containing n-2 one bits
|
||||
and one zero bit.\
|
||||
The last bit pattern of ℒ(n) breaks the sequence, for it is 11…11,
|
||||
containing n-1 one bits and no zero bit.
|
||||
|
||||
The reason why we break the sequence, not permitting the
|
||||
representation of unboundedly large numbers, is that
|
||||
computers cannot handle unboundedly large numbers – one
|
||||
must always specify a bound, or else some attacker will
|
||||
cause our code to crash, producing results that we did not
|
||||
anticipate, that the attacker may well be able to make use
|
||||
of.
|
||||
|
||||
Perhaps a better solution is to waste a bit, thereby
|
||||
allowing future expansion. We use a representation
|
||||
that can represent arbitrarily large numbers, but clients
|
||||
and servers can put some arbitrary maximum on the size of
|
||||
the number. If that maximum proves too low, future clients
|
||||
can just expand it without breaking backward compatibility.
|
||||
This is similar to the fact that different file systems
|
||||
have different arbitrary maxima for the nesting of
|
||||
directories, the length of paths, and the length of
|
||||
directory names. Provided the maxima are generous
|
||||
it does not matter that they are not the same.
|
||||
|
||||
Thus the numbers 1 to 2 are represented by \[1…3\] →
|
||||
ℒ(2), 1 being the pattern “0”, and 2 being the
|
||||
pattern “1”
|
||||
|
||||
The numbers 0 to 5 are represented by \[0…6\] → ℒ(6), being the patterns\
|
||||
“0”, “10”, “110”, “1110”, “11110”, “11111”
|
||||
|
||||
Thus \[0…6\] → ℒ(6)(3) is a bit pattern that represents the number
|
||||
3, and it is “1110”
|
||||
|
||||
This representation is only useful if we expect our numbers
|
||||
to be quite small.
|
||||
|
||||
\[0…6\] → ℓ(ℒ(2),1) is the sequence “00”, “01”,
|
||||
“100”, “101”, “110”, “111” representing the
|
||||
numbers zero to five, representing the numbers 0 to
|
||||
less than 2^2+1^ – 2^1^
|
||||
|
||||
\[1…15\] → ℓ(ℒ(3),1) is similarly the sequence\
|
||||
“00”, “01”,\
|
||||
“1000”, “1001”, “1010 1011”,\
|
||||
“11000”, “11001”, “11010”, “11011”,“11100”, “11101”, “11110”, “11111”,\
|
||||
representing the numbers one to fourteen, representing the
|
||||
numbers 1 to less than 1 + 2^3+1^ – 2^1^
|
||||
|
||||
We notice that ℓ(ℒ(n),k) has 2^n+k^ – 2^k^
|
||||
patterns, and the shortest patterns are length 1+k, and the
|
||||
largest patterns of length 2n+k-2
|
||||
|
||||
This representation in general requires twice as many bits
|
||||
as to represent large numbers as the usual, non self
|
||||
terminating representation does (assuming k to be small)
|
||||
|
||||
We can iterate this process again, to get the bit string sequence:\
|
||||
ℓ(ℓ(ℒ(n),j),k)\
|
||||
which sequence has 2\^(2^n+j^ - 2^j^ + k) - 2^k^
|
||||
elements.
|
||||
|
||||
This representation is asymptotically efficient for very
|
||||
large numbers, making further iterations pointless.
|
||||
|
||||
ℓ(ℒ(5),1) has 62 elements, starting with a two bit pattern, and ending
|
||||
with a nine bit pattern. Thus ℓ(ℓ(ℒ(5),1),2) has
|
||||
2^64^-4 elements, starting with a four bit pattern, and finishing
|
||||
with a 72 bit pattern.
|
||||
|
||||
### prefix free encoding as Huffman coding
|
||||
|
||||
Now let us consider a Huffman representation of the
|
||||
numbers when we assign the number `n` the
|
||||
weight `1/(n*(n+1)) = 1/n – 1/(n+1)`
|
||||
|
||||
In this case the weight of the numbers in the range `n ... m` is `1/n – 1/(m+1)`
|
||||
|
||||
So our bit patterns are:\
|
||||
0 (representing 1)\
|
||||
100 101 representing 2 to 3\
|
||||
11000 11001 11010 11011 representing 4 to 7\
|
||||
1110000 1110001 1110010 1110011 1110100 1110101
|
||||
1110110 1110111 representing 8 to 15
|
||||
|
||||
We see that the Huffman coding of the numbers that are
|
||||
weighted as having probability `1/(n*(n+1))`
|
||||
|
||||
Is our friend \[1…\] → ℓ(ℒ(n),0), where n is very large.
|
||||
|
||||
Thus this is good in a situation where we are quite unlikely to encounter
|
||||
a big number. However a very common situation, perhaps the most
|
||||
common situation, is that we are quite likely to encounter numbers smaller
|
||||
than a given small amount, but also quite likely to encounter numbers
|
||||
larger than a given huge amount – that the probability of encountering a
|
||||
number in the range 0…5 is somewhat comparable to the probability of
|
||||
encountering a number in the range 5000…50000000.
|
||||
|
||||
We want an encoding that corresponds to a Huffman encoding where numbers are logarithmically distributed up to some enormous limit, corresponding to an encoding where for all n, n bit numbers are represented with an only slightly larger number of bits, n+O(log(n)) bits.
|
||||
|
||||
In such case, we should we should represent such values by members of a
|
||||
prefix free sequence `ℓ(ℓ(ℒ,j),k)`
|
1
docs/pandoc_templates/after.pandoc
Normal file
@ -0,0 +1 @@
|
||||
<p style="background-color: #ccffcc; font-size: 80%;"><a rel="license" href="http://creativecommons.org/licenses/by/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://i.creativecommons.org/l/by/4.0/80x15.png" /></a><br />This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by/4.0/">Creative Commons Attribution 4.0 International License</a>.</p>
|
1
docs/pandoc_templates/before.pandoc
Normal file
@ -0,0 +1 @@
|
||||
<p><a href="./index.html"> To Home page</a></p>
|
21
docs/pandoc_templates/header.pandoc
Normal file
@ -0,0 +1,21 @@
|
||||
<style>
|
||||
body {
|
||||
max-width: 30em;
|
||||
margin-left: 1em;
|
||||
}
|
||||
p.center {text-align:center;}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td, th {
|
||||
border: 1px solid #999;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
h1.title{
|
||||
text-align: center; font-size: xxx-large;
|
||||
}
|
||||
</style>
|
||||
<link rel="shortcut icon" href="../rho.ico">
|
||||
|
31
docs/pandoc_templates/style.css
Normal file
@ -0,0 +1,31 @@
|
||||
body {
|
||||
max-width: 30em;
|
||||
margin-left: 1em;
|
||||
}
|
||||
p.center {text-align:center;
|
||||
}
|
||||
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td, th {
|
||||
border: 1px solid #999;
|
||||
padding: 0.5rem;
|
||||
text-align: left;
|
||||
}
|
||||
code{white-space: pre-wrap;
|
||||
}
|
||||
span.smallcaps{font-variant: small-caps;
|
||||
}
|
||||
span.underline{text-decoration: underline;
|
||||
}
|
||||
div.column{display: inline-block; vertical-align: top; width: 50%;
|
||||
}
|
||||
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;
|
||||
}
|
||||
ul.task-list{list-style: none;
|
||||
}
|
||||
.display.math{display: block; text-align: center; margin: 0.5rem auto;
|
||||
}
|
||||
h1.title{text-align: center; font-size: xxx-large;
|
||||
}
|
695
docs/parsers.md
Normal file
@ -0,0 +1,695 @@
|
||||
---
|
||||
title: Parsers
|
||||
---
|
||||
This rambles a lot. Thoughts in progress: Summarizing my thoughts here at the top.
|
||||
|
||||
Linux scripts started off using lexing for parsing, resulting in complex and
|
||||
incomprehensible semantics, producing unexpected results. (Try naming a
|
||||
file `-r`, or a directory with spaces in the name.)
|
||||
|
||||
They are rapidly converging in actual usage to the operator precedence
|
||||
syntax and semantics\
|
||||
`command1 subcommand arg1 … argn infixoperator command2 subcommand …`
|
||||
|
||||
Which is parsed as\
|
||||
`((staticclass1.staticmethod( arg1 … argn)) infixoperator ((staticclass2.staticmethod(…)))`
|
||||
|
||||
With line feed acting as `}{` operator, start of file acting as a `{` operator, end
|
||||
of file acting as a `}` operator, suggesting that in a sane language, indent
|
||||
increase should act as `{` operator, indent decrease should act as a `}`
|
||||
operator.
|
||||
|
||||
Command line syntax sucks, because programs interpret their command
|
||||
lines using a simple lexer, which lexes on spaces. Universal resource
|
||||
identifier syntax sucks, because it was originally constructed so that it
|
||||
could be a command line argument, hence no spaces, and because it was
|
||||
designed to be parsed by a lexer.
|
||||
|
||||
But EBNF parsers also suck, because they do not parse the same way
|
||||
humans do. Most actual programs can be parsed by a simple parser, even
|
||||
though the language in principle requires a more powerful parser, becaus
|
||||
humans do not use the nightmarish full power of a grammer that an EBNF
|
||||
definition winds up defining.
|
||||
|
||||
Note that [LLVM language creation tools](https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/)
|
||||
tutorial does not user an EBNF
|
||||
parser. These tools also make creating a new language with JIT semantics
|
||||
very easy.
|
||||
|
||||
We are programming in languages that are not parsed the way the
|
||||
programmer is parsing them.
|
||||
|
||||
Programming languages ignore whitespace, because programmers tend to
|
||||
express their meaning with whitespace for the human reader, and
|
||||
whitespace grammer is not altogether parallel to the EBNF grammer.
|
||||
There is a mismatch in grammers.
|
||||
|
||||
Seems to me that human parsing is combination of low level lexing, Pratt
|
||||
parsing on operator right and left binding power, and a higher level of
|
||||
grouping that works like lexing. Words are lexed by spaces and
|
||||
punctuation, grouped by operator binding power, with operator
|
||||
recognition taking into account the types on the stack, groups of parsed
|
||||
words are bounded by statement separators, which can be lexed out,
|
||||
groups of statements are grouped and bounded by indenting.
|
||||
|
||||
Some levels in the hierarchy are lexed out, others are operator binding
|
||||
power parsed out. There are some “operators” that mean group separator
|
||||
for a given hierarchical level, which is a tell that reveals lex style parsing,
|
||||
for example semi colon in C++, full stop and paragraph break in text.
|
||||
|
||||
The never ending problems from mixing tab and spaces indenting can be
|
||||
detected by making a increase or decrease of ident by a space a bracket
|
||||
operator, and an increase or decrease by a tab a non matching bracket
|
||||
operator.
|
||||
|
||||
Pratt parsing parsers operators by their left and right binding power –
|
||||
which is a superset of operator precedence parsing. EBNF does not
|
||||
directly express this concept, and programming this concept into EBNF is
|
||||
complicated, indirect, and imperfect – because it is too powerful a
|
||||
superset, that can express anything, including things that do not make
|
||||
sense to the human writing the stuff to be parsed.
|
||||
|
||||
Pratt parsing finalizes an expression by visiting the operators in reverse
|
||||
polish order, thus implicitly executing a stack of run time typed operands,
|
||||
which eventually get compiled and eventually executed as just-in-time typed
|
||||
or statically typed operands and operators.
|
||||
|
||||
For [identity](identity.html), we need Cryptographic Resource Identifiers,
|
||||
which cannot conform the “Universal” Resource Identifier syntax and semantics.
|
||||
|
||||
Lexers are not powerful enough, and the fact that they are still used
|
||||
for uniform resource identifiers, relative resource identifiers, and command
|
||||
line arguments is a disgrace.
|
||||
|
||||
Advanced parsers, however, are too powerful, resulting in syntax that is
|
||||
counter intuitive. That ninety percent of the time a program file can be
|
||||
parsed by a simple parser incapable of recognizing the full set of
|
||||
syntactically correct expressions that the language allows indicates that the
|
||||
programmer’s mental model of the language has a more simple structure.
|
||||
|
||||
# Pratt Parsing
|
||||
|
||||
I really love the Pratt Parser, because it is short and simple, because if you
|
||||
add to the symbol table you can add new syntax during compilation,
|
||||
because what it recognizes corresponds to human intuition and human
|
||||
reading.
|
||||
|
||||
But it is just not actually a parser. Given a source with invalid expressions
|
||||
such as unary multiplication and unbalanced parentheses, it will cheerfully
|
||||
generate a parse. It also lacks the concept out of which all the standard
|
||||
parsers are constructed, that expressions are of different kinds, different
|
||||
nonterminals.
|
||||
|
||||
To fix Pratt parsing, it would have to recognize operators as bracketing, as
|
||||
prefix undery, postfix unary, or infix, and that some operators do not have an
|
||||
infix kinds, and it would have to recognize that operands have types, and that
|
||||
an operator produces a type from its inputs. It would have to attribute a
|
||||
nonterminal to a subtree. It would have to recognize ternary operators as
|
||||
operators.
|
||||
|
||||
And that is a major rewrite and reinvention.
|
||||
|
||||
Lalr parsers appear to be closer to the programmer mental model, but looking at
|
||||
Pratt Parsing, there is a striking resemblance between C and what falls out
|
||||
Pratt’s model:
|
||||
|
||||
The kind of “lexing” the Pratt parser does seems to have a natural
|
||||
correspondence to the kind of parsing the programmer does as his eye rolls
|
||||
over the code. Pratt’s deviations from what would be correct behavior in
|
||||
simple arithmetic expressions composed of numerals and single character
|
||||
symbols seem to strikingly resemble expressions that engineers find
|
||||
comfortable.
|
||||
|
||||
When `expr` is called, it is provided the right binding power of the
|
||||
token that called it. It consumes tokens until it meets a token whose left
|
||||
binding power is equal or lower than the right binding power of the operator
|
||||
that called it. It collects all tokens that bind together into a tree before
|
||||
returning to the operator that called it.
|
||||
|
||||
The Pratt `peek` peeks ahead to see if what is coming up is an
|
||||
operator, therefore needs to check what is coming up against a symbol table,
|
||||
which existing implementations fail to explicitly implement.
|
||||
|
||||
The Pratt algorithm, as implemented by Pratt and followers, assumes that all
|
||||
operators can be unary prefix or infix (hence the nud/led distinction). It
|
||||
should get the nature of the upcoming operator from the symbol table (infix,
|
||||
unary, or both, and if unary, prefix or postfix.
|
||||
|
||||
Although implementers have not realized it, they are treating all “non
|
||||
operator” tokens as unary posfix operators. Instead of, or as well as, they
|
||||
need to treat all tokens (where items recognized from a symbol table are
|
||||
pre-aggregated) as operators, with ordinary characters as postfix unary,
|
||||
spaces as postfix unary with weaker binding power, and a token consisting of
|
||||
a utf8 iterator plus a byte count as equivalent to a left tree with right
|
||||
single character leaves and a terminal left leaf.
|
||||
|
||||
Pratt parsing is like lexing, breaking a stream of characters into groups,
|
||||
but the grouping is hierarchical. The algorithm annotates a linear text with
|
||||
hierarchy.
|
||||
|
||||
Operators are characterized by a global order of left precedence, a global
|
||||
order of right precedence (the difference giving us left associativity and
|
||||
right associativity)
|
||||
|
||||
If we extend the Pratt algorithm with the concept of unitary postfix
|
||||
operators, we see it is treating each ordinary unrecognized character as a
|
||||
unitary postfix operator, and each whitespace character as a unitary postfix
|
||||
operator of weaker binding power.
|
||||
|
||||
[Apodaca]:https://dev.to/jrop/pratt-parsing
|
||||
|
||||
Pratt and [Apodaca] are primarily interested in the case of unary minus, so
|
||||
they handle the case of a tree with a potentially null token by
|
||||
distinguishing between nud (no left context) and led (the right hand side of
|
||||
an operator with left context).
|
||||
|
||||
Pratt assumes that in correct source text, `nud` is only going to encounter an
|
||||
atomic token, in which case it consumes the token, constructs a leaf vertex
|
||||
which points into the source, and returns, or a unary prefixoperator, or an
|
||||
opening bracket. If it encounters an operator, it calls `expr` with the right
|
||||
binding power of that operator, and when `expr`has finished parsing, returns
|
||||
a corresponding vertex.
|
||||
|
||||
Not at all clear to me how it handles brackets. Pratt gets by without the
|
||||
concept of matching tokens, or hides it implicitly. Seems to me that correct
|
||||
parsing is that a correct vertex has to contain all matching tokens, and the
|
||||
expressions cotained therein, so a vertex corresponding to a bracketed
|
||||
expression has to point to the open and closing bracket terminals, and the
|
||||
contained expression. I would guess that his algorithm winds up with a
|
||||
tree that just happens to contain matching tokens in related positions in the tree.
|
||||
|
||||
Suppose the typical case, a tree of binary operators inside a tree of binary
|
||||
operators: In that case, when `expr` is called, the source pointer is pointing
|
||||
to the start of an expression. `expr` calls `nud` to parse the expression, and if
|
||||
that is all she wrote (because ` peek` reveals an operator with lower left
|
||||
binding power than the right binding power that `expr` was called with)
|
||||
returns the edge to the vertext constructed by `nud`. Otherise, it parses out
|
||||
the operator, and calls `led` with the right binding power of the operator it has encountered, to get the right hand argument of the binary operator. It
|
||||
then constructs a vertex containing the operator, whose left edge points to
|
||||
the node constructed by `nud` and whose right hand edge points to the node
|
||||
constructed by `led`. If that is all she wrote, returns, otherwise iterates
|
||||
its while loop, constructing the ever higher root of a right leaning tree of
|
||||
all previous roots, whose ultimate left most leaf is the vertex constructed by
|
||||
`nud`, and whose right hand vertexes were constructed by `led`.
|
||||
|
||||
The nud/led distinction is not sufficiently general. They did not realize
|
||||
that they were treating ordinary characters as postfix unitary operators.
|
||||
|
||||
Trouble is, I want to use the parser as the lexer, which ensures that as the
|
||||
human eye slides over the text, the text reads the way it is in fact
|
||||
structured. But if we do Pratt parsing on single characters to group them
|
||||
into larger aggregates, `p*--q*s` is going to be misaggregated by the parser
|
||||
to `(( (p*)−) − (q*s)`, which is meaningless.
|
||||
|
||||
And, if we employ Pratt’s trick of nud/led distinction, will evaluate as
|
||||
`p*(-(-q*s))` which gives us a meaningful but wrong result ` p*q*s`
|
||||
|
||||
If we allow multicharacter operators then they have to be lexed out at the
|
||||
earliest stage of the process – the Pratt algorithm has to be augmented by
|
||||
aggregate tokens, found by attempting to the following text against a symbol
|
||||
table. Existing Pratt algorithms tend to have an implicit symbol table of
|
||||
one character symbols, everything in the symbol table being assumed to be
|
||||
potentially either infix or unary prefix, and everything else outside the
|
||||
implicit symbol table unary postfix.
|
||||
|
||||
If we extend the Pratt algorithm with the concept of unitary postfix
|
||||
operators, we see it is treating each ordinary unrecognized character as a
|
||||
unitary postfix operator, and each whitespace character as a unitary postfix
|
||||
operator of weaker binding power.
|
||||
|
||||
Suppose a token consists of a utf8 iterator and a byte count.
|
||||
|
||||
So, all the entities we work with are trees, but recursion terminates because
|
||||
some nodes of the tree have been collapsed to variables that consist of a
|
||||
utf8 iterator and a byte count, *and some parts of the tree have been
|
||||
partially collapsed to vertexes that consist of a ut8 iterator, a byte count,
|
||||
and an array of trees*.
|
||||
|
||||
C++ forbids `“foo bar()”` to match `“foobar()”`, but
|
||||
allows `“foobar ()”` to match, which is arguably an error.
|
||||
|
||||
`“foobar(”` has to lex out as a prefix operator. But is not
|
||||
really a prefix unitary operator. It is a set of matching operators, like
|
||||
brackets and the tenary operatro bool?value:value The commas and the closing
|
||||
bracket are also part of it. Which brings us to recognizing ternary
|
||||
operators. The naive single character Pratt algorithm handles ternary
|
||||
operators correctly (assuming that the input text is valid) which is
|
||||
surprising. So it should simply also match the commas and right bracket as a
|
||||
particular case of ternary and higher operators in the initial symbol search,
|
||||
albeit doing that so that it is simple and correct and naturally falls out of
|
||||
the algorithm is not necessarily obvious.
|
||||
|
||||
Operator precedence gets you a long way, but it messed up because it did not
|
||||
recognize the distinction between right binding power and left binding
|
||||
power. Pratt gets you a long way further.
|
||||
|
||||
But Pratt messes up because it does not explicitly recognize the difference
|
||||
between unitary prefix and unitary postfix, nor does it explicitly recognize
|
||||
operator matching – that a group of operators are one big multi argument
|
||||
operator. It does not recognize that brackets are expressions of the form
|
||||
symbol-expression-match, let alone that ternary operators are expressions of
|
||||
the form expression-symbol-match-expression.
|
||||
|
||||
Needs to be able to recognize that expressions of the form
|
||||
expression-symbol-expression-match-expression-match\...expression are
|
||||
expressions, and convert the tree into prefix form (polish notation with
|
||||
arguments bracketed) and into postfix form (reverse polish) with a count of
|
||||
the stack size.
|
||||
|
||||
Needs to have a stack of symbols that need left matches.
|
||||
|
||||
# Lalr
|
||||
|
||||
Bison and yacc are
|
||||
[joined at the hip](https://tomassetti.me/why-you-should-not-use-flex-yacc-and-bison/) to seven bit ascii and BNF, (through flex and lex)
|
||||
whereas [ANTLR](https://tomassetti.me/ebnf/)
|
||||
recognizes unicode and the far more concise and intelligible EBNF. ANTLR
|
||||
generates ALL parsers, which allow syntax that allows statements that are ugly
|
||||
and humanly unintelligible, while Bison when restricted to LALR parsers allows
|
||||
only grammars that forbid certain excesses, but generates unintelligible error
|
||||
messages when you specify a grammar that allows such excesses.
|
||||
|
||||
You could hand write your own lexer, and use it with BisonC++. Which seemingly
|
||||
everyone does.
|
||||
|
||||
ANTLR allows expressions that take long time to parse, but only polynomially
|
||||
long, fifth power, and prays that humans seldom use such expressions, which in
|
||||
practice they seldom do. But sometimes they do, resulting in hideously bad
|
||||
parser performance, where the parser runs out of memory or time. Because
|
||||
he parser allows non LALR syntax, it may find many potential meanings
|
||||
halfway through a straightforward lengthy expression that is entirely clear
|
||||
to humans because the non LALR syntax would never occur to the human. In
|
||||
ninety percent of files, there is not a single expression that cannot be
|
||||
parsed by very short lookahead, because even if the language allows it,
|
||||
people just do not use it, finding it unintelligible. Thus, a language that
|
||||
allows non LALR syntax locks you in against subsequent syntax extension,
|
||||
because the extension you would like to make already has some strange and non
|
||||
obvious meaning in the existing syntax.
|
||||
|
||||
This makes it advisable to use a parser that can enforce a syntax definition
|
||||
that does not permit non LALR expressions.
|
||||
|
||||
On the other hand, LALR parsers walk the tree in Reverse Polish
|
||||
order, from the bottom up. This makes it hard to debug your grammar, and
|
||||
hard to report syntax errors intelligibly. And sometimes you just cannot
|
||||
express the grammar you want as LALR, and you wind up writing a superset of
|
||||
the grammar you want, and then ad-hoc forbidding otherwise legitimate
|
||||
constructions, in which case you have abandoned the simplicity and
|
||||
directness of LALR, and the fact that it naturally tends to restrict you to
|
||||
humanly intelligible syntax.
|
||||
|
||||
Top down makes debugging your syntax easier, and issuing useful error
|
||||
messages a great deal easier. It is hard to provide any LALR handling of
|
||||
syntax errors other than just stop at the first error, but top down makes it
|
||||
a lot harder to implement semantics, because Reverse Polish order directly
|
||||
expresses the actions you want to take in the order that you need to take
|
||||
them.
|
||||
|
||||
LALR allows left recursion, so that you can naturally make minus and divide
|
||||
associate in the correct and expected order, while with LL, you wind up
|
||||
doing something weird and complicated – you build the tree, then you have
|
||||
another pass to get it into the correct order.
|
||||
|
||||
Most top down parsers, such as ANTLR, have a workaround to allow left
|
||||
recursion. They internally turn it into right recursion by the standard
|
||||
transformation, and then optimize out the ensuing tail recursion. But that
|
||||
is a hack, which increases the distance between your expression tree and
|
||||
your abstract syntax tree, still increases the distance between your grammar
|
||||
and your semantics during parser execution. You are walking the hack,
|
||||
instead of walking your own grammar’s syntax tree in Reverse Polish order.
|
||||
Implementing semantics becomes more complex. You still wind up with added
|
||||
complexity when doing left recursion, just moved around a bit.
|
||||
|
||||
LALR allows you to more directly express the grammar you want to express. With
|
||||
top down parsers, you can accomplish the same thing, but you have to take a
|
||||
more roundabout route to express the same grammar, and again you are likely
|
||||
to find you have allowed expressions that you do not want and which do not
|
||||
naturally have reasonable and expected semantics.
|
||||
|
||||
ANTLR performs top down generation of the expression tree. Your code called by
|
||||
ANTLR converts the expression tree into the Abstract Syntax tree, and the
|
||||
abstract syntax tree into the High Level Intermediate Representation.
|
||||
|
||||
The ANTLR algorithm can be slow as a week of sundays, or wind up eating
|
||||
polynomially large amounts of memory till it crashes. To protect against
|
||||
this problem, [he
|
||||
suggests using the fast SLL algorithm first, and should it fail, then use
|
||||
the full on potentially slow and memory hungry LL\* algorithm.](https://github.com/antlr/antlr4/issues/374) Ninety
|
||||
percent of language files can be parsed by the fast algorithm, because people
|
||||
just do not use too clever by half constructions. But it appears to me that
|
||||
anything that cannot be parsed by SLL, but can be parsed by LL\*, is not good
|
||||
code – that what confuses an SLL parser also confuses a human, that the
|
||||
alternate readings permitted by the larger syntax are never code that people
|
||||
want to use.
|
||||
|
||||
Antlr does not know or care if your grammar makes any sense until it tries to
|
||||
analyze particular texts. But you would like to know up front if your
|
||||
grammar is valid.
|
||||
|
||||
LALR parsers are bottom up, so have terrible error messages when they analyze
|
||||
a particular example of the text, but they have the enormous advantage that
|
||||
they will analyze your grammar up front and guarantee that any grammatically
|
||||
correct statement is LALR. If a LALR parser can analyze it, chances are that
|
||||
a human can also. ANTLR permits grammars that permit unintelligible statements.
|
||||
|
||||
The [LRX parser](http://lrxpg.com/downloads.html) looks the most
|
||||
suitable for your purpose. It has a restrictive license and only runs in the
|
||||
visual studio environment, but you only need to distribute the source code it
|
||||
builds the compiler from as open source, not the compiler compiler. It halts
|
||||
at the first error message, since incapable of building intelligible multiple
|
||||
error messages. The compiler it generates builds a syntax tree and a symbol
|
||||
table.
|
||||
|
||||
The generically named [lalr](https://github.com/cwbaker/lalr)
|
||||
looks elegantly simple, and not joined at the hip to all sorts of strange
|
||||
environment. Unlike Bison C++, should be able to handle unicode strings,
|
||||
with its regular expressionsrx pa. It only handles BNF, not EBNF, but that
|
||||
is a relatively minor detail. Its regular expressions are under documented,
|
||||
but regular expression syntax is pretty standard. It does not build a symbol
|
||||
table.
|
||||
|
||||
And for full generality, you really need a symbol table where the symbols get
|
||||
syntax, which is a major extension to any existing parser. That starts to
|
||||
look like hard work. The lalr algorithm does not add syntax on the fly. The
|
||||
lrxpg parser does build a symbol tree one on the fly, but not syntax on the
|
||||
fly – but its website just went down. No one has attempted to write a
|
||||
language that can add syntax on the fly. They build a syntax capable of
|
||||
expressing an arbitrary graph with symbolic links, and then give the graph
|
||||
extensible semantics. The declaration/definition semantic is not full
|
||||
parsing on the definition, but rather operates on the tree.
|
||||
|
||||
In practice, LALR parsers need to be extended beyond LALR with operator
|
||||
precedence. Expressing operator precedence within strict LALR is apt to be
|
||||
messy. And, because LALR walks the tree in reverse polish order, you want
|
||||
the action that gets executed at parse time to return a value that the
|
||||
generated parser puts on a stack managed by the parser, which stack is
|
||||
available when the action of the operator that consumes it is called. In
|
||||
which case the definition/declaration semantic declares a symbol that has a
|
||||
directed graph associated with it, which graph is then walked to interpret
|
||||
what is on the parse stack. The data of the declaration defines metacode
|
||||
that is executed when the symbol is invoked, the directed graph associated
|
||||
with the symbol definition being metacode executed by the action that parser
|
||||
performs when the symbol is used. The definition/declaration semantic allows
|
||||
arbitrary graphs containing cycles (full recursion) to be defined, by the
|
||||
declaration adding indirections to a previously constructed directed graph.
|
||||
|
||||
The operator-precedence parser can parse all LR(1) grammars where two
|
||||
consecutive nonterminals and epsilon never appear in the right-hand side of any
|
||||
rule. They are simple enough to write by hand, which is not generally the case
|
||||
with more sophisticated right shift-reduce parsers. Second, they can be written
|
||||
to consult an operator table at run time. Considering that “universal” resource
|
||||
locators and command lines are parsed with mere lexers, perhaps a hand written
|
||||
operator-precedence parser is good enough. After all, Forth and Lisp have less.
|
||||
|
||||
C++ variadic templates are a purely functional metalanguage operating on the
|
||||
that stack. Purely functional languages suck, as demonstrated by the fact
|
||||
that we are now retroactively shoehorning procedural code (if constexpr) into
|
||||
C++ template meta language. Really, you need the parse stack of previously
|
||||
encountered arguments to potentially contain arbitrary objects.
|
||||
|
||||
When a lalr parser parses an if-then-else statement, then if the parser
|
||||
grammer defines “if” as the nonterminal, which may contain an “else”
|
||||
clause, it is going to execute the associated actions in the reverse order.
|
||||
But if you define “else” as the nonterminal, which must be preceded by an
|
||||
“if” clause, then the parser will execute the associated actions in the
|
||||
expected order. But suppose you have an else clause in curly brackets
|
||||
inside an if-then-else. Then the parse action order is necessarily going to
|
||||
be different from the procedural. Further, the very definition of an if-then-else clause implies a parse time in which all actions are performed, and a procedural time in which only one action is performed.
|
||||
|
||||
Definition code metacode must operate on the parser stack, but declaration
|
||||
metacode may operate on a different stack, implying a coroutine relationship
|
||||
between declaration metacode and definition metacode. The parser, to be
|
||||
intelligible, has to perform actions in as close to left to right order as
|
||||
possible hence my comment that the “else” nonterminal must contain the “if”
|
||||
nonterminal, not the other way around – but what if the else nonterminal
|
||||
contains an “if then else” inside curly braces? The parser actions can and
|
||||
will happen in different order to the run time actions. Every term of the
|
||||
if-then-else structure is going to have its action performed in syntax order,
|
||||
but the syntax order has to be capable of implying a different procedural
|
||||
order, during which not all actions of an if-then-else structure will be
|
||||
performed. And similarly with loops, where every term of the loop causes a
|
||||
parse time action to be performed once in parse time order, but procedural
|
||||
time actions in a different order, and performed many times.
|
||||
|
||||
This implies that any fragment of source code in a language that uses the
|
||||
declaration/definition syntax and semantic gets to do stuff in three phases
|
||||
(Hence in C, you can define a variable or a function without declaring it,
|
||||
resulting in link time errors, and in C++ define a class without declaring
|
||||
its methods and data, resulting in compilation errors at a stage of
|
||||
compilation that is ill defined and inexplicit)
|
||||
|
||||
The parser action of the declaration statement constructs a declaration data
|
||||
structure, which is metacode, possibly invoking the metacode generated by
|
||||
previous declarations and definitions. When the term declared is then used,
|
||||
then the metacode of the definition is executed. And the usage may well
|
||||
invoke the metacode generated by the action associated at parse time with the
|
||||
declaration statement, but attempting to do so causes an error in the parser
|
||||
action if the declaration action has not yet been encountered in parse action
|
||||
order.
|
||||
|
||||
So, we get parser actions which construct definition and declaration metacode
|
||||
and subsequent parser actions, performed later during the parse of subsequent
|
||||
source code that invoke that metacode by name to construct metacode. But, as
|
||||
we see in the case of the if-then-else and do-while constructions, there must
|
||||
be a third execution phase, in which the explicitly procedural code
|
||||
constructed, but not executed, by the metacode, is actually executed
|
||||
procedurally. Which, of course, in C++ is performed after the link and load
|
||||
phase. But we want procedural metacode. And since procedural metacode must
|
||||
contain conditional and loops, there has to be a third phase during parsing,
|
||||
executed as a result of parse time actions, that procedurally performs ifs
|
||||
and loops in metacode. So a declaration can invoke the metacode constructed
|
||||
by previous declarations – meaning that a parse time action executes metacode
|
||||
constructed by previous parse time actions. But, to invoke procedural
|
||||
metacode from a parse time action, a previous parse time action has to have
|
||||
invoked metacode constructed by an even earlier parse time action to
|
||||
construct procedural metacode.
|
||||
|
||||
Of course all three phases can be collapsed into one, as a definition can act
|
||||
as both a declaration and a definition, two phases in one, but there have to
|
||||
be three phases, that can be the result parser actions widely separated in
|
||||
time, triggered by code widely separated in the source, and thinking of the
|
||||
common and normal case is going to result in mental confusion, collapsing
|
||||
things that are distinct, because the distinction is commonly uniportant and
|
||||
elided. Hence the thick syntactic soup with which I have struggling when I
|
||||
write C++ templates defining classes that define operators and then attempt
|
||||
to use the operators.
|
||||
|
||||
In the language of C we have parse time actions, link time actions, and
|
||||
execution time actions, and only at execution time is procedural code
|
||||
constructed as a result of earlier actions actually performed procedurally.
|
||||
|
||||
We want procedural metacode that can construct procedural metacode. So we
|
||||
want execution time actions performed during parsing. So let us call the
|
||||
actions definitional actions, linking actions, and execution actions. And if
|
||||
we ware going to have procedural actions during parsing, we are going to have
|
||||
linking actions during parsing. (Of course, in actually existent C++, second
|
||||
stage compilation does a whole lot of linker actions, resulting in
|
||||
excessively tight coupling between linker and compiler, and the inability of
|
||||
other languages to link to C++, and the syntax soup that ensues when I define
|
||||
a template class containing inline operators.
|
||||
|
||||
# Forth the model
|
||||
|
||||
We assume the equivalent of Forth, where the interpreter directly interprets
|
||||
and executes human readable and writeable text, by looking the symbols in the
|
||||
text and performing the actions they comand, which commands may command the
|
||||
interpreter to generate compiled and linked code, including compiled code that
|
||||
generates compiled and linked code, commands the interpreter to add names for
|
||||
what it has compiled to the name table, and then commands the interpreter to
|
||||
execute those routines by name.
|
||||
|
||||
Except that Forth is absolutely typeless, or has only one type, fixed
|
||||
precision integers that are also pointers, while we want a language in which
|
||||
types are first class values, as manipulable as integers, except that they
|
||||
are immutable, a language where a pointer to a pointer to an integer cannot
|
||||
be added to a pointer, and subtraction of one pointer from another pointer of
|
||||
the same type pointing into the same object produces an integer, where you
|
||||
cannot point a pointer out of the range of the object it refers to, nor
|
||||
increment a reference, only the referenced value.
|
||||
|
||||
Lexing merely needs symbols to be listed. Parsing merely needs them to be, in C++ terminology, declared but not defined. Pratt parsing puts operators in forth order, but knows and cares nothing about types, so is naturally adapted to a Forth like language which has only one type, or values have run time types, or generating an intermediate language which undergoes a second state compilation that produces statically typed code.
|
||||
|
||||
In forth, symbols pointed to memory addresses, and it was up to the command whether it would load an integer from an address, stored an integer at that address, execute a subroutine at that address, or go to that address, the ultimate in unsafe typelessness.
|
||||
|
||||
Pratt parsing is an outstandingly elegant solution to parsing, and allows compile time extension to the parser, though it needs a lexer driven by the symbol table if you have multi character operators, but I am still lost in the problem of type safety.
|
||||
|
||||
Metaprogramming in C++ is done a lazily evaluated purely functional language
|
||||
where a template is usually used to construct a type from type arguments. I
|
||||
want to construct types procedurally, and generate code procedurally, rather
|
||||
than by invoking pure functions.
|
||||
|
||||
In Pratt parsing, the the language is parserd sequentially in parser order, but
|
||||
the parser maintains a tree of recursive calls, and builds a tree of pointers
|
||||
into the source, such that it enters each operator in polish order, and
|
||||
finishes up each operator in reverse polish order.
|
||||
|
||||
On entering in polish order, this may be an operand with a variable number of
|
||||
arguments (unary minus or infix minus) so it cannot know the number of operands
|
||||
coming up, but on exiting in reverse polish order, it knows the number and
|
||||
something about the type of the arguments, so it has to look for an
|
||||
interpretation of the operator that can handle that many arguments of those
|
||||
type. Which may not necessarily be a concrete type.
|
||||
|
||||
Operators that change the behavior of the lexer or the parser are typically
|
||||
acted upon in polish order. Compilation to byte code that does not yet have
|
||||
concrete types is done in reverse polish order, so operators that alter the
|
||||
compilation to byte code are executed at that point. Operators that manipulate
|
||||
that byte code during the linking to concrete types act at link time, when the
|
||||
typeless byte code is invoked with concrete types.
|
||||
|
||||
Naming puts a symbol in the lexer symbol table.
|
||||
|
||||
Declaring puts a symbol in the parser symbol table
|
||||
|
||||
Defining compiles, and possibly links, the definition, and attaches that data
|
||||
to the symbol where it may be used or executed in subsequent compilation and
|
||||
linking steps when that symbol is subsequently invoked. If the definition
|
||||
contains procedural code, it is not going to be executed procedurally until
|
||||
compiled and linked, which will likely occur when the symbol is invoked later.
|
||||
|
||||
An ordinary procedure definition without concrete types is the equivalent of an
|
||||
ordinary C++ template. When it is used with concrete types, the linker will
|
||||
interet to the operations it invokes in terms of those concrete types, and fail
|
||||
if they don’t support those operations.
|
||||
|
||||
A metacode procedure gets put into the lexer symbol table when it is named,
|
||||
into the parser symbol table when it is defined. When it is declared, its
|
||||
definition may be used when its symbol is encountered in polish order by the
|
||||
parser, and may be executed at that time to modify the behavior of parser and
|
||||
linker. When a named, declared, and defined symbol is encountered by the
|
||||
parser in reverse polish order, its compiled code may be used to generate
|
||||
linked code, and its linked and compiled code may manipulate the compiled code
|
||||
preparator to linking.
|
||||
|
||||
When a symbol is declared, it gets added to the parser and lexer symbol table. When it is defined, it gets added to the linker symbol table. When defined with a concrete type, also gets added to the linker symbol table with those concrete types, as an optimization.
|
||||
|
||||
If an operation could produce an output of variant type, then it is an additive
|
||||
algebraic type, which then has to handled by a switch statement.
|
||||
|
||||
There are five steps: Lexing, parsing, compiling, linking, and running, and
|
||||
any fragment of source code may experience some or all of these steps, with the
|
||||
resulting entries in the symbol table then being available to the next code
|
||||
fragment, Forth style. Thus `77+9`gets lexed into `77, +, `, parsed into `+(77, 9)`, compiled into `77 9 +`,
|
||||
linked into `77, 9 +<int, int>` and executed into `int(86` and the rest of the source code proceeds to parse, compile, link, and
|
||||
run as if you had written `86`.
|
||||
|
||||
Further the source code can create run time code, code that gets declared,
|
||||
defined, and linked during the compile that is executed during the compile,
|
||||
modifying the behavior of the lexer, the parser, the compiler, and the linker
|
||||
over the course of a single compile and link. This enables a forth style
|
||||
bootstrapping, where the lexer, parser, compiler and linker lexes, compiles,
|
||||
and links, most of its own potentially modifiable source code in every compile,
|
||||
much as every c++ compile includes the header files for the standard template
|
||||
library, so that much of your program is rewritten by template metacode that
|
||||
you included at the start of the program.
|
||||
|
||||
Compiled but not linked code could potentially operate on variables of any
|
||||
type, though if the variables did not have a type required by an operator, you
|
||||
would get a link time error, not get a compile time error. This is OK because
|
||||
linking of a fragment of source is not a separate step, but usually happens
|
||||
before the lexer has gotten much further through the source code, happens as
|
||||
soon as the code fragment is invoked with variables of defined type, though
|
||||
usually of as yet undefined value.
|
||||
|
||||
A console program is an operator whose values are of the type iostream, it gets
|
||||
linked as soon as the variable type is defined, and executed when you assign
|
||||
defined values to iostream.
|
||||
|
||||
Because C++ metacode is purely functional, it gets lazily evaluated, so the
|
||||
syntax and compiler can cheerfully leave it undefined when, or even if, it
|
||||
gets executed. Purely functional languages only terminate by laziness. But
|
||||
if we want to do the same things with procedural metacode, no option but to
|
||||
explicitly define what get executed when. In which case pure lalr syntax is
|
||||
going to impact the semantics, since lalr syntax defines the order of parse
|
||||
time actions, and order of execution impacts the semantics. I am not
|
||||
altogether certain as to whether the result is going to be intellibile and
|
||||
predictable. Pratt syntax, however, is going to result in predictzble execution order.
|
||||
|
||||
The declaration, obviously, defines code that can be executed by a subsequent
|
||||
parse action after the declaration parse action has been performed, and the
|
||||
definition code that can be compiled after the definition parse action
|
||||
performed.
|
||||
|
||||
The compiled code can be linked when when invoked with variables of defined
|
||||
type and undefined value, and executed when invoked with variables of defined
|
||||
type and an value.
|
||||
|
||||
Consider what happens when the definition defines an overload for an infix
|
||||
operator. The definition of the infix operator can only be procedurally
|
||||
executed when the parser calls the infix action with the arguments on the parse
|
||||
stack, which happens long after the infix operator is overloaded.
|
||||
|
||||
The definition has to be parsed when the parser encounters it. But it is
|
||||
procedural code, which cannot be procedurally executed until later, much
|
||||
later. So the definition has to compile, not execute, procedural code, then
|
||||
cause the data structure created by the declaration to point to that compiled
|
||||
code. And then later when the parser encounters an actual use of the infix
|
||||
operator, the compiled procedural code of the infix definition is actually
|
||||
executed to generate linked procedural code with explicit and defined types,
|
||||
which is part of the definition of the function or method in whose source code
|
||||
the infix operator was used.
|
||||
|
||||
One profoundly irritating feature of C++ code, probably caused by LL parsing,
|
||||
is that if the left hand side of an infix expression has an appropriate
|
||||
overloaded operator, it works, but if the right hand side, it fails. Here we
|
||||
see parsing having an incomprehensible and arbitrary influence on semantics.
|
||||
|
||||
C++ is a strongly typed language. With types, any procedure is has typed
|
||||
inputs and outputs, and should only do safe and sensible things for that type.
|
||||
C++ metacode manipulates types as first class objects, which implies that if we
|
||||
were to do the same thing procedurally, types need a representation, and
|
||||
procedural commands to make new types from old, and to garbage collect, or
|
||||
memory manage, operations on these data objects, as if they were strings,
|
||||
floating point numbers, or integers of known precision. So you could construct
|
||||
or destruct an object of type type, generate new types by doing type operations
|
||||
on old types, for example add two types or multiply two types to produce an
|
||||
algebraic type, or create a type that is a const type or pointer type to an
|
||||
existing type, which type actually lives in memory somewhere, in a variable
|
||||
like any other variable. And, after constructing an algebraic type by
|
||||
procedurally multiply two types, and perhaps storing in a variable of type
|
||||
type, or invoking a function (aka C++ template type) that returns a type
|
||||
dynamically, create an object of that type – or an array of objects of that
|
||||
type. For every actual object, the language interpreter knows the type,
|
||||
meaning the object of type X that you just constructed is somehow linked to the
|
||||
continuing existence of the object of type type that has the value type X that
|
||||
you used to construct it, and cannot be destroyed until all the obects created
|
||||
using it are destroyed. Since the interpreter knows the type of every object,
|
||||
including objects of type type, and since every command to do something with an
|
||||
object is type aware, this can prevent the interpreter from being commanded to
|
||||
do something stupid. Obviously type data has to be stored somewhere, and has
|
||||
to be immutable, at least until garbage collected because no longer referenced.
|
||||
|
||||
Can circular type references exist? Well, not if they are immutable, because
|
||||
if a type references a type, that type must already exist, and so cannot
|
||||
reference a type that does not yet exist. It could reference a function that
|
||||
generates types, but that reference is not circular. It could have values that
|
||||
are constexpr, and values that reference static variables. If no circular
|
||||
references possible, garbage collection by reference counting works
|
||||
|
||||
Types are algebraic types, sums and products of existing types, plus modifiers
|
||||
such as `const, *,` and `&`.
|
||||
|
||||
Type information is potentially massive, and if we are executing a routine that
|
||||
refers to a type by the function that generates it, we don’t want that
|
||||
equivalent of a C++ template invoked every time, generating a new immutable
|
||||
object every time that is an exact copy of what it produced the last time it
|
||||
went through the loop. Rather, the interpreter needs short circuit the
|
||||
construction by looking up a hash of that type constructing template call, to
|
||||
check it it has been called with those function inputs, to already produced an
|
||||
object of that type. And when a function that generates a type is executed,
|
||||
needs to look for duplications of existing types. A great many template
|
||||
invocations simply choose the right type out of a small set of possible types.
|
||||
It is frequently the case that the same template may be invoked with an
|
||||
enormous variety of variables, and come up with very few different concrete
|
||||
results.
|
||||
|
||||
When the interpreter compiles a loop or a recursive call, the type information
|
||||
is likely to be an invariant, which should get optimized out of the loops. But
|
||||
when it is directly executing source code commands which command it to compile
|
||||
source code. such optimization is impossible
|
||||
|
||||
But, as in forth, you can tell the interpreter to store the commands in a
|
||||
routine somewhere, and when they are stored, the types have already been
|
||||
resolved. Typically the interpreter is going to finish interpreting the source
|
||||
code, producing stored programs each containing a limited amount of type
|
||||
information.
|
BIN
docs/paxos-simple.pdf
Normal file
362
docs/paxos_protocol.md
Normal file
@ -0,0 +1,362 @@
|
||||
---
|
||||
title: Paxos
|
||||
---
|
||||
Paxos addresses the arrow theorem, and the difficulty of having a reliable
|
||||
broadcast channel.
|
||||
|
||||
You want a fifty percent vote, so you don’t want anyone voting for two
|
||||
candidates, at leas not until one vote has been invalidated by timeout,
|
||||
and you want to somehow have a single arbitrarily selected agenda
|
||||
to vote up or down.
|
||||
|
||||
And, having been voted up, you don’t want anything else to be voted up,
|
||||
so that you can definitely know when an agenda has been selected.
|
||||
|
||||
But Paxos assumes that many of these problems such as who is eligible to vote
|
||||
and what their vote is worth, have solutions that have been somewhat
|
||||
arbitrarily predetermined by the engineer setting things up, and that we don’t
|
||||
have the problem of the Roman Senate and popular assembly, where only about a
|
||||
third of the Senate actually showed up to vote, and an insignificant number of
|
||||
those eligible to vote in popular assembly showed up, most of them clients of
|
||||
senators, so Paxos is not worried about people gaming the system to exclude
|
||||
voters they do not want, nor worried about people gaming the somewhat
|
||||
arbitrary preselection of the agenda to be voted up and down.
|
||||
|
||||
# Analysing [Paxos Made Simple] in terms of Arrow and Reliable Broadcast
|
||||
|
||||
[Paxos Made Simple]:./paxos-simple.pdf
|
||||
|
||||
The trouble with Lamport’s proposal, described in [Paxos Made Simple] is that
|
||||
it assumes no byzantine failure, and that therefore the reliable broadcast
|
||||
channel is trivial, and it assumes that any proposal will be acceptable, that
|
||||
all anyone cares about is converging on one proposal, therefore it always
|
||||
converges on the first proposal accepted by one acceptor.
|
||||
|
||||
> 1. A proposer chooses a new proposal number n and sends a `prepare n`
|
||||
> request to each member of some set of acceptors, asking it to respond
|
||||
> with:
|
||||
> a. A promise never again to accept a proposal numbered less than
|
||||
> `n`, and
|
||||
> b. The proposal with the highest number less than `n` that it has
|
||||
> accepted, if any.
|
||||
> 2. If the proposer receives the requested responses from a majority of
|
||||
> the acceptors, then it can issue a `accept n v` request with number
|
||||
> `n` and value `v`, where `v` is the value of the highest-numbered
|
||||
> proposal among the responses, or is any value selected by the proposer
|
||||
> if the responders reported no proposals accepted.
|
||||
|
||||
So the proposer either signs on with the existing possible consensus `v`, and
|
||||
notifies a bunch of acceptors with the consensus, or initiates new possible
|
||||
consensus `v`
|
||||
|
||||
The assumption that `n` can be arbitrary seems to assume the proposers are
|
||||
all agents of the same human, so do not care which proposal is accepted. But
|
||||
we intend that they be agents of different humans. But let us figure out
|
||||
how everything fits together before critiquing that.
|
||||
|
||||
> if an acceptor ignores a `prepare` or `accept` request because it has already
|
||||
> received a prepare request with a higher number, then it should probably
|
||||
> inform the proposer, who should then abandon its proposal. This is a
|
||||
> performance optimization that does not affect correctness.
|
||||
|
||||
If a majority of acceptors accept some proposal, then we have a result. But
|
||||
we do not yet have everyone, or indeed anyone, knowing the result.
|
||||
Whereupon we have the reliable broadcast channel problem, which Lamport hand
|
||||
waves away. The learners are going to learn it. Somehow. And once we have
|
||||
a result accepted, we then happily go on to the next round.
|
||||
|
||||
Well, suppose the leader’s proposal is just intolerable? Lamport assumes a
|
||||
level of concord that is unlikely to exist.
|
||||
|
||||
Well screw them, they have to propose the same value already accepted by an
|
||||
acceptor. So we are going to get a definite result. Worst case outcome, is
|
||||
that proposers keep issuing new higher numbered proposals before any proposal
|
||||
is accepted.
|
||||
|
||||
Lamport is assuming no byzantine failure – but, assuming no byzantine failure,
|
||||
this is going to generate a definite result sooner or later.
|
||||
|
||||
But because we could have overlapping proposals with no acceptances, Lamport
|
||||
concludes, we need a “distinguished proposer”, a leader,
|
||||
the primus inter pares, the Chief executive officer.
|
||||
As an efficiency requirement, not a requirement to reach consensus.
|
||||
|
||||
Trouble is that Lamport’s Paxos algorithm is that as soon as it becomes known
|
||||
that one acceptor has accepted one proposal, everyone should converge to it.
|
||||
|
||||
But suppose everyone becomes aware of the proposal, and 49% of acceptors
|
||||
think it is great, and 51% of acceptors think it sucks intolerably?
|
||||
|
||||
If a leader’s proposal could be widely heard, and widely rejected, we have a
|
||||
case not addressed by Lamport’s Paxos protocol.
|
||||
|
||||
Lamport does not appear to think about the case that the leader’s proposals
|
||||
are rejected because they are just objectionable.
|
||||
|
||||
# Analysing [Practical Byzantine Fault Tolerance]
|
||||
|
||||
[Practical Byzantine Fault Tolerance]:./byzantine_paxos.pdf
|
||||
|
||||
[Practical Byzantine Fault Tolerance] differs from [Paxos Made Simple] in
|
||||
having “views” where the change of leader (what they call “the primary”) is
|
||||
accomplished by a change of “view”, and in having three phases, pre-prepare,
|
||||
prepare, and accept, instead of two phases, prepare and accept.
|
||||
|
||||
Pre-prepare is the “primary” (leader, CEO, primus inter pares) notifying the
|
||||
“replicas” (peers) of the total order of a client message.
|
||||
|
||||
Prepare is the “replicas” (peers, reliable broadcast channel) notifying each
|
||||
other of association between total order and message digest.
|
||||
|
||||
Accept is the “replicas” and the client learning that $33\%+1$ of the
|
||||
“replicas” (peers, reliable broadcast channel) agree on the total order of the
|
||||
client’s message.
|
||||
|
||||
# Analysing Raft Protocol
|
||||
|
||||
The [raft protocol] is inherently insecure against Byzantine attacks, because
|
||||
the leader is fully responsible for managing log replication on the other
|
||||
servers of the cluster
|
||||
|
||||
[raft protocol]: https://ipads.se.sjtu.edu.cn/_media/publications/wang_podc19.pdf
|
||||
|
||||
We obviously want the pool to be replicated peer to peer, with the primus
|
||||
inter pares (leader, what [Practical Byzantine Fault Tolerance] call
|
||||
“the primary”) organizing the peers to vote for one block after they have
|
||||
already come close to agreement, and the only differences are transactions
|
||||
not yet widely circulated, or disagreements over which of two conflicting
|
||||
spends is to be incorporated in the next block.
|
||||
|
||||
I am pretty sure that this mapping of byzantine Paxos to blockchain is
|
||||
garbled, confused, and incorrect. I am missing something, misunderstanding
|
||||
something, there are a bunch of phases that matter which I am leaving out,
|
||||
unaware I have left them out. I will have to revisit this.
|
||||
|
||||
The Paxos protocol should be understood as a system wherein peers agree on a
|
||||
total ordering of transactions. Each transaction happens within a block, and
|
||||
each block has a sequential integer identifier. Each transaction within a
|
||||
valid block must be non conflicting with every other transaction within a
|
||||
block and consistent with all past transactions, so that although the the
|
||||
block defines a total order on every transaction within the block, all
|
||||
transactions can be applied in parallel.
|
||||
|
||||
The problem is that we need authoritative agreement on what transactions are
|
||||
part of block N.
|
||||
|
||||
Proposed transactions flood fill through the peers. A single distinguished
|
||||
entity must propose a block, the pre-prepare message, notice of this
|
||||
proposal, the root hash of the proposal, flood fills through the peers, and
|
||||
peers notify each other of this proposal and that they are attempting to
|
||||
synchronize on it. Synchronizing on the block and validating it are likely to
|
||||
require huge amounts of bandwidth and processing power, and will take
|
||||
significant time.
|
||||
|
||||
If a peer successfully synchronize, he issues a prepare message. If something
|
||||
is wrong with the block, he issues a nack, a vote against the proposed
|
||||
block, but the nack is informational only. It signals that peers should get
|
||||
ready for a view change, but it is an optimization only.
|
||||
|
||||
If a peer receives a voting majority of prepare messages, he issues a commit
|
||||
message.
|
||||
|
||||
And that is the Paxos protocol for that block of transactions. We then go
|
||||
into the Paxos protocol for the block of prepare messages that proves a
|
||||
majority voted “prepare”. The block of prepare messages chains to the block
|
||||
of transactions, and the block of transactions chains to the previous block
|
||||
of prepare messages.
|
||||
|
||||
And if time goes by, and we have not managed a commit, perhaps because there
|
||||
are lot of nacks due to bad transactions, perhaps because the primus inter
|
||||
pares claims to have transactions that not everyone has, and then is unable
|
||||
to provide them, (maybe the internet went down for it) peers become open to
|
||||
another pre-prepare message from the next in line to be primus inter pares.
|
||||
|
||||
In order that we can flood fill, we have to be able to simultaneously
|
||||
synchronize on several different views of the pool of transactions. If
|
||||
synchronization on a proposed block is stalling, perhaps because of missing
|
||||
transactions, we end up synchronizing on multiple proposed blocks proposed by
|
||||
multiple entities. Which is great if one runs to completion, and the others
|
||||
do not, but we are likely to run into multiple proposed blocks succeeding. In
|
||||
which case, we have what [Castro and Liskov](./byzantine_paxos.pdf)call a
|
||||
view change.
|
||||
|
||||
If things are not going anywhere, a peer issues a view change message which
|
||||
flood fills around the pool, nominating the next peer in line for primus
|
||||
inter pares, or the peer nominated in existing pool of view change messages.
|
||||
When it has a majority for a view change in its pool, it will then pay
|
||||
attention to a pre-prepare message from the new primus inter pares, (which it
|
||||
may well have already received) and we the new primus inter pares restarts
|
||||
the protocol for deciding on the next block, issuing a new pre-prepare
|
||||
message (which is likely one that many of the peers have already synchronized
|
||||
on).
|
||||
|
||||
A peer has a pre-pare message. He has a recent vote that the entity that
|
||||
issued the pre-pare message is primus inter pares. He has, or synchronizes
|
||||
on, that block whose root hash is the one specified in that pre-prepare
|
||||
message issued by that primus-inter-pares. When he has the block, and it is
|
||||
valid, then away we go. He flood fills a prepare message, which prepare
|
||||
message chains to the block, and to the latest vote for primus inter pares.
|
||||
|
||||
Each new state of the blockchain has a final root hash at its peak, and each
|
||||
peer that accepts the new state of that blockchain has a pile of commits that
|
||||
add up to a majority committing to this new peak. But it is probable that
|
||||
different peers will have different piles of commits. Whenever an entity
|
||||
wants to change the blockchain state (issues a pre-prepare message), it will
|
||||
propose an addition to the blockchain that contains one particular pile of
|
||||
commits validating the previous state of the blockchain. Thus each block
|
||||
contains one specific view of the consensus validating the previous root. But
|
||||
it does not contain the consensus validating itself. That is in the pool,
|
||||
not yet in the blockchain.
|
||||
|
||||
A change of primus inter pares is itself a change in blockchain state, so
|
||||
when the new primus inter pares issues a new proposed commit, which is to say
|
||||
a new pre-prepare message, it is going to have, in addition to the pile of
|
||||
commits that they may have already synchronized on, a pile of what [Castro
|
||||
and Liskov](./byzantine_paxos.pdf)call view change messages, one specific
|
||||
view of that pile of view change messages. A valid block is going to have a
|
||||
valid pre-prepare message from a valid primus inter pares, and, if necessary,
|
||||
the vote for that primus inter pares. But the change in primus inter pares,
|
||||
for each peer, took place when that peer had a majority of votes for the
|
||||
primus inter pares, and the commit to the new block took place when that peer
|
||||
had the majority of votes for the block. For each peer, the consensus
|
||||
depends on the votes in his pool, not the votes that subsequently get
|
||||
recorded in the block chain.
|
||||
|
||||
So, a peer will not normally accept a proposed transaction from a client if
|
||||
it already has a conflicting transaction. There is nothing in the protocol
|
||||
enforcing this, but if the double spend is coming from Joe Random Scammer,
|
||||
that is just extra work for that peer and all the other peers.
|
||||
|
||||
But once a peer has a accepted a double spend transaction, finding consistency
|
||||
is likely to be easier in the final blockchain, where that transaction simply
|
||||
has no outputs. Otherwise for the sake of premature optimization, we
|
||||
complicate the algorithm for reaching consensus.
|
||||
|
||||
# Fast Byzantine multi paxos
|
||||
|
||||
# Generating the next block
|
||||
|
||||
But in the end, we have to generate the next block, which includes some transactions and not others
|
||||
|
||||
In the bitcoin blockchain, the transactions flood fill through all miners,
|
||||
one miner randomly wins the right to decide on the block, forms the block,
|
||||
and the block floodfills through the blockchain.
|
||||
|
||||
In our chain, the primus inter pares proposes a block, peers synchronize on
|
||||
it, which should be fast because they have already synchronized with each
|
||||
other, and if there is nothing terribly wrong with it, send in their
|
||||
signatures. When the primus inter pares has enough signatures, he then sends
|
||||
out the signature block, containing all the signatures.
|
||||
|
||||
If we have a primus inter pares, and his proposal is acceptable, then things
|
||||
proceed straight forwardly through the same synchronization process as we use
|
||||
in flood fill, except that we are focusing on flood filling older
|
||||
information to make sure everyone is in agreement..
|
||||
|
||||
After a block is agreed upon, the peers focus on flood filling all the
|
||||
transactions that they possess around. This corresponds to the client phase
|
||||
of Fast Byzantine Collapsed MultiPaxos. When the time for the next block
|
||||
arrives, they stop floodfilling, apart from floodfilling any old transactions
|
||||
that they received before the previous block was agreed, but which were not
|
||||
included in the previous block, and focus on achieving agreement on those old
|
||||
transactions, plus a subset of their new transactions. They try to achieve
|
||||
agreement by postponing new transactions, and flooding old transactions around.
|
||||
|
||||
When a peer is in the Paxos phase, a synchronization event corresponds to
|
||||
what [Castro and Liskov 4.2](./byzantine_paxos.pdf)call a group commit.
|
||||
Synchronization events with the primary inter pares result in what [Castro
|
||||
and Liskov](./byzantine_paxos.pdf) call a pre-prepare multicast, though if
|
||||
the primus inter pares times out, or its proposal is rejected, we then go
|
||||
into what [Castro and Liskov 4.4](./byzantine_paxos.pdf) call view changes.
|
||||
In their proposal, there is a designated sequence, and if one primus inter
|
||||
pares fails, then you go to the next, and the next, thus reducing the
|
||||
stalling problem when two entities are trying to be primus inter pares.
|
||||
|
||||
They assume an arbitrary sequence number, pre-assigned. We will instead go
|
||||
through the leading signatories of the previous block, with a succession of
|
||||
timeouts. “Hey, the previous primus inter pares has not responded. Let\’s
|
||||
hear it from the leading signatory of the previous block. Hey. No response
|
||||
from him either. Let us try the second leading signatory”. Trying for a
|
||||
different consensus nominator corresponds to what Castro and Liskov call
|
||||
“view change”
|
||||
|
||||
The primus inter pares, Castro and Liskov\’s primary, Castro and Liskov\’s
|
||||
view, issues a proposed root hash for the next block (a short message).
|
||||
Everyone chats to everyone else, announcing that they are attempting to
|
||||
synchronize on that root hash, also short messages, preparatory to long
|
||||
messages as they attempt to synchronize. If they succeed in generating the
|
||||
proposed block, and there is nothing wrong with the block, they send
|
||||
approvals (short messages) to everyone else. At some point the primus inter
|
||||
pares wraps up a critical mass of approvals in an approval block (a
|
||||
potentially long message). When everyone has a copy of the proposed block,
|
||||
and the approval block, then the block chain has added another immutable
|
||||
block.
|
||||
|
||||
Castro and Liskov’s pre prepare message is the primary telling everyone
|
||||
“Hey, let us try for the next block having this root hash”
|
||||
|
||||
Castro and Liskov’s prepare message is each peer telling all the other peers
|
||||
“Trying to synchronize on this announcement”, confirming that the primus
|
||||
inter pares is active, and assumed to be sane.
|
||||
|
||||
Castro and Liskov’s commit message is each peer telling all the other peers
|
||||
“I see a voting majority of peers trying to synchronize on this root hash”.
|
||||
At this point Castro and Liskov’s protocol is complete – but of course
|
||||
there is no guarantee that we will be able to synchronize on this root hash -
|
||||
it might contain invalid transactions, it might reference transactions that
|
||||
get lost because peers go down or internet communications are interrupted. So
|
||||
our protocol is not complete.
|
||||
|
||||
The peers have not committed to the block. They have committed to commit to
|
||||
the block if they have it and it is valid.
|
||||
|
||||
So, we have a Paxos process where they agree to try for a block of
|
||||
transactions with one specific root hash. Then everyone tries to sync on the
|
||||
block. Then we have a second Paxos process where they agree that they have a
|
||||
block of signatories agreeing that they have the block of transactions and it
|
||||
is valid.
|
||||
|
||||
The block of transactions Merkle chains to previous blocks of signatories and
|
||||
transactions, and the block of signatories Merkle chains to previous blocks
|
||||
of transactions and signatories. Each short message of commitment contains a
|
||||
short proof that it chains to previous commitments, which a peer already has
|
||||
and has already committed to.
|
||||
|
||||
When a peer has the block of transactions that a voting majority of peers
|
||||
have agreed to commit to, and the block is valid, it announces it has the
|
||||
block, and that the block has majority support, and it goes into the flood
|
||||
fill transactions phase.
|
||||
|
||||
In the flood fill transactions phase, it randomly synchronizes its pool data
|
||||
with other random peers, where each peer synchronizes with the other peer by
|
||||
each peer giving the other the pool items it does not yet possess.
|
||||
|
||||
It also enters into a Paxos consensus phase where peers agree on the
|
||||
authoritative block of signatures, and the time for the next block of
|
||||
transactions, so that each not only has a majority of signatures, a but the
|
||||
same block of signatures forming a majority. When the time for forming the
|
||||
next block arrives, it switches to flood filling only old data around, the
|
||||
point being to converge on common set of transactions.
|
||||
|
||||
After convergence on a common set of transactions has been going for a while,
|
||||
they expect an announcement of a proposed consensus on those transactions by
|
||||
primus inter pares, the pre-prepare message of the Paxos protocol.
|
||||
|
||||
They then proceed to Paxos consensus on the intended block, by way of the
|
||||
prepare and commit messages, followed, if all goes well, by a voting majority
|
||||
of peers announcing that they have the block and it is valid.
|
||||
|
||||
Our implementation of Byzantine Paxos differs from that of Castro and Liskov
|
||||
in that a block corresponds to what they call a stable checkpoint, and also
|
||||
corresponds to what they call a group transaction. If every peer swiftly
|
||||
received every transaction, and then rejected conflicting transactions, then
|
||||
it would approximate their protocol, but you cannot say a transaction is
|
||||
rollback proof until you reach a stable checkpoint, and if rollbacks are
|
||||
possible, people are going to game the system. We want authoritative
|
||||
consensus on what transactions have been accepted more than we want prompt
|
||||
response, as decentralized rollback is apt to be chaotic, unpredictable, and
|
||||
easily gamed. They prioritized prompt response, while allowing the
|
||||
possibility of inconsistent response – which was OK in their application,
|
||||
because no one wanted to manipulate the system into giving inconsistent
|
||||
responses, and inconsistent responses did not leave angry clients out of
|
||||
money.
|
BIN
docs/pedersons_secret_sharing.pdf
Normal file
122
docs/peering_through_nat.md
Normal file
@ -0,0 +1,122 @@
|
||||
---
|
||||
lang: en
|
||||
title: Peering through NAT
|
||||
---
|
||||
A library to peer through NAT is a library to replace TCP, the domain
|
||||
name system, SSL, and email.
|
||||
|
||||
The NAT mapping timeout is officially 20 seconds, but I have no idea
|
||||
what this means in practice. I suspect each NAT discards port mappings
|
||||
according to its own idiosyncratic rules, but 20 seconds may be a widely respected minimum.
|
||||
|
||||
An experiment on [hole punching] showed that most NATs had a way
|
||||
longer timeout, and concluded that the way to go was to just repunch as
|
||||
needed. They never bothered with keep alive. They also found that a lot of
|
||||
the time, both parties were behind the same NAT, sometimes because of
|
||||
NATs on top of NATs
|
||||
|
||||
[hole punching]:http://www.mindcontrol.org/~hplus/nat-punch.html
|
||||
"How to communicate peer-to-peer through NAT firewalls"
|
||||
{target="_blank"}
|
||||
|
||||
Another source says that "most NAT tables expire within 60 seconds, so
|
||||
NAT keepalive allows phone ports to remain open by sending a UDP
|
||||
packet every 25-50 seconds".
|
||||
|
||||
The no brainer way is that each party pings the other at a mutually agreed
|
||||
time every 15 seconds. Which is a significant cost in bandwidth. But if a
|
||||
server has 4Mib/s of internet bandwidth, can support keepalives for couple
|
||||
of million clients. On the other hand, someone on cell phone data with thirty
|
||||
peers is going to make a significant dent in his bandwidth.
|
||||
|
||||
With client to client keepalives, probably a client will seldom have more
|
||||
than dozen peers. Suppose each keepalive is sent 15 seconds after the
|
||||
counterparty's previous packet, or an expected keepalive is not received,
|
||||
and each keepalive acks received packets. If not receiving expected acks
|
||||
or expected keepalives, we send nack keepalives (hello-are-you-there
|
||||
packets) one per second, until we give up.
|
||||
|
||||
This algorithm should not be baked in stone, but rather should be an
|
||||
option in the connection negotiation, so that we can do new algorithms as
|
||||
the NAT problem changes, as it continually does.
|
||||
|
||||
If two parties are trying to setup a connection through a third party broker,
|
||||
they both fire packets at each other (at each other's IP as seen by the
|
||||
broker) at the same broker time minus half the broker round trip time. If
|
||||
they don't get a packet in the sum of the broker round trip times, keep
|
||||
firing with slow exponential backoff until connection is achieved,or until
|
||||
exponential backoff approaches the twenty second limit.
|
||||
|
||||
Their initial setup packets should be steganographed as TCP startup
|
||||
handshake packets.
|
||||
|
||||
We assume a global map of peers that form a mesh whereby you can get
|
||||
connections, but not everyone has to participate in that mesh. They can be
|
||||
clients of such a peer, and only inform selected counterparties as to whom
|
||||
they are a client of.
|
||||
|
||||
The protocol for a program to open port forwarding is part of Universal Plug and Play, UPnP, which was invented by Microsoft but is now ISO/IEC 29341 and is implemented in most SOHO routers.
|
||||
|
||||
But is it generally turned off by default, or manually. Needless to say, if relatively benign Bitcoin software can poke a hole in the
|
||||
firewall and set up a port forward, so can botnet malware.
|
||||
|
||||
The standard for poking a transient hole in a NAT is STUN, which only works for UDP – but generally works – not always, but most of the time. This problem everyone has dealt with, and there are standards, but not libraries, for dealing with it. There should be a library for dealing with it – but then you have to deal with names and keys, and have a reliability and bandwidth management layer on top of UDP.
|
||||
|
||||
But if our messages are reasonably short and not terribly frequent, as client messages tend to be, link level buffering at the physical level will take care of bandwidth management, and reliability consists of message received, or message not received. For short messages between peers, we can probably go UDP and retry.
|
||||
|
||||
STUN and ISO/IEC 29341 are incomplete, and most libraries that supply implementations are far too complete – you just want a banana, and you get the entire jungle.
|
||||
|
||||
Ideally we would like a fake or alternative TCP session setup, and then you get a regular standard TCP connection on a random port, assuming that the target machine has that service running, and the default path for exporting that service results in window with a list of accessible services, and how busy they are. Real polish would be hooking the domain name resolution so that names in the peer top level domain return a true IP, and and then intercepts TCP session setup for that IP so that it will result in TCP session setup going through the NAT penetration mechanism if the peer is behind a NAT. One can always install one’s own OSI layer three or layer two, as a vpn does or the host for a virtual machine. Intercept the name lookup, and then tell the layer three to do something special when a tcp session is attempted on the recently acquired IP address, assuming the normal case where an attempt to setup a TCP session on an IP address follows very quickly after a name lookup.
|
||||
|
||||
Note that the internet does not in fact use the OSI model though everyone talks as if it did. Internet layers correspond only vaguely to OSI layers, being instead:
|
||||
|
||||
1. Physical
|
||||
2. Data link
|
||||
3. Network
|
||||
4. Transport
|
||||
5. Application
|
||||
|
||||
And I have no idea how one would write or install one’s own network or transport layer, but something is installable, because I see no end of software that installs something, as every vpn does.
|
||||
|
||||
------------------------------------------------------------------------
|
||||
|
||||
Assume an identity system that finds the entity you want to
|
||||
talk to.
|
||||
|
||||
If it is behind a firewall, you cannot notify it, cannot
|
||||
send an interrupt, cannot ring its phone.
|
||||
|
||||
Assume the identity system can notify it. Maybe it has a
|
||||
permanent connection to an entity in the identity system.
|
||||
|
||||
Your target agrees to take the call. Both parties are
|
||||
informed of each other’s IP address and port number on which
|
||||
they will be taking the call by the identity system.
|
||||
|
||||
Both parties send off introduction UDP packets to the
|
||||
other’s IP address and port number – thereby punching holes
|
||||
in their firewall for return packets. When they get
|
||||
a return packet, an introduction acknowledgement, the
|
||||
connection is assumed established.
|
||||
|
||||
It is that simple.
|
||||
|
||||
Of course networks are necessarily non deterministic,
|
||||
therefore all beliefs about the state of the network need to
|
||||
be represented in a Bayesian manner, so any
|
||||
assumption must be handled in such a manner that the
|
||||
computer is capable of doubting it.
|
||||
|
||||
We have finite, and slowly changing, probability that our
|
||||
packets get into the cloud, a finite and slowly changing
|
||||
probability that our messages get from the cloud to our
|
||||
target. We have finite probability that our target
|
||||
has opened its firewall, finite probability that our
|
||||
target can open its firewall, which transitions to
|
||||
extremely high probability when we get an
|
||||
acknowledgement – which prior probability diminishes over
|
||||
time.
|
||||
|
||||
As I observe in [Estimating Frequencies from Small Samples](./estimating_frequencies_from_small_samples.html) any adequately flexible representation of the state of
|
||||
the network has to be complex, a fairly large body of data,
|
||||
more akin to a spam filter than a Boolean.
|
26
docs/petname_language.md
Normal file
@ -0,0 +1,26 @@
|
||||
---
|
||||
title:
|
||||
Petname Language
|
||||
---
|
||||
Many different cryptographic identifiers get a petname, but the primary one of thought and concern is petnames for Zooko identifiers.
|
||||
|
||||
A Zooko identifier arrives as display name, nickname, public key, and signature binding the display name and nickname to the public key.
|
||||
|
||||
A petname for a Zooko name is derived from the nickname:
|
||||
|
||||
- remove all leading and trailing whitespace.
|
||||
- If there are no alphabetic characters, prefix it with one at random.
|
||||
- Move any leading characters that are not alphabetic from prefix to affix.
|
||||
- Replace all whitespaces with hyphens.
|
||||
- Replace all special characters with their nearest permitted equivalent.\
|
||||
\"#%&'(),.:;<=>?@[]\\^{|} ~\` are special characters that allow escape
|
||||
from plain text. In displayed text, @ will signify a petname
|
||||
corresponding to a Zooko name.\
|
||||
If someone's nickname is Bob, he will likely get the petname Bob,
|
||||
which will be displayed in text as `@Bob`, indicating it is likely to
|
||||
be translated in transmission and reception.
|
||||
- If the result is a duplicate of an existing petname, append a number that renders it unique.
|
||||
- The user gets the opportunity to revise the petname, but his petname has to be a valid identifier that conforms to the above rules, or else it gets revised again.
|
||||
- The user may know that several petnames correspond to one entity, but he cannot assign several nicknames to one petname.
|
||||
|
||||
We will also eventually have local ids for receive addresses.
|
684
docs/proof_of_stake.md
Normal file
@ -0,0 +1,684 @@
|
||||
---
|
||||
title:
|
||||
Proof of Stake
|
||||
---
|
||||
::: {style="background-color : #ffdddd; font-size:120%"}
|
||||
![run!](tealdeer.gif)[TL;DR Map a blockdag algorithm equivalent to the
|
||||
Generalized MultiPaxos Byzantine
|
||||
protocol to the corporate form:]{style="font-size:150%"}
|
||||
|
||||
The proof of stake crypto currency will work like
|
||||
shares. Crypto wallets, or the humans controlling the wallets,
|
||||
correspond to shareholders.
|
||||
Peer computers in good standing on the blockchain, or the humans
|
||||
controlling them, correspond to company directors.
|
||||
CEO.
|
||||
:::
|
||||
|
||||
We need proof of stake because our state regulated system of notaries,
|
||||
bankers, accountants, and lawyers has gone off the rails, and because
|
||||
proof of work means that a tiny handful of people who are [burning a
|
||||
whole lot of computer power]
|
||||
control your crypto money. Apart from being wasteful, they don’t
|
||||
necessarily have your best interests at heart, producing a bias towards
|
||||
inflation as in Monero, and/or high transaction fees as in Bitcoin.
|
||||
|
||||
[burning a whole lot of computer power]: https://news.bitcoin.com/businessman-buys-two-electric-power-stations-to-do-bitcoin-mining-in-russia/
|
||||
|
||||
The entire bitcoin network
|
||||
[currently consumes just over 46 terawatt hours of energy every year].
|
||||
This is almost as much as the annual energy consumption of Portugal,
|
||||
with its population of roughly 10 million. Simply settling a transaction
|
||||
in the bitcoin network consumes around 427 kilowatt hours. This amount
|
||||
of energy is enough to supply an average German four-person household
|
||||
with electricity for more than a month.
|
||||
|
||||
[currently consumes just over 46 terawatt hours of energy every year]: https://arstechnica.com/tech-policy/2018/02/european-bankers-scoff-at-bitcoin-for-its-risk-huge-energy-inefficiency/
|
||||
|
||||
Notaries, bankers, accountants and lawyers, are professionals whom
|
||||
people hire because they don’t trust each other. They are in the trust
|
||||
business. And then there is some failure of trust, some bad behavior,
|
||||
as for example Enron’s accounting, and then they turn to the government
|
||||
to make trust mandatory, whereupon due to regulatory capture and the
|
||||
malincentives of the state, the untrustworthy behavior becomes standard
|
||||
and compulsory.
|
||||
|
||||
[Sarbanes-Oxley] was the response to the misconduct of Enron’s
|
||||
accountants, and was theoretically intended to make behavior similar to
|
||||
that of Enron’s accountants forbidden, but the practical consequence was
|
||||
that in substantial part, it made such behavior compulsory. Which is
|
||||
why Gab is now doing an Initial Coin Offering (ICO) instead of an
|
||||
Initial Public Offering (IPO).
|
||||
|
||||
[Sarbanes-Oxley]:sox_accounting.html
|
||||
"Sarbanes-Oxley accounting"
|
||||
|
||||
Because current blockchains are proof of work, rather than proof of
|
||||
stake, they give coin holders no power. Thus an initial coin offering
|
||||
(ICO) is not a promise of general authority over the assets of the
|
||||
proposed company, but a promise of future goods or services that will be
|
||||
provided by the company. A proof of stake ICO could function as a more
|
||||
direct substitute for an initial public offering (IPO). Thus we want it
|
||||
to be easy to issue your own coins, and [to perform coin swaps between
|
||||
chains without the need for an exchange] that would provide a potential
|
||||
target for regulation.
|
||||
|
||||
[to perform coin swaps between chains without the need for an exchange]: ./contracts_on_blockchain.html#atomic-swaps-on-separate-blockchains
|
||||
|
||||
The block chain, an immutable append only data structure, is a
|
||||
replacement for public notaries, and on that foundation of public
|
||||
notarization, we can and should recreate banking, the corporate form,
|
||||
accounting, and, eventually, lawyering, though the Ethereum attempt to
|
||||
build lawyering is premature and wicked. We will not be able to build
|
||||
lawyering until the crypto currency system is well established and has a
|
||||
good reputation and record system linked to it, and attempting to build
|
||||
lawyering into it prematurely will result in bad conduct.
|
||||
|
||||
The blockchain governed by proof work is failing. We need a better
|
||||
solution for notarizing, based on some form of Paxos, and on that better
|
||||
system of notarizing, we should build banking. And on that banking, the
|
||||
corporate form, and on that corporate form, accounting. We will worry
|
||||
about lawyering and justice at some much later date, after crypto
|
||||
currency values stabilize.
|
||||
|
||||
We build public notarization, then build money on top or that. When
|
||||
that is working and stable, we promptly build accounting and a
|
||||
replacement of the joint stock corporation, which replacement for the
|
||||
joint stock corporation will manifest as the “Initial coin offering”,
|
||||
like Gab’s initial coin offering.
|
||||
|
||||
The business of Enron’s accountants failed because no one trusted them
|
||||
any more.
|
||||
|
||||
Facing massive loss of trust in the wake of the Enron scandal,
|
||||
accountants rushed to the government, and demanded the government
|
||||
provide them with mandatory state supplied and state enforced trust, and
|
||||
got [Sarbanes-Oxley], with the result that they failed upwards instead of
|
||||
downwards. This created a situation where it is impossible for a mid
|
||||
sized company to go public. So venture capitalists, instead of having
|
||||
an initial public offering of a successful venture, attempt to sell the
|
||||
successful venture to Google. Hence Gab’s Initial Coin Offering.
|
||||
Google will not buy them, for obvious reasons, and they cannot do an
|
||||
initial public offering, because of [Sarbanes-Oxley], hence the initial
|
||||
coin offering.
|
||||
|
||||
Actually existent governments and bankers are evil and untrustworthy,
|
||||
and the burdens of using trust mediated by bankers and governments to do
|
||||
business are rapidly becoming intolerable. Accounting and HR have
|
||||
become vast onerous bureaucracies, burdensome tentacles of the state in
|
||||
every business, making businesses larger than a family and smaller than
|
||||
giant multinational corporation with a skyscraper full of Harvard
|
||||
lawyers each drawing \$300 per hour, increasingly impractical.
|
||||
|
||||
Proof of work is a terrible idea, and is failing disastrously, but we
|
||||
need to replace it with something better than bankers and government.
|
||||
|
||||
The gig economy represents the collapse of the corporate form under the
|
||||
burden of HR and accounting.
|
||||
|
||||
The initial coin offering (in place of the initial public offering)
|
||||
represents an attempt to recreate the corporate form using crypto
|
||||
currency, to which existing crypto currencies are not well adapted.
|
||||
|
||||
The corporate form is collapsing in part because of [Sarbanes-Oxley],
|
||||
which gives accountants far too much power, and those responsible for
|
||||
deliverables and closing deals far too little, and in part because HR
|
||||
provides a vector for social justice warriors to infect corporations.
|
||||
|
||||
When a corporation goes social justice it abandons its original
|
||||
functions, and instead dedicates itself, its resources and the wealth of
|
||||
its share holders full time to infecting other corporations with social
|
||||
justice, like a zombie turning those still living into zombies.
|
||||
|
||||
I have been working on the problem of creating a crypto currency well
|
||||
adapted for this purpose, and what I conclude always implies pre-mining,
|
||||
that existing owners of a crypto currency shall be like shareholders in
|
||||
an existing business, transferring shares between each other.
|
||||
|
||||
Which immediately leads us to the idea of a mass of competing
|
||||
currencies, with share swaps – which automatically has scalability,
|
||||
particularly if one some of these crypto corporations have as their
|
||||
major asset shares in the main currency, and a system of enforcement
|
||||
that makes it difficult for some shareholders to steal that dangerously
|
||||
liquid asset from other shareholders, in which case they are sidechains,
|
||||
rather than separate shares systems.
|
||||
|
||||
Western civilization is based on double entry accounting, the joint
|
||||
stock corporation, and the scientific method, all of which have come
|
||||
under massive attack.
|
||||
|
||||
Crypto currencies need to make accounting and corporations workable
|
||||
again, and are needed for this purpose.
|
||||
|
||||
The joint stock corporation has always existed, and our earliest records
|
||||
of them come from the time of the Roman Empire in the West, but before
|
||||
Charles the Second they were grants of Kingly power to private
|
||||
individuals for state purposes – they were socialism.
|
||||
|
||||
Under Charles the Second, they were still theoretically socialism, but
|
||||
now it was expected and high status to get rich in the course of
|
||||
pursuing state purposes, socialism similar to “Socialism with Chinese
|
||||
characteristics” which is not very socialist at all, or “The party
|
||||
decides what communism is”, which is not very communist at all.
|
||||
|
||||
Under Charles the second we first see the Randian Hero Engineer Chief
|
||||
executive officer, using other people’s money and other people’s labour
|
||||
to advance technology and apply technological advance to the creation of
|
||||
value. There was a sort of Silicon Valley under Charles the second, but
|
||||
instead of laying down optical fibre they were laying down canals and
|
||||
conquering the Indies.
|
||||
|
||||
During late Victorian times, we see the corporation become wholly
|
||||
private. Corporations are now people, a situation parodied by Gilbert
|
||||
and Sullivan – but at the same time, we see the regulatory state
|
||||
rendering them socialist again by the back door, a problem starting in
|
||||
Victorian times, and now getting out of control.
|
||||
|
||||
Human Resources has long been an ever more burdensome branch of the
|
||||
socialist state inserted parasitically into every business, and
|
||||
[Sarbanes-Oxley] has now destroyed double entry accounting, which is
|
||||
fundamental to the survival of western civilization.
|
||||
|
||||
Under [Sarbanes-Oxley], the tail wags the dog. Instead of monitoring
|
||||
actual reality, accounting decides on official reality.
|
||||
|
||||
Power in business has been taken out of the hands of those who provide
|
||||
the capital, those responsible for closing deals, and those responsible
|
||||
for deliverables, and into the hands of Accounting and Human Resources,
|
||||
who have government granted power to obstruct, disrupt, delay, and
|
||||
criminalize those responsible for providing capital, those responsible
|
||||
for closing deals, and those responsible for deliverables.
|
||||
|
||||
We need to construct crypto currency around this function. The original
|
||||
intent was for buying drugs, buying guns, violating copyright, money
|
||||
laundering, and capital flight.
|
||||
|
||||
These are all important and we need to support them all, especially
|
||||
violating copyright, capital flight and buying guns under repressive
|
||||
regimes. But we now see big demand for crypto currencies to support a
|
||||
replacement for Charles the Second’s corporate form, which is being
|
||||
destroyed by HR, and to restore double entry accounting, which is being
|
||||
destroyed by [Sarbanes-Oxley].
|
||||
|
||||
[Sarbanes-Oxley] is more deeply immoral than forcing doctors to perform
|
||||
abortions, forcing businesses to pay for abortions, and forcing males to
|
||||
pay for abortions by single women with whom they have no relationship
|
||||
through “free” medical care.
|
||||
|
||||
People lost their trust in accountants, so accountants went to the
|
||||
government, and asked the government to force corporations to act as if
|
||||
they trusted accountants – which undermines the cooperation and good
|
||||
behavior on which our economy is based, by undermining the accounting
|
||||
system that distinguishes positive sum behavior from negative sum
|
||||
behavior, the accounting system that tracks the creation of value. This
|
||||
makes the entire crypto anarchy black market program legitimate and
|
||||
necessary. We are morally obligated to obey a government that supports
|
||||
our civilization and holds it together, keeping peace and order,
|
||||
defending the realm from enemies internal and external, protecting the
|
||||
property of taxpayers, enforcing contracts, and honoring warriors. We
|
||||
are not morally obligated to obey a hostile regime that seeks to destroy
|
||||
western civilization. If the state will not uphold trust, cooperation,
|
||||
and positive sum behavior, we must find other ways. Undermining
|
||||
accounting undermines cooperation.
|
||||
|
||||
Used to be that the state church was the state church. Then Education
|
||||
media complex was the State Church, Harvard seamlessly transitioning
|
||||
from being the headquarters of the State Church of New England, to being
|
||||
the headquarters of the American Empire running the entire Education
|
||||
Media complex of most of the world.
|
||||
|
||||
Now the social media are the state church.
|
||||
|
||||
This is a problem because the unofficially official state belief system
|
||||
is more and more out of touch with reality, resulting in an increasingly
|
||||
oppressive social media, that continually censors and bans an ever
|
||||
increasing number of people for an ever increasing number of crime
|
||||
thoughts. Thus we need for an anarchic and chaotic social media system
|
||||
to oppose and resist it. We need the chans, and things like the chans.
|
||||
|
||||
If we had a saner and more functional state belief system, one that did
|
||||
not compel belief in so many egregious lies, and force so many
|
||||
compelled affirmations of egregious point-deer-make-horse lies, then it
|
||||
would be right to repress a chaotic, chan style, darknet style social
|
||||
media system. If the state church was sane, then censorship to support
|
||||
the state belief system would be morally justified, and resisting it
|
||||
morally wrong, but because the quasi state social media are egregiously
|
||||
untruthful and therefore egregiously repressive, anarchic and chaotic
|
||||
social media is morally justified and necessary.
|
||||
|
||||
Our system of client wallets needs to support the minimum chat system
|
||||
necessary to agree to transfer of value to value, and to support
|
||||
lightning networks to prevent traceability, but we need to architect it
|
||||
so that wallets are potentially capable of becoming a social media
|
||||
platform. This capability is not part of the minimum viable product, and
|
||||
will not be part of early releases, but the minimum viable product needs
|
||||
to be able to support and prove conversations comprising a contract to
|
||||
exchange value. Design of the minimum viable product needs to be done in
|
||||
a way that supports the future capability of wallets supporting general
|
||||
conversations similar to the chans. If the state church had stuck to
|
||||
commanding people to believe that God is three and God is one, no one
|
||||
can prove anything different, but when the state church commands us to
|
||||
believe that blacks are being unfairly targeted by police, and therefore
|
||||
quotas limiting arrests of minority groups are totally justified, and
|
||||
simultaneously totally nonexistent, not only can we prove something
|
||||
different, but if we were to believe the official truth and official
|
||||
science that are social media requires us to believe, are likely to get
|
||||
killed, thus today’s state church is necessarily more repressive than it
|
||||
was back when it was openly a a state church. God being both three and
|
||||
one is harder to falsify than arrest quotas being both morally
|
||||
obligatory and nonexistent, thus higher levels of repression are
|
||||
required to enforce official belief.
|
||||
|
||||
When the state religion was transliterated from the next world to this,
|
||||
it became readily falsifiable, resulting in the necessity of disturbing
|
||||
levels of repression, to which repression crypto anarchy is the morally
|
||||
appropriate response. With a saner state religion, crypto anarchy would
|
||||
be a morally objectionable attack on order and social cohesion, but the
|
||||
current officially unofficial state religion is itself a morally
|
||||
objectionable attack on order and social cohesion. Hence we need a
|
||||
distributed system of wallet to wallet encrypted chat with no
|
||||
monopolistic center. In the minimum viable product we will not attempt
|
||||
to compete with chat and social media, but we need to have the ability
|
||||
to discuss payments in wallets, with no monopolistic
|
||||
center, and will set this up in a way compatible with future expansion
|
||||
to a future challenge against current social media. In war, all things
|
||||
are permitted, and the state is making war on society. A minimum system
|
||||
that allows people to pay each other unsupervised
|
||||
is not very different from a centreless system that allows people to
|
||||
associate to discuss any hate fact, nor very different from a centerless
|
||||
lightning network.
|
||||
|
||||
# General structure
|
||||
|
||||
The blockchain is managed by the peers. Peers are fully accessible on
|
||||
the internet, in that they have a port that one can receive UDP messages
|
||||
from any peer or any client on the internet. If they are behind a NAT,
|
||||
the port is forwarded to them by port forwarding, a load balancer, or
|
||||
failover redirection. Each peer keeps a copy of the entire blockchain.
|
||||
Clients only keep a copy of the data that is relevant to themselves, and
|
||||
are commonly behind NATs, so that their communications with each other
|
||||
must be mediated by peers.
|
||||
|
||||
The blockchain contains data linked by hashes, so that any peer can
|
||||
provide any client with a short proof that the data supplied is part of
|
||||
the current global consensus, the current global consensus being
|
||||
represented by a short hash, which is linked to every previous global
|
||||
consensus. This chain of hashes ensures that the record of past events
|
||||
is immutable. Data can be lost, or can become temporarily or permanently
|
||||
inaccessible, but it cannot be changed. The fundamental function of the
|
||||
blockchain is to function as a public notary, to ensure that everyone
|
||||
sees the same immutable story about past events. The hash dag is
|
||||
structured so that a peer can also produce a short proof to a client
|
||||
that it has presented all events, or the all the most recent events,
|
||||
that relate to a given human readable name or a given public key. For
|
||||
the hash dag to be capable of being analysed with reasonable efficiency,
|
||||
the hash dag must correspond to a very small number of approximately
|
||||
balanced Merkle trees
|
||||
|
||||
In order to produce short proofs that can be proven to contain all
|
||||
transactions relevant to a given key or human readable name, there have
|
||||
to be Merkle-patricia trees organized primarily by block number, but
|
||||
also Merkle trees organized by key and by name, and the root hash of the
|
||||
current consensus has to depend on the root hash of each of these three
|
||||
trees, and all past versions of these trees, the root hashes of all past
|
||||
consensuses. The blockchain will contain a Merkle-patricia dag of all
|
||||
unspent transaction outputs, and all spent transaction outputs, enabling
|
||||
a short proof that a transaction output was spent in block it, and that
|
||||
as of block n, a another transaction output was not yet spent.
|
||||
|
||||
In order to produce a short proof, the consensus of the block chain has to
|
||||
be organized a collection of balanced binary Merkle trees, with the state of
|
||||
the consensus at any one point in time being represented as the list of roots
|
||||
of the Merkle trees.
|
||||
|
||||
This enables people with limited computing power and a quite small
|
||||
amount of data to prove that the state of the consensus at any one time for
|
||||
which they have that quite small amount of data, includes the consensus at
|
||||
any earlier time for which they have that quite small amount of data.
|
||||
|
||||
We need this capability because at scale, full peers are enormous and have
|
||||
and handle enormous amounts of data, so we wind up with few peers and
|
||||
an enormous number of clients, and the clients need to be able to monitor
|
||||
the peers to resist being scammed.
|
||||
|
||||
Clients need the capability to know that at any moment, the current
|
||||
consensus of the peers includes that past in which value was committed to
|
||||
public keys on the blockchain whose secrets he controls.
|
||||
|
||||
We build a system of payments and globally unique human readable names
|
||||
mapping to [Zooko’s triangle] names (Zooko’s quadrangle) on top of this
|
||||
public notary functionality.
|
||||
|
||||
[Zooko’s triangle]: ./zookos_triangle.html
|
||||
|
||||
All wallets shall be client wallets, and all transaction outputs shall
|
||||
be controlled by private keys known only to client wallets, but most
|
||||
transactions or transaction outputs shall be registered with one
|
||||
specific peer. The blockchain will record a peer’s uptime, its
|
||||
provision of storage and bandwidth to the blockchain, and the amount of
|
||||
stake registered with a peer. To be a peer in good standing, a peer has
|
||||
to have a certain amount of uptime, supply a certain amount of bandwidth
|
||||
and storage to the blockchain, and have a certain amount of stake
|
||||
registered to it. Anything it signed as being in accordance with the
|
||||
rules of the blockchain must have been in accordance with the rules of
|
||||
the blockchain. Thus client wallets that control large amounts of stake
|
||||
vote which peers matter, peers vote which peer is primus inter pares,
|
||||
and the primus inter pares settles double spending conflicts and
|
||||
suchlike.
|
||||
|
||||
In the classic Byzantine Fault Resistant algorithms, the leader is decided
|
||||
prospectively, and then decides the total order of all transactions. In a
|
||||
blockdag algorithm, the leader is determined retrospectively rather than
|
||||
prospectively. Each peer in each gossip event (each vertex of the dag)
|
||||
makes a decision that implies a total order of all transactions, which
|
||||
decisions are unlikely to agree, and then which vertex is the one that
|
||||
actually does set the total order i decided retrospectively, equivalent to
|
||||
continuous retrospective leader election in the classic Byzantine fault
|
||||
resistant algorithms.
|
||||
|
||||
Proof of stake works like the corporate form, or can work like the
|
||||
corporate form, with the crypto currency as shares, the wallets, or the
|
||||
humans controlling the wallets, as shareholders, the peers in good
|
||||
standing, or the humans controlling the peers in good standing as the
|
||||
board, and the primus inter pares, or the human controlling the primus
|
||||
inter pares, as the CEO.
|
||||
|
||||
Thus the crypto currency works, or can work, like shares in a
|
||||
corporation. Proof of stake means that the shareholders can less easily
|
||||
be screwed over, since the shareholders elect the board from time to
|
||||
time, and the board elects the CEO from time to time to time.
|
||||
|
||||
But we need many corporations, not just one. OK, each crypto
|
||||
corporation corresponds to a sidechain, with its primus inter pares as a
|
||||
named peer on the mainchain. Buying and selling shares corresponds to
|
||||
swapping shares. The mainchain, if all goes well, has the special
|
||||
privilege of being the source of root names, and its shares are the most
|
||||
liquid, the most readily exchangeable, and this is the primary thing
|
||||
that makes it “main”.
|
||||
|
||||
# Implementation of proof of stake
|
||||
|
||||
Good blockdag protocols with high consensus bandwidth rely on forming
|
||||
a consensus about the total past of the blockchain during the gossip
|
||||
protocol where they share transactions around.
|
||||
|
||||
During gossip, they also share opinions on the total past of the blockchain.
|
||||
|
||||
If each peer tries to support past consensus, tries to support the opinion of
|
||||
what looks like it might be the majority of peers by stake that it sees in
|
||||
past gossip events, then we get rapid convergence to a single view of the
|
||||
less recent past, though each peer initially has its own view of the very
|
||||
recent past.
|
||||
|
||||
Blockdag algorithms of this class of consensus algorithm, if correct and
|
||||
correctly implemented, are equivalent to Practical Byzantine Fault
|
||||
Tolerant Consensus with continuous leader election, with the important
|
||||
differences being that the the leader is elected retrospectively rather than
|
||||
prospectively, with later peers deciding whom the past leader was and
|
||||
adopting his opinion of the total past, rather than whom the next leader
|
||||
will be, and if consensus fails to happen, we get a fork, rather than the
|
||||
algorithm stalling. The one third limit is fork detection, which is not quite
|
||||
the same thing as the one third limit in Practical Byzantine Fault Resistant
|
||||
Consensus, though in a sense equivalent, or closely related.
|
||||
|
||||
Satoshi’s design for bitcoin was that every wallet should be a peer on the
|
||||
mainchain, every peer should be a miner.
|
||||
|
||||
In Satoshi’s design every peer needs to be a miner, and every wallet a peer,
|
||||
because that is where the power is. If you don’t have power over your money,
|
||||
going to get ripped off and oppressed one way or the other way.
|
||||
|
||||
This design fell apart under scaling pressure, with there being two or perhaps
|
||||
three mining pools that control the blockchain, a massively centralized
|
||||
system, and people are increasingly reliant on client wallets and exchanges in
|
||||
a system never designed to securely support client wallets or exchanges,
|
||||
leading to notorious exchange failures and ripoffs. Getting miners to validate
|
||||
your transactions is slow and expensive, and getting slower and more
|
||||
expensive. The lightning network is likely to wind up with all transactions
|
||||
going through two big exchanges, as credit card transactions do.
|
||||
|
||||
Wallets are by Satoshi’s design linked to output keys. And now every exchange
|
||||
demands to see your face and evidence of your true name. What I am seeing now
|
||||
is scaling failure and anonymity failure.
|
||||
|
||||
Bitcoin has failed due to scaling problems, resulting in high costs of
|
||||
transactions, high delay, heavy use of client wallets in a system in which
|
||||
client wallets are inherently unsafe, heavy use of exchanges in a system where
|
||||
exchanges are inherently unsafe.
|
||||
|
||||
For scaling, we need to design a system with a limited number of peers, more
|
||||
than a dozen, less than a thousand, and an enormous number of client wallets,
|
||||
billions of client wallets. And we need to give client wallets power and
|
||||
prevent the peers from having too much power.
|
||||
|
||||
Power to the wallets means our system has to run on proof of stake, rather
|
||||
than proof of work. But since a wallet is not always on the internet, cannot
|
||||
routinely exercise power moment to moment. So, we need a system where unspent
|
||||
transaction outputs are hosted by particular blockchain peers, or large
|
||||
transaction outputs are hosted by particular peers, but controlled by client
|
||||
wallets, and the power of a peer depends on hosting sufficient value.
|
||||
|
||||
The architecture will be somewhat similar to email, where you run an email
|
||||
client on your computer, whose server is an email agent somewhere out in the
|
||||
internet. An email agent is a peer in relation to other email agents, and a
|
||||
host and server in relation to your email client.
|
||||
|
||||
Wallets will rendezvous with other wallets through peers, but the peer will set
|
||||
up a direct connection between wallets, where wallets send encrypted packets
|
||||
directly to each other, and the peer cannot know what wallets are talking to
|
||||
what wallets about what transactions. By analogy with email agents, we will
|
||||
call peers on the blockchain blockchain agents when they perform the role of
|
||||
servicing wallets, but, following the tradition established by Satoshi, peers
|
||||
when they perform the role of cooperating with each other to operate the
|
||||
blockchain.
|
||||
|
||||
The blockchain will be a directed acyclic graph indexed by Merkle trees. Being
|
||||
a directed acyclic graph, the peers will have to download and process the
|
||||
entire blockchain. The Merkle trees will be approximately balanced, thus of
|
||||
depth of order log N, hence a blockchain agent can provide a wallet a short
|
||||
chain of hashes linking any fact about the blockchain to the current root of
|
||||
the blockchain.
|
||||
|
||||
A wallet has a login relationship to a blockchain agent, which is a peer to
|
||||
all the other blockchain hosts on that blockchain, just as an email client has
|
||||
a login relationship with an email agent. A wallet is the client of its
|
||||
blockchain agent. Your wallet client will be software and a user interface
|
||||
that manages several wallets. Thus a wallet will be like an email account,
|
||||
which has a single login relationship with a single host, and your wallet
|
||||
client, which may manage several wallets, is like an email client, which is
|
||||
the client of an email agent or a small number of email agents.
|
||||
|
||||
What then stops the blockchain agent from lying to the wallet about the root
|
||||
and contents of the blockchain?.
|
||||
|
||||
Wallets perform transactions by interacting directly with other wallets
|
||||
through an encrypted connection. [If both wallets are behind a firewall the
|
||||
connection is set up by servers, but it does not pass through the servers]
|
||||
(how_to_punch_holes_in_firewalls.html). For the interaction to succeed, for a
|
||||
transaction to go through, both wallets must have the same Merkle root for the
|
||||
blockchain, or one Merkle root must be a recent subsidiary of the other. The
|
||||
wallet receives a short proof (log N hashes where N is the number of events on
|
||||
the block chain in the last few months) that proves that the past that Bob’s
|
||||
wallet received from its peer is part of the same past as the past that Ann’s
|
||||
wallet received from her peer, that both wallets are on the same blockchain,
|
||||
that the data about the past that one wallet has is consistent with the data
|
||||
about the past that the other wallet has.
|
||||
|
||||
Sometimes a wallet name will be a Zooko name, consisting of a public key,
|
||||
secret key, nickname, and petname. Sometimes it will also have a globally
|
||||
unique human readable name controlled by a Zooko name. Peers will usually have
|
||||
globally unique human readable names controlled by a Zooko name human.
|
||||
However, wallets will most commonly have login names, `wallet_name@peer_name`.
|
||||
All transaction outputs and inputs have an associated secret key, and the
|
||||
hash of the transaction must be signed by Schnorr multisignature of all the
|
||||
inputs.
|
||||
|
||||
Wallets should retain not the full block chain, but relevant transactions and
|
||||
outputs, and for each transaction and output, log N hashes connecting that to
|
||||
hashes close to the root.
|
||||
|
||||
Peers check the entire blockchain for validity, and construct short proofs
|
||||
showing to wallets that they are seeing the same view of those facts that they
|
||||
care about are the same for everyone, that the past does not get rewritten,
|
||||
and every wallet on the same blockchain sees the same past. Wallets only
|
||||
download short and relevant parts of the blockchain.
|
||||
|
||||
Peers on the blockchain are not
|
||||
behind NATS, or if they are they have port forwarding, or some similar NAT
|
||||
penetration set up. One wallet sets up a connection to another wallet through
|
||||
a peer, but once the connection is set up, information does not pass through
|
||||
the peer, but rather information is sent directly from wallet to wallet. The
|
||||
peer negotiates a port for two wallets that wish to communicate and informs
|
||||
each of the other’s external IP address. It also negotiates a shared secret,
|
||||
and informs each wallet of the id associated with the shared secret. Both
|
||||
parties send off introduction UDP packets to the other’s IP address and port
|
||||
number – thereby punching holes in their firewall for return packets. When
|
||||
they get a return packet, an introduction acknowledgement, the connection is
|
||||
assumed established.
|
||||
|
||||
Where a wallet has a login type name, `wallet_name@peer_name`, the peer could
|
||||
potentially engage in a man in the middle attack against the wallet. However,
|
||||
during transactions, it is necessary to prove control of the secret key of an
|
||||
unspent transaction output, which the man in the middle cannot do, with the
|
||||
result that the connection will break with an error message blaming the peer
|
||||
that set up the connection.
|
||||
|
||||
When a group of wallets want to perform a transaction, they rendezvous on one
|
||||
particular wallet, who constructs the transaction, and feeds it to his
|
||||
blockchain agent. A rendezvous on a particular wallet is a room on that
|
||||
wallet. A wallet can have several rooms, and each room can have several other
|
||||
wallets connected to it. One wallet connected to a room, and also connected to
|
||||
another wallet by a separate connection, can invite that other wallet to that
|
||||
room, without needing to go through a peer. This prevents peers from reliably
|
||||
knowing who is party to a transaction.
|
||||
|
||||
## Byzantine failure.
|
||||
|
||||
Byzantine failure, if intentional and malicious, is lying, either explicitly - giving one party inconsistent with the information given to another party, the classic example being one Byzantine General telling one other general he is going to advance, and another general that he is going to retreat, so that the general expecting to be part of an advance finds himself alone and he is killed and his army destroyed.
|
||||
|
||||
In a blockdag, this always going to become visible eventually, but the problem is, it may become visible too late.
|
||||
|
||||
Mechanisms to protect against Byzantine failure look superficially like
|
||||
proof of stake shareholder democracy but they are subtly different. They
|
||||
protect against the ten percent attack, but assume that any one outcome
|
||||
selected by any one correctly functioning peer is equally acceptable, that
|
||||
the problem is selecting one of many equally possible and equally
|
||||
acceptable outcomes, that the decision of any peer of the two thirds of
|
||||
peers not engaged in byzantine failure is equally OK.
|
||||
|
||||
We need fifty one percent rule, shareholder authority, and we need to
|
||||
protect against Byzantine failure, the ten percent attack, both.
|
||||
|
||||
We need computer algorithms that prevent the ten percent attack, one third
|
||||
minus one, and we need social mechanisms that detect and penalize one third
|
||||
plus one.
|
||||
|
||||
We need computer shareholder democracy and human shareholder democracy layered
|
||||
on top of and underneath byzantine fault resistance.
|
||||
|
||||
Byzantine fault tolerance looks superficially like shareholder democracy, but
|
||||
it is not. We need all the mechanisms, each to address its own problem space.
|
||||
|
||||
If two thirds plus one voluntarily follow the
|
||||
policy of half plus one, and one third plus one testify that one rather
|
||||
arbitrarily selected outcome is consistent with the policy commanded by
|
||||
half plus one, then that is the one verified outcome of the blockchain.
|
||||
|
||||
And, to make sure we have mostly honest participants, human level social
|
||||
enforcement, which means that Byzantine faults need to have some probability
|
||||
of being detected and Streisanded.
|
||||
|
||||
But, in the event of complete breakdown of a major connection of the
|
||||
network, we need the one third plus one to reliably verify that there
|
||||
is no other one third plus one, by sampling geographically distant
|
||||
and network address distant groups of nodes.
|
||||
|
||||
So, we have fifty percent by weight of stake plus one determining policy,
|
||||
and one third of active peers on the network that have been nominated by
|
||||
fifty percent plus one of weight of stake to give effect to policy
|
||||
selecting particular blocks, which become official when fifty percent plus
|
||||
one of active peers the network that have been nominated by fifty percent
|
||||
plus one of weight of stake have acked the outcome selected by one third
|
||||
plus one of active peers.
|
||||
|
||||
In the rare case where half the active peers see timeouts from the other
|
||||
half of the active peers, and vice versa, we could get two blocks, each
|
||||
endorsed by one third of the active peers, which case would need to be
|
||||
resolved by a fifty one percent vote of weight of stake voting for the
|
||||
acceptable outcome that is endorsed by the largest group of active peers,
|
||||
but the normal outcome is that half the weight of stake receives
|
||||
notification (the host representing them receives notification) of one
|
||||
final block selected by one third of the active peers on the network,
|
||||
without receiving notification of a different final block.
|
||||
|
||||
# Funds need to be fully traceable, and fully untraceable
|
||||
|
||||
A crypto currency needs to be totally traceable and totally untraceable.
|
||||
A common application is money lending in the third world. The money
|
||||
lender needs to be able to prove he loaned the peasant such and such an
|
||||
amount, on such and such terms, with such and such an agreement, and the
|
||||
[peasant needs to be able to prove he repaid the money lender such and
|
||||
such an amount]
|
||||
in relation to that agreement and in fulfillment those terms.
|
||||
|
||||
[peasant needs to be able to prove he repaid the money lender such and
|
||||
such an amount]: http://www.scmp.com/week-asia/business/article/2148658/how-bitcoin-and-cryptocurrencies-went-wall-street-high-streets,
|
||||
|
||||
The peasant’s daughter is working in Saudi Arabia. She buys some crypto
|
||||
currency, perhaps for cash, and sends it to her mother. She trusts her
|
||||
mother, her mother trusts her, she does not need any traceability, but
|
||||
the government wants to tax transfers, and the financial system that has
|
||||
a government monopoly wants to charge her and her mother a fee for getting in
|
||||
her way.
|
||||
|
||||
The parties to a transaction can send proof of transaction, and proof
|
||||
that the transaction is in accord with an agreement, and proof of what
|
||||
the agreement was, to anyone, or make it public. But if they take no
|
||||
action to make it public, no one can know who the transaction took place
|
||||
between, nor what the transaction was about.
|
||||
|
||||
An unhappy customer can make his transaction public. A happy customer
|
||||
can send a favorable acknowledgment to the supplier, that the supplier
|
||||
will likely make public. But if neither of them does anything to make it
|
||||
public, it remains untraceable by default, unless one of the parties
|
||||
does something special to change the default.
|
||||
|
||||
A company has an initial coin offering instead of an initial share
|
||||
offering. It goes bust. People who claim to be owed money by the company
|
||||
want to find the people who bought the initial coins. They do not want
|
||||
to be found.
|
||||
|
||||
# the corporate form
|
||||
|
||||
To better support the corporate form, the crypto currency maintains a
|
||||
name system, of globally unique human readable names on top of [Zooko’s
|
||||
triangle] names.
|
||||
|
||||
[Zooko’s triangle]: ./zookos_triangle.html
|
||||
|
||||
Transactions between [Zooko’s triangle] identities will be untraceable,
|
||||
because amounts will be in fixed sized amounts, and transactions will
|
||||
mingle many people paying many recipients. Transactions with globally
|
||||
unique human readable names will usually be highly traceable, since that
|
||||
is what the name is for – but you don’t have to use the name in the
|
||||
payment, and by default, do not.
|
||||
|
||||
A wallet name will typically look like an email address,
|
||||
`human readable login name @ human readable peer name`, but the
|
||||
transaction outputs it controls will be largely controlled by public
|
||||
keys, which are not provably connected to the wallet, unless the wallet
|
||||
operator chooses for it to be provable.
|
||||
|
||||
If someone makes a traceable and provable payment to a human readable
|
||||
name, he can then associate an arbitrary URL with that payment, such as
|
||||
a review of the service or product provided by the entity with the human
|
||||
readable name, so that people can find out what people who paid that
|
||||
entity are saying.
|
||||
|
||||
So if you make a provable payment to a recipient with a human readable
|
||||
name, [you will have some assurance of getting what you paid
|
||||
for](trust_and_privacy_on_the_blockchain.html).
|
||||
|
||||
Peers may have human readable names, and wallets may have names of the
|
||||
form `LoginName@PeerName`.
|
||||
|
219
docs/recognizing_categories_and_instances.md
Normal file
@ -0,0 +1,219 @@
|
||||
---
|
||||
title: Recognizing categories, and recognizing particulars as forming a category
|
||||
# katex
|
||||
---
|
||||
This is, of course, a deep unsolved problem in philosophy.
|
||||
|
||||
However, it seems to be soluble as computer algorithm. Programs that do
|
||||
this, ought to look conscious.
|
||||
|
||||
There are a lot of programs solving things that I though were AI hard, for
|
||||
example recognizing pornography, recognizing faces in images, predicting what
|
||||
music, or what books, or what movies, a particular customer might like.
|
||||
|
||||
We have clustering algorithms that work in on points in spaces of reasonably
|
||||
small dimension. However, instances are sparse vectors in space of
|
||||
unenumerably large dimension.
|
||||
|
||||
Consider, for example, the problem of grouping like documents to like, for
|
||||
spam filtering. Suppose the properties of the document are all substrings of
|
||||
the document of twenty words or less and 200 characters or less. In that case,
|
||||
there are as many dimensions as there are two hundred character strings.
|
||||
|
||||
# Dimensional reduction
|
||||
|
||||
The combinatorial explosion occurs because we have taken the wrong approach
|
||||
to reducing problems that originate in the physical world of very large
|
||||
dimension, large because each quality of the objects involved or potentially
|
||||
involved is a dimension.
|
||||
|
||||
The cool magic trick that makes this manageable is dimensional reduction.
|
||||
Johnson and Lindenstrauss discovered in the early 1980s that if one has
|
||||
$O(2^n)$ points in a space of very large dimension, a random projection onto a
|
||||
space of dimension $O(n)$ does not much affect distances and angles.
|
||||
|
||||
Achlioptas found that this is true for the not very random mapping wherein
|
||||
elements of the matrix mapping the large space to the smaller space have the
|
||||
form $1$, with probability $\frac{1}{6}$, $0$ with probability $\frac{4}{6}$,
|
||||
$-1$ with probability $\frac{1}{6}$, though a sparse matrix is apt to
|
||||
distort a sparse vector
|
||||
|
||||
There exists a set of points of size $m$ that needs dimension
|
||||
$$\displaystyle{O(\frac{\log(m)}{ε^2})}$$
|
||||
in order to preserve the distances
|
||||
between all pairs of points within a factor of $1±ε$
|
||||
|
||||
The time to find the nearest neighbour is logarithmic in the number of points,
|
||||
but exponential in the dimension of the space. So we do one pass with rather
|
||||
large epsilon, and another pass, using an algorithm proportional to the small
|
||||
number of candidate neighbours times the dimensionality with a small number
|
||||
of candidate neighbours found in the first pass.
|
||||
|
||||
So in a space of unenumerably large dimension, such as the set of substrings
|
||||
of an email, or perhaps substrings of bounded length with bounds at spaces,
|
||||
carriage returns, and punctuation, we deterministically hash each substring,
|
||||
and use the hash to deterministically assign a mapping between the vector
|
||||
corresponding to this substring, and a vector in the reduced space.
|
||||
|
||||
The optimal instance recognition algorithm, for normally distributed
|
||||
attributes, and for already existent, already known categories, is Mahalanobis
|
||||
distance
|
||||
|
||||
Is not the spam characteristic of an email just its $T.(S-G)$, where $T$ is
|
||||
the vector of the email, and $S$ and $G$ are the average vectors of good
|
||||
email and spam email?
|
||||
|
||||
Variance works, instead of probability – Mahalanobis distance, but this is
|
||||
most reasonable for things that have reasonable dimension, like attributing
|
||||
skulls to races, while dimensional reduction is most useful in spaces of
|
||||
unenumerably large dimension, where distributions are necessarily non
|
||||
normal.
|
||||
|
||||
But variance is, approximately, the log of probability, so Mahalanobis is
|
||||
more or less Bayes filtering.
|
||||
|
||||
So we can reasonably reduce each email into twenty questions space, or, just
|
||||
to be on the safe side, forty questions space. (Will have to test how many
|
||||
dimensions empirically retain angles and distances)
|
||||
|
||||
We then, in the reduced space, find natural groupings, a natural grouping
|
||||
being an elliptic region in high dimensional space where the density is
|
||||
anomalously large, or rather a normal distribution in high dimensional space
|
||||
such that assigning a particular email to a particular normal distribution
|
||||
dramatically reduces the entropy.
|
||||
|
||||
We label each such natural grouping with the statistically improbable phrase
|
||||
that best distinguishes members of the grouping from all other such groupings.
|
||||
|
||||
The end user can then issue rules that mails belonging to certain groupings
|
||||
be given particular attention – or lack of attention, such as being sent
|
||||
direct to spam.
|
||||
|
||||
The success of face recognition, etc, suggests that this might be just a
|
||||
problem of clever algorithms. Pile enough successful intelligence like
|
||||
algorithms together, integrate them well, perhaps we will have sentience.
|
||||
Analogously with the autonomous cars. They had no new algorithms, they just
|
||||
made the old algorithms actually do something useful.
|
||||
|
||||
# Robot movement
|
||||
|
||||
Finding movement paths is full of singularities, looks to me that we force it
|
||||
down to two and half dimensions, force the obstacles to stick figures, and
|
||||
then find a path to the destination. Hence the mental limit on complex knot
|
||||
problems.
|
||||
|
||||
Equivalently, we want to reduce the problem space to a collection of regions
|
||||
in which pathfinding algorithms that assume continuity work, and then
|
||||
construct graph of such regions where nodes correspond to such convex region
|
||||
within which continuity works, and edges correspond an overlap between two
|
||||
such convex regions. Since the space is enormous, drastic reduction is
|
||||
needed.
|
||||
|
||||
In the case of robot movement we are likely to wind up with a very large
|
||||
graph of such convex regions within which the assumption of singularity free
|
||||
movement is correct, and because the graph is apt to be very large, finding
|
||||
an efficient path through the graph is apt to be prohibitive, which is apt to
|
||||
cause robot ground vehicles to crash because they cannot quickly figure out
|
||||
the path to evade an unexpected object and makes it impractical for a robot
|
||||
to take a can of beer from the fridge.
|
||||
|
||||
We therefore use the [sybil guard algorithm] to reduce the graph by treating
|
||||
groups of highly connected vertices as a single vertex.
|
||||
|
||||
[sybil guard algorithm]:./sybil_attack.html
|
||||
|
||||
# Artificial Intelligence
|
||||
|
||||
[Gradient descent is not what makes Neural nets work] Comment by Bruce on
|
||||
Echo State Networks.
|
||||
|
||||
[Gradient descent is not what makes Neural nets work]:https://scottlocklin.wordpress.com/2012/08/02/30-open-questions-in-physics-and-astronomy/
|
||||
|
||||
Echo state Network is your random neural network, which mixes a great pile of
|
||||
randomness into your actual data, to expand it into a much larger pile of
|
||||
data that implicitly contains all the uncorrupted original information in its
|
||||
very great redundancy, albeit in massively mangled form. Then “You just fit
|
||||
the output layer using linear regression. You can fit it with something more
|
||||
complicated, but why bother; it doesn’t help.”
|
||||
|
||||
A generalization of “fitting the output layer using linear regression” is
|
||||
finding groupings, recognizing categories, in the space of very large dimension
|
||||
that consists of the output of the output layer.
|
||||
|
||||
Fitting by linear regression assumes we already have a list of instances that
|
||||
are known to be type specimens of the category, assumes that the category is
|
||||
already defined and we want an efficient way of recognizing new instances as
|
||||
members of this category. But living creatures can form new categories,
|
||||
without having them handed to them on a platter. We want to be able to
|
||||
discover that a group of instances belong together.
|
||||
|
||||
So we generate a random neural network, identify those outputs that provide
|
||||
useful information identifying categories, and prune those elements of the
|
||||
network that do not contribute useful information identifying useful categories.
|
||||
|
||||
That it does not help tells me you are doing a dimensional reduction on the
|
||||
outputs of an echo state network.
|
||||
|
||||
You are generating vectors in a space of uncountably large dimension, which
|
||||
vectors describe probabilities, and probabilities of probabilities (Bayesian
|
||||
regress, probability of a probability of a frequency, to correct priors, and
|
||||
priors about priors) so that if two vectors are distant in your space, one is
|
||||
uncorrelated with the other, and if two things are close, they are
|
||||
correlated.
|
||||
|
||||
Because the space is of uncountably large dimension, the vectors are
|
||||
impossible to manipulate directly, so you are going to perform a random
|
||||
dimensional reduction on a set of such vectors to a space of manageably large
|
||||
dimension.
|
||||
|
||||
At a higher level you eventually need to distinguish the direction of
|
||||
causation in order to get an action network, a network that envisages action
|
||||
to bring the external world through a causal path to an intended state, which
|
||||
state has a causal correlation to *desire*, a network whose output state is
|
||||
*intent*, and whose goal is desire. When the action network selects one
|
||||
intended state of the external world rather than another, that selection is
|
||||
*purpose*. When the action network selects one causal path rather than
|
||||
another, that selection is *will*.
|
||||
|
||||
The colour red is not a wavelength, a phenomenon, but is a qualia, an
|
||||
estimate of the likelihood that an object has a reflectance spectrum in
|
||||
visible light peaked in that wavelength, but which estimate of probability
|
||||
can then be used as if it were a phenomena in forming concepts, such as
|
||||
blood, which in turn can be used to form higher level concepts, as when the
|
||||
Old Testament says of someone who needed killing “His blood is on his own
|
||||
head”.
|
||||
|
||||
Concepts are Hegelian Neural Networks: “Neurons that fire together, wire
|
||||
together”
|
||||
|
||||
This is related to random dimensional reduction. You have a collection of
|
||||
vectors in space of uncountably large dimension. Documents, emails, what you
|
||||
see when you look in a particular direction, what you experience at a
|
||||
particular moment. You perform a random dimensional reduction to a space of
|
||||
manageable dimension, but large enough to probabilistically preserve
|
||||
distances and angles in the original space – typically twenty or a hundred
|
||||
dimensions.
|
||||
|
||||
By this means, you are able to calculate distances and angles in your
|
||||
dimensionally reduced space which approximately reflect the distances and
|
||||
angles in the original space, which was probably of dimension
|
||||
$10^{100^{100^{100}}}$, the original space being phenomena that occurred
|
||||
together, and collections of phenomena that occurred together that you have
|
||||
some reason for bundling into a collection, and your randomly reduced space
|
||||
having dimension of order that a child can count to in a reasonably short
|
||||
time.
|
||||
|
||||
And now you have vectors such that you can calculate the inner product and
|
||||
cross product on them, and perform matrix operations on them. This gives you
|
||||
qualia. Higher level qualia are *awareness*
|
||||
|
||||
And, using this, you can restructure the original vectors, for example
|
||||
structuring experiences into events, structuring things in the visual field
|
||||
into objects, and then you can do the same process on collections of events,
|
||||
and collections of objects that have something common.
|
||||
|
||||
Building a flying machine was very hard, until the Wright brothers said
|
||||
“three axis control, pitch, yaw, and roll”
|
||||
|
||||
Now I have said the words “dimensional reduction of vectors in a space of
|
||||
uncountably large dimension, desire, purpose, intent, and will”
|
1082
docs/replacing_TCP.md
Normal file
312
docs/scale_clients_trust.md
Normal file
@ -0,0 +1,312 @@
|
||||
---
|
||||
title: Scaling, trust and clients
|
||||
---
|
||||
|
||||
# Client trust
|
||||
|
||||
When there are billions of people using the blockchain, it will inevitably
|
||||
only be fully verified by a few hundred or at most a few thousand major
|
||||
peers, who will inevitably have [interests that do not necessarily coincide]
|
||||
with those of the billions of users, who will inevitably have only client
|
||||
wallets.
|
||||
|
||||
[interests that do not necessarily coincide]:https://vitalik.ca/general/2021/05/23/scaling.html
|
||||
"Vitalik Buterin talks blockchain scaling"
|
||||
|
||||
And a few hundred seems to be the minimum size required to stop peers
|
||||
with a lot of clients from doing nefarious things. At scale, we are going to
|
||||
approach the limits of distributed trust.
|
||||
|
||||
There are several cures for this. Well, not cures, but measures that can
|
||||
alleviate the disease
|
||||
|
||||
None of these are yet implemented, and we will not get around to
|
||||
implementing them until we start to take over the world. But it is
|
||||
necessary that what we do implement be upwards compatible with this scaling design:
|
||||
|
||||
## proof of stake
|
||||
|
||||
Make the stake of a peer the value of coins (unspent transaction outputs)
|
||||
that were injected into the blockchain through that peer. This ensures that
|
||||
the interests of the peers will be aligned with the whales, with the interests
|
||||
of those that hold a whole lot of value on the blockchain. Same principle
|
||||
as a well functioning company board. A company board directly represents
|
||||
major shareholders, whose interests are for the most part aligned with
|
||||
ordinary shareholders. (This is apt to fail horribly when an accounting or
|
||||
law firm is on the board, or a converged investment fund.) This measure
|
||||
gives power the whales, who do not want their hosts to do nefarious things.
|
||||
|
||||
## client verification
|
||||
|
||||
every single client verifies the transactions that it is directly involved in,
|
||||
and a subset of the transactions that gave rise to the coins that it receives.
|
||||
|
||||
If it verified the ancestry of every coin it received all the way back, it
|
||||
would have to verify the entire blockchain, but it can verify the biggest
|
||||
ancestor of the biggest ancestor and a random subset of ancestors, thus
|
||||
invalid transactions are going immediately generate problems. If every
|
||||
client unpredictably verifies a small number of transactions, the net effect
|
||||
is going to be that most transactions are going to be unpredictably verified
|
||||
by several clients.
|
||||
|
||||
## sharding, many blockchains
|
||||
|
||||
Coins in a shard are shares in [sovereign cipher corporations] whose
|
||||
primary asset is a coin on the primary blockchain that vests power over
|
||||
their name and assets in a frequently changing public key. Every time
|
||||
money moves from the main chain to a sidechain, or from one sidechain to
|
||||
another, the old coin is spent, and a new coin is created. The public key on
|
||||
the mainchain coin corresponds to [a frequently changing secret that is distributed]
|
||||
between the peers on the sidechain in proportion to their stake.
|
||||
|
||||
The mainchain transaction is a big transaction between many sidechains,
|
||||
that contains a single output or input from each side chain, with each
|
||||
single input or output from each sidechain representing many single
|
||||
transactions between sidechains, and each single transaction between
|
||||
sidechains representing many single transactions between many clients of
|
||||
each sidechain.
|
||||
|
||||
The single big mainchain transaction merkle chains to the total history of
|
||||
each sidechain, and each client of a sidechain can verify any state
|
||||
information about his sidechain against the most recent sidechain
|
||||
transaction on the mainchain, and routinely does.
|
||||
|
||||
## lightning layer
|
||||
|
||||
The [lightning layer] is the correct place for privacy and contracts – because
|
||||
we do not want every transaction, let alone every contract, appearing on
|
||||
the mainchain. Keeping as much stuff as possible *off* the blockchain helps
|
||||
with both privacy and scaling.
|
||||
|
||||
## zk-snarks
|
||||
|
||||
Zk-snarks are not yet a solution. They have enormous potential
|
||||
benefits for privacy and scaling, but as yet, no one has quite found a way.
|
||||
|
||||
A zk-snark is a succinct proof that code *was* executed on an immense pile
|
||||
of data, and produced the expected, succinct, result. It is a witness that
|
||||
someone carried out the calculation he claims he did, and that calculation
|
||||
produced the result he claimed it did. So not everyone has to verify the
|
||||
blockchain from beginning to end. And not everyone has to know what
|
||||
inputs justified what outputs.
|
||||
|
||||
The innumerable privacy coins around based on zk-snarks are just not
|
||||
doing what has to be done to make a zk-snark privacy currency that is
|
||||
viable at any reasonable scale. They are intentionally scams, or by
|
||||
negligence, unintentionally scams. All the zk-snark coins are doing the
|
||||
step from set $N$ of valid coins, valid unspent transaction outputs, to set
|
||||
$N+1$, in the old fashioned Satoshi way, and sprinkling a little bit of
|
||||
zk-snark magic privacy pixie dust on top (because the task of producing a
|
||||
genuine zk-snark proof of coin state for step $N$ to step $N+1$ is just too big
|
||||
for them). Which is, intentionally or unintentionally, a scam.
|
||||
|
||||
Not yet an effective solution for scaling the blockchain, for to scale the
|
||||
blockchain, you need a concise proof that any spend in the blockchain was
|
||||
only spent once, and while a zk-snark proving this is concise and
|
||||
capable of being quickly evaluated by any client, generating the proof is
|
||||
an enormous task. Lots of work is being done to render this task
|
||||
manageable, but as yet, last time I checked, not manageable at scale.
|
||||
Rendering it efficient would be a total game changer, radically changing
|
||||
the problem.
|
||||
|
||||
The fundamental problem is that in order to produce a compact proof that
|
||||
the set of coins, unspent transaction outputs, of state $N+1$ was validly
|
||||
derived from the set of coins at state $N$, you actually have to have those
|
||||
sets of coins, which is not very compact at all, and generate a compact
|
||||
proof about a tree lookup and cryptographic verification for each of the
|
||||
changes in the set.
|
||||
|
||||
This is an inherently enormous task at scale, which will have to be
|
||||
factored into many, many subtasks, performed by many, many machines.
|
||||
Factoring the problem up is hard, for it not only has to be factored, divided
|
||||
up, it has to be divided up in a way that is incentive compatible, or else
|
||||
the blockchain is going to fail at scale because of peer misconduct,
|
||||
transactions are just not going to be validated. Factoring a problem is hard,
|
||||
and factoring that has to be mindful of incentive compatibility is
|
||||
considerably harder. I am seeing a lot of good work grappling with the
|
||||
problem of factoring, dividing the problem into manageable subtasks, but
|
||||
it seems to be totally oblivious to the hard problem of incentive compatibility at scale.
|
||||
|
||||
Incentive compatibility was Satoshi's brilliant insight, and the client trust
|
||||
problem is failure of Satoshi's solution to that problem to scale. Existing
|
||||
zk-snark solutions fail at scale, though in a different way. With zk-snarks,
|
||||
the client can verify the zk-snark, but producing a valid zk-snark in the
|
||||
first place is going to be hard, and will rapidly get harder as the scale
|
||||
increases.
|
||||
|
||||
A zk-snark that succinctly proves that the set of coins (unspent transaction
|
||||
outputs) at block $N+1$ was validly derived from the set of coins at
|
||||
block $N$, and can also prove that any given coin is in that set or not in that
|
||||
set is going to have to be a proof about many, many, zk-snarks produced
|
||||
by many, many machines, a proof about a very large dag of zk-snarks,
|
||||
each zk-snark a vertex in the dag proving some small part of the validity
|
||||
of the step from consensus state $N$ of valid coins to consensus state
|
||||
$N+1$ of valid coins, and the owners of each of those machines that produced a tree
|
||||
vertex for the step from set $N$ to set $N+1$ will need a reward proportionate
|
||||
to the task that they have completed, and the validity of the reward will
|
||||
need to be part of the proof, and there will need to be a market in those
|
||||
rewards, with each vertex in the dag preferring the cheapest source of
|
||||
child vertexes. Each of the machines would only need to have a small part
|
||||
of the total state $N$, and a small part of the transactions transforming state
|
||||
$N$ into state $N+1$. This is hard but doable, but I am just not seeing it done yet.
|
||||
|
||||
I see good [proposals for factoring the work], but I don't see them
|
||||
addressing the incentive compatibility problem. It needs a whole picture
|
||||
design, rather than a part of the picture design. A true zk-snark solution
|
||||
has to shard the problem of producing state $N+1$, the set of unspent
|
||||
transaction outputs, from state $N$, so it should also shard the problem of
|
||||
producing a consensus on the total set and order of transactions.
|
||||
|
||||
[proposals for factoring the work]:https://hackmd.io/@vbuterin/das
|
||||
"Data Availability Sampling Phase 1 Proposal"
|
||||
|
||||
### The problem with zk-snarks
|
||||
|
||||
Last time I checked, [Cairo] was not ready for prime time.
|
||||
|
||||
[Cairo]:https://starkware.co/cairo/
|
||||
"Cairo - StarkWare Industries Ltd."
|
||||
|
||||
Maybe it is ready now.
|
||||
|
||||
The two basic problems with zk-snarks is that even though a zk-snark
|
||||
proving something about an enormous data set is quite small and can be
|
||||
quickly verified by anyone, it requires enormous computational resources to
|
||||
generate the proof, and how does the end user know that the verification
|
||||
verifies what it is supposed to verify?
|
||||
|
||||
To solve the first problem, need distributed generation of the proof,
|
||||
constructing a zk-snark that is a proof about a dag of zk-snarks,
|
||||
effectively a zk-snark implementation of the map-reduce algorithm for
|
||||
massive parallelism. In general map-reduce requires trusted shards that
|
||||
will not engage in Byzantine defection, but with zk-snarks they can be
|
||||
untrusted, allowing the problem to be massively distributed over the
|
||||
internet.
|
||||
|
||||
To solve the second problem, need an [intelligible scripting language for
|
||||
generating zk-snarks], a scripting language that generates serial verifiers
|
||||
and massively parallel map-reduce proofs.
|
||||
|
||||
[intelligible scripting language for
|
||||
generating zk-snarks]:https://www.cairo-lang.org
|
||||
"Welcome to Cairo
|
||||
A Language For Scaling DApps Using STARKs"
|
||||
|
||||
Both problems are being actively worked on. Both problems need a good deal
|
||||
more work, last time I checked. For end user trust in client wallets
|
||||
relying on zk-snark verification to be valid, at least some of the end
|
||||
users of client wallets will need to themselves generate the verifiers from
|
||||
the script.
|
||||
|
||||
For trust based on zk-snarks to be valid, a very large number of people
|
||||
must themselves have the source code to a large program that was
|
||||
executed on an immense amount of data, and must themselves build and
|
||||
run the verifier to prove that this code was run on the actual data at least
|
||||
once, and produced the expected result, even though very few of them will
|
||||
ever execute that program on actual data, and there is too much data for
|
||||
any one computer to ever execute the program on all the data.
|
||||
|
||||
Satoshi's fundamental design was that all users should verify the
|
||||
blockchain, which becomes impractical when the blockchain approaches four
|
||||
hundred gigabytes. A zk-snark design needs to redesign blockchains from
|
||||
the beginning, with distributed generation of the proof, but the proof for
|
||||
each step in the chain, from mutable state $N$ to mutable state $N+1$, from set
|
||||
$N$ of coins, unspent transaction outputs, to set $N+1$ of coins only being
|
||||
generated once or generated a quite small number of times, with its
|
||||
generation being distributed over all peers through map-reduce, while the
|
||||
proof is verified by everyone, peer and client.
|
||||
|
||||
For good verifier performance, with acceptable prover performance, one
|
||||
should construct a stark that can be verified quickly, and then produce
|
||||
a libsnark that it was verified at least once ([libsnark proof generation
|
||||
being costly], but the proofs are very small and quickly verifiable).
|
||||
|
||||
At the end of the day, we still need the code generating and executing the
|
||||
verification of zk-snarks to be massively replicated, in order that all
|
||||
this rigmarole with zk-snarks and starks is actually worthy of producing
|
||||
trust.
|
||||
|
||||
[libsnark proof generation being costly]:https://eprint.iacr.org/2018/046.pdf
|
||||
"Scalable computational integrity:
|
||||
section 1.3.2: concrete performance"
|
||||
|
||||
This is not a problem I am working on, but I would be happy to see a
|
||||
solution. I am seeing a lot of scam solutions, that sprinkle zk-snarks over
|
||||
existing solutions as magic pixie dust, like putting wings on a solid fuel
|
||||
rocket and calling it a space plane.
|
||||
|
||||
[lightning layer]:lightning_layer.html
|
||||
|
||||
[sovereign cipher corporations]:social_networking.html#many-sovereign-corporations-on-the-blockchain
|
||||
|
||||
[a frequently changing secret that is distributed]:multisignature.html#scaling
|
||||
|
||||
# sharding within each single very large peer
|
||||
|
||||
Sharding within a single peer is an easier problem than sharding the
|
||||
blockchain between mutually distrustful peers capable of Byzantine
|
||||
defection, and the solutions are apt to be more powerful and efficient.
|
||||
|
||||
When we go to scale, when we have very large peers on the blockchain,
|
||||
we are going to have to have sharding within each very large peer, which will
|
||||
multiprocess in the style of Google's massively parallel multiprocessing,
|
||||
where scaling and multiprocessing is embedded in interactions with the
|
||||
massively distributed database, either on top of an existing distributed
|
||||
database such as Rlite or Cockroach, or we will have to extend the
|
||||
consensus algorithm so that the shards of each cluster form their own
|
||||
distributed database, or extend the consensus algorithm so that peers can
|
||||
shard. As preparation for the latter possibility, we need to have each peer
|
||||
only form gossip events with a small and durable set of peers with which it
|
||||
has lasting relationships, because the events, as we go to scale, tend to
|
||||
have large and unequal costs and benefits for each peer. Durable
|
||||
relationships make sharding possible, but we will not worry to much about
|
||||
sharding until a forty terabyte blockchain comes in sight.
|
||||
|
||||
When we go to scale, we are going to have to have sharding, which will
|
||||
multiprocess in the style of Google’s massively parallel multiprocessing,
|
||||
where scaling and multiprocessing is embedded in interactions with the
|
||||
massively distributed database, either on top of an existing distributed
|
||||
database such as Rlite or Cockroach, or we will have to extend the
|
||||
consensus algorithm so that the shards of each cluster form their own
|
||||
distributed database, or extend the consensus algorithm so that peers can
|
||||
shard. As preparation for the latter possibility, we need to have each peer
|
||||
only form gossip events with a small and durable set of peers with which it
|
||||
has lasting relationships, because the events, as we go to scale, tend to
|
||||
have large and unequal costs and benefits for each peer. Durable
|
||||
relationships make sharding possible, but we will not worry to much about
|
||||
sharding until a forty terabyte blockchain comes in sight.
|
||||
|
||||
For sharding, each peer has a copy of a subset of the total blockchain, and
|
||||
some peers have a parity set of many such subsets, each peer has a subset
|
||||
of the set of unspent transaction outputs as of consensus on total order at
|
||||
one time, and is working on constructing a subset of the set of unspent
|
||||
transactions as of a recent consensus on total order, each peer has all the
|
||||
root hashes of all the balanced binary trees of all the subsets, but not all
|
||||
the subsets, each peer has durable relationships with a set of peers that
|
||||
have the entire collection of subsets, and two durable relationships with
|
||||
peers that have parity sets of all the subsets.
|
||||
|
||||
Each subset of the append only immutable set of transactions is represented
|
||||
by a balanced binary tree of hashes representing $2^n$ blocks of
|
||||
the blockchain, and each subset of the mutable set of unspent transaction
|
||||
outputs is a subsection of the Merkle-patricia tree of transaction outputs,
|
||||
which is part of a directed acyclic graph of all consensus sets of all past
|
||||
consensus states of transaction outputs, but no one keeps that entire graph
|
||||
around once it gets too big, as it rapidly will, only various subsets of it.
|
||||
|
||||
But they keep the hashes around that can prove that any subset of it was
|
||||
part of the consensus at some time.
|
||||
|
||||
Gossip vertexes immutable added to the immutable chain of blocks will
|
||||
contain the total hash of the state of unspent transactions as of a previous
|
||||
consensus block, thus the immutable and ever growing blockchain will contain
|
||||
an immutable record of all past consensus Merkle-patricia trees of
|
||||
unspent transaction outputs, and thus of the past consensus about the
|
||||
dynamic and changing state resulting from the immutable set of all past
|
||||
transactions
|
||||
|
||||
For very old groups of blocks to be discardable, it will from time to time be
|
||||
necessary to add repeat copies of old transaction outputs that are still
|
||||
unspent, so that the old transactions that gave rise to them can be
|
||||
discarded, and one can then re-evaluate the state of the blockchain starting
|
||||
from the middle, rather than the very beginning.
|