String IO. Completed MPIR 2.6.0 section 5.12.

This commit is contained in:
Alex Dyachenko 2014-04-23 20:44:37 -04:00
parent f4d3fd57d6
commit adb5be1ece
8 changed files with 398 additions and 66 deletions

View File

@ -303,9 +303,9 @@ typedef __gmp_randstate_struct gmp_randstate_t[1];
/* Output of mp?_out_raw_m */
typedef struct
{
unsigned char* allocated;
char* allocated;
size_t allocatedSize;
unsigned char* written;
char* written;
size_t writtenSize;
} __mpir_out_struct;
typedef __mpir_out_struct mpir_out_struct[1];

View File

@ -19,6 +19,7 @@ along with the MPIR Library. If not, see http://www.gnu.org/licenses/.
using System;
using System.IO;
using System.Text;
using Microsoft.VisualStudio.TestTools.UnitTesting;
namespace MPIR.Tests.HugeIntTests
@ -41,6 +42,149 @@ namespace MPIR.Tests.HugeIntTests
Assert.AreEqual(ms.Length, ms.Position);
}
}
[TestMethod]
public void InputOutputStr()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 100;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
a.Write(writer);
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
}
}
[TestMethod]
public void InputOutputStrHex()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 100;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
{
writer.Write("0x");
a.Write(writer, 16);
}
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
Assert.AreEqual((char)0xFEFF + "0x" + a.ToString(16), Encoding.UTF8.GetString(ms.ToArray()));
}
}
[TestMethod]
public void InputOutputStrHexLower()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 100;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
{
writer.Write("0x");
a.Write(writer, 16, true);
}
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
Assert.AreEqual((char)0xFEFF + "0x" + a.ToString(16, true), Encoding.UTF8.GetString(ms.ToArray()));
}
}
[TestMethod]
public void InputOutputStrOctal()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 1;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
{
writer.Write('0');
a.Write(writer, 8);
}
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
Assert.AreEqual((char)0xFEFF + "0" + a.ToString(8), Encoding.UTF8.GetString(ms.ToArray()));
}
}
[TestMethod]
public void InputOutputStrBinary()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 100;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
{
writer.Write("0b");
a.Write(writer, 2);
}
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
Assert.AreEqual((char)0xFEFF + "0b" + a.ToString(2), Encoding.UTF8.GetString(ms.ToArray()));
}
}
[TestMethod]
public void InputOutputStr62()
{
using (var a = new HugeInt("0x10123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"))
using (var b = new HugeInt())
using (var ms = new MemoryStream())
{
a.Value = a ^ 100;
using (var writer = new StreamWriter(ms, Encoding.UTF8, 1024, true))
a.Write(writer, 62);
ms.Position = 0;
using (var reader = new StreamReader(ms, Encoding.UTF8, false, 1024, true))
b.Read(reader, 62);
Assert.AreEqual(a, b);
Assert.AreEqual(ms.Length, ms.Position);
Assert.AreEqual((char)0xFEFF + a.ToString(62), Encoding.UTF8.GetString(ms.ToArray()));
}
}
//more tests coming here
}
}

View File

@ -18,29 +18,3 @@ along with the MPIR Library. If not, see http://www.gnu.org/licenses/.
*/
#include "Stdafx.h"
void CustomFree (void* ptr)
{
CustomFree(ptr, 0);
}
void CustomFree(void* ptr, size_t size)
{
void (*freeFunc) (void*, size_t);
mp_get_memory_functions (NULL, NULL, &freeFunc);
freeFunc(ptr, size);
}
void* CustomAllocate(size_t size)
{
void* (*allocateFunc) (size_t);
mp_get_memory_functions(&allocateFunc, NULL, NULL);
return allocateFunc(size);
}
void* CustomReallocate(void* old, size_t size)
{
void* (*reallocateFunc) (void*, size_t, size_t);
mp_get_memory_functions(NULL, &reallocateFunc, NULL);
return reallocateFunc(old, 0, size);
}

View File

@ -19,11 +19,6 @@ along with the MPIR Library. If not, see http://www.gnu.org/licenses/.
#pragma once
void* CustomAllocate (size_t alloc_size);
void* CustomReallocate (void* ptr, size_t new_size);
void CustomFree (void* ptr);
void CustomFree (void* ptr, size_t size);
enum EvaluationOptions : __int8
{
None = 0x0,

View File

@ -89,6 +89,7 @@ along with the MPIR Library. If not, see http://www.gnu.org/licenses/.
void dummy_ternary(mpz_ptr d, mpz_srcptr a, mpz_srcptr b, mpz_srcptr c) { };
using namespace System::Runtime::InteropServices;
using namespace System::Text;
namespace MPIR
{
@ -195,7 +196,7 @@ namespace MPIR
auto result = gcnew System::Text::StringBuilder();
result->Append(sign);
result->Append(gcnew String(str));
CustomFree(str);
(*__gmp_free_func)(str, 0);
return result->ToString();
}
@ -508,27 +509,30 @@ namespace MPIR
#define chunkSize 1024
void HugeInt::Write(Stream^ stream)
size_t HugeInt::Write(Stream^ stream)
{
mpir_out_struct out;
mpz_out_raw_m(out, _value);
auto buffer = gcnew array<unsigned char>(chunkSize);
auto ptr = out->written;
auto toWrite = (int)out->writtenSize;
while(out->writtenSize > 0)
while(toWrite > 0)
{
auto len = Math::Min(chunkSize, (int)out->writtenSize);
auto len = Math::Min(chunkSize, toWrite);
Marshal::Copy(IntPtr(ptr), buffer, 0, len);
stream->Write(buffer, 0, len);
ptr += len;
out->writtenSize -= len;
toWrite -= len;
}
CustomFree(out->allocated, out->allocatedSize);
(*__gmp_free_func)(out->allocated, out->allocatedSize);
return out->writtenSize;
}
void HugeInt::Read(Stream^ stream)
size_t HugeInt::Read(Stream^ stream)
{
unsigned char csize_bytes[4];
mpir_out_struct out;
@ -566,5 +570,168 @@ namespace MPIR
}
}
size_t HugeInt::Write(TextWriter^ writer, int base, bool lowercase)
{
auto str = ToString(base, lowercase);
writer->Write(str);
return str->Length;
}
#define X 0xff
const unsigned char digit_value_tab[] =
{
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, X, X, X, X, X, X,
X,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,
25,26,27,28,29,30,31,32,33,34,35,X, X, X, X, X,
X,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,
51,52,53,54,55,56,57,58,59,60,61,X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,
X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X
};
size_t HugeInt::Read(TextReader^ reader, int base)
{
int c;
size_t nread = 0;
/* Skip whitespace. */
while ((c = reader->Peek()) >= 0 && Char::IsWhiteSpace(c))
{
nread++;
reader->Read();
}
return ReadNoWhite(reader, base, nread);
}
#define PEEK_NEXT_CHAR \
reader->Read(); \
c = reader->Peek(); \
nread++;
// adapted from inp_str, which is shared by mpq_inp_str
size_t HugeInt::ReadNoWhite(TextReader^ reader, int base, size_t nread)
{
char *str;
size_t alloc_size, str_size;
bool negative = false;
mp_size_t xsize;
const unsigned char* digit_value = digit_value_tab;
int c = reader->Peek();
if (base > 36)
{
// For bases > 36, use the collating sequence
// 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
digit_value += 224;
if (base > 62)
throw gcnew ArgumentException("Invalid base", "base");
}
if (c == '-')
{
negative = true;
PEEK_NEXT_CHAR;
}
if (c == EOF || digit_value[c] >= (base == 0 ? 10 : base))
throw gcnew Exception("No digits found");
// If BASE is 0, try to find out the base by looking at the initial characters.
if (base == 0)
{
base = 10;
if (c == '0')
{
base = 8;
PEEK_NEXT_CHAR;
switch(c = reader->Peek())
{
case 'x':
case 'X':
base = 16;
PEEK_NEXT_CHAR;
break;
case 'b':
case 'B':
base = 2;
PEEK_NEXT_CHAR;
}
}
}
// Skip leading zeros
while (c == '0')
{
PEEK_NEXT_CHAR;
}
alloc_size = 100;
str = (char *) (*__gmp_allocate_func) (alloc_size);
str_size = 0;
while (c != EOF)
{
int dig = digit_value[c];
if (dig >= base)
break;
if (str_size >= alloc_size)
{
size_t old_alloc_size = alloc_size;
alloc_size = alloc_size * 3 / 2;
str = (char *) (*__gmp_reallocate_func) (str, old_alloc_size, alloc_size);
}
str[str_size++] = dig;
reader->Read();
c = reader->Peek();
}
nread += str_size;
// Make sure the string is not empty, mpn_set_str would fail.
if (str_size == 0)
{
_value ->_mp_size = 0;
}
else
{
xsize = (((mp_size_t)
(str_size / __mp_bases[base].chars_per_bit_exactly))
/ GMP_NUMB_BITS + 2);
MPZ_REALLOC (_value, xsize);
// Convert the byte array in base BASE to our bignum format.
xsize = mpn_set_str (_value->_mp_d, (unsigned char *) str, str_size, base);
_value->_mp_size = negative ? -xsize : xsize;
}
(*__gmp_free_func) (str, alloc_size);
return nread;
}
#pragma endregion
};

View File

@ -1398,12 +1398,12 @@ namespace MPIR
//construction
void AllocateStruct()
{
_value = (mpz_ptr)CustomAllocate(sizeof(__mpz_struct));
_value = (mpz_ptr)((*__gmp_allocate_func)(sizeof(__mpz_struct)));
}
void DeallocateStruct()
{
mpz_clear(_value);
CustomFree(_value);
(*__gmp_free_func)(_value, 0);
_value = nullptr;
}
void FromString(String^ value, int base);
@ -1743,27 +1743,6 @@ namespace MPIR
/// <returns>The number of digits the number would take written in the specified base, possibly 1 too big, not counting a leading minus.</returns>
mp_size_t ApproximateSizeInBase(int base) { return mpz_sizeinbase(_value, base); }
/// <summary>
/// Output the integer to the <paramref name="stream"/> in raw binary format.
/// <para>The number is written in a portable format, with 4 bytes of size information, and that many bytes of limbs.
/// Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).
/// </para>The output can be read with Read(Stream).
/// <para>The output cannot be read by mpz_inp_raw from GMP 1, because of changes necessary
/// for compatibility between 32-bit and 64-bit machines.
/// </para></summary>
/// <param name="stream">Stream to output the number to</param>
void Write(Stream^ stream);
/// <summary>
/// Reads the integer value from the <paramref name="stream"/> in raw binary format, as it would have been written by Write(Stream).
/// <para>The number is read in a portable format, with 4 bytes of size information, and that many bytes of limbs.
/// Both the size and the limbs are written in decreasing significance order (i.e., in big-endian).
/// </para>This routine can read the output from mpz_out_raw also from GMP 1, in spite of changes
/// necessary for compatibility between 32-bit and 64-bit machines.
/// </summary>
/// <param name="stream">Stream to input the number from</param>
void Read(Stream^ stream);
#pragma endregion
#pragma region IO
@ -1777,7 +1756,8 @@ namespace MPIR
/// for compatibility between 32-bit and 64-bit machines.
/// </para></summary>
/// <param name="stream">Stream to output the number to</param>
void Write(Stream^ stream);
/// <returns>the number of bytes written, or 0 if an error occurs.</returns>
size_t Write(Stream^ stream);
/// <summary>
/// Reads the integer value from the <paramref name="stream"/> in raw binary format, as it would have been written by Write(Stream).
@ -1787,8 +1767,79 @@ namespace MPIR
/// necessary for compatibility between 32-bit and 64-bit machines.
/// </summary>
/// <param name="stream">Stream to input the number from</param>
void Read(Stream^ stream);
/// <returns>the number of bytes read, or 0 if an error occurs.</returns>
size_t Read(Stream^ stream);
/// <summary>
/// Output the integer to the <paramref name="writer"/> as a string of digits in decimal.
/// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,
/// it is useful to separate the numbers with a character that is not a valid decimal digit.
/// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.
/// </summary>
/// <param name="writer">Text writer to output the number to</param>
/// <returns>the number of characters written</returns>
size_t Write(TextWriter^ writer) { return Write(writer, 0, false); }
/// <summary>
/// Output the integer to the <paramref name="writer"/> as a string of digits in base <paramref name="base"/>.
/// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,
/// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name="base"/>.
/// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.
/// <para>For hexadecimal, binary, or octal, no leading base indication is written.
/// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.
/// </summary>
/// <param name="writer">Text writer to output the number to</param>
/// <param name="base">The base to use for the output.
/// <para>The base can be from 2 to 62; uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</para></param>
/// <returns>the number of characters written</returns>
size_t Write(TextWriter^ writer, int base) { return Write(writer, base, false); }
/// <summary>
/// Output the integer to the <paramref name="writer"/> as a string of digits in base <paramref name="base"/>.
/// <para>When writing multiple numbers that are to be read back with the Read(TextReader) method,
/// it is useful to separate the numbers with a character that is not a valid digit in base <paramref name="base"/>.
/// </para>This is because the Read method stops reading when it encounters a character that cannot represent a digit.
/// <para>For hexadecimal, binary, or octal, no leading base indication is written.
/// </para>Therefore, for bases other than 10, use the Read(reader, base) overload rather than Read(reader) to read the number back.
/// </summary>
/// <param name="writer">Text writer to output the number to</param>
/// <param name="base">The base to use for the output.
/// <para>The base can be from 2 to 62; Bases up to 36 use uppercase or lowercase letters based on the <paramref name="lowercase"/> argument.
/// </para>For bases larger than 36, the <paramref name="lowercase"/> argument is ignored and uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.</param>
/// <param name="lowercase">Indicates if lowercase or uppercase letters should be used for the output.
/// <para>This argument is ignored for bases larger than 36, where both uppercase and lowercase letters are used.</para></param>
/// <returns>the number of characters written</returns>
size_t Write(TextWriter^ writer, int base, bool lowercase);
/// <summary>
/// Input the number as a possibly white-space preceeded string.
/// <para>The base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.
/// </para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.
/// <para>This method reads the output of a Write(TextWriter) when decimal base is used.
/// </para>For hexadecimal, binary, or octal, because Write(TextWriter) doesn't write leading base indication characters,
/// using this overload of Read will fail to recognize the correct base.</summary>
/// <param name="reader">Text reader to input the number from</param>
/// <returns>the number of characters read</returns>
size_t Read(TextReader^ reader) { return Read(reader, 0); }
/// <summary>
/// Input the number as a possibly white-space preceeded string in base <paramref name="base"/> from the <paramref name="reader"/>.
/// <para>Reading terminates at end-of-stream, or up to but not including a character that is not a valid digit.
/// </para>This method reads the output of a Write(TextWriter) method.
/// </summary>
/// <param name="reader">Text reader to input the number from</param>
/// <param name="base">The base to use for the input.
/// <para>The base can be from 2 to 62; For bases up to 36 case is ignored.
/// </para>For bases larger than 36, uppercase letters represent digits 10-35 while lowercase letters represent digits 36-61.
/// <para>If 0, the base of the number is determined from the leading characters: 0x or 0X for hexadecimal, 0b or 0B for binary, 0 for octal, decimal otherwise.
/// </para>Note that the leading base characters are not written by the Write method.</param>
/// <returns>the number of characters read</returns>
size_t Read(TextReader^ reader, int base);
internal:
size_t ReadNoWhite(TextReader^ reader, int base, size_t nread);
public:
#pragma endregion
};
};

View File

@ -24,5 +24,6 @@ along with the MPIR Library. If not, see http://www.gnu.org/licenses/.
#pragma once
#include "mpir.h"
#include "gmp-impl.h"
#include "Common.h"
#include "HugeInt.h"

View File

@ -107,7 +107,7 @@ void mpz_inp_raw_p (mpz_ptr x, unsigned char* csize_bytes, mpir_out_ptr out)
/* Get limb boundaries right in the read, for the benefit of the
non-nails case. */
xp[0] = 0;
out->written = (unsigned char *) (xp + abs_xsize) - abs_csize;
out->written = (char *) (xp + abs_xsize) - abs_csize;
}
out->writtenSize = abs_csize;
out->allocatedSize = abs_xsize;