4502e7563b
Contrary to what a comment in wxTextInputStream::GetChar() said, it is actually possible to get more than one wide character from a call to wxMBConv::ToWChar(len+1) even if a previous call to ToWChar(len) failed to decode anything at all. This happens with wxConvAuto because it keeps returning an error while it doesn't have enough data to determine if the input contains a BOM or not, but then returns all the characters examined so far at once if it turns out that there was no BOM, after all. The simplest case in which this created problems was just input starting with a NUL byte as it as this could be a start of UTF-32BE BOM. The fix consists in keeping all the bytes read but not yet decoded in the m_lastBytes buffer and retrying to decode them during the next GetChar() call. This implies keeping track of how much valid data is there in m_lastBytes exactly, as we can't discard the already decoded data immediately, but need to keep it in the buffer too, in order to allow implementing UngetLast(). Incidentally, UngetLast() was totally broken for UTF-16/32 input (containing NUL bytes in the middle of the characters) before and this change fixes this as a side effect. Also add test cases for previously failing inputs.
330 lines
8.7 KiB
C++
330 lines
8.7 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// Name: tests/streams/textstreamtest.cpp
|
|
// Purpose: wxTextXXXStream unit test
|
|
// Author: Ryan Norton, Vince Harron
|
|
// Created: 2004-08-14
|
|
// Copyright: (c) 2004 Ryan Norton, (c) 2006 Vince Harron
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// headers
|
|
// ----------------------------------------------------------------------------
|
|
|
|
#include "testprec.h"
|
|
|
|
#ifdef __BORLANDC__
|
|
#pragma hdrstop
|
|
#endif
|
|
|
|
#ifndef WX_PRECOMP
|
|
#include "wx/wx.h"
|
|
#endif // WX_PRECOMP
|
|
|
|
#include "wx/txtstrm.h"
|
|
#include "wx/wfstream.h"
|
|
|
|
#if wxUSE_LONGLONG
|
|
#include "wx/longlong.h"
|
|
#endif
|
|
|
|
#if wxUSE_UNICODE
|
|
#include "wx/mstream.h"
|
|
#endif // wxUSE_UNICODE
|
|
|
|
#include "testfile.h"
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// test class
|
|
// ----------------------------------------------------------------------------
|
|
|
|
class TextStreamTestCase : public CppUnit::TestCase
|
|
{
|
|
public:
|
|
TextStreamTestCase();
|
|
|
|
private:
|
|
CPPUNIT_TEST_SUITE( TextStreamTestCase );
|
|
CPPUNIT_TEST( Endline );
|
|
CPPUNIT_TEST( MiscTests );
|
|
|
|
#if wxUSE_LONGLONG
|
|
CPPUNIT_TEST( TestLongLong );
|
|
CPPUNIT_TEST( TestULongLong );
|
|
#endif // wxUSE_LONGLONG
|
|
|
|
#if wxUSE_UNICODE
|
|
CPPUNIT_TEST( TestUTF8Input );
|
|
CPPUNIT_TEST( TestEmbeddedZerosUTF16LEInput );
|
|
CPPUNIT_TEST( TestEmbeddedZerosUTF16BEInput );
|
|
CPPUNIT_TEST( TestEmbeddedZerosUTF32LEInput );
|
|
CPPUNIT_TEST( TestEmbeddedZerosUTF32BEInput );
|
|
#endif // wxUSE_UNICODE
|
|
CPPUNIT_TEST_SUITE_END();
|
|
|
|
void Endline();
|
|
void MiscTests();
|
|
|
|
#if wxUSE_LONGLONG
|
|
void TestLongLong();
|
|
void TestULongLong();
|
|
#endif // wxUSE_LONGLONG
|
|
|
|
#if wxUSE_UNICODE
|
|
void TestUTF8Input();
|
|
void TestEmbeddedZerosUTF16LEInput();
|
|
void TestEmbeddedZerosUTF16BEInput();
|
|
void TestEmbeddedZerosUTF32LEInput();
|
|
void TestEmbeddedZerosUTF32BEInput();
|
|
void TestInput(const wxMBConv& conv,
|
|
const void* encodedText,
|
|
size_t encodedSize );
|
|
#endif // wxUSE_UNICODE
|
|
|
|
|
|
wxDECLARE_NO_COPY_CLASS(TextStreamTestCase);
|
|
};
|
|
|
|
// register in the unnamed registry so that these tests are run by default
|
|
CPPUNIT_TEST_SUITE_REGISTRATION( TextStreamTestCase );
|
|
|
|
// also include in its own registry so that these tests can be run alone
|
|
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( TextStreamTestCase, "TextStreamTestCase" );
|
|
|
|
TextStreamTestCase::TextStreamTestCase()
|
|
{
|
|
}
|
|
|
|
#if defined(__WINDOWS__)
|
|
# define NEWLINE "\r\n"
|
|
# define NEWLINELEN 2
|
|
#elif defined(__WXMAC__) && !defined(__DARWIN__)
|
|
# define NEWLINE "\r"
|
|
# define NEWLINELEN 1
|
|
#else
|
|
# define NEWLINE "\n"
|
|
# define NEWLINELEN 1
|
|
#endif
|
|
|
|
void TextStreamTestCase::Endline()
|
|
{
|
|
TempFile f("test.txt");
|
|
|
|
{
|
|
wxFileOutputStream pOutFile(f.GetName());
|
|
wxTextOutputStream pOutText(pOutFile);
|
|
pOutText << wxT("Test text") << endl
|
|
<< wxT("More Testing Text (There should be newline before this)");
|
|
}
|
|
|
|
wxFileInputStream pInFile(f.GetName());
|
|
|
|
char szIn[9 + NEWLINELEN];
|
|
|
|
pInFile.Read(szIn, 9 + NEWLINELEN);
|
|
|
|
CPPUNIT_ASSERT( memcmp(&szIn[9], NEWLINE, NEWLINELEN) == 0 );
|
|
}
|
|
|
|
void TextStreamTestCase::MiscTests()
|
|
{
|
|
wxString filename = wxT("testdata.fc");
|
|
wxFileInputStream fsIn(filename);
|
|
if ( !fsIn.IsOk() )
|
|
{
|
|
return;
|
|
}
|
|
|
|
wxTextInputStream tis(fsIn);
|
|
CPPUNIT_ASSERT_EQUAL("# this is the test data file for wxFileConfig tests", tis.ReadLine());
|
|
CPPUNIT_ASSERT_EQUAL("value1=one", tis.ReadLine());
|
|
CPPUNIT_ASSERT_EQUAL("# a comment here", tis.ReadLine());
|
|
CPPUNIT_ASSERT_EQUAL("value2=two", tis.ReadLine());
|
|
CPPUNIT_ASSERT_EQUAL("value\\ with\\ spaces\\ inside\\ it=nothing special", tis.ReadLine());
|
|
CPPUNIT_ASSERT_EQUAL("path=$PATH", tis.ReadLine());
|
|
}
|
|
|
|
#if wxUSE_LONGLONG
|
|
|
|
template <typename T>
|
|
static void DoTestRoundTrip(const T *values, size_t numValues)
|
|
{
|
|
TempFile f("test.txt");
|
|
|
|
{
|
|
wxFileOutputStream fileOut(f.GetName());
|
|
wxTextOutputStream textOut(fileOut);
|
|
|
|
for ( size_t n = 0; n < numValues; n++ )
|
|
{
|
|
textOut << values[n] << endl;
|
|
}
|
|
}
|
|
|
|
{
|
|
wxFileInputStream fileIn(f.GetName());
|
|
wxTextInputStream textIn(fileIn);
|
|
|
|
T value;
|
|
for ( size_t n = 0; n < numValues; n++ )
|
|
{
|
|
textIn >> value;
|
|
|
|
CPPUNIT_ASSERT( value == values[n] );
|
|
}
|
|
}
|
|
}
|
|
|
|
void TextStreamTestCase::TestLongLong()
|
|
{
|
|
static const wxLongLong llvalues[] =
|
|
{
|
|
0,
|
|
1,
|
|
-1,
|
|
0x12345678l,
|
|
-0x12345678l,
|
|
wxLL(0x123456789abcdef0),
|
|
wxLL(-0x123456789abcdef0),
|
|
};
|
|
|
|
DoTestRoundTrip(llvalues, WXSIZEOF(llvalues));
|
|
}
|
|
|
|
void TextStreamTestCase::TestULongLong()
|
|
{
|
|
static const wxULongLong ullvalues[] =
|
|
{
|
|
0,
|
|
1,
|
|
0x12345678l,
|
|
wxULL(0x123456789abcdef0),
|
|
};
|
|
|
|
DoTestRoundTrip(ullvalues, WXSIZEOF(ullvalues));
|
|
}
|
|
|
|
#endif // wxUSE_LONGLONG
|
|
|
|
#if wxUSE_UNICODE
|
|
|
|
static const wchar_t txtWchar[4] =
|
|
{
|
|
0x0041, // LATIN CAPITAL LETTER A
|
|
0x0100, // A WITH BREVE, LATIN SMALL LETTER
|
|
0x0041, // LATIN CAPITAL LETTER A
|
|
0x0100, // A WITH BREVE, LATIN SMALL LETTER
|
|
};
|
|
|
|
static const unsigned char txtUtf8[6] =
|
|
{
|
|
0x41, 0xc4, 0x80, 0x41, 0xc4, 0x80,
|
|
};
|
|
|
|
static const unsigned char txtUtf16le[8] =
|
|
{
|
|
0x41, 0x00, 0x00, 0x01, 0x41, 0x00, 0x00, 0x01,
|
|
};
|
|
|
|
static const unsigned char txtUtf16be[8] =
|
|
{
|
|
0x00, 0x41, 0x01, 0x00, 0x00, 0x41, 0x01, 0x00,
|
|
};
|
|
|
|
static const unsigned char txtUtf32le[16] =
|
|
{
|
|
0x41, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
|
|
0x41, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00,
|
|
};
|
|
|
|
static const unsigned char txtUtf32be[16] =
|
|
{
|
|
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x01, 0x00,
|
|
0x00, 0x00, 0x00, 0x41, 0x00, 0x00, 0x01, 0x00,
|
|
};
|
|
|
|
void TextStreamTestCase::TestUTF8Input()
|
|
{
|
|
TestInput(wxConvUTF8, txtUtf8, sizeof(txtUtf8));
|
|
TestInput(wxCSConv(wxFONTENCODING_UTF8), txtUtf8, sizeof(txtUtf8));
|
|
}
|
|
|
|
void TextStreamTestCase::TestEmbeddedZerosUTF16LEInput()
|
|
{
|
|
TestInput(wxMBConvUTF16LE(), txtUtf16le, sizeof(txtUtf16le));
|
|
TestInput(wxCSConv(wxFONTENCODING_UTF16LE), txtUtf16le, sizeof(txtUtf16le));
|
|
}
|
|
|
|
void TextStreamTestCase::TestEmbeddedZerosUTF16BEInput()
|
|
{
|
|
TestInput(wxMBConvUTF16BE(), txtUtf16be, sizeof(txtUtf16be));
|
|
TestInput(wxCSConv(wxFONTENCODING_UTF16BE), txtUtf16be, sizeof(txtUtf16be));
|
|
}
|
|
|
|
void TextStreamTestCase::TestEmbeddedZerosUTF32LEInput()
|
|
{
|
|
TestInput(wxMBConvUTF32LE(), txtUtf32le, sizeof(txtUtf32le));
|
|
TestInput(wxCSConv(wxFONTENCODING_UTF32LE), txtUtf32le, sizeof(txtUtf32le));
|
|
}
|
|
|
|
void TextStreamTestCase::TestEmbeddedZerosUTF32BEInput()
|
|
{
|
|
TestInput(wxMBConvUTF32BE(), txtUtf32be, sizeof(txtUtf32be));
|
|
TestInput(wxCSConv(wxFONTENCODING_UTF32BE), txtUtf32be, sizeof(txtUtf32be));
|
|
}
|
|
|
|
void TextStreamTestCase::TestInput(const wxMBConv& conv,
|
|
const void *encodedText,
|
|
size_t encodedSize)
|
|
{
|
|
wxMemoryInputStream byteIn(encodedText, encodedSize);
|
|
wxTextInputStream textIn(byteIn, wxT("\n"), conv);
|
|
|
|
wxString temp;
|
|
while ( wxChar c = textIn.GetChar() )
|
|
{
|
|
temp.Append(c);
|
|
}
|
|
|
|
CPPUNIT_ASSERT_EQUAL( WXSIZEOF(txtWchar), temp.length() );
|
|
|
|
CPPUNIT_ASSERT_EQUAL( 0, memcmp(txtWchar, temp.wc_str(), sizeof(txtWchar)) );
|
|
}
|
|
|
|
TEST_CASE("wxTextInputStream::GetChar", "[text][input][stream][char]")
|
|
{
|
|
// This is the simplest possible test that used to trigger assertion in
|
|
// wxTextInputStream::GetChar().
|
|
SECTION("starts-with-nul")
|
|
{
|
|
const wxUint8 buf[] = { 0x00, 0x01, };
|
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
|
wxTextInputStream tis(mis);
|
|
|
|
REQUIRE( tis.GetChar() == 0x00 );
|
|
REQUIRE( tis.GetChar() == 0x01 );
|
|
REQUIRE( tis.GetChar() == 0x00 );
|
|
CHECK( tis.GetInputStream().Eof() );
|
|
}
|
|
|
|
// This exercises a problematic path in GetChar() as the first 3 bytes of
|
|
// this stream look like the start of UTF-32BE BOM, but this is not
|
|
// actually a BOM because the 4th byte is 0xFE and not 0xFF, so the stream
|
|
// should decode the buffer as Latin-1 once it gets there.
|
|
SECTION("almost-UTF-32-BOM")
|
|
{
|
|
const wxUint8 buf[] = { 0x00, 0x00, 0xFE, 0xFE, 0x01 };
|
|
wxMemoryInputStream mis(buf, sizeof(buf));
|
|
wxTextInputStream tis(mis);
|
|
|
|
REQUIRE( tis.GetChar() == 0x00 );
|
|
REQUIRE( tis.GetChar() == 0x00 );
|
|
REQUIRE( tis.GetChar() == 0xFE );
|
|
REQUIRE( tis.GetChar() == 0xFE );
|
|
REQUIRE( tis.GetChar() == 0x01 );
|
|
REQUIRE( tis.GetChar() == 0x00 );
|
|
CHECK( tis.GetInputStream().Eof() );
|
|
}
|
|
}
|
|
|
|
#endif // wxUSE_UNICODE
|