2004-02-15 10:43:21 -05:00
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Name: tests/mbconv/main.cpp
|
|
|
|
// Purpose: wxMBConv unit test
|
2005-03-30 18:03:36 -05:00
|
|
|
// Author: Vadim Zeitlin, Mike Wetherell
|
2004-02-15 10:43:21 -05:00
|
|
|
// Created: 14.02.04
|
|
|
|
// RCS-ID: $Id$
|
2005-03-30 18:03:36 -05:00
|
|
|
// Copyright: (c) 2003 TT-Solutions, (c) 2005 Mike Wetherell
|
2004-02-15 10:43:21 -05:00
|
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// headers
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
2004-11-22 00:00:19 -05:00
|
|
|
#include "testprec.h"
|
2004-04-01 02:17:50 -05:00
|
|
|
|
|
|
|
#ifdef __BORLANDC__
|
|
|
|
#pragma hdrstop
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef WX_PRECOMP
|
|
|
|
#include "wx/wx.h"
|
|
|
|
#endif // WX_PRECOMP
|
|
|
|
|
2004-02-15 10:43:21 -05:00
|
|
|
#include "wx/strconv.h"
|
|
|
|
#include "wx/string.h"
|
|
|
|
|
2005-03-30 18:03:36 -05:00
|
|
|
#if defined wxHAVE_TCHAR_SUPPORT && !defined HAVE_WCHAR_H
|
|
|
|
#define HAVE_WCHAR_H
|
|
|
|
#endif
|
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// Some wide character constants. "\uXXXX" escapes aren't supported by old
|
|
|
|
// compilers such as VC++ 5 and g++ 2.95.
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
wchar_t u41[] = { 0x41, 0 };
|
|
|
|
wchar_t u7f[] = { 0x7f, 0 };
|
|
|
|
|
|
|
|
wchar_t u80[] = { 0x80, 0 };
|
|
|
|
wchar_t u391[] = { 0x391, 0 };
|
|
|
|
wchar_t u7ff[] = { 0x7ff, 0 };
|
|
|
|
|
|
|
|
wchar_t u800[] = { 0x800, 0 };
|
|
|
|
wchar_t u2620[] = { 0x2620, 0 };
|
|
|
|
wchar_t ufffd[] = { 0xfffd, 0 };
|
|
|
|
|
|
|
|
#if SIZEOF_WCHAR_T == 4
|
|
|
|
wchar_t u10000[] = { 0x10000, 0 };
|
|
|
|
wchar_t u1000a5[] = { 0x1000a5, 0 };
|
|
|
|
wchar_t u10fffd[] = { 0x10fffd, 0 };
|
|
|
|
#else
|
|
|
|
wchar_t u10000[] = { 0xd800, 0xdc00, 0 };
|
|
|
|
wchar_t u1000a5[] = { 0xdbc0, 0xdca5, 0 };
|
|
|
|
wchar_t u10fffd[] = { 0xdbff, 0xdffd, 0 };
|
|
|
|
#endif
|
|
|
|
|
2004-02-15 10:43:21 -05:00
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// test class
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
class MBConvTestCase : public CppUnit::TestCase
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
MBConvTestCase() { }
|
|
|
|
|
|
|
|
private:
|
|
|
|
CPPUNIT_TEST_SUITE( MBConvTestCase );
|
|
|
|
CPPUNIT_TEST( WC2CP1250 );
|
2005-03-30 18:03:36 -05:00
|
|
|
#ifdef HAVE_WCHAR_H
|
|
|
|
CPPUNIT_TEST( UTF8_41 );
|
|
|
|
CPPUNIT_TEST( UTF8_7f );
|
|
|
|
CPPUNIT_TEST( UTF8_80 );
|
|
|
|
CPPUNIT_TEST( UTF8_c2_7f );
|
|
|
|
CPPUNIT_TEST( UTF8_c2_80 );
|
|
|
|
CPPUNIT_TEST( UTF8_ce_91 );
|
|
|
|
CPPUNIT_TEST( UTF8_df_bf );
|
|
|
|
CPPUNIT_TEST( UTF8_df_c0 );
|
|
|
|
CPPUNIT_TEST( UTF8_e0_a0_7f );
|
|
|
|
CPPUNIT_TEST( UTF8_e0_a0_80 );
|
|
|
|
CPPUNIT_TEST( UTF8_e2_98_a0 );
|
|
|
|
CPPUNIT_TEST( UTF8_ef_bf_bd );
|
|
|
|
CPPUNIT_TEST( UTF8_ef_bf_c0 );
|
|
|
|
CPPUNIT_TEST( UTF8_f0_90_80_7f );
|
|
|
|
CPPUNIT_TEST( UTF8_f0_90_80_80 );
|
|
|
|
CPPUNIT_TEST( UTF8_f4_8f_bf_bd );
|
|
|
|
CPPUNIT_TEST( UTF8PUA_f4_80_82_a5 );
|
|
|
|
CPPUNIT_TEST( UTF8Octal_backslash245 );
|
|
|
|
#endif // HAVE_WCHAR_H
|
2004-02-15 10:43:21 -05:00
|
|
|
CPPUNIT_TEST_SUITE_END();
|
|
|
|
|
|
|
|
void WC2CP1250();
|
|
|
|
|
2005-03-30 18:03:36 -05:00
|
|
|
#ifdef HAVE_WCHAR_H
|
|
|
|
// UTF-8 tests. Test the first, last and one in the middle for sequences
|
|
|
|
// of each length
|
|
|
|
void UTF8_41() { UTF8("\x41", u41); }
|
|
|
|
void UTF8_7f() { UTF8("\x7f", u7f); }
|
|
|
|
void UTF8_80() { UTF8("\x80", NULL); }
|
|
|
|
|
|
|
|
void UTF8_c2_7f() { UTF8("\xc2\x7f", NULL); }
|
|
|
|
void UTF8_c2_80() { UTF8("\xc2\x80", u80); }
|
|
|
|
void UTF8_ce_91() { UTF8("\xce\x91", u391); }
|
|
|
|
void UTF8_df_bf() { UTF8("\xdf\xbf", u7ff); }
|
|
|
|
void UTF8_df_c0() { UTF8("\xdf\xc0", NULL); }
|
|
|
|
|
|
|
|
void UTF8_e0_a0_7f() { UTF8("\xe0\xa0\x7f", NULL); }
|
|
|
|
void UTF8_e0_a0_80() { UTF8("\xe0\xa0\x80", u800); }
|
|
|
|
void UTF8_e2_98_a0() { UTF8("\xe2\x98\xa0", u2620); }
|
|
|
|
void UTF8_ef_bf_bd() { UTF8("\xef\xbf\xbd", ufffd); }
|
|
|
|
void UTF8_ef_bf_c0() { UTF8("\xef\xbf\xc0", NULL); }
|
|
|
|
|
|
|
|
void UTF8_f0_90_80_7f() { UTF8("\xf0\x90\x80\x7f", NULL); }
|
|
|
|
void UTF8_f0_90_80_80() { UTF8("\xf0\x90\x80\x80", u10000); }
|
|
|
|
void UTF8_f4_8f_bf_bd() { UTF8("\xf4\x8f\xbf\xbd", u10fffd); }
|
|
|
|
|
|
|
|
// test 'escaping the escape characters' for the two escaping schemes
|
|
|
|
void UTF8PUA_f4_80_82_a5() { UTF8PUA("\xf4\x80\x82\xa5", u1000a5); }
|
|
|
|
void UTF8Octal_backslash245() { UTF8Octal("\\245", L"\\245"); }
|
|
|
|
|
|
|
|
// implementation for the utf-8 tests (see comments below)
|
|
|
|
void UTF8(const char *charSequence, const wchar_t *wideSequence);
|
|
|
|
void UTF8PUA(const char *charSequence, const wchar_t *wideSequence);
|
|
|
|
void UTF8Octal(const char *charSequence, const wchar_t *wideSequence);
|
|
|
|
void UTF8(const char *charSequence, const wchar_t *wideSequence, int option);
|
|
|
|
#endif // HAVE_WCHAR_H
|
|
|
|
|
2004-04-01 02:17:50 -05:00
|
|
|
DECLARE_NO_COPY_CLASS(MBConvTestCase)
|
2004-02-15 10:43:21 -05:00
|
|
|
};
|
|
|
|
|
2004-03-03 17:53:52 -05:00
|
|
|
// register in the unnamed registry so that these tests are run by default
|
|
|
|
CPPUNIT_TEST_SUITE_REGISTRATION( MBConvTestCase );
|
|
|
|
|
|
|
|
// also include in it's own registry so that these tests can be run alone
|
2004-02-15 10:43:21 -05:00
|
|
|
CPPUNIT_TEST_SUITE_NAMED_REGISTRATION( MBConvTestCase, "MBConvTestCase" );
|
|
|
|
|
|
|
|
void MBConvTestCase::WC2CP1250()
|
|
|
|
{
|
|
|
|
static const struct Data
|
|
|
|
{
|
|
|
|
const wchar_t *wc;
|
|
|
|
const char *cp1250;
|
|
|
|
} data[] =
|
|
|
|
{
|
|
|
|
{ L"hello", "hello" }, // test that it works in simplest case
|
2004-06-23 18:18:11 -04:00
|
|
|
{ L"\xBD of \xBD is \xBC", NULL }, // this should fail as cp1250 doesn't have 1/2
|
2004-02-15 10:43:21 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
wxCSConv cs1250(wxFONTENCODING_CP1250);
|
|
|
|
for ( size_t n = 0; n < WXSIZEOF(data); n++ )
|
|
|
|
{
|
|
|
|
const Data& d = data[n];
|
2004-03-03 17:53:52 -05:00
|
|
|
if (d.cp1250)
|
|
|
|
{
|
|
|
|
CPPUNIT_ASSERT( strcmp(cs1250.cWC2MB(d.wc), d.cp1250) == 0 );
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2004-03-05 18:14:57 -05:00
|
|
|
CPPUNIT_ASSERT( (const char*)cs1250.cWC2MB(d.wc) == NULL );
|
2004-03-03 17:53:52 -05:00
|
|
|
}
|
2004-02-15 10:43:21 -05:00
|
|
|
}
|
|
|
|
}
|
2005-03-30 18:03:36 -05:00
|
|
|
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// UTF-8 tests
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
|
|
|
|
#ifdef HAVE_WCHAR_H
|
|
|
|
|
|
|
|
// Check that 'charSequence' translates to 'wideSequence' and back.
|
2005-04-03 17:15:52 -04:00
|
|
|
// Invalid sequences can be tested by giving NULL for 'wideSequence'. Even
|
2005-03-30 18:03:36 -05:00
|
|
|
// invalid sequences should roundtrip when an option is given and this is
|
|
|
|
// checked.
|
|
|
|
//
|
|
|
|
void MBConvTestCase::UTF8(const char *charSequence,
|
|
|
|
const wchar_t *wideSequence)
|
|
|
|
{
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use this alternative when 'charSequence' contains a PUA character. Such
|
|
|
|
// sequences should still roundtrip ok, and this is checked.
|
|
|
|
//
|
|
|
|
void MBConvTestCase::UTF8PUA(const char *charSequence,
|
|
|
|
const wchar_t *wideSequence)
|
|
|
|
{
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
|
|
|
|
UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Use this alternative when 'charSequence' contains an octal escape sequence.
|
|
|
|
// Such sequences should still roundtrip ok, and this is checked.
|
|
|
|
//
|
|
|
|
void MBConvTestCase::UTF8Octal(const char *charSequence,
|
|
|
|
const wchar_t *wideSequence)
|
|
|
|
{
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_NOT);
|
|
|
|
UTF8(charSequence, wideSequence, wxMBConvUTF8::MAP_INVALID_UTF8_TO_PUA);
|
|
|
|
UTF8(charSequence, NULL, wxMBConvUTF8::MAP_INVALID_UTF8_TO_OCTAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
// include the option in the error messages so it's possible to see which
|
|
|
|
// test failed
|
|
|
|
#define UTF8ASSERT(expr) CPPUNIT_ASSERT_MESSAGE(#expr + errmsg, expr)
|
|
|
|
|
|
|
|
// The test implementation
|
|
|
|
//
|
|
|
|
void MBConvTestCase::UTF8(const char *charSequence,
|
|
|
|
const wchar_t *wideSequence,
|
|
|
|
int option)
|
|
|
|
{
|
|
|
|
const size_t BUFSIZE = 128;
|
|
|
|
wxASSERT(strlen(charSequence) * 3 + 10 < BUFSIZE);
|
|
|
|
char bytes[BUFSIZE];
|
|
|
|
|
|
|
|
// include the option in the error messages so it's possible to see
|
|
|
|
// which test failed
|
|
|
|
sprintf(bytes, " (with option == %d)", option);
|
|
|
|
std::string errmsg(bytes);
|
|
|
|
|
|
|
|
// put the charSequence at the start, middle and end of a string
|
|
|
|
strcpy(bytes, charSequence);
|
|
|
|
strcat(bytes, "ABC");
|
|
|
|
strcat(bytes, charSequence);
|
|
|
|
strcat(bytes, "XYZ");
|
|
|
|
strcat(bytes, charSequence);
|
|
|
|
|
|
|
|
// translate it into wide characters
|
|
|
|
wxMBConvUTF8 utf8(option);
|
|
|
|
wchar_t widechars[BUFSIZE];
|
2005-04-03 17:15:52 -04:00
|
|
|
size_t lenResult = utf8.MB2WC(NULL, bytes, 0);
|
2005-03-30 18:03:36 -05:00
|
|
|
size_t result = utf8.MB2WC(widechars, bytes, BUFSIZE);
|
2005-04-03 17:15:52 -04:00
|
|
|
UTF8ASSERT(result == lenResult);
|
2005-03-30 18:03:36 -05:00
|
|
|
|
|
|
|
// check we got the expected result
|
|
|
|
if (wideSequence) {
|
|
|
|
UTF8ASSERT(result != (size_t)-1);
|
|
|
|
wxASSERT(result < BUFSIZE);
|
|
|
|
|
|
|
|
wchar_t expected[BUFSIZE];
|
|
|
|
wcscpy(expected, wideSequence);
|
|
|
|
wcscat(expected, L"ABC");
|
|
|
|
wcscat(expected, wideSequence);
|
|
|
|
wcscat(expected, L"XYZ");
|
|
|
|
wcscat(expected, wideSequence);
|
|
|
|
|
|
|
|
UTF8ASSERT(wcscmp(widechars, expected) == 0);
|
2005-04-03 17:15:52 -04:00
|
|
|
UTF8ASSERT(wcslen(widechars) == result);
|
2005-03-30 18:03:36 -05:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// If 'wideSequence' is NULL, then the result is expected to be
|
|
|
|
// invalid. Normally that is as far as we can go, but if there is an
|
|
|
|
// option then the conversion should succeed anyway, and it should be
|
|
|
|
// possible to translate back to the original
|
|
|
|
if (!option) {
|
|
|
|
UTF8ASSERT(result == (size_t)-1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
UTF8ASSERT(result != (size_t)-1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// translate it back and check we get the original
|
|
|
|
char bytesAgain[BUFSIZE];
|
2005-04-03 17:15:52 -04:00
|
|
|
size_t lenResultAgain = utf8.WC2MB(NULL, widechars, 0);
|
2005-03-30 18:03:36 -05:00
|
|
|
size_t resultAgain = utf8.WC2MB(bytesAgain, widechars, BUFSIZE);
|
2005-04-03 17:15:52 -04:00
|
|
|
UTF8ASSERT(resultAgain == lenResultAgain);
|
2005-03-30 18:03:36 -05:00
|
|
|
UTF8ASSERT(resultAgain != (size_t)-1);
|
|
|
|
wxASSERT(resultAgain < BUFSIZE);
|
|
|
|
|
|
|
|
UTF8ASSERT(strcmp(bytes, bytesAgain) == 0);
|
2005-04-03 17:15:52 -04:00
|
|
|
UTF8ASSERT(strlen(bytesAgain) == resultAgain);
|
2005-03-30 18:03:36 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
#endif // HAVE_WCHAR_H
|