diff --git a/docs/changes.txt b/docs/changes.txt index 06f7800124..fc8318135e 100644 --- a/docs/changes.txt +++ b/docs/changes.txt @@ -62,6 +62,7 @@ All: - wxLogInfo() now logs messages if the log level is high enough, even without wxLog::SetVerbose() which now only affects wxLogVerbose(). - Add wxFileType::GetExpandedCommand() (troelsk). +- Make it easier to convert to/from UTF-8-encoded std::string (ARATA Mizuki). All (GUI): diff --git a/include/wx/string.h b/include/wx/string.h index c23586e314..90783fbd0b 100644 --- a/include/wx/string.h +++ b/include/wx/string.h @@ -1241,12 +1241,17 @@ public: // wxStringImpl is std::string in the encoding we want #define wxStringToStdStringRetType const std::string& const std::string& ToStdString() const { return m_impl; } + std::string ToStdString(const wxMBConv& conv) const + { + wxScopedCharBuffer buf(mb_str(conv)); + return std::string(buf.data(), buf.length()); + } #else // wxStringImpl is either not std::string or needs conversion #define wxStringToStdStringRetType std::string - std::string ToStdString() const + std::string ToStdString(const wxMBConv& conv = wxConvLibc) const { - wxScopedCharBuffer buf(mb_str()); + wxScopedCharBuffer buf(mb_str(conv)); return std::string(buf.data(), buf.length()); } #endif @@ -1611,6 +1616,24 @@ public: return FromImpl(wxStringImpl(utf8, len)); } +#if wxUSE_STD_STRING + static wxString FromUTF8Unchecked(const std::string& utf8) + { + wxASSERT( wxStringOperations::IsValidUtf8String(utf8.c_str(), utf8.length()) ); + /* + Note that, under wxUSE_UNICODE_UTF8 and wxUSE_STD_STRING, wxStringImpl can be + initialized with a std::string whether wxUSE_STL_BASED_WXSTRING is 1 or not. + */ + return FromImpl(utf8); + } + static wxString FromUTF8(const std::string& utf8) + { + if ( utf8.empty() || !wxStringOperations::IsValidUtf8String(utf8.c_str(), utf8.length()) ) + return wxString(); + return FromImpl(utf8); + } +#endif + const wxScopedCharBuffer utf8_str() const { return wxCharBuffer::CreateNonOwned(m_impl.c_str(), m_impl.length()); } @@ -1627,6 +1650,12 @@ public: "string must be valid UTF-8" ); return s; } +#if wxUSE_STD_STRING + static wxString FromUTF8(const std::string& utf8) + { return FromUTF8(utf8.c_str(), utf8.length()); } + static wxString FromUTF8Unchecked(const std::string& utf8) + { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); } +#endif const wxScopedCharBuffer utf8_str() const { return mb_str(wxMBConvUTF8()); } #else // ANSI static wxString FromUTF8(const char *utf8) @@ -1654,6 +1683,12 @@ public: return wxString(buf.data(), wlen); } +#if wxUSE_STD_STRING + static wxString FromUTF8(const std::string& utf8) + { return FromUTF8(utf8.c_str(), utf8.length()); } + static wxString FromUTF8Unchecked(const std::string& utf8) + { return FromUTF8Unchecked(utf8.c_str(), utf8.length()); } +#endif const wxScopedCharBuffer utf8_str() const { return wxMBConvUTF8().cWC2MB(wc_str()); } #endif diff --git a/interface/wx/string.h b/interface/wx/string.h index 2e2e91e651..c4213100fe 100644 --- a/interface/wx/string.h +++ b/interface/wx/string.h @@ -85,8 +85,8 @@ - String in UTF-8 encoding using wxString::utf8_str(). - String in any given encoding using mb_str() with the appropriate wxMBConv object. This is also a potentially destructive operation. - - Standard @c std::string using wxString::ToStdString(). The contents - of the returned string use the current locale encoding, so this + - Standard @c std::string using wxString::ToStdString(). The encoding + of the returned string is specified with a wxMBConv object, so this conversion is potentially destructive as well. - Wide C string using wxString::wc_str(). - Standard @c std::wstring using wxString::ToStdWstring(). @@ -745,10 +745,10 @@ public: const TYPE ToAscii(char replaceWith = '_') const; /** - Return the string as an std::string in current locale encoding. + Return the string as an std::string using @e conv's wxMBConv::cWC2MB method. - Note that if the conversion of (Unicode) string contents to the current - locale fails, the return string will be empty. Be sure to check for + Note that if the conversion of (Unicode) string contents using @e conv + fails, the return string will be empty. Be sure to check for this to avoid silent data loss. Instead of using this function it's also possible to write @@ -760,9 +760,12 @@ public: @endcode but using ToStdString() may make the code more clear. + @param conv + The converter to be used. This parameter is new in wxWidgets 3.1.1. + @since 2.9.1 */ - std::string ToStdString() const; + std::string ToStdString(const wxMBConv& conv = wxConvLibc) const; /** Return the string as an std::wstring. @@ -1813,10 +1816,14 @@ public: alternative to this function called FromUTF8Unchecked() which, unlike this one, doesn't check that the input string is valid. + The overload taking @c std::string is only available starting with + wxWidgets 3.1.1. + @since 2.8.4 */ static wxString FromUTF8(const char* s); static wxString FromUTF8(const char* s, size_t len); + static wxString FromUTF8(const std::string& s); //@} //@{ @@ -1833,10 +1840,14 @@ public: string to this function will result in creating a corrupted wxString and all the subsequent operations on it will be undefined. + The overload taking @c std::string is only available starting with + wxWidgets 3.1.1. + @since 2.8.9 */ static wxString FromUTF8Unchecked(const char* s); static wxString FromUTF8Unchecked(const char* s, size_t len); + static wxString FromUTF8Unchecked(const std::string& s); //@} }; diff --git a/tests/strings/stdstrings.cpp b/tests/strings/stdstrings.cpp index 9c7a56471c..0110e9d0e8 100644 --- a/tests/strings/stdstrings.cpp +++ b/tests/strings/stdstrings.cpp @@ -608,5 +608,12 @@ void StdStringTestCase::StdConversion() wxStdWideString s8(s4); CPPUNIT_ASSERT( s8 == "hello" ); + + std::string s9("\xF0\x9F\x90\xB1\0\xE7\x8C\xAB", 9); /* U+1F431 U+0000 U+732B */ + wxString s10 = wxString::FromUTF8(s9); + CPPUNIT_ASSERT_EQUAL( s9, s10.ToStdString(wxConvUTF8) ); + + std::string s11("xyz\0\xFF", 5); /* an invalid UTF-8 sequence */ + CPPUNIT_ASSERT_EQUAL( wxString::FromUTF8(s11), "" ); } #endif // wxUSE_STD_STRING