Replace wxLocaleIdent ctor from language with FromTag()

This static function parses a subset of the language tags described in
BCP 47 (see https://www.rfc-editor.org/rfc/bcp/bcp47.txt).

Use the tag, as specified by this function, rather than the locale
identifiers components under MSW, where this should allow us to use even
locales that can't be described using just language-script-region.
This commit is contained in:
Vadim Zeitlin 2021-09-04 23:56:40 +02:00
parent a47a885718
commit 6b26deaddc
7 changed files with 178 additions and 37 deletions

View File

@ -33,17 +33,14 @@ enum
class WXDLLIMPEXP_BASE wxLocaleIdent
{
public:
// Create the object from BCP 47-like language tag: the string must contain
// at least the language part (2 or 3 ASCII letters) and may contain script
// and region separated by dashes.
static wxLocaleIdent FromTag(const wxString& tag);
// Default ctor creates an empty, invalid identifier.
wxLocaleIdent() { }
// Construct from language, i.e. a two-letter ISO 639-1 code (or a
// three-letter ISO 639-2 code if there is no ISO 639-1 code for this
// language).
wxLocaleIdent(const char* language)
: m_language(wxString::FromAscii(language))
{
}
// Set language
wxLocaleIdent& Language(const wxString& language);
@ -69,6 +66,11 @@ public:
// Construct platform dependent name
wxString GetName() const;
// Get the language tag: for the objects created with FromTag() returns the
// string passed to it directly, otherwise reconstructs this string from
// the components.
wxString GetTag() const;
// Empty locale identifier is invalid. at least Language() must be called.
bool IsEmpty() const
{
@ -76,6 +78,8 @@ public:
}
private:
wxString m_tag;
wxString m_language;
wxString m_region;
wxString m_script;

View File

@ -195,25 +195,66 @@ public:
wxString wxGetUIDateFormat();
/**
Allows to construct the full locale identifier in a portable way.
Represents a locale in a portable way.
Parts of the locale not supported by the current platform (e.g. modifier under non-Unix platforms) are ignored.
The remaining parts are used to construct a string uniquely identifying the locale in a platform-specific name.
There are two possible ways to construct wxLocaleIdent:
Usage example:
- You can either use fromTag() to create it from a string in the form
@code language ["-" script] ["-" region] @endcode, corresponding to
the subset of BCP 47 (https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
syntax.
- Or you can create it from the different parts of this string by using
the default constructor and then chaining calls to Language(),
Region(), Script() and other methods.
The first method is useful for interoperating with the other software using
BCP 47 language tags, while the second one may may result in more readable
code and allows to specify Unix-specific locale description parts such as
charset and modifier that are not part of the BCP 47 strings.
Example of using wxLocaleIdent in the second way:
@code
auto loc = wxLocaleIdent("fr").Region("BE").Modifier("euro");
auto loc = wxLocaleIdent().Language("fr").Region("BE").Modifier("euro");
#if defined(__WINDOWS__) || defined(__WXOSX__)
wxASSERT( loc.GetName() == "fr_BE" );
#elif defined(__UNIX__)
wxASSERT( loc.GetName() == "fr_BE@euro" );
#endif
@endcode
For the first way, it is enough to just write
@code
auto loc = wxLocaleIdent::FromTag("fr-BE"); // Dash, not underscore!
@endcode
@since 3.1.6
*/
class wxLocaleIdent
{
public:
/**
Return the locale identifier corresponding to the given BCP 47-like tag.
The string must contain at least the language part (2 or 3 ASCII
letters) and may contain script and region separated by dashes, i.e.
all of the following are valid:
- "mn"
- "mn-MN"
- "mn-Cyrl-MN"
Note that while BCP 47 extlangs, variants, extensions, private use and
grandfathered tags are currently not directly supported, they may still
work for creating wxUILocale on platforms with native support for BCP
47 strings.
If the input argument uses an unrecognized syntax (e.g. is empty), an
empty wxLocaleIdent is returned. Of course, even if this function
returns a non-empty object, the resulting locale may still be invalid
or unsupported, use wxUILocale::IsSupported() to check for this.
*/
static wxLocaleIdent FromTag(const wxString& tag);
/**
Default constructor creates an empty and invalid locale identifier.
@ -221,18 +262,6 @@ public:
*/
wxLocaleIdent();
/**
Constructor with language.
Note that this constructor is non-explicit, allowing to pass just a
simple string, such as "en", to functions taking wxLocaleIdent.
@param language
ISO 639 language code.
See Language() for more detailed info.
*/
wxLocaleIdent(const char* language);
/**
Set language.

View File

@ -22,6 +22,8 @@
#include "wx/uilocale.h"
#include "wx/arrstr.h"
#ifndef __WINDOWS__
#include "wx/language.h"
#endif
@ -45,6 +47,96 @@ wxUILocale wxUILocale::ms_current;
// wxLocaleIdent
// ----------------------------------------------------------------------------
/* static */
wxLocaleIdent wxLocaleIdent::FromTag(const wxString& tag)
{
// See section 2.01 of https://www.rfc-editor.org/rfc/bcp/bcp47.txt for the
// full syntax. Here we fully support just the subset we're interested in:
//
// - Normal language tags (not private use or grandfathered ones).
// - Only script and region, but not the extensions or extlangs.
// Language tags must always use ASCII.
if ( tag != tag.ToAscii() )
return wxLocaleIdent();
const wxArrayString& parts = wxSplit(tag, '-', '\0');
wxArrayString::const_iterator it = parts.begin();
if ( it == parts.end() )
return wxLocaleIdent();
// We have at least the language, so we'll return a valid object.
wxLocaleIdent locId;
locId.m_language = *it;
// Also store the full string, so that the platforms that support BCP 47
// natively can use it instead of reconstructing the string from our fields.
locId.m_tag = tag;
if ( ++it == parts.end() )
return locId;
// Advance to the next component we know about.
switch ( locId.m_language.length() )
{
case 2:
case 3:
// Looks like an ISO 639 code.
break;
default:
// It may be private use or grandfathered tag or just invalid
// syntax, but in any case we can't parse it further.
return locId;
}
// Skip extlangs that are 3 letters long, in contrast to 3 digit region
// codes.
while ( it->length() == 3 && !isdigit((*it)[0]) )
{
if ( ++it == parts.end() )
return locId;
}
switch ( it->length() )
{
case 2:
case 3:
// Either an ISO 3166-1 or UN M.49 region code.
locId.m_region = *it;
break;
case 4:
// Must be an ISO 15924 script.
locId.m_script = *it;
break;
default:
// This looks to be completely invalid.
return wxLocaleIdent();
}
// If we got the language and the region, we can't parse anything else
// (variants, extensions, private use) anyhow.
if ( !locId.m_region.empty() )
return locId;
// Otherwise we must have got the script above, so check if we have the
// region too.
if ( ++it == parts.end() )
return locId;
switch ( it->length() )
{
case 2:
case 3:
locId.m_region = *it;
break;
}
return locId;
}
wxLocaleIdent& wxLocaleIdent::Language(const wxString& language)
{
m_language = language;
@ -75,6 +167,22 @@ wxLocaleIdent& wxLocaleIdent::Modifier(const wxString& modifier)
return *this;
}
wxString wxLocaleIdent::GetTag() const
{
if ( !m_tag.empty() )
return m_tag;
wxString tag = m_language;
if ( !m_script.empty() )
tag << '-' << m_script;
if ( !m_region.empty() )
tag << '-' << m_region;
return tag;
}
// ----------------------------------------------------------------------------
// wxUILocale
// ----------------------------------------------------------------------------

View File

@ -420,7 +420,7 @@ wxUILocaleImpl* wxUILocaleImpl::CreateForLocale(const wxLocaleIdent& locId)
return NULL;
}
return wxUILocaleImplName::Create(locId.GetName().wc_str());
return wxUILocaleImplName::Create(locId.GetTag().wc_str());
}
#endif // wxUSE_INTL

View File

@ -139,7 +139,7 @@ wxUILocaleImplCF::GetInfo(wxLocaleInfo index, wxLocaleCategory cat) const
/* static */
wxUILocaleImpl* wxUILocaleImpl::CreateStdC()
{
return wxUILocaleImplCF::Create(wxLocaleIdent("C"));
return wxUILocaleImplCF::Create(wxLocaleIdent().Language("C"));
}
/* static */

View File

@ -387,7 +387,7 @@ wxUILocaleImplUnix::CompareStrings(const wxString& lhs, const wxString& rhs,
/* static */
wxUILocaleImpl* wxUILocaleImpl::CreateStdC()
{
return new wxUILocaleImplUnix("C");
return new wxUILocaleImplUnix(wxLocaleIdent().Language("C"));
}
/* static */

View File

@ -260,23 +260,23 @@ static inline bool CheckSupported(const wxUILocale& loc, const char* desc)
TEST_CASE("wxUILocale::IsSupported", "[uilocale]")
{
CheckSupported(wxUILocale("en"), "English");
CheckSupported(wxUILocale(wxLocaleIdent("fr").Region("FR")), "French");
CHECK( !wxUILocale("bloordyblop").IsSupported() );
CheckSupported(wxUILocale(wxLocaleIdent::FromTag("en")), "English");
CheckSupported(wxUILocale(wxLocaleIdent().Language("fr").Region("FR")), "French");
CHECK( !wxUILocale(wxLocaleIdent::FromTag("bloordyblop")).IsSupported() );
}
TEST_CASE("wxUILocale::GetInfo", "[uilocale]")
{
CHECK( wxUILocale("en").GetInfo(wxLOCALE_DECIMAL_POINT) == "." );
CHECK( wxUILocale(wxLocaleIdent::FromTag("en")).GetInfo(wxLOCALE_DECIMAL_POINT) == "." );
const wxUILocale locDE("de");
const wxUILocale locDE(wxLocaleIdent::FromTag("de"));
if ( CheckSupported(locDE, "German") )
CHECK( locDE.GetInfo(wxLOCALE_DECIMAL_POINT) == "," );
// This one shows that "Swiss High German" locale (de_CH) correctly uses
// dot, and not comma, as decimal separator, even under macOS, where POSIX
// APIs use incorrect (identical to "German") definitions for this locale.
const wxUILocale locDE_CH(wxLocaleIdent("de").Region("CH"));
const wxUILocale locDE_CH(wxLocaleIdent().Language("de").Region("CH"));
if ( CheckSupported(locDE_CH, "Swiss German") )
CHECK( locDE_CH.GetInfo(wxLOCALE_DECIMAL_POINT) == "." );
}
@ -288,7 +288,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]")
{
SECTION("English")
{
const wxUILocale l("en");
const wxUILocale l(wxLocaleIdent::FromTag("en"));
// This is not very interesting, but check that comparison works at all.
CHECK( l.CompareStrings("x", "x") == 0 );
@ -319,7 +319,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]")
#if wxUSE_UNICODE
SECTION("German")
{
const wxUILocale l(wxLocaleIdent("de").Region("DE"));
const wxUILocale l(wxLocaleIdent().Language("de").Region("DE"));
if ( !CheckSupported(l, "German") )
return;
@ -344,7 +344,7 @@ TEST_CASE("wxUILocale::CompareStrings", "[uilocale]")
if ( wxIsRunningUnderWine() )
return;
const wxUILocale l("sv");
const wxUILocale l(wxLocaleIdent::FromTag("sv"));
if ( !CheckSupported(l, "Swedish") )
return;