Fix recognizing locales using UTF-8 charset
Do not assume that C locale uses UTF-8, as this is simply not true and none of the CRT functions handle UTF-8 correctly with this locale. Do recognize locales explicitly using UTF-8 charset as being in UTF-8. On most Unix systems (including Linux), it didn't really matter that we didn't do it, because we used nl_langinfo() there, but it does matter a lot with MSVC under MSW whose CRT supports UTF-8 now, but UTF-8 functions were not used there -- do use them now. (cherry picked from commit a1d289fe3ea74aa1c713e0f02f5fd5f83810af58)
This commit is contained in:
parent
759331e839
commit
2c2d9fd909
@ -1065,6 +1065,19 @@ char *strdup(const char *s)
|
||||
bool wxLocaleIsUtf8 = false; // the safer setting if not known
|
||||
#endif
|
||||
|
||||
static bool wxIsCharsetUtf8(const char* charset)
|
||||
{
|
||||
if ( strcmp(charset, "UTF-8") == 0 ||
|
||||
strcmp(charset, "utf-8") == 0 ||
|
||||
strcmp(charset, "UTF8") == 0 ||
|
||||
strcmp(charset, "utf8") == 0 )
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool wxIsLocaleUtf8()
|
||||
{
|
||||
// NB: we intentionally don't use wxLocale::GetSystemEncodingName(),
|
||||
@ -1075,31 +1088,28 @@ static bool wxIsLocaleUtf8()
|
||||
// GNU libc provides current character set this way (this conforms to
|
||||
// Unix98)
|
||||
const char *charset = nl_langinfo(CODESET);
|
||||
if ( charset )
|
||||
{
|
||||
// "UTF-8" is used by modern glibc versions, but test other variants
|
||||
// as well, just in case:
|
||||
if ( strcmp(charset, "UTF-8") == 0 ||
|
||||
strcmp(charset, "utf-8") == 0 ||
|
||||
strcmp(charset, "UTF8") == 0 ||
|
||||
strcmp(charset, "utf8") == 0 )
|
||||
{
|
||||
if ( charset && wxIsCharsetUtf8(charset) )
|
||||
return true;
|
||||
}
|
||||
}
|
||||
#endif // HAVE_LANGINFO_H
|
||||
|
||||
// check LC_CTYPE string: this also works with (sufficiently recent) MSVC
|
||||
// and on any other system without nl_langinfo()
|
||||
const char *lc_ctype = setlocale(LC_CTYPE, NULL);
|
||||
if ( lc_ctype )
|
||||
{
|
||||
// check if we're running under the "C" locale: it is 7bit subset
|
||||
// of UTF-8, so it can be safely used with the UTF-8 build:
|
||||
const char *lc_ctype = setlocale(LC_CTYPE, NULL);
|
||||
if ( lc_ctype &&
|
||||
(strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
|
||||
{
|
||||
if ( (strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
|
||||
return true;
|
||||
|
||||
// any other locale can also use UTF-8 encoding if it's explicitly
|
||||
// specified
|
||||
const char* charset = strrchr(lc_ctype, '.');
|
||||
if ( charset && wxIsCharsetUtf8(charset + 1) )
|
||||
return true;
|
||||
}
|
||||
|
||||
// we don't know what charset libc is using, so assume the worst
|
||||
// to be safe:
|
||||
// by default assume that we don't use UTF-8
|
||||
return false;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user