Fix recognizing locales using UTF-8 charset
Do not assume that C locale uses UTF-8, as this is simply not true and none of the CRT functions handle UTF-8 correctly with this locale. Do recognize locales explicitly using UTF-8 charset as being in UTF-8. On most Unix systems (including Linux), it didn't really matter that we didn't do it, because we used nl_langinfo() there, but it does matter a lot with MSVC under MSW whose CRT supports UTF-8 now, but UTF-8 functions were not used there -- do use them now. (cherry picked from commit a1d289fe3ea74aa1c713e0f02f5fd5f83810af58)
This commit is contained in:
parent
759331e839
commit
2c2d9fd909
@ -1065,6 +1065,19 @@ char *strdup(const char *s)
|
|||||||
bool wxLocaleIsUtf8 = false; // the safer setting if not known
|
bool wxLocaleIsUtf8 = false; // the safer setting if not known
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static bool wxIsCharsetUtf8(const char* charset)
|
||||||
|
{
|
||||||
|
if ( strcmp(charset, "UTF-8") == 0 ||
|
||||||
|
strcmp(charset, "utf-8") == 0 ||
|
||||||
|
strcmp(charset, "UTF8") == 0 ||
|
||||||
|
strcmp(charset, "utf8") == 0 )
|
||||||
|
{
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
static bool wxIsLocaleUtf8()
|
static bool wxIsLocaleUtf8()
|
||||||
{
|
{
|
||||||
// NB: we intentionally don't use wxLocale::GetSystemEncodingName(),
|
// NB: we intentionally don't use wxLocale::GetSystemEncodingName(),
|
||||||
@ -1075,31 +1088,28 @@ static bool wxIsLocaleUtf8()
|
|||||||
// GNU libc provides current character set this way (this conforms to
|
// GNU libc provides current character set this way (this conforms to
|
||||||
// Unix98)
|
// Unix98)
|
||||||
const char *charset = nl_langinfo(CODESET);
|
const char *charset = nl_langinfo(CODESET);
|
||||||
if ( charset )
|
if ( charset && wxIsCharsetUtf8(charset) )
|
||||||
{
|
|
||||||
// "UTF-8" is used by modern glibc versions, but test other variants
|
|
||||||
// as well, just in case:
|
|
||||||
if ( strcmp(charset, "UTF-8") == 0 ||
|
|
||||||
strcmp(charset, "utf-8") == 0 ||
|
|
||||||
strcmp(charset, "UTF8") == 0 ||
|
|
||||||
strcmp(charset, "utf8") == 0 )
|
|
||||||
{
|
|
||||||
return true;
|
return true;
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // HAVE_LANGINFO_H
|
#endif // HAVE_LANGINFO_H
|
||||||
|
|
||||||
|
// check LC_CTYPE string: this also works with (sufficiently recent) MSVC
|
||||||
|
// and on any other system without nl_langinfo()
|
||||||
|
const char *lc_ctype = setlocale(LC_CTYPE, NULL);
|
||||||
|
if ( lc_ctype )
|
||||||
|
{
|
||||||
// check if we're running under the "C" locale: it is 7bit subset
|
// check if we're running under the "C" locale: it is 7bit subset
|
||||||
// of UTF-8, so it can be safely used with the UTF-8 build:
|
// of UTF-8, so it can be safely used with the UTF-8 build:
|
||||||
const char *lc_ctype = setlocale(LC_CTYPE, NULL);
|
if ( (strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
|
||||||
if ( lc_ctype &&
|
return true;
|
||||||
(strcmp(lc_ctype, "C") == 0 || strcmp(lc_ctype, "POSIX") == 0) )
|
|
||||||
{
|
// any other locale can also use UTF-8 encoding if it's explicitly
|
||||||
|
// specified
|
||||||
|
const char* charset = strrchr(lc_ctype, '.');
|
||||||
|
if ( charset && wxIsCharsetUtf8(charset + 1) )
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// we don't know what charset libc is using, so assume the worst
|
// by default assume that we don't use UTF-8
|
||||||
// to be safe:
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user