4cb0e8d05c
wxConvAuto implicitly supposed that the chunk of data passed to it for translation was big enough to allow it to at least detect the BOM from it. However this isn't necessarily the case and never is with wxTextInputStream which reads the bytes one by one. Fix this by waiting until we have enough data to be able to detect the BOM. This still doesn't fix the problem with streams without BOM and the corresponding unit test still fails -- it will need to be fixed at the level of wxTextInputStream itself later but handling correctly the cases when a BOM is present is already better than before. See #11570. git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@63064 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
151 lines
4.4 KiB
C++
151 lines
4.4 KiB
C++
///////////////////////////////////////////////////////////////////////////////
|
|
// Name: wx/convauto.h
|
|
// Purpose: wxConvAuto class declaration
|
|
// Author: Vadim Zeitlin
|
|
// Created: 2006-04-03
|
|
// RCS-ID: $Id$
|
|
// Copyright: (c) 2006 Vadim Zeitlin
|
|
// Licence: wxWindows licence
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
|
|
#ifndef _WX_CONVAUTO_H_
|
|
#define _WX_CONVAUTO_H_
|
|
|
|
#include "wx/strconv.h"
|
|
#include "wx/fontenc.h"
|
|
|
|
#if wxUSE_WCHAR_T
|
|
|
|
// ----------------------------------------------------------------------------
|
|
// wxConvAuto: uses BOM to automatically detect input encoding
|
|
// ----------------------------------------------------------------------------
|
|
|
|
class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
|
|
{
|
|
public:
|
|
// default ctor, the real conversion will be created on demand
|
|
wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT)
|
|
{
|
|
Init();
|
|
|
|
m_encDefault = enc;
|
|
}
|
|
|
|
// copy ctor doesn't initialize anything neither as conversion can only be
|
|
// deduced on first use
|
|
wxConvAuto(const wxConvAuto& other) : wxMBConv()
|
|
{
|
|
Init();
|
|
|
|
m_encDefault = other.m_encDefault;
|
|
}
|
|
|
|
virtual ~wxConvAuto()
|
|
{
|
|
if ( m_ownsConv )
|
|
delete m_conv;
|
|
}
|
|
|
|
// get/set the fall-back encoding used when the input text doesn't have BOM
|
|
// and isn't UTF-8
|
|
//
|
|
// special values are wxFONTENCODING_MAX meaning not to use any fall back
|
|
// at all (but just fail to convert in this case) and wxFONTENCODING_SYSTEM
|
|
// meaning to use the encoding of the system locale
|
|
static wxFontEncoding GetFallbackEncoding() { return ms_defaultMBEncoding; }
|
|
static void SetFallbackEncoding(wxFontEncoding enc);
|
|
static void DisableFallbackEncoding()
|
|
{
|
|
SetFallbackEncoding(wxFONTENCODING_MAX);
|
|
}
|
|
|
|
|
|
// override the base class virtual function(s) to use our m_conv
|
|
virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
|
|
const char *src, size_t srcLen = wxNO_LEN) const;
|
|
|
|
virtual size_t FromWChar(char *dst, size_t dstLen,
|
|
const wchar_t *src, size_t srcLen = wxNO_LEN) const;
|
|
|
|
virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
|
|
|
|
virtual wxMBConv *Clone() const { return new wxConvAuto(*this); }
|
|
|
|
private:
|
|
// all currently recognized BOM values
|
|
enum BOMType
|
|
{
|
|
BOM_Unknown = -1,
|
|
BOM_None,
|
|
BOM_UTF32BE,
|
|
BOM_UTF32LE,
|
|
BOM_UTF16BE,
|
|
BOM_UTF16LE,
|
|
BOM_UTF8
|
|
};
|
|
|
|
// return the BOM type of this buffer
|
|
static BOMType DetectBOM(const char *src, size_t srcLen);
|
|
|
|
// common part of all ctors
|
|
void Init()
|
|
{
|
|
// no need to initialize m_bomType and m_consumedBOM here, this will be
|
|
// done when m_conv is created
|
|
m_conv = NULL;
|
|
m_ownsConv = false;
|
|
}
|
|
|
|
// initialize m_conv with the UTF-8 conversion
|
|
void InitWithUTF8()
|
|
{
|
|
m_conv = &wxConvUTF8;
|
|
m_ownsConv = false;
|
|
}
|
|
|
|
// create the correct conversion object for the given BOM type
|
|
void InitFromBOM(BOMType bomType);
|
|
|
|
// create the correct conversion object for the BOM present in the
|
|
// beginning of the buffer; adjust the buffer to skip the BOM if found
|
|
//
|
|
// return false if the buffer is too short to allow us to determine if we
|
|
// have BOM or not
|
|
bool InitFromInput(const char **src, size_t *len);
|
|
|
|
// adjust src and len to skip over the BOM (identified by m_bomType) at the
|
|
// start of the buffer
|
|
void SkipBOM(const char **src, size_t *len) const;
|
|
|
|
|
|
// fall-back multibyte encoding to use, may be wxFONTENCODING_SYSTEM or
|
|
// wxFONTENCODING_MAX but not wxFONTENCODING_DEFAULT
|
|
static wxFontEncoding ms_defaultMBEncoding;
|
|
|
|
// conversion object which we really use, NULL until the first call to
|
|
// either ToWChar() or FromWChar()
|
|
wxMBConv *m_conv;
|
|
|
|
// the multibyte encoding to use by default if input isn't Unicode
|
|
wxFontEncoding m_encDefault;
|
|
|
|
// our BOM type
|
|
BOMType m_bomType;
|
|
|
|
// true if we allocated m_conv ourselves, false if we just use an existing
|
|
// global conversion
|
|
bool m_ownsConv;
|
|
|
|
// true if we already skipped BOM when converting (and not just calculating
|
|
// the size)
|
|
bool m_consumedBOM;
|
|
|
|
|
|
wxDECLARE_NO_ASSIGN_CLASS(wxConvAuto);
|
|
};
|
|
|
|
#endif // wxUSE_WCHAR_T
|
|
|
|
#endif // _WX_CONVAUTO_H_
|
|
|