wxWidgets/include/wx/convauto.h
Vadim Zeitlin 4cb0e8d05c Fix wxConvAuto behaviour when it is used by wxTextInputStream.
wxConvAuto implicitly supposed that the chunk of data passed to it for
translation was big enough to allow it to at least detect the BOM from it.
However this isn't necessarily the case and never is with wxTextInputStream
which reads the bytes one by one.

Fix this by waiting until we have enough data to be able to detect the BOM.
This still doesn't fix the problem with streams without BOM and the
corresponding unit test still fails -- it will need to be fixed at the level
of wxTextInputStream itself later but handling correctly the cases when a BOM
is present is already better than before.

See #11570.

git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@63064 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
2010-01-04 12:22:49 +00:00

151 lines
4.4 KiB
C++

///////////////////////////////////////////////////////////////////////////////
// Name: wx/convauto.h
// Purpose: wxConvAuto class declaration
// Author: Vadim Zeitlin
// Created: 2006-04-03
// RCS-ID: $Id$
// Copyright: (c) 2006 Vadim Zeitlin
// Licence: wxWindows licence
///////////////////////////////////////////////////////////////////////////////
#ifndef _WX_CONVAUTO_H_
#define _WX_CONVAUTO_H_
#include "wx/strconv.h"
#include "wx/fontenc.h"
#if wxUSE_WCHAR_T
// ----------------------------------------------------------------------------
// wxConvAuto: uses BOM to automatically detect input encoding
// ----------------------------------------------------------------------------
class WXDLLIMPEXP_BASE wxConvAuto : public wxMBConv
{
public:
// default ctor, the real conversion will be created on demand
wxConvAuto(wxFontEncoding enc = wxFONTENCODING_DEFAULT)
{
Init();
m_encDefault = enc;
}
// copy ctor doesn't initialize anything neither as conversion can only be
// deduced on first use
wxConvAuto(const wxConvAuto& other) : wxMBConv()
{
Init();
m_encDefault = other.m_encDefault;
}
virtual ~wxConvAuto()
{
if ( m_ownsConv )
delete m_conv;
}
// get/set the fall-back encoding used when the input text doesn't have BOM
// and isn't UTF-8
//
// special values are wxFONTENCODING_MAX meaning not to use any fall back
// at all (but just fail to convert in this case) and wxFONTENCODING_SYSTEM
// meaning to use the encoding of the system locale
static wxFontEncoding GetFallbackEncoding() { return ms_defaultMBEncoding; }
static void SetFallbackEncoding(wxFontEncoding enc);
static void DisableFallbackEncoding()
{
SetFallbackEncoding(wxFONTENCODING_MAX);
}
// override the base class virtual function(s) to use our m_conv
virtual size_t ToWChar(wchar_t *dst, size_t dstLen,
const char *src, size_t srcLen = wxNO_LEN) const;
virtual size_t FromWChar(char *dst, size_t dstLen,
const wchar_t *src, size_t srcLen = wxNO_LEN) const;
virtual size_t GetMBNulLen() const { return m_conv->GetMBNulLen(); }
virtual wxMBConv *Clone() const { return new wxConvAuto(*this); }
private:
// all currently recognized BOM values
enum BOMType
{
BOM_Unknown = -1,
BOM_None,
BOM_UTF32BE,
BOM_UTF32LE,
BOM_UTF16BE,
BOM_UTF16LE,
BOM_UTF8
};
// return the BOM type of this buffer
static BOMType DetectBOM(const char *src, size_t srcLen);
// common part of all ctors
void Init()
{
// no need to initialize m_bomType and m_consumedBOM here, this will be
// done when m_conv is created
m_conv = NULL;
m_ownsConv = false;
}
// initialize m_conv with the UTF-8 conversion
void InitWithUTF8()
{
m_conv = &wxConvUTF8;
m_ownsConv = false;
}
// create the correct conversion object for the given BOM type
void InitFromBOM(BOMType bomType);
// create the correct conversion object for the BOM present in the
// beginning of the buffer; adjust the buffer to skip the BOM if found
//
// return false if the buffer is too short to allow us to determine if we
// have BOM or not
bool InitFromInput(const char **src, size_t *len);
// adjust src and len to skip over the BOM (identified by m_bomType) at the
// start of the buffer
void SkipBOM(const char **src, size_t *len) const;
// fall-back multibyte encoding to use, may be wxFONTENCODING_SYSTEM or
// wxFONTENCODING_MAX but not wxFONTENCODING_DEFAULT
static wxFontEncoding ms_defaultMBEncoding;
// conversion object which we really use, NULL until the first call to
// either ToWChar() or FromWChar()
wxMBConv *m_conv;
// the multibyte encoding to use by default if input isn't Unicode
wxFontEncoding m_encDefault;
// our BOM type
BOMType m_bomType;
// true if we allocated m_conv ourselves, false if we just use an existing
// global conversion
bool m_ownsConv;
// true if we already skipped BOM when converting (and not just calculating
// the size)
bool m_consumedBOM;
wxDECLARE_NO_ASSIGN_CLASS(wxConvAuto);
};
#endif // wxUSE_WCHAR_T
#endif // _WX_CONVAUTO_H_