1999-12-30 19:35:13 -05:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
2000-07-15 15:51:35 -04:00
|
|
|
// Name: wx/encconv.h
|
1999-12-30 19:35:13 -05:00
|
|
|
// Purpose: wxEncodingConverter class for converting between different
|
|
|
|
// font encodings
|
|
|
|
// Author: Vaclav Slavik
|
|
|
|
// Copyright: (c) 1999 Vaclav Slavik
|
2004-05-23 16:53:33 -04:00
|
|
|
// Licence: wxWindows licence
|
1999-12-30 19:35:13 -05:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
#ifndef _WX_ENCCONV_H_
|
|
|
|
#define _WX_ENCCONV_H_
|
1999-12-30 19:35:13 -05:00
|
|
|
|
|
|
|
#include "wx/defs.h"
|
2001-06-26 16:59:19 -04:00
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
#include "wx/object.h"
|
|
|
|
#include "wx/fontenc.h"
|
1999-12-30 19:35:13 -05:00
|
|
|
#include "wx/dynarray.h"
|
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// constants
|
|
|
|
// ----------------------------------------------------------------------------
|
1999-12-30 19:35:13 -05:00
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
enum
|
|
|
|
{
|
1999-12-30 19:35:13 -05:00
|
|
|
wxCONVERT_STRICT,
|
|
|
|
wxCONVERT_SUBSTITUTE
|
|
|
|
};
|
|
|
|
|
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
enum
|
|
|
|
{
|
1999-12-30 19:35:13 -05:00
|
|
|
wxPLATFORM_CURRENT = -1,
|
2000-01-03 13:22:51 -05:00
|
|
|
|
1999-12-30 19:35:13 -05:00
|
|
|
wxPLATFORM_UNIX = 0,
|
|
|
|
wxPLATFORM_WINDOWS,
|
|
|
|
wxPLATFORM_OS2,
|
2000-01-03 13:22:51 -05:00
|
|
|
wxPLATFORM_MAC
|
1999-12-30 19:35:13 -05:00
|
|
|
};
|
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
// types
|
|
|
|
// ----------------------------------------------------------------------------
|
1999-12-30 19:35:13 -05:00
|
|
|
|
2002-03-06 01:31:34 -05:00
|
|
|
WX_DEFINE_ARRAY_INT(wxFontEncoding, wxFontEncodingArray);
|
1999-12-30 19:35:13 -05:00
|
|
|
|
|
|
|
//--------------------------------------------------------------------------------
|
|
|
|
// wxEncodingConverter
|
|
|
|
// This class is capable of converting strings between any two
|
|
|
|
// 8bit encodings/charsets. It can also convert from/to Unicode
|
|
|
|
//--------------------------------------------------------------------------------
|
|
|
|
|
2003-07-01 21:59:24 -04:00
|
|
|
class WXDLLIMPEXP_BASE wxEncodingConverter : public wxObject
|
1999-12-30 19:35:13 -05:00
|
|
|
{
|
|
|
|
public:
|
2000-01-03 13:22:51 -05:00
|
|
|
|
1999-12-30 19:35:13 -05:00
|
|
|
wxEncodingConverter();
|
2006-09-05 16:47:48 -04:00
|
|
|
virtual ~wxEncodingConverter() { if (m_Table) delete[] m_Table; }
|
2000-01-03 13:22:51 -05:00
|
|
|
|
2002-12-04 09:11:26 -05:00
|
|
|
// Initialize conversion. Both output or input encoding may
|
2000-07-15 15:51:35 -04:00
|
|
|
// be wxFONTENCODING_UNICODE, but only if wxUSE_WCHAR_T is set to 1.
|
1999-12-30 19:35:13 -05:00
|
|
|
//
|
|
|
|
// All subsequent calls to Convert() will interpret it's argument
|
|
|
|
// as a string in input_enc encoding and will output string in
|
|
|
|
// output_enc encoding.
|
|
|
|
//
|
2000-01-03 13:22:51 -05:00
|
|
|
// You must call this method before calling Convert. You may call
|
1999-12-30 19:35:13 -05:00
|
|
|
// it more than once in order to switch to another conversion
|
|
|
|
//
|
|
|
|
// Method affects behaviour of Convert() in case input character
|
|
|
|
// cannot be converted because it does not exist in output encoding:
|
2000-01-03 13:22:51 -05:00
|
|
|
// wxCONVERT_STRICT --
|
|
|
|
// follow behaviour of GNU Recode - just copy unconvertable
|
|
|
|
// characters to output and don't change them (it's integer
|
1999-12-30 19:35:13 -05:00
|
|
|
// value will stay the same)
|
|
|
|
// wxCONVERT_SUBSTITUTE --
|
2000-01-03 13:22:51 -05:00
|
|
|
// try some (lossy) substitutions - e.g. replace
|
1999-12-30 19:35:13 -05:00
|
|
|
// unconvertable latin capitals with acute by ordinary
|
|
|
|
// capitals, replace en-dash or em-dash by '-' etc.
|
|
|
|
// both modes gurantee that output string will have same length
|
|
|
|
// as input string
|
|
|
|
//
|
2004-09-13 14:03:44 -04:00
|
|
|
// Returns false if given conversion is impossible, true otherwise
|
1999-12-30 19:35:13 -05:00
|
|
|
// (conversion may be impossible either if you try to convert
|
2004-05-23 10:56:36 -04:00
|
|
|
// to Unicode with non-Unicode build of wxWidgets or if input
|
1999-12-30 19:35:13 -05:00
|
|
|
// or output encoding is not supported.)
|
|
|
|
bool Init(wxFontEncoding input_enc, wxFontEncoding output_enc, int method = wxCONVERT_STRICT);
|
2000-01-03 13:22:51 -05:00
|
|
|
|
1999-12-30 19:35:13 -05:00
|
|
|
// Convert input string according to settings passed to Init.
|
|
|
|
// Note that you must call Init before using Convert!
|
2004-11-16 06:58:24 -05:00
|
|
|
bool Convert(const char* input, char* output) const;
|
|
|
|
bool Convert(char* str) const { return Convert(str, str); }
|
2003-09-22 16:15:00 -04:00
|
|
|
wxString Convert(const wxString& input) const;
|
2000-07-15 15:51:35 -04:00
|
|
|
|
|
|
|
#if wxUSE_WCHAR_T
|
2004-11-16 06:58:24 -05:00
|
|
|
bool Convert(const char* input, wchar_t* output) const;
|
|
|
|
bool Convert(const wchar_t* input, char* output) const;
|
|
|
|
bool Convert(const wchar_t* input, wchar_t* output) const;
|
|
|
|
bool Convert(wchar_t* str) const { return Convert(str, str); }
|
2000-01-03 13:22:51 -05:00
|
|
|
#endif
|
1999-12-30 19:35:13 -05:00
|
|
|
// Return equivalent(s) for given font that are used
|
|
|
|
// under given platform. wxPLATFORM_CURRENT means the plaform
|
|
|
|
// this binary was compiled for
|
|
|
|
//
|
|
|
|
// Examples:
|
|
|
|
// current platform enc returned value
|
|
|
|
// -----------------------------------------------------
|
|
|
|
// unix CP1250 {ISO8859_2}
|
|
|
|
// unix ISO8859_2 {}
|
|
|
|
// windows ISO8859_2 {CP1250}
|
|
|
|
//
|
|
|
|
// Equivalence is defined in terms of convertibility:
|
|
|
|
// 2 encodings are equivalent if you can convert text between
|
|
|
|
// then without loosing information (it may - and will - happen
|
|
|
|
// that you loose special chars like quotation marks or em-dashes
|
|
|
|
// but you shouldn't loose any diacritics and language-specific
|
|
|
|
// characters when converting between equivalent encodings).
|
2000-01-03 13:22:51 -05:00
|
|
|
//
|
|
|
|
// Convert() method is not limited to converting between
|
1999-12-30 19:35:13 -05:00
|
|
|
// equivalent encodings, it can convert between arbitrary
|
|
|
|
// two encodings!
|
|
|
|
//
|
|
|
|
// Remember that this function does _NOT_ check for presence of
|
|
|
|
// fonts in system. It only tells you what are most suitable
|
|
|
|
// encodings. (It usually returns only one encoding)
|
|
|
|
//
|
|
|
|
// Note that argument enc itself may be present in returned array!
|
|
|
|
// (so that you can -- as a side effect -- detect whether the
|
|
|
|
// encoding is native for this platform or not)
|
|
|
|
static wxFontEncodingArray GetPlatformEquivalents(wxFontEncoding enc, int platform = wxPLATFORM_CURRENT);
|
|
|
|
|
2000-01-03 13:22:51 -05:00
|
|
|
// Similar to GetPlatformEquivalent, but this one will return ALL
|
1999-12-30 19:35:13 -05:00
|
|
|
// equivalent encodings, regardless the platform, including itself.
|
|
|
|
static wxFontEncodingArray GetAllEquivalents(wxFontEncoding enc);
|
|
|
|
|
2003-10-13 19:47:22 -04:00
|
|
|
// Return true if [any text in] one multibyte encoding can be
|
|
|
|
// converted to another one losslessly.
|
|
|
|
//
|
|
|
|
// Do not call this with wxFONTENCODING_UNICODE, it doesn't make
|
|
|
|
// sense (always works in one sense and always depends on the text
|
|
|
|
// to convert in the other)
|
|
|
|
static bool CanConvert(wxFontEncoding encIn, wxFontEncoding encOut)
|
|
|
|
{
|
2003-10-13 19:49:18 -04:00
|
|
|
return GetAllEquivalents(encIn).Index(encOut) != wxNOT_FOUND;
|
2003-10-13 19:47:22 -04:00
|
|
|
}
|
|
|
|
|
1999-12-30 19:35:13 -05:00
|
|
|
private:
|
2000-01-03 13:22:51 -05:00
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
#if wxUSE_WCHAR_T
|
|
|
|
wchar_t *m_Table;
|
|
|
|
#else
|
|
|
|
char *m_Table;
|
|
|
|
#endif
|
2000-01-02 14:51:04 -05:00
|
|
|
bool m_UnicodeInput, m_UnicodeOutput;
|
1999-12-30 19:35:13 -05:00
|
|
|
bool m_JustCopy;
|
2000-01-03 13:22:51 -05:00
|
|
|
|
2009-02-08 06:45:59 -05:00
|
|
|
wxDECLARE_NO_COPY_CLASS(wxEncodingConverter);
|
1999-12-30 19:35:13 -05:00
|
|
|
};
|
|
|
|
|
2000-07-15 15:51:35 -04:00
|
|
|
#endif // _WX_ENCCONV_H_
|