new HTML tags parser and entities substitution code
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@10744 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
ec7c3e898a
commit
daa616fca0
@ -21,24 +21,21 @@
|
||||
#include "wx/html/htmltag.h"
|
||||
#include "wx/filesys.h"
|
||||
|
||||
class wxHtmlParser;
|
||||
class wxHtmlTagHandler;
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
// wxHtmlParser
|
||||
// This class handles generic parsing of HTML document : it scans
|
||||
// the document and divide it into blocks of tags (where one block
|
||||
// consists of starting and ending tag and of text between these
|
||||
// 2 tags.
|
||||
//--------------------------------------------------------------------------------
|
||||
class WXDLLEXPORT wxMBConv;
|
||||
class WXDLLEXPORT wxHtmlParser;
|
||||
class WXDLLEXPORT wxHtmlTagHandler;
|
||||
class WXDLLEXPORT wxHtmlEntitiesParser;
|
||||
|
||||
// This class handles generic parsing of HTML document : it scans
|
||||
// the document and divide it into blocks of tags (where one block
|
||||
// consists of starting and ending tag and of text between these
|
||||
// 2 tags.
|
||||
class WXDLLEXPORT wxHtmlParser : public wxObject
|
||||
{
|
||||
DECLARE_ABSTRACT_CLASS(wxHtmlParser)
|
||||
|
||||
public:
|
||||
wxHtmlParser() : wxObject(), m_HandlersHash(wxKEY_STRING)
|
||||
{ m_FS = NULL; m_Cache = NULL; m_HandlersStack = NULL; }
|
||||
wxHtmlParser();
|
||||
virtual ~wxHtmlParser();
|
||||
|
||||
// Sets the class which will be used for opening files
|
||||
@ -106,6 +103,9 @@ protected:
|
||||
// ignored if no hander is found.
|
||||
// Derived class is *responsible* for filling in m_Handlers table.
|
||||
virtual void AddTag(const wxHtmlTag& tag);
|
||||
|
||||
// Returns entity parser object, used to substitute HTML &entities;
|
||||
wxHtmlEntitiesParser *GetEntitiesParser() const { return m_entitiesParser; }
|
||||
|
||||
protected:
|
||||
// source being parsed
|
||||
@ -130,24 +130,20 @@ protected:
|
||||
wxFileSystem *m_FS;
|
||||
// handlers stack used by PushTagHandler and PopTagHandler
|
||||
wxList *m_HandlersStack;
|
||||
|
||||
// entity parse
|
||||
wxHtmlEntitiesParser *m_entitiesParser;
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
//--------------------------------------------------------------------------------
|
||||
// wxHtmlTagHandler
|
||||
// This class (and derived classes) cooperates with wxHtmlParser.
|
||||
// Each recognized tag is passed to handler which is capable
|
||||
// of handling it. Each tag is handled in 3 steps:
|
||||
// 1. Handler will modifies state of parser
|
||||
// (using it's public methods)
|
||||
// 2. Parser parses source between starting and ending tag
|
||||
// 3. Handler restores original state of the parser
|
||||
//--------------------------------------------------------------------------------
|
||||
|
||||
// This class (and derived classes) cooperates with wxHtmlParser.
|
||||
// Each recognized tag is passed to handler which is capable
|
||||
// of handling it. Each tag is handled in 3 steps:
|
||||
// 1. Handler will modifies state of parser
|
||||
// (using it's public methods)
|
||||
// 2. Parser parses source between starting and ending tag
|
||||
// 3. Handler restores original state of the parser
|
||||
class WXDLLEXPORT wxHtmlTagHandler : public wxObject
|
||||
{
|
||||
DECLARE_ABSTRACT_CLASS(wxHtmlTagHandler)
|
||||
@ -184,6 +180,33 @@ protected:
|
||||
};
|
||||
|
||||
|
||||
// This class is used to parse HTML entities in strings. It can handle
|
||||
// both named entities and &#xxxx entries where xxxx is Unicode code.
|
||||
class WXDLLEXPORT wxHtmlEntitiesParser : public wxObject
|
||||
{
|
||||
DECLARE_DYNAMIC_CLASS(wxHtmlEntitiesParser)
|
||||
|
||||
public:
|
||||
wxHtmlEntitiesParser();
|
||||
virtual ~wxHtmlEntitiesParser();
|
||||
|
||||
// Sets encoding of output string.
|
||||
// Has no effect if wxUSE_WCHAR_T==0 or wxUSE_UNICODE==1
|
||||
void SetEncoding(wxFontEncoding encoding);
|
||||
|
||||
// Parses entities in input and replaces them with respective characters
|
||||
// (with respect to output encoding)
|
||||
wxString Parse(const wxString& input);
|
||||
|
||||
protected:
|
||||
wxChar GetEntityChar(const wxString& entity);
|
||||
wxChar GetCharForCode(unsigned code);
|
||||
|
||||
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
|
||||
wxMBConv *m_conv;
|
||||
wxFontEncoding m_encoding;
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -157,160 +157,12 @@ bool HP_TagHandler::HandleTag(const wxHtmlTag& tag)
|
||||
}
|
||||
else
|
||||
{ // "PARAM"
|
||||
if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name"))
|
||||
{
|
||||
if (m_Name == wxEmptyString && tag.GetParam(wxT("NAME")) == wxT("Name"))
|
||||
m_Name = tag.GetParam(wxT("VALUE"));
|
||||
if (m_Name.Find(wxT('&')) != -1)
|
||||
{
|
||||
#define ESCSEQ(escape, subst) \
|
||||
{ _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
|
||||
static wxChar* substitutions[][4] =
|
||||
{
|
||||
ESCSEQ("quot", "\""),
|
||||
ESCSEQ("#34", "\""),
|
||||
ESCSEQ("#8220", "\""),
|
||||
ESCSEQ("#8221", "\""),
|
||||
ESCSEQ("lt", "<"),
|
||||
ESCSEQ("#60", "<"),
|
||||
ESCSEQ("gt", ">"),
|
||||
ESCSEQ("#62", ">"),
|
||||
|
||||
ESCSEQ("#94", "^"), /* ^ */
|
||||
|
||||
ESCSEQ("nbsp", " "),
|
||||
ESCSEQ("#32", " "),
|
||||
ESCSEQ("iexcl", "!"),
|
||||
ESCSEQ("#33", "!"),
|
||||
ESCSEQ("cent", "¢"/* ¢ */),
|
||||
ESCSEQ("#162", "¢"/* ¢ */),
|
||||
|
||||
ESCSEQ("trade", "(TM)"),
|
||||
ESCSEQ("#153", "(TM)"),
|
||||
ESCSEQ("#8482", "(TM)"),
|
||||
|
||||
ESCSEQ("yen", "¥"),
|
||||
ESCSEQ("#165", "¥"),
|
||||
ESCSEQ("brkbar", "¦"),
|
||||
ESCSEQ("#166", "¦"),
|
||||
ESCSEQ("sect", "§"),
|
||||
ESCSEQ("#167", "§"),
|
||||
ESCSEQ("uml", "¨"),
|
||||
ESCSEQ("#168", "¨"),
|
||||
|
||||
ESCSEQ("copy", "©"), /* © */
|
||||
ESCSEQ("#169", "©"),
|
||||
ESCSEQ("ordf", "ª"),
|
||||
ESCSEQ("#170", "ª"),
|
||||
ESCSEQ("laquo", "«"), /* « */
|
||||
ESCSEQ("#171", "«"),
|
||||
ESCSEQ("not", "¬"),
|
||||
ESCSEQ("#172", "¬"),
|
||||
|
||||
ESCSEQ("reg", "®"), /* ® */
|
||||
ESCSEQ("#174", "®"),
|
||||
|
||||
ESCSEQ("deg", "°"), /* ° */
|
||||
ESCSEQ("#176", "°"),
|
||||
ESCSEQ("plusm", "±"), /* ± */
|
||||
ESCSEQ("#177", "±"),
|
||||
|
||||
ESCSEQ("acute", "´"),
|
||||
ESCSEQ("#180", "´"),
|
||||
ESCSEQ("macron", "¯"),
|
||||
ESCSEQ("#175", "¯"),
|
||||
ESCSEQ("micro", "µ"), /* µ */
|
||||
ESCSEQ("#181", "µ"),
|
||||
ESCSEQ("para", "¶"), /* ¶ */
|
||||
ESCSEQ("#182", "¶"),
|
||||
|
||||
ESCSEQ("ordm", "º"), /* º */
|
||||
ESCSEQ("#186", "º"),
|
||||
ESCSEQ("raquo", "»"), /* » */
|
||||
ESCSEQ("#187", "»"),
|
||||
|
||||
ESCSEQ("iquest", "¿"), /* ¿ */
|
||||
ESCSEQ("#191", "¿"),
|
||||
ESCSEQ("Agrave", "\300"/* À */),
|
||||
ESCSEQ("#193", "\300"/* À */),
|
||||
|
||||
ESCSEQ("Acirc", "\302"/* Â */),
|
||||
ESCSEQ("Atilde", "\303"/* Ã */),
|
||||
ESCSEQ("Auml", "\304"/* Ä */),
|
||||
ESCSEQ("Aring", " "),
|
||||
ESCSEQ("AElig", " "),
|
||||
ESCSEQ("Ccedil", "\347"/* ç */),
|
||||
ESCSEQ("Egrave", "\310"/* È */),
|
||||
ESCSEQ("Eacute", "\311"/* É */),
|
||||
ESCSEQ("Ecirc", "\312"/* Ê */),
|
||||
ESCSEQ("Euml", "\313"/* Ë */),
|
||||
ESCSEQ("Igrave", "\314"/* Ì */),
|
||||
|
||||
ESCSEQ("Icirc", "\316"/* Î */),
|
||||
ESCSEQ("Iuml", "\317"/* Ï */),
|
||||
|
||||
ESCSEQ("Ntilde", "\321"/* Ñ */),
|
||||
ESCSEQ("Ograve", "\322"/* Ò */),
|
||||
|
||||
ESCSEQ("Ocirc", "\324"/* Ô */),
|
||||
ESCSEQ("Otilde", "\325"/* Õ */),
|
||||
ESCSEQ("Ouml", "\326"/* Ö */),
|
||||
|
||||
ESCSEQ("Oslash", " "),
|
||||
ESCSEQ("Ugrave", "\331"/* Ù */),
|
||||
|
||||
ESCSEQ("Ucirc", " "),
|
||||
ESCSEQ("Uuml", "\334"/* Ü */),
|
||||
|
||||
ESCSEQ("szlig", "\247"/* § */),
|
||||
ESCSEQ("agrave","\340"/* à */),
|
||||
ESCSEQ("aacute", "\341"/* á */),
|
||||
ESCSEQ("acirc", "\342"/* â */),
|
||||
ESCSEQ("atilde", "\343"/* ã */),
|
||||
ESCSEQ("auml", "\344"/* ä */),
|
||||
ESCSEQ("aring", "a"),
|
||||
ESCSEQ("aelig", "ae"),
|
||||
ESCSEQ("ccedil", "\347"/* ç */),
|
||||
ESCSEQ("egrave", "\350"/* è */),
|
||||
ESCSEQ("eacute", "\351"/* é */),
|
||||
ESCSEQ("ecirc", "\352"/* ê */),
|
||||
ESCSEQ("euml", "\353"/* ë */),
|
||||
ESCSEQ("igrave", "\354"/* ì */),
|
||||
ESCSEQ("iacute", "\355"/* í */),
|
||||
ESCSEQ("icirc", " "),
|
||||
ESCSEQ("iuml", "\357"/* ï */),
|
||||
ESCSEQ("eth", " "),
|
||||
ESCSEQ("ntilde", "\361"/* ñ */),
|
||||
ESCSEQ("ograve", "\362"/* ò */),
|
||||
ESCSEQ("oacute", "\363"/* ó */),
|
||||
ESCSEQ("ocirc", "\364"/* ô */),
|
||||
ESCSEQ("otilde", "\365"/* õ */),
|
||||
ESCSEQ("ouml", "\366"/* ö */),
|
||||
ESCSEQ("divide", " "),
|
||||
ESCSEQ("oslash", " "),
|
||||
ESCSEQ("ugrave", "\371"/* ù */),
|
||||
ESCSEQ("uacute", "\372"/* ú */),
|
||||
ESCSEQ("ucirc", "\373"/* û */),
|
||||
ESCSEQ("uuml", "\374"/* ü */),
|
||||
|
||||
ESCSEQ("yuml", ""),
|
||||
|
||||
/* this one should ALWAYS stay the last one!!! */
|
||||
ESCSEQ("amp", "&"),
|
||||
ESCSEQ("#38", "&"),
|
||||
|
||||
{ NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
for (int i = 0; substitutions[i][0] != NULL; i++)
|
||||
{
|
||||
m_Name.Replace(substitutions[i][0], substitutions[i][3], TRUE);
|
||||
m_Name.Replace(substitutions[i][1], substitutions[i][3], TRUE);
|
||||
m_Name.Replace(substitutions[i][2], substitutions[i][3], TRUE);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (tag.GetParam(wxT("NAME")) == wxT("Local")) m_Page = tag.GetParam(wxT("VALUE"));
|
||||
if (tag.GetParam(wxT("NAME")) == wxT("ID")) tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
|
||||
if (tag.GetParam(wxT("NAME")) == wxT("Local"))
|
||||
m_Page = tag.GetParam(wxT("VALUE"));
|
||||
if (tag.GetParam(wxT("NAME")) == wxT("ID"))
|
||||
tag.ScanParam(wxT("VALUE"), wxT("%i"), &m_ID);
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
@ -131,156 +131,6 @@ const wxHtmlCell* wxHtmlCell::Find(int condition, const void* param) const
|
||||
wxHtmlWordCell::wxHtmlWordCell(const wxString& word, wxDC& dc) : wxHtmlCell()
|
||||
{
|
||||
m_Word = word;
|
||||
|
||||
if (m_Word.Find(wxT('&')) != -1)
|
||||
{
|
||||
#define ESCSEQ(escape, subst) \
|
||||
{ _T("&") _T(escape) _T(";"), _T("&") _T(escape) _T(" "), _T("&") _T(escape), _T(subst) }
|
||||
static wxChar* substitutions[][4] =
|
||||
{
|
||||
ESCSEQ("quot", "\""),
|
||||
ESCSEQ("#34", "\""),
|
||||
ESCSEQ("#8220", "\""),
|
||||
ESCSEQ("#8221", "\""),
|
||||
ESCSEQ("lt", "<"),
|
||||
ESCSEQ("#60", "<"),
|
||||
ESCSEQ("gt", ">"),
|
||||
ESCSEQ("#62", ">"),
|
||||
|
||||
ESCSEQ("#94", "^"), /* ^ */
|
||||
|
||||
ESCSEQ("nbsp", " "),
|
||||
ESCSEQ("#32", " "),
|
||||
ESCSEQ("iexcl", "!"),
|
||||
ESCSEQ("#33", "!"),
|
||||
ESCSEQ("cent", "¢"/* ¢ */),
|
||||
ESCSEQ("#162", "¢"/* ¢ */),
|
||||
|
||||
ESCSEQ("trade", "(TM)"),
|
||||
ESCSEQ("#153", "(TM)"),
|
||||
ESCSEQ("#8482", "(TM)"),
|
||||
|
||||
ESCSEQ("yen", "¥"),
|
||||
ESCSEQ("#165", "¥"),
|
||||
ESCSEQ("brkbar", "¦"),
|
||||
ESCSEQ("#166", "¦"),
|
||||
ESCSEQ("sect", "§"),
|
||||
ESCSEQ("#167", "§"),
|
||||
ESCSEQ("uml", "¨"),
|
||||
ESCSEQ("#168", "¨"),
|
||||
|
||||
ESCSEQ("copy", "©"), /* © */
|
||||
ESCSEQ("#169", "©"),
|
||||
ESCSEQ("ordf", "ª"),
|
||||
ESCSEQ("#170", "ª"),
|
||||
ESCSEQ("laquo", "«"), /* « */
|
||||
ESCSEQ("#171", "«"),
|
||||
ESCSEQ("not", "¬"),
|
||||
ESCSEQ("#172", "¬"),
|
||||
|
||||
ESCSEQ("reg", "®"), /* ® */
|
||||
ESCSEQ("#174", "®"),
|
||||
|
||||
ESCSEQ("deg", "°"), /* ° */
|
||||
ESCSEQ("#176", "°"),
|
||||
ESCSEQ("plusm", "±"), /* ± */
|
||||
ESCSEQ("#177", "±"),
|
||||
|
||||
ESCSEQ("acute", "´"),
|
||||
ESCSEQ("#180", "´"),
|
||||
ESCSEQ("macron", "¯"),
|
||||
ESCSEQ("#175", "¯"),
|
||||
ESCSEQ("micro", "µ"), /* µ */
|
||||
ESCSEQ("#181", "µ"),
|
||||
ESCSEQ("para", "¶"), /* ¶ */
|
||||
ESCSEQ("#182", "¶"),
|
||||
|
||||
ESCSEQ("ordm", "º"), /* º */
|
||||
ESCSEQ("#186", "º"),
|
||||
ESCSEQ("raquo", "»"), /* » */
|
||||
ESCSEQ("#187", "»"),
|
||||
|
||||
ESCSEQ("iquest", "¿"), /* ¿ */
|
||||
ESCSEQ("#191", "¿"),
|
||||
ESCSEQ("Agrave", "\300"/* À */),
|
||||
ESCSEQ("#193", "\300"/* À */),
|
||||
|
||||
ESCSEQ("Acirc", "\302"/* Â */),
|
||||
ESCSEQ("Atilde", "\303"/* Ã */),
|
||||
ESCSEQ("Auml", "\304"/* Ä */),
|
||||
ESCSEQ("Aring", " "),
|
||||
ESCSEQ("AElig", " "),
|
||||
ESCSEQ("Ccedil", "\347"/* ç */),
|
||||
ESCSEQ("Egrave", "\310"/* È */),
|
||||
ESCSEQ("Eacute", "\311"/* É */),
|
||||
ESCSEQ("Ecirc", "\312"/* Ê */),
|
||||
ESCSEQ("Euml", "\313"/* Ë */),
|
||||
ESCSEQ("Igrave", "\314"/* Ì */),
|
||||
|
||||
ESCSEQ("Icirc", "\316"/* Î */),
|
||||
ESCSEQ("Iuml", "\317"/* Ï */),
|
||||
|
||||
ESCSEQ("Ntilde", "\321"/* Ñ */),
|
||||
ESCSEQ("Ograve", "\322"/* Ò */),
|
||||
|
||||
ESCSEQ("Ocirc", "\324"/* Ô */),
|
||||
ESCSEQ("Otilde", "\325"/* Õ */),
|
||||
ESCSEQ("Ouml", "\326"/* Ö */),
|
||||
|
||||
ESCSEQ("Oslash", " "),
|
||||
ESCSEQ("Ugrave", "\331"/* Ù */),
|
||||
|
||||
ESCSEQ("Ucirc", " "),
|
||||
ESCSEQ("Uuml", "\334"/* Ü */),
|
||||
|
||||
ESCSEQ("szlig", "\247"/* § */),
|
||||
ESCSEQ("agrave","\340"/* à */),
|
||||
ESCSEQ("aacute", "\341"/* á */),
|
||||
ESCSEQ("acirc", "\342"/* â */),
|
||||
ESCSEQ("atilde", "\343"/* ã */),
|
||||
ESCSEQ("auml", "\344"/* ä */),
|
||||
ESCSEQ("aring", "a"),
|
||||
ESCSEQ("aelig", "ae"),
|
||||
ESCSEQ("ccedil", "\347"/* ç */),
|
||||
ESCSEQ("egrave", "\350"/* è */),
|
||||
ESCSEQ("eacute", "\351"/* é */),
|
||||
ESCSEQ("ecirc", "\352"/* ê */),
|
||||
ESCSEQ("euml", "\353"/* ë */),
|
||||
ESCSEQ("igrave", "\354"/* ì */),
|
||||
ESCSEQ("iacute", "\355"/* í */),
|
||||
ESCSEQ("icirc", " "),
|
||||
ESCSEQ("iuml", "\357"/* ï */),
|
||||
ESCSEQ("eth", " "),
|
||||
ESCSEQ("ntilde", "\361"/* ñ */),
|
||||
ESCSEQ("ograve", "\362"/* ò */),
|
||||
ESCSEQ("oacute", "\363"/* ó */),
|
||||
ESCSEQ("ocirc", "\364"/* ô */),
|
||||
ESCSEQ("otilde", "\365"/* õ */),
|
||||
ESCSEQ("ouml", "\366"/* ö */),
|
||||
ESCSEQ("divide", " "),
|
||||
ESCSEQ("oslash", " "),
|
||||
ESCSEQ("ugrave", "\371"/* ù */),
|
||||
ESCSEQ("uacute", "\372"/* ú */),
|
||||
ESCSEQ("ucirc", "\373"/* û */),
|
||||
ESCSEQ("uuml", "\374"/* ü */),
|
||||
|
||||
ESCSEQ("yuml", ""),
|
||||
|
||||
/* this one should ALWAYS stay the last one!!! */
|
||||
ESCSEQ("amp", "&"),
|
||||
ESCSEQ("#38", "&"),
|
||||
|
||||
{ NULL, NULL, NULL }
|
||||
};
|
||||
|
||||
for (int i = 0; substitutions[i][0] != NULL; i++)
|
||||
{
|
||||
m_Word.Replace(substitutions[i][0], substitutions[i][3], TRUE);
|
||||
m_Word.Replace(substitutions[i][1], substitutions[i][3], TRUE);
|
||||
m_Word.Replace(substitutions[i][2], substitutions[i][3], TRUE);
|
||||
}
|
||||
}
|
||||
|
||||
dc.GetTextExtent(m_Word, &m_Width, &m_Height, &m_Descent);
|
||||
SetCanLiveOnPagebreak(FALSE);
|
||||
}
|
||||
|
@ -28,6 +28,7 @@
|
||||
#include "wx/tokenzr.h"
|
||||
#include "wx/wfstream.h"
|
||||
#include "wx/url.h"
|
||||
#include "wx/fontmap.h"
|
||||
#include "wx/html/htmldefs.h"
|
||||
#include "wx/html/htmlpars.h"
|
||||
|
||||
@ -39,6 +40,21 @@
|
||||
|
||||
IMPLEMENT_ABSTRACT_CLASS(wxHtmlParser,wxObject)
|
||||
|
||||
wxHtmlParser::wxHtmlParser()
|
||||
: wxObject(), m_Cache(NULL), m_HandlersHash(wxKEY_STRING),
|
||||
m_FS(NULL), m_HandlersStack(NULL)
|
||||
{
|
||||
m_entitiesParser = new wxHtmlEntitiesParser;
|
||||
}
|
||||
|
||||
wxHtmlParser::~wxHtmlParser()
|
||||
{
|
||||
delete m_HandlersStack;
|
||||
m_HandlersHash.Clear();
|
||||
m_HandlersList.DeleteContents(TRUE);
|
||||
m_HandlersList.Clear();
|
||||
delete m_entitiesParser;
|
||||
}
|
||||
|
||||
wxObject* wxHtmlParser::Parse(const wxString& source)
|
||||
{
|
||||
@ -180,18 +196,398 @@ void wxHtmlParser::PopTagHandler()
|
||||
m_HandlersStack->DeleteNode(first);
|
||||
}
|
||||
|
||||
wxHtmlParser::~wxHtmlParser()
|
||||
{
|
||||
if (m_HandlersStack) delete m_HandlersStack;
|
||||
m_HandlersHash.Clear();
|
||||
m_HandlersList.DeleteContents(TRUE);
|
||||
m_HandlersList.Clear();
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// wxHtmlTagHandler
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
IMPLEMENT_ABSTRACT_CLASS(wxHtmlTagHandler,wxObject)
|
||||
#endif
|
||||
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// wxHtmlEntitiesParser
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
IMPLEMENT_DYNAMIC_CLASS(wxHtmlEntitiesParser,wxObject)
|
||||
|
||||
wxHtmlEntitiesParser::wxHtmlEntitiesParser()
|
||||
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
|
||||
: m_conv(NULL), m_encoding(wxFONTENCODING_SYSTEM)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
wxHtmlEntitiesParser::~wxHtmlEntitiesParser()
|
||||
{
|
||||
delete m_conv;
|
||||
}
|
||||
|
||||
void wxHtmlEntitiesParser::SetEncoding(wxFontEncoding encoding)
|
||||
{
|
||||
#if wxUSE_WCHAR_T && !wxUSE_UNICODE
|
||||
if (encoding == m_encoding) return;
|
||||
delete m_conv;
|
||||
m_conv = NULL;
|
||||
m_encoding = encoding;
|
||||
if (m_encoding != wxFONTENCODING_SYSTEM)
|
||||
m_conv = new wxCSConv(wxFontMapper::GetEncodingName(m_encoding));
|
||||
#endif
|
||||
}
|
||||
|
||||
wxString wxHtmlEntitiesParser::Parse(const wxString& input)
|
||||
{
|
||||
const wxChar *c, *last;
|
||||
const wxChar *in_str = input.c_str();
|
||||
wxString output;
|
||||
|
||||
for (c = in_str, last = in_str; *c != wxT('\0'); c++)
|
||||
{
|
||||
if (*c == wxT('&'))
|
||||
{
|
||||
if (c - last > 0)
|
||||
output.append(last, c - last);
|
||||
if (++c == wxT('\0')) break;
|
||||
wxString entity;
|
||||
const wxChar *ent_s = c;
|
||||
for (; (*c >= wxT('a') && *c <= wxT('z')) ||
|
||||
(*c >= wxT('A') && *c <= wxT('Z')) ||
|
||||
(*c >= wxT('0') && *c <= wxT('9')) ||
|
||||
*c == wxT('_') || *c == wxT('#'); c++) {}
|
||||
entity.append(ent_s, c - ent_s);
|
||||
if (*c == wxT(';')) c++;
|
||||
output << GetEntityChar(entity);
|
||||
last = c;
|
||||
}
|
||||
}
|
||||
if (*last != wxT('\0'))
|
||||
output.append(last);
|
||||
return output;
|
||||
}
|
||||
|
||||
struct wxHtmlEntityInfo
|
||||
{
|
||||
const wxChar *name;
|
||||
unsigned code;
|
||||
};
|
||||
|
||||
static int compar_entity(const void *key, const void *item)
|
||||
{
|
||||
return wxStrcmp((wxChar*)key, ((wxHtmlEntityInfo*)item)->name);
|
||||
}
|
||||
|
||||
wxChar wxHtmlEntitiesParser::GetCharForCode(unsigned code)
|
||||
{
|
||||
#if wxUSE_UNICODE
|
||||
return (wxChar)code;
|
||||
#elif wxUSE_WCHAR_T
|
||||
char buf[2];
|
||||
wchar_t wbuf[2];
|
||||
wbuf[0] = (wchar_t)code;
|
||||
wbuf[1] = 0;
|
||||
wxMBConv *conv = m_conv ? m_conv : &wxConvLocal;
|
||||
if (conv->WC2MB(buf, wbuf, 1) == (size_t)-1)
|
||||
return '?';
|
||||
return buf[0];
|
||||
#else
|
||||
return (code < 256) ? (wxChar)code : '?';
|
||||
#endif
|
||||
}
|
||||
|
||||
wxChar wxHtmlEntitiesParser::GetEntityChar(const wxString& entity)
|
||||
{
|
||||
unsigned code = 0;
|
||||
|
||||
if (entity[0] == wxT('#'))
|
||||
{
|
||||
const wxChar *ent_s = entity.c_str();
|
||||
const wxChar *format;
|
||||
|
||||
if (ent_s[1] == wxT('x') || ent_s[1] == wxT('X'))
|
||||
{
|
||||
format = wxT("%x");
|
||||
ent_s++;
|
||||
}
|
||||
else
|
||||
format = wxT("%u");
|
||||
ent_s++;
|
||||
|
||||
if (wxSscanf(ent_s, format, &code) != 1)
|
||||
code = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
static wxHtmlEntityInfo substitutions[] = {
|
||||
{ wxT("AElig"),198 },
|
||||
{ wxT("Aacute"),193 },
|
||||
{ wxT("Acirc"),194 },
|
||||
{ wxT("Agrave"),192 },
|
||||
{ wxT("Alpha"),913 },
|
||||
{ wxT("Aring"),197 },
|
||||
{ wxT("Atilde"),195 },
|
||||
{ wxT("Auml"),196 },
|
||||
{ wxT("Beta"),914 },
|
||||
{ wxT("Ccedil"),199 },
|
||||
{ wxT("Chi"),935 },
|
||||
{ wxT("Dagger"),8225 },
|
||||
{ wxT("Delta"),916 },
|
||||
{ wxT("ETH"),208 },
|
||||
{ wxT("Eacute"),201 },
|
||||
{ wxT("Ecirc"),202 },
|
||||
{ wxT("Egrave"),200 },
|
||||
{ wxT("Epsilon"),917 },
|
||||
{ wxT("Eta"),919 },
|
||||
{ wxT("Euml"),203 },
|
||||
{ wxT("Gamma"),915 },
|
||||
{ wxT("Iacute"),205 },
|
||||
{ wxT("Icirc"),206 },
|
||||
{ wxT("Igrave"),204 },
|
||||
{ wxT("Iota"),921 },
|
||||
{ wxT("Iuml"),207 },
|
||||
{ wxT("Kappa"),922 },
|
||||
{ wxT("Lambda"),923 },
|
||||
{ wxT("Mu"),924 },
|
||||
{ wxT("Ntilde"),209 },
|
||||
{ wxT("Nu"),925 },
|
||||
{ wxT("OElig"),338 },
|
||||
{ wxT("Oacute"),211 },
|
||||
{ wxT("Ocirc"),212 },
|
||||
{ wxT("Ograve"),210 },
|
||||
{ wxT("Omega"),937 },
|
||||
{ wxT("Omicron"),927 },
|
||||
{ wxT("Oslash"),216 },
|
||||
{ wxT("Otilde"),213 },
|
||||
{ wxT("Ouml"),214 },
|
||||
{ wxT("Phi"),934 },
|
||||
{ wxT("Pi"),928 },
|
||||
{ wxT("Prime"),8243 },
|
||||
{ wxT("Psi"),936 },
|
||||
{ wxT("Rho"),929 },
|
||||
{ wxT("Scaron"),352 },
|
||||
{ wxT("Sigma"),931 },
|
||||
{ wxT("THORN"),222 },
|
||||
{ wxT("Tau"),932 },
|
||||
{ wxT("Theta"),920 },
|
||||
{ wxT("Uacute"),218 },
|
||||
{ wxT("Ucirc"),219 },
|
||||
{ wxT("Ugrave"),217 },
|
||||
{ wxT("Upsilon"),933 },
|
||||
{ wxT("Uuml"),220 },
|
||||
{ wxT("Xi"),926 },
|
||||
{ wxT("Yacute"),221 },
|
||||
{ wxT("Yuml"),376 },
|
||||
{ wxT("Zeta"),918 },
|
||||
{ wxT("aacute"),225 },
|
||||
{ wxT("acirc"),226 },
|
||||
{ wxT("acute"),180 },
|
||||
{ wxT("aelig"),230 },
|
||||
{ wxT("agrave"),224 },
|
||||
{ wxT("alefsym"),8501 },
|
||||
{ wxT("alpha"),945 },
|
||||
{ wxT("amp"),38 },
|
||||
{ wxT("and"),8743 },
|
||||
{ wxT("ang"),8736 },
|
||||
{ wxT("aring"),229 },
|
||||
{ wxT("asymp"),8776 },
|
||||
{ wxT("atilde"),227 },
|
||||
{ wxT("auml"),228 },
|
||||
{ wxT("bdquo"),8222 },
|
||||
{ wxT("beta"),946 },
|
||||
{ wxT("brvbar"),166 },
|
||||
{ wxT("bull"),8226 },
|
||||
{ wxT("cap"),8745 },
|
||||
{ wxT("ccedil"),231 },
|
||||
{ wxT("cedil"),184 },
|
||||
{ wxT("cent"),162 },
|
||||
{ wxT("chi"),967 },
|
||||
{ wxT("circ"),710 },
|
||||
{ wxT("clubs"),9827 },
|
||||
{ wxT("cong"),8773 },
|
||||
{ wxT("copy"),169 },
|
||||
{ wxT("crarr"),8629 },
|
||||
{ wxT("cup"),8746 },
|
||||
{ wxT("curren"),164 },
|
||||
{ wxT("dArr"),8659 },
|
||||
{ wxT("dagger"),8224 },
|
||||
{ wxT("darr"),8595 },
|
||||
{ wxT("deg"),176 },
|
||||
{ wxT("delta"),948 },
|
||||
{ wxT("diams"),9830 },
|
||||
{ wxT("divide"),247 },
|
||||
{ wxT("eacute"),233 },
|
||||
{ wxT("ecirc"),234 },
|
||||
{ wxT("egrave"),232 },
|
||||
{ wxT("empty"),8709 },
|
||||
{ wxT("emsp"),8195 },
|
||||
{ wxT("ensp"),8194 },
|
||||
{ wxT("epsilon"),949 },
|
||||
{ wxT("equiv"),8801 },
|
||||
{ wxT("eta"),951 },
|
||||
{ wxT("eth"),240 },
|
||||
{ wxT("euml"),235 },
|
||||
{ wxT("euro"),8364 },
|
||||
{ wxT("exist"),8707 },
|
||||
{ wxT("fnof"),402 },
|
||||
{ wxT("forall"),8704 },
|
||||
{ wxT("frac12"),189 },
|
||||
{ wxT("frac14"),188 },
|
||||
{ wxT("frac34"),190 },
|
||||
{ wxT("frasl"),8260 },
|
||||
{ wxT("gamma"),947 },
|
||||
{ wxT("ge"),8805 },
|
||||
{ wxT("gt"),62 },
|
||||
{ wxT("hArr"),8660 },
|
||||
{ wxT("harr"),8596 },
|
||||
{ wxT("hearts"),9829 },
|
||||
{ wxT("hellip"),8230 },
|
||||
{ wxT("iacute"),237 },
|
||||
{ wxT("icirc"),238 },
|
||||
{ wxT("iexcl"),161 },
|
||||
{ wxT("igrave"),236 },
|
||||
{ wxT("image"),8465 },
|
||||
{ wxT("infin"),8734 },
|
||||
{ wxT("int"),8747 },
|
||||
{ wxT("iota"),953 },
|
||||
{ wxT("iquest"),191 },
|
||||
{ wxT("isin"),8712 },
|
||||
{ wxT("iuml"),239 },
|
||||
{ wxT("kappa"),954 },
|
||||
{ wxT("lArr"),8656 },
|
||||
{ wxT("lambda"),955 },
|
||||
{ wxT("lang"),9001 },
|
||||
{ wxT("laquo"),171 },
|
||||
{ wxT("larr"),8592 },
|
||||
{ wxT("lceil"),8968 },
|
||||
{ wxT("ldquo"),8220 },
|
||||
{ wxT("le"),8804 },
|
||||
{ wxT("lfloor"),8970 },
|
||||
{ wxT("lowast"),8727 },
|
||||
{ wxT("loz"),9674 },
|
||||
{ wxT("lrm"),8206 },
|
||||
{ wxT("lsaquo"),8249 },
|
||||
{ wxT("lsquo"),8216 },
|
||||
{ wxT("lt"),60 },
|
||||
{ wxT("macr"),175 },
|
||||
{ wxT("mdash"),8212 },
|
||||
{ wxT("micro"),181 },
|
||||
{ wxT("middot"),183 },
|
||||
{ wxT("minus"),8722 },
|
||||
{ wxT("mu"),956 },
|
||||
{ wxT("nabla"),8711 },
|
||||
{ wxT("nbsp"),160 },
|
||||
{ wxT("ndash"),8211 },
|
||||
{ wxT("ne"),8800 },
|
||||
{ wxT("ni"),8715 },
|
||||
{ wxT("not"),172 },
|
||||
{ wxT("notin"),8713 },
|
||||
{ wxT("nsub"),8836 },
|
||||
{ wxT("ntilde"),241 },
|
||||
{ wxT("nu"),957 },
|
||||
{ wxT("oacute"),243 },
|
||||
{ wxT("ocirc"),244 },
|
||||
{ wxT("oelig"),339 },
|
||||
{ wxT("ograve"),242 },
|
||||
{ wxT("oline"),8254 },
|
||||
{ wxT("omega"),969 },
|
||||
{ wxT("omicron"),959 },
|
||||
{ wxT("oplus"),8853 },
|
||||
{ wxT("or"),8744 },
|
||||
{ wxT("ordf"),170 },
|
||||
{ wxT("ordm"),186 },
|
||||
{ wxT("oslash"),248 },
|
||||
{ wxT("otilde"),245 },
|
||||
{ wxT("otimes"),8855 },
|
||||
{ wxT("ouml"),246 },
|
||||
{ wxT("para"),182 },
|
||||
{ wxT("part"),8706 },
|
||||
{ wxT("permil"),8240 },
|
||||
{ wxT("perp"),8869 },
|
||||
{ wxT("phi"),966 },
|
||||
{ wxT("pi"),960 },
|
||||
{ wxT("piv"),982 },
|
||||
{ wxT("plusmn"),177 },
|
||||
{ wxT("pound"),163 },
|
||||
{ wxT("prime"),8242 },
|
||||
{ wxT("prod"),8719 },
|
||||
{ wxT("prop"),8733 },
|
||||
{ wxT("psi"),968 },
|
||||
{ wxT("quot"),34 },
|
||||
{ wxT("rArr"),8658 },
|
||||
{ wxT("radic"),8730 },
|
||||
{ wxT("rang"),9002 },
|
||||
{ wxT("raquo"),187 },
|
||||
{ wxT("rarr"),8594 },
|
||||
{ wxT("rceil"),8969 },
|
||||
{ wxT("rdquo"),8221 },
|
||||
{ wxT("real"),8476 },
|
||||
{ wxT("reg"),174 },
|
||||
{ wxT("rfloor"),8971 },
|
||||
{ wxT("rho"),961 },
|
||||
{ wxT("rlm"),8207 },
|
||||
{ wxT("rsaquo"),8250 },
|
||||
{ wxT("rsquo"),8217 },
|
||||
{ wxT("sbquo"),8218 },
|
||||
{ wxT("scaron"),353 },
|
||||
{ wxT("sdot"),8901 },
|
||||
{ wxT("sect"),167 },
|
||||
{ wxT("shy"),173 },
|
||||
{ wxT("sigma"),963 },
|
||||
{ wxT("sigmaf"),962 },
|
||||
{ wxT("sim"),8764 },
|
||||
{ wxT("spades"),9824 },
|
||||
{ wxT("sub"),8834 },
|
||||
{ wxT("sube"),8838 },
|
||||
{ wxT("sum"),8721 },
|
||||
{ wxT("sup"),8835 },
|
||||
{ wxT("sup1"),185 },
|
||||
{ wxT("sup2"),178 },
|
||||
{ wxT("sup3"),179 },
|
||||
{ wxT("supe"),8839 },
|
||||
{ wxT("szlig"),223 },
|
||||
{ wxT("tau"),964 },
|
||||
{ wxT("there4"),8756 },
|
||||
{ wxT("theta"),952 },
|
||||
{ wxT("thetasym"),977 },
|
||||
{ wxT("thinsp"),8201 },
|
||||
{ wxT("thorn"),254 },
|
||||
{ wxT("tilde"),732 },
|
||||
{ wxT("times"),215 },
|
||||
{ wxT("trade"),8482 },
|
||||
{ wxT("uArr"),8657 },
|
||||
{ wxT("uacute"),250 },
|
||||
{ wxT("uarr"),8593 },
|
||||
{ wxT("ucirc"),251 },
|
||||
{ wxT("ugrave"),249 },
|
||||
{ wxT("uml"),168 },
|
||||
{ wxT("upsih"),978 },
|
||||
{ wxT("upsilon"),965 },
|
||||
{ wxT("uuml"),252 },
|
||||
{ wxT("weierp"),8472 },
|
||||
{ wxT("xi"),958 },
|
||||
{ wxT("yacute"),253 },
|
||||
{ wxT("yen"),165 },
|
||||
{ wxT("yuml"),255 },
|
||||
{ wxT("zeta"),950 },
|
||||
{ wxT("zwj"),8205 },
|
||||
{ wxT("zwnj"),8204 },
|
||||
{NULL, 0}};
|
||||
static size_t substitutions_cnt = 0;
|
||||
|
||||
if (substitutions_cnt == 0)
|
||||
while (substitutions[substitutions_cnt].code != 0)
|
||||
substitutions_cnt++;
|
||||
|
||||
wxHtmlEntityInfo *info;
|
||||
info = (wxHtmlEntityInfo*) bsearch(entity.c_str(), substitutions,
|
||||
substitutions_cnt,
|
||||
sizeof(wxHtmlEntityInfo),
|
||||
compar_entity);
|
||||
if (info)
|
||||
code = info->code;
|
||||
}
|
||||
|
||||
if (code == 0)
|
||||
return wxT('?');
|
||||
else
|
||||
return GetCharForCode(code);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -26,6 +26,7 @@
|
||||
#endif
|
||||
|
||||
#include "wx/html/htmltag.h"
|
||||
#include "wx/html/htmlpars.h"
|
||||
#include <stdio.h> // for vsscanf
|
||||
#include <stdarg.h>
|
||||
|
||||
@ -121,15 +122,17 @@ wxHtmlTagsCache::wxHtmlTagsCache(const wxString& source)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
|
||||
{
|
||||
if (m_Cache == NULL) return;
|
||||
if (m_Cache[m_CachePos].Key != at)
|
||||
{
|
||||
int delta = (at < m_Cache[m_CachePos].Key) ? -1 : 1;
|
||||
do {m_CachePos += delta;} while (m_Cache[m_CachePos].Key != at);
|
||||
do
|
||||
{
|
||||
m_CachePos += delta;
|
||||
}
|
||||
while (m_Cache[m_CachePos].Key != at);
|
||||
}
|
||||
*end1 = m_Cache[m_CachePos].End1;
|
||||
*end2 = m_Cache[m_CachePos].End2;
|
||||
@ -144,64 +147,129 @@ void wxHtmlTagsCache::QueryTag(int at, int* end1, int* end2)
|
||||
|
||||
IMPLEMENT_CLASS(wxHtmlTag,wxObject)
|
||||
|
||||
wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCache* cache) : wxObject()
|
||||
wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos,
|
||||
wxHtmlTagsCache *cache,
|
||||
wxHtmlEntitiesParser *entParser) : wxObject()
|
||||
{
|
||||
int i;
|
||||
char c;
|
||||
wxChar c;
|
||||
|
||||
// fill-in name, params and begin pos:
|
||||
m_Name = m_Params = wxEmptyString;
|
||||
i = pos+1;
|
||||
if (source[i] == wxT('/')) { m_Ending = TRUE; i++; }
|
||||
else m_Ending = FALSE;
|
||||
if (source[i] == wxT('/'))
|
||||
{ m_Ending = TRUE; i++; }
|
||||
else
|
||||
m_Ending = FALSE;
|
||||
|
||||
// find tag's name and convert it to uppercase:
|
||||
while ((i < end_pos) &&
|
||||
((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
|
||||
c != wxT('\n') && c != wxT('\t') &&
|
||||
c != wxT('>')))
|
||||
((c = source[i++]) != wxT(' ') && c != wxT('\r') &&
|
||||
c != wxT('\n') && c != wxT('\t') &&
|
||||
c != wxT('>')))
|
||||
{
|
||||
if ((c >= wxT('a')) && (c <= wxT('z'))) c -= (wxT('a') - wxT('A'));
|
||||
m_Name += c;
|
||||
if ((c >= wxT('a')) && (c <= wxT('z')))
|
||||
c -= (wxT('a') - wxT('A'));
|
||||
m_Name << c;
|
||||
}
|
||||
|
||||
// if the tag has parameters, read them and "normalize" them,
|
||||
// i.e. convert to uppercase, replace whitespaces by spaces and
|
||||
// remove whitespaces around '=':
|
||||
if (source[i-1] != wxT('>'))
|
||||
while ((i < end_pos) && ((c = source[i++]) != wxT('>')))
|
||||
{
|
||||
#define IS_WHITE(c) (c == wxT(' ') || c == wxT('\r') || \
|
||||
c == wxT('\n') || c == wxT('\t'))
|
||||
wxString pname, pvalue;
|
||||
wxChar quote;
|
||||
enum
|
||||
{
|
||||
if ((c >= wxT('a')) && (c <= wxT('z')))
|
||||
c -= (wxT('a') - wxT('A'));
|
||||
if (c == wxT('\r') || c == wxT('\n') || c == wxT('\t'))
|
||||
c = wxT(' '); // make future parsing a bit simpler
|
||||
m_Params += c;
|
||||
if (c == wxT('"'))
|
||||
ST_BEFORE_NAME = 1,
|
||||
ST_NAME,
|
||||
ST_BEFORE_EQ,
|
||||
ST_BEFORE_VALUE,
|
||||
ST_VALUE
|
||||
} state;
|
||||
|
||||
quote = 0;
|
||||
state = ST_BEFORE_NAME;
|
||||
while (i < end_pos)
|
||||
{
|
||||
c = source[i++];
|
||||
|
||||
if (c == wxT('>') && !(state == ST_VALUE && quote != 0))
|
||||
{
|
||||
// remove spaces around the '=' character:
|
||||
if (m_Params.Length() > 1 &&
|
||||
m_Params[m_Params.Length()-2] == wxT(' '))
|
||||
if (state == ST_BEFORE_EQ || state == ST_NAME)
|
||||
{
|
||||
m_Params.RemoveLast();
|
||||
while (m_Params.Length() > 0 && m_Params.Last() == wxT(' '))
|
||||
m_Params.RemoveLast();
|
||||
m_Params += wxT('"');
|
||||
m_ParamNames.Add(pname);
|
||||
m_ParamValues.Add(wxEmptyString);
|
||||
}
|
||||
while ((i < end_pos) && (source[i++] == wxT(' '))) {}
|
||||
if (i < end_pos) i--;
|
||||
|
||||
// ...and copy the value to m_Params:
|
||||
while ((i < end_pos) && ((c = source[i++]) != wxT('"')))
|
||||
m_Params += c;
|
||||
m_Params += c;
|
||||
else if (state == ST_VALUE && quote == 0)
|
||||
{
|
||||
m_ParamNames.Add(pname);
|
||||
m_ParamValues.Add(entParser->Parse(pvalue));
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (c == wxT('\''))
|
||||
switch (state)
|
||||
{
|
||||
while ((i < end_pos) && ((c = source[i++]) != wxT('\'')))
|
||||
m_Params += c;
|
||||
m_Params += c;
|
||||
case ST_BEFORE_NAME:
|
||||
if (!IS_WHITE(c))
|
||||
{
|
||||
pname = c;
|
||||
state = ST_NAME;
|
||||
}
|
||||
break;
|
||||
case ST_NAME:
|
||||
if (IS_WHITE(c))
|
||||
state = ST_BEFORE_EQ;
|
||||
else if (c == wxT('='))
|
||||
state = ST_BEFORE_VALUE;
|
||||
else
|
||||
pname << c;
|
||||
break;
|
||||
case ST_BEFORE_EQ:
|
||||
if (c == wxT('='))
|
||||
state = ST_BEFORE_VALUE;
|
||||
else if (!IS_WHITE(c))
|
||||
{
|
||||
m_ParamNames.Add(pname);
|
||||
m_ParamValues.Add(wxEmptyString);
|
||||
pname = c;
|
||||
state = ST_NAME;
|
||||
}
|
||||
break;
|
||||
case ST_BEFORE_VALUE:
|
||||
if (!IS_WHITE(c))
|
||||
{
|
||||
if (c == wxT('"') || c == wxT('\''))
|
||||
quote = c, pvalue = wxEmptyString;
|
||||
else
|
||||
quote = 0, pvalue = c;
|
||||
state = ST_VALUE;
|
||||
}
|
||||
break;
|
||||
case ST_VALUE:
|
||||
if ((quote != 0 && c == quote) ||
|
||||
(quote == 0 && IS_WHITE(c)))
|
||||
{
|
||||
m_ParamNames.Add(pname);
|
||||
if (quote == 0)
|
||||
{
|
||||
// VS: backward compatibility, no real reason,
|
||||
// but wxHTML code relies on this... :(
|
||||
pvalue.MakeUpper();
|
||||
}
|
||||
m_ParamValues.Add(entParser->Parse(pvalue));
|
||||
state = ST_BEFORE_NAME;
|
||||
}
|
||||
else
|
||||
pvalue << c;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
#undef IS_WHITE
|
||||
}
|
||||
m_Begin = i;
|
||||
|
||||
cache->QueryTag(pos, &m_End1, &m_End2);
|
||||
@ -209,113 +277,49 @@ wxHtmlTag::wxHtmlTag(const wxString& source, int pos, int end_pos, wxHtmlTagsCac
|
||||
if (m_End2 > end_pos) m_End2 = end_pos;
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool wxHtmlTag::HasParam(const wxString& par) const
|
||||
{
|
||||
const wxChar *st = m_Params, *p = par;
|
||||
const wxChar *st2, *p2;
|
||||
const wxChar invalid = wxT('\1');
|
||||
|
||||
if (*st == 0) return FALSE;
|
||||
if (*p == 0) return FALSE;
|
||||
for (st2 = st, p2 = p; ; st2++)
|
||||
{
|
||||
if (*p2 == 0 && *st2 == wxT('=')) return TRUE;
|
||||
if (*st2 == 0) return FALSE;
|
||||
if (*p2 != *st2) p2 = &invalid;
|
||||
if (*p2 == *st2) p2++;
|
||||
if (*st2 == wxT(' ')) p2 = p;
|
||||
else if (*st2 == wxT('='))
|
||||
{
|
||||
p2 = p;
|
||||
while (*st2 != wxT(' '))
|
||||
{
|
||||
if (*st2 == wxT('"'))
|
||||
{
|
||||
st2++;
|
||||
while (*st2 != wxT('"')) st2++;
|
||||
}
|
||||
st2++;
|
||||
if (*st2 == 0) return FALSE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return (m_ParamNames.Index(par, FALSE) != wxNOT_FOUND);
|
||||
}
|
||||
|
||||
|
||||
|
||||
wxString wxHtmlTag::GetParam(const wxString& par, bool with_commas) const
|
||||
{
|
||||
const wxChar *st = m_Params, *p = par;
|
||||
const wxChar *st2, *p2;
|
||||
const wxChar invalid = wxT('\1');
|
||||
bool comma;
|
||||
wxChar comma_char;
|
||||
|
||||
if (*st == 0) return wxEmptyString;
|
||||
if (*p == 0) return wxEmptyString;
|
||||
for (st2 = st, p2 = p; ; st2++)
|
||||
int index = m_ParamNames.Index(par, FALSE);
|
||||
if (index == wxNOT_FOUND)
|
||||
return wxEmptyString;
|
||||
if (with_commas)
|
||||
{
|
||||
if (*p2 == 0 && *st2 == wxT('=')) // found
|
||||
{
|
||||
wxString fnd = wxEmptyString;
|
||||
st2++; // '=' character
|
||||
comma = FALSE;
|
||||
comma_char = wxT('\0');
|
||||
if (!with_commas && (*(st2) == wxT('"')))
|
||||
{
|
||||
st2++;
|
||||
comma = TRUE;
|
||||
comma_char = wxT('"');
|
||||
}
|
||||
else if (!with_commas && (*(st2) == wxT('\'')))
|
||||
{
|
||||
st2++;
|
||||
comma = TRUE;
|
||||
comma_char = wxT('\'');
|
||||
}
|
||||
|
||||
while (*st2 != 0)
|
||||
{
|
||||
if (comma && *st2 == comma_char) comma = FALSE;
|
||||
else if ((*st2 == wxT(' ')) && (!comma)) break;
|
||||
fnd += (*(st2++));
|
||||
}
|
||||
if (!with_commas && (*(st2-1) == comma_char)) fnd.RemoveLast();
|
||||
return fnd;
|
||||
}
|
||||
if (*st2 == 0) return wxEmptyString;
|
||||
if (*p2 != *st2) p2 = &invalid;
|
||||
if (*p2 == *st2) p2++;
|
||||
if (*st2 == wxT(' ')) p2 = p;
|
||||
else if (*st2 == wxT('='))
|
||||
{
|
||||
p2 = p;
|
||||
while (*st2 != wxT(' '))
|
||||
{
|
||||
if (*st2 == wxT('"'))
|
||||
{
|
||||
st2++;
|
||||
while (*st2 != wxT('"')) st2++;
|
||||
}
|
||||
else if (*st2 == wxT('\''))
|
||||
{
|
||||
st2++;
|
||||
while (*st2 != wxT('\'')) st2++;
|
||||
}
|
||||
st2++;
|
||||
}
|
||||
}
|
||||
// VS: backward compatibility, seems to be never used by wxHTML...
|
||||
wxString s;
|
||||
s << wxT('"') << m_ParamValues[index] << wxT('"');
|
||||
return s;
|
||||
}
|
||||
else
|
||||
return m_ParamValues[index];
|
||||
}
|
||||
|
||||
|
||||
|
||||
int wxHtmlTag::ScanParam(const wxString& par, wxChar *format, void *param) const
|
||||
{
|
||||
wxString parval = GetParam(par);
|
||||
return wxSscanf(parval, format, param);
|
||||
}
|
||||
|
||||
wxString wxHtmlTag::GetAllParams() const
|
||||
{
|
||||
// VS: this function is for backward compatiblity only,
|
||||
// never used by wxHTML
|
||||
wxString s;
|
||||
size_t cnt = m_ParamNames.GetCount();
|
||||
for (size_t i = 0; i < cnt; i++)
|
||||
{
|
||||
s << m_ParamNames[i];
|
||||
s << wxT('=');
|
||||
if (m_ParamValues[i].Find(wxT('"')) != wxNOT_FOUND)
|
||||
s << wxT('\'') << m_ParamValues[i] << wxT('\'');
|
||||
else
|
||||
s << wxT('"') << m_ParamValues[i] << wxT('"');
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -197,10 +197,12 @@ void wxHtmlWinParser::AddText(const char* txt)
|
||||
char temp[wxHTML_BUFLEN];
|
||||
register char d;
|
||||
int templen = 0;
|
||||
|
||||
|
||||
if (m_tmpLastWasSpace)
|
||||
{
|
||||
while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++;
|
||||
while ((i < lng) &&
|
||||
((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') ||
|
||||
(txt[i] == '\t'))) i++;
|
||||
}
|
||||
|
||||
while (i < lng)
|
||||
@ -210,7 +212,8 @@ void wxHtmlWinParser::AddText(const char* txt)
|
||||
if ((d == '\n') || (d == '\r') || (d == ' ') || (d == '\t'))
|
||||
{
|
||||
i++, x++;
|
||||
while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') || (txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
|
||||
while ((i < lng) && ((txt[i] == '\n') || (txt[i] == '\r') ||
|
||||
(txt[i] == ' ') || (txt[i] == '\t'))) i++, x++;
|
||||
}
|
||||
else i++;
|
||||
|
||||
@ -219,9 +222,11 @@ void wxHtmlWinParser::AddText(const char* txt)
|
||||
temp[templen-1] = ' ';
|
||||
temp[templen] = 0;
|
||||
templen = 0;
|
||||
if (m_EncConv) m_EncConv->Convert(temp);
|
||||
c = new wxHtmlWordCell(temp, *(GetDC()));
|
||||
if (m_UseLink) c->SetLink(m_Link);
|
||||
if (m_EncConv)
|
||||
m_EncConv->Convert(temp);
|
||||
c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
|
||||
if (m_UseLink)
|
||||
c->SetLink(m_Link);
|
||||
m_Container->InsertCell(c);
|
||||
m_tmpLastWasSpace = TRUE;
|
||||
}
|
||||
@ -229,9 +234,11 @@ void wxHtmlWinParser::AddText(const char* txt)
|
||||
if (templen)
|
||||
{
|
||||
temp[templen] = 0;
|
||||
if (m_EncConv) m_EncConv->Convert(temp);
|
||||
c = new wxHtmlWordCell(temp, *(GetDC()));
|
||||
if (m_UseLink) c->SetLink(m_Link);
|
||||
if (m_EncConv)
|
||||
m_EncConv->Convert(temp);
|
||||
c = new wxHtmlWordCell(GetEntitiesParser()->Parse(temp), *(GetDC()));
|
||||
if (m_UseLink)
|
||||
c->SetLink(m_Link);
|
||||
m_Container->InsertCell(c);
|
||||
m_tmpLastWasSpace = FALSE;
|
||||
}
|
||||
@ -333,7 +340,11 @@ void wxHtmlWinParser::SetFontFace(const wxString& face)
|
||||
void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
|
||||
{
|
||||
m_InputEnc = m_OutputEnc = wxFONTENCODING_DEFAULT;
|
||||
if (m_EncConv) {delete m_EncConv; m_EncConv = NULL;}
|
||||
if (m_EncConv)
|
||||
{
|
||||
delete m_EncConv;
|
||||
m_EncConv = NULL;
|
||||
}
|
||||
|
||||
if (enc == wxFONTENCODING_DEFAULT) return;
|
||||
|
||||
@ -363,6 +374,10 @@ void wxHtmlWinParser::SetInputEncoding(wxFontEncoding enc)
|
||||
m_OutputEnc = wxFONTENCODING_DEFAULT;
|
||||
|
||||
m_InputEnc = enc;
|
||||
if (m_OutputEnc == wxFONTENCODING_DEFAULT)
|
||||
GetEntitiesParser()->SetEncoding(wxFONTENCODING_SYSTEM);
|
||||
else
|
||||
GetEntitiesParser()->SetEncoding(m_OutputEnc);
|
||||
|
||||
if (m_InputEnc == m_OutputEnc) return;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user