2008-03-08 08:52:38 -05:00
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
// Name: html/htmlpars.h
|
2008-03-10 11:24:38 -04:00
|
|
|
// Purpose: interface of wxHtmlTagHandler
|
2008-03-08 08:52:38 -05:00
|
|
|
// Author: wxWidgets team
|
|
|
|
// RCS-ID: $Id$
|
|
|
|
// Licence: wxWindows license
|
|
|
|
/////////////////////////////////////////////////////////////////////////////
|
|
|
|
|
|
|
|
/**
|
|
|
|
@class wxHtmlTagHandler
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-10-25 16:44:52 -04:00
|
|
|
@todo describe me
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
@library{wxhtml}
|
|
|
|
@category{html}
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-10-25 16:44:52 -04:00
|
|
|
@see @ref overview_html_handlers, wxHtmlTag
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
class wxHtmlTagHandler : public wxObject
|
|
|
|
{
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
Constructor.
|
|
|
|
*/
|
|
|
|
wxHtmlTagHandler();
|
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Returns list of supported tags.
|
|
|
|
The list is in uppercase and tags are delimited by ','.
|
|
|
|
Example: @c "I,B,FONT,P"
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-10-13 09:24:43 -04:00
|
|
|
virtual wxString GetSupportedTags() = 0;
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
This is the core method of each handler. It is called each time
|
2008-03-09 08:33:59 -04:00
|
|
|
one of supported tags is detected. @a tag contains all necessary
|
2008-03-08 08:52:38 -05:00
|
|
|
info (see wxHtmlTag for details).
|
2008-10-25 16:44:52 -04:00
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
@code
|
|
|
|
bool MyHandler::HandleTag(const wxHtmlTag& tag)
|
|
|
|
{
|
|
|
|
...
|
|
|
|
// change state of parser (e.g. set bold face)
|
|
|
|
ParseInner(tag);
|
|
|
|
...
|
|
|
|
// restore original state of parser
|
|
|
|
}
|
|
|
|
@endcode
|
|
|
|
|
|
|
|
You shouldn't call ParseInner() if the tag is not paired with an ending one.
|
|
|
|
|
|
|
|
@return @true if ParseInner() was called, @false otherwise.
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-10-13 09:24:43 -04:00
|
|
|
virtual bool HandleTag(const wxHtmlTag& tag) = 0;
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
This method calls parser's wxHtmlParser::DoParsing method
|
|
|
|
for the string between this tag and the paired ending tag:
|
2008-10-25 16:44:52 -04:00
|
|
|
@code
|
|
|
|
...<A HREF="x.htm">Hello, world!</A>...
|
|
|
|
@endcode
|
|
|
|
|
|
|
|
In this example, a call to ParseInner() (with @a tag pointing to A tag)
|
2008-03-08 08:52:38 -05:00
|
|
|
will parse 'Hello, world!'.
|
|
|
|
*/
|
|
|
|
void ParseInner(const wxHtmlTag& tag);
|
|
|
|
|
|
|
|
/**
|
2008-03-09 08:33:59 -04:00
|
|
|
Assigns @a parser to this handler. Each @b instance of handler
|
2008-03-08 08:52:38 -05:00
|
|
|
is guaranteed to be called only from the parser.
|
|
|
|
*/
|
|
|
|
virtual void SetParser(wxHtmlParser parser);
|
|
|
|
|
2008-10-25 16:44:52 -04:00
|
|
|
protected:
|
2008-03-08 08:52:38 -05:00
|
|
|
/**
|
|
|
|
This attribute is used to access parent parser. It is protected so that
|
|
|
|
it can't be accessed by user but can be accessed from derived classes.
|
|
|
|
*/
|
2008-10-25 16:44:52 -04:00
|
|
|
wxHtmlParser* m_Parser;
|
2008-03-08 08:52:38 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
|
2008-03-10 11:24:38 -04:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
/**
|
|
|
|
@class wxHtmlParser
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
Classes derived from this handle the @b generic parsing of HTML documents: it
|
2008-10-25 16:44:52 -04:00
|
|
|
scans the document and divide it into blocks of tags (where one block consists
|
|
|
|
of beginning and ending tag and of text between these two tags).
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-10-25 16:44:52 -04:00
|
|
|
It is independent from wxHtmlWindow and can be used as stand-alone parser.
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
It uses system of tag handlers to parse the HTML document. Tag handlers
|
|
|
|
are not statically shared by all instances but are created for each
|
|
|
|
wxHtmlParser instance. The reason is that the handler may contain
|
|
|
|
document-specific temporary data used during parsing (e.g. complicated
|
|
|
|
structures like tables).
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
Typically the user calls only the wxHtmlParser::Parse method.
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
@library{wxhtml}
|
|
|
|
@category{html}
|
2008-03-08 09:43:31 -05:00
|
|
|
|
2008-10-25 16:44:52 -04:00
|
|
|
@see @ref overview_html_cells, @ref overview_html_handlers, wxHtmlTag
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-03-08 09:43:31 -05:00
|
|
|
class wxHtmlParser
|
2008-03-08 08:52:38 -05:00
|
|
|
{
|
|
|
|
public:
|
|
|
|
/**
|
|
|
|
Constructor.
|
|
|
|
*/
|
|
|
|
wxHtmlParser();
|
|
|
|
|
|
|
|
/**
|
|
|
|
This may (and may not) be overwritten in derived class.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
2008-03-08 09:43:31 -05:00
|
|
|
This method is called each time new tag is about to be added.
|
2008-10-25 16:44:52 -04:00
|
|
|
@a tag contains information about the tag. (See wxHtmlTag for details.)
|
|
|
|
|
|
|
|
Default (wxHtmlParser) behaviour is this: first it finds a handler capable
|
|
|
|
of handling this tag and then it calls handler's HandleTag() method.
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-10-13 07:09:56 -04:00
|
|
|
virtual void AddTag(const wxHtmlTag& tag);
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Adds handler to the internal list ( hash table) of handlers.
|
|
|
|
This method should not be called directly by user but rather by derived class'
|
2008-03-08 08:52:38 -05:00
|
|
|
constructor.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
This adds the handler to this @b instance of wxHtmlParser, not to
|
2008-10-25 16:44:52 -04:00
|
|
|
all objects of this class!
|
|
|
|
(Static front-end to AddTagHandler is provided by wxHtmlWinParser).
|
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
All handlers are deleted on object deletion.
|
|
|
|
*/
|
|
|
|
virtual void AddTagHandler(wxHtmlTagHandler handler);
|
|
|
|
|
|
|
|
/**
|
|
|
|
Must be overwritten in derived class.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
|
|
|
This method is called by DoParsing() each time a part of text is parsed.
|
|
|
|
@a txt is NOT only one word, it is substring of input.
|
|
|
|
It is not formatted or preprocessed (so white spaces are unmodified).
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
virtual void AddWord(const wxString& txt);
|
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Parses the m_Source from @a begin_pos to @a end_pos - 1.
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
void DoParsing(int begin_pos, int end_pos);
|
2008-10-25 16:44:52 -04:00
|
|
|
|
|
|
|
/**
|
|
|
|
Parses the whole m_Source.
|
|
|
|
*/
|
2008-03-08 09:43:31 -05:00
|
|
|
void DoParsing();
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
This must be called after DoParsing().
|
|
|
|
*/
|
|
|
|
virtual void DoneParser();
|
|
|
|
|
|
|
|
/**
|
|
|
|
Returns pointer to the file system. Because each tag handler has
|
|
|
|
reference to it is parent parser it can easily request the file by
|
2008-10-25 16:44:52 -04:00
|
|
|
calling:
|
|
|
|
@code
|
|
|
|
wxFSFile *f = m_Parser -> GetFS() -> OpenFile("image.jpg");
|
|
|
|
@endcode
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-03-09 12:24:26 -04:00
|
|
|
wxFileSystem* GetFS() const;
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Returns product of parsing.
|
|
|
|
Returned value is result of parsing of the document.
|
|
|
|
|
|
|
|
The type of this result depends on internal representation in derived
|
|
|
|
parser (but it must be derived from wxObject!).
|
2008-03-08 08:52:38 -05:00
|
|
|
See wxHtmlWinParser for details.
|
|
|
|
*/
|
2008-10-13 09:24:43 -04:00
|
|
|
virtual wxObject* GetProduct() = 0;
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
|
|
|
Returns pointer to the source being parsed.
|
|
|
|
*/
|
|
|
|
wxString* GetSource();
|
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Setups the parser for parsing the @a source string.
|
|
|
|
(Should be overridden in derived class)
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
virtual void InitParser(const wxString& source);
|
|
|
|
|
|
|
|
/**
|
|
|
|
Opens given URL and returns @c wxFSFile object that can be used to read data
|
|
|
|
from it. This method may return @NULL in one of two cases: either the URL doesn't
|
|
|
|
point to any valid resource or the URL is blocked by overridden implementation
|
|
|
|
of @e OpenURL in derived class.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
2008-03-08 09:43:31 -05:00
|
|
|
@param type
|
2008-03-09 08:33:59 -04:00
|
|
|
Indicates type of the resource. Is one of:
|
2008-10-25 16:44:52 -04:00
|
|
|
- wxHTML_URL_PAGE: Opening a HTML page.
|
|
|
|
- wxHTML_URL_IMAGE: Opening an image.
|
|
|
|
- wxHTML_URL_OTHER: Opening a resource that doesn't fall into
|
|
|
|
any other category.
|
2008-03-08 09:43:31 -05:00
|
|
|
@param url
|
2008-03-09 08:33:59 -04:00
|
|
|
URL being opened.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
|
|
|
@note
|
|
|
|
Always use this method in tag handlers instead of GetFS()->OpenFile()
|
|
|
|
because it can block the URL and is thus more secure.
|
|
|
|
Default behaviour is to call wxHtmlWindow::OnOpeningURL of the associated
|
|
|
|
wxHtmlWindow object (which may decide to block the URL or redirect it to
|
|
|
|
another one),if there's any, and always open the URL if the parser is not
|
|
|
|
used with wxHtmlWindow.
|
|
|
|
Returned wxFSFile object is not guaranteed to point to url, it might have
|
|
|
|
been redirected!
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
virtual wxFSFile* OpenURL(wxHtmlURLType type,
|
|
|
|
const wxString& url);
|
|
|
|
|
|
|
|
/**
|
|
|
|
Proceeds parsing of the document. This is end-user method. You can simply
|
2008-10-25 16:44:52 -04:00
|
|
|
call it when you need to obtain parsed output (which is parser-specific).
|
|
|
|
|
2008-03-08 08:52:38 -05:00
|
|
|
The method does these things:
|
2008-10-25 16:44:52 -04:00
|
|
|
-# calls InitParser(source)
|
|
|
|
-# calls DoParsing()
|
|
|
|
-# calls GetProduct()
|
|
|
|
-# calls DoneParser()
|
|
|
|
-# returns value returned by GetProduct()
|
|
|
|
|
|
|
|
You shouldn't use InitParser(), DoParsing(), GetProduct() or DoneParser() directly.
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
wxObject* Parse(const wxString& source);
|
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Restores parser's state before last call to PushTagHandler().
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
void PopTagHandler();
|
|
|
|
|
|
|
|
/**
|
2008-03-08 09:43:31 -05:00
|
|
|
Forces the handler to handle additional tags
|
|
|
|
(not returned by wxHtmlTagHandler::GetSupportedTags).
|
2008-03-08 08:52:38 -05:00
|
|
|
The handler should already be added to this parser.
|
2008-10-25 16:44:52 -04:00
|
|
|
|
2008-03-08 09:43:31 -05:00
|
|
|
@param handler
|
2008-03-09 08:33:59 -04:00
|
|
|
the handler
|
2008-03-08 09:43:31 -05:00
|
|
|
@param tags
|
2008-10-25 16:44:52 -04:00
|
|
|
List of tags (in same format as GetSupportedTags()'s return value).
|
|
|
|
The parser will redirect these tags to handler (until call to PopTagHandler()).
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
|
|
|
Imagine you want to parse following pseudo-html structure:
|
|
|
|
@code
|
|
|
|
<myitems>
|
|
|
|
<param name="one" value="1">
|
|
|
|
<param name="two" value="2">
|
|
|
|
</myitems>
|
|
|
|
|
|
|
|
<execute>
|
|
|
|
<param program="text.exe">
|
|
|
|
</execute>
|
|
|
|
@endcode
|
|
|
|
|
|
|
|
It is obvious that you cannot use only one tag handler for \<param\> tag.
|
|
|
|
Instead you must use context-sensitive handlers for \<param\> inside \<myitems\>
|
|
|
|
and \<param\> inside \<execute\>.
|
|
|
|
This is the preferred solution:
|
|
|
|
|
|
|
|
@code
|
|
|
|
TAG_HANDLER_BEGIN(MYITEM, "MYITEMS")
|
|
|
|
TAG_HANDLER_PROC(tag)
|
|
|
|
{
|
|
|
|
// ...something...
|
|
|
|
|
|
|
|
m_Parser -> PushTagHandler(this, "PARAM");
|
|
|
|
ParseInner(tag);
|
|
|
|
m_Parser -> PopTagHandler();
|
|
|
|
|
|
|
|
// ...something...
|
|
|
|
}
|
|
|
|
TAG_HANDLER_END(MYITEM)
|
|
|
|
@endcode
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
|
|
|
void PushTagHandler(wxHtmlTagHandler* handler,
|
|
|
|
const wxString& tags);
|
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Sets the virtual file system that will be used to request additional files.
|
|
|
|
(For example @c IMG tag handler requests wxFSFile with the image data.)
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-03-09 08:33:59 -04:00
|
|
|
void SetFS(wxFileSystem fs);
|
2008-03-08 08:52:38 -05:00
|
|
|
|
|
|
|
/**
|
2008-10-25 16:44:52 -04:00
|
|
|
Call this function to interrupt parsing from a tag handler.
|
|
|
|
No more tags will be parsed afterward. This function may only be called
|
|
|
|
from Parse() or any function called by it (i.e. from tag handlers).
|
2008-03-08 08:52:38 -05:00
|
|
|
*/
|
2008-09-27 07:21:10 -04:00
|
|
|
virtual void StopParsing();
|
2008-03-08 08:52:38 -05:00
|
|
|
};
|
2008-03-10 11:24:38 -04:00
|
|
|
|