2001-07-16 18:54:14 -04:00
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
%% Name: regex.tex
|
|
|
|
%% Purpose: wxRegEx documentation
|
|
|
|
%% Author: Vadim Zeitlin
|
|
|
|
%% Modified by:
|
|
|
|
%% Created: 14.07.01
|
|
|
|
%% RCS-ID: $Id$
|
|
|
|
%% Copyright: (c) 2001 Vadim Zeitlin
|
|
|
|
%% License: wxWindows license
|
|
|
|
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
|
|
|
|
|
|
|
|
\section{\class{wxRegEx}}\label{wxregex}
|
|
|
|
|
|
|
|
wxRegEx represents a regular expression. The regular expressions syntax
|
|
|
|
supported is the POSIX one. Both basic and extended regular expressions are
|
|
|
|
supported but, unlike POSIX C API, the extended ones are used by default.
|
|
|
|
|
|
|
|
This class provides support for regular expressions matching and also
|
|
|
|
replacement. It is built on top of either the system library (if it has support
|
|
|
|
for POSIX regular expressions - which is the case of the most modern Unices) or
|
|
|
|
uses the built in Henry Spencer's library. In the latter case you need to abide
|
|
|
|
by the terms of its copyright:
|
|
|
|
|
|
|
|
\begin{verbatim}
|
|
|
|
Copyright 1992, 1993, 1994, 1997 Henry Spencer. All rights reserved.
|
|
|
|
This software is not subject to any license of the American Telephone
|
|
|
|
and Telegraph Company or of the Regents of the University of California.
|
|
|
|
|
|
|
|
Permission is granted to anyone to use this software for any purpose on
|
|
|
|
any computer system, and to alter it and redistribute it, subject
|
|
|
|
to the following restrictions:
|
|
|
|
|
|
|
|
1. The author is not responsible for the consequences of use of this
|
|
|
|
software, no matter how awful, even if they arise from flaws in it.
|
|
|
|
|
|
|
|
2. The origin of this software must not be misrepresented, either by
|
|
|
|
explicit claim or by omission. Since few users ever read sources,
|
|
|
|
credits must appear in the documentation.
|
|
|
|
|
|
|
|
3. Altered versions must be plainly marked as such, and must not be
|
|
|
|
misrepresented as being the original software. Since few users
|
|
|
|
ever read sources, credits must appear in the documentation.
|
|
|
|
|
|
|
|
4. This notice may not be removed or altered.
|
|
|
|
\end{verbatim}
|
|
|
|
|
|
|
|
\wxheading{Derived from}
|
|
|
|
|
|
|
|
No base class
|
|
|
|
|
|
|
|
\wxheading{Data structures}
|
|
|
|
|
|
|
|
Flags for regex compilation to be used with \helpref{Compile()}{wxregexcompile}:
|
2001-07-18 21:07:56 -04:00
|
|
|
|
2001-07-16 18:54:14 -04:00
|
|
|
\begin{verbatim}
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
// use extended regex syntax (default)
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_EXTENDED = 0,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// use basic RE syntax
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_BASIC = 2,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// ignore case in match
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_ICASE = 4,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// only check match, don't set back references
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_NOSUB = 8,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// if not set, treat '\n' as an ordinary character, otherwise it is
|
|
|
|
// special: it is not matched by '.' and '^' and '$' always match
|
2001-07-18 18:00:03 -04:00
|
|
|
// after/before it regardless of the setting of wxRE_NOT[BE]OL
|
|
|
|
wxRE_NEWLINE = 16,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// default flags
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_DEFAULT = wxRE_EXTENDED
|
2001-07-16 18:54:14 -04:00
|
|
|
}
|
|
|
|
\end{verbatim}
|
|
|
|
|
|
|
|
Flags for regex matching to be used with \helpref{Matches()}{wxregexmatches}.
|
|
|
|
|
|
|
|
These flags are mainly useful when doing several matches in a long string
|
2002-05-05 10:24:07 -04:00
|
|
|
to prevent erroneous matches for {\tt '\textasciicircum'} and {\tt '\$'}:
|
2001-07-18 21:07:56 -04:00
|
|
|
|
2001-07-16 18:54:14 -04:00
|
|
|
\begin{verbatim}
|
|
|
|
enum
|
|
|
|
{
|
|
|
|
// '^' doesn't match at the start of line
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_NOTBOL = 32,
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
// '$' doesn't match at the end of line
|
2001-07-18 18:00:03 -04:00
|
|
|
wxRE_NOTEOL = 64
|
2001-07-16 18:54:14 -04:00
|
|
|
}
|
|
|
|
\end{verbatim}
|
|
|
|
|
2001-07-18 21:07:56 -04:00
|
|
|
\wxheading{Examples}
|
|
|
|
|
|
|
|
A bad example of processing some text containing email addresses (the example
|
|
|
|
is bad because the real email addresses can have more complicated form than
|
|
|
|
{\tt user@host.net}):
|
|
|
|
|
|
|
|
\begin{verbatim}
|
|
|
|
wxString text;
|
|
|
|
...
|
|
|
|
wxRegEx reEmail = "([^@]+)@([[:alnum:].-_].)+([[:alnum:]]+)";
|
|
|
|
if ( reEmail.Matches(text) )
|
|
|
|
{
|
|
|
|
wxString text = reEmail.GetMatch(email);
|
|
|
|
wxString username = reEmail.GetMatch(email, 1);
|
|
|
|
if ( reEmail.GetMatch(email, 3) == "com" ) // .com TLD?
|
|
|
|
{
|
|
|
|
...
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// or we could do this to hide the email address
|
|
|
|
size_t count = reEmail.ReplaceAll(text, "HIDDEN@\\2\\3");
|
|
|
|
printf("text now contains %u hidden addresses", count);
|
|
|
|
\end{verbatim}
|
|
|
|
|
2001-07-16 18:54:14 -04:00
|
|
|
\latexignore{\rtfignore{\wxheading{Members}}}
|
|
|
|
|
|
|
|
\membersection{wxRegEx::wxRegEx}\label{wxregexwxregex}
|
|
|
|
|
|
|
|
\func{}{wxRegEx}{\void}
|
|
|
|
|
|
|
|
Default ctor: use \helpref{Compile()}{wxregexcompile} later.
|
|
|
|
|
|
|
|
\func{}{wxRegEx}{\param{const wxString\& }{expr}, \param{int }{flags = wxRE\_DEFAULT}}
|
|
|
|
|
|
|
|
Create and compile the regular expression, use
|
|
|
|
\helpref{IsValid}{wxregexisvalid} to test for compilation errors.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::\destruct{wxRegEx}}\label{wxregexdtor}
|
|
|
|
|
|
|
|
\func{}{\destruct{wxRegEx}}{\void}
|
|
|
|
|
|
|
|
dtor not virtual, don't derive from this class
|
|
|
|
|
|
|
|
\membersection{wxRegEx::Compile}\label{wxregexcompile}
|
|
|
|
|
|
|
|
\func{bool}{Compile}{\param{const wxString\& }{pattern}, \param{int }{flags = wxRE\_DEFAULT}}
|
|
|
|
|
2001-07-18 21:07:56 -04:00
|
|
|
Compile the string into regular expression, return {\tt TRUE} if ok or {\tt FALSE}
|
2001-07-16 18:54:14 -04:00
|
|
|
if string has a syntax error.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::IsValid}\label{wxregexisvalid}
|
|
|
|
|
|
|
|
\constfunc{bool}{IsValid}{\void}
|
|
|
|
|
|
|
|
Return {\tt TRUE} if this is a valid compiled regular expression, {\tt FALSE}
|
|
|
|
otherwise.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::GetMatch}\label{wxregexgetmatch}
|
|
|
|
|
|
|
|
\constfunc{bool}{GetMatch}{\param{size\_t* }{start}, \param{size\_t* }{len}, \param{size\_t }{index = 0}}
|
|
|
|
|
|
|
|
Get the start index and the length of the match of the expression
|
|
|
|
(if {\it index} is $0$) or a bracketed subexpression ({\it index} different
|
|
|
|
from $0$).
|
|
|
|
|
|
|
|
May only be called after successful call to \helpref{Matches()}{wxregexmatches}
|
|
|
|
and only if {\tt wxRE\_NOSUB} was {\bf not} used in
|
|
|
|
\helpref{Compile()}{wxregexcompile}.
|
|
|
|
|
|
|
|
Returns {\tt FALSE} if no match or if an error occured.
|
|
|
|
|
|
|
|
\constfunc{wxString}{GetMatch}{\param{const wxString\& }{text}, \param{size\_t }{index = 0}}
|
|
|
|
|
|
|
|
Returns the part of string corresponding to the match where {\it index} is
|
|
|
|
interpreted as above. Empty string is returned if match failed
|
|
|
|
|
|
|
|
May only be called after successful call to \helpref{Matches()}{wxregexmatches}
|
|
|
|
and only if {\tt wxRE\_NOSUB} was {\bf not} used in
|
|
|
|
\helpref{Compile()}{wxregexcompile}.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::Matches}\label{wxregexmatches}
|
|
|
|
|
|
|
|
\constfunc{bool}{Matches}{\param{const wxChar* }{text}, \param{int }{flags = 0}}
|
|
|
|
|
|
|
|
Matches the precompiled regular expression against the string {\it text},
|
|
|
|
returns {\tt TRUE} if matches and {\tt FALSE} otherwise.
|
|
|
|
|
|
|
|
Flags may be combination of {\tt wxRE\_NOTBOL} and {\tt wxRE\_NOTEOL}.
|
|
|
|
|
|
|
|
May only be called after successful call to \helpref{Compile()}{wxregexcompile}.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::Replace}\label{wxregexreplace}
|
|
|
|
|
|
|
|
\constfunc{int}{Replace}{\param{wxString* }{text}, \param{const wxString\& }{replacement}, \param{size\_t }{maxMatches = 0}}
|
|
|
|
|
|
|
|
Replaces the current regular expression in the string pointed to by
|
|
|
|
{\it text}, with the text in {\it replacement} and return number of matches
|
|
|
|
replaced (maybe $0$ if none found) or $-1$ on error.
|
|
|
|
|
2001-07-20 04:47:48 -04:00
|
|
|
The replacement text may contain back references {\tt $\backslash$number} which will be
|
2001-07-16 18:54:14 -04:00
|
|
|
replaced with the value of the corresponding subexpression in the
|
2001-07-20 04:47:48 -04:00
|
|
|
pattern match. {\tt $\backslash$0} corresponds to the entire match and {\tt \&} is a
|
2001-07-16 18:54:14 -04:00
|
|
|
synonym for it. Backslash may be used to quote itself or {\tt \&} character.
|
|
|
|
|
|
|
|
{\it maxMatches} may be used to limit the number of replacements made, setting
|
2002-06-07 16:15:28 -04:00
|
|
|
it to $1$, for example, will only replace first occurrence (if any) of the
|
2001-07-16 18:54:14 -04:00
|
|
|
pattern in the text while default value of $0$ means replace all.
|
|
|
|
|
|
|
|
\membersection{wxRegEx::ReplaceAll}\label{wxregexreplaceall}
|
|
|
|
|
|
|
|
\constfunc{int}{ReplaceAll}{\param{wxString* }{text}, \param{const wxString\& }{replacement}}
|
|
|
|
|
2002-06-07 16:15:28 -04:00
|
|
|
Replace all occurrences: this is actually a synonym for
|
2001-07-16 18:54:14 -04:00
|
|
|
\helpref{Replace()}{wxregexreplace}.
|
|
|
|
|
|
|
|
\wxheading{See also}
|
|
|
|
|
|
|
|
\helpref{ReplaceFirst}{wxregexreplacefirst}
|
|
|
|
|
|
|
|
\membersection{wxRegEx::ReplaceFirst}\label{wxregexreplacefirst}
|
|
|
|
|
|
|
|
\constfunc{int}{ReplaceFirst}{\param{wxString* }{text}, \param{const wxString\& }{replacement}}
|
|
|
|
|
2002-06-07 16:15:28 -04:00
|
|
|
Replace the first occurrence.
|
2001-07-16 18:54:14 -04:00
|
|
|
|
|
|
|
\wxheading{See also}
|
|
|
|
|
|
|
|
\helpref{Replace}{wxregexreplace}
|
|
|
|
|