applied patch 890642: wxRE_ADVANCED flag and docs
git-svn-id: https://svn.wxwidgets.org/svn/wx/wxWidgets/trunk@25866 c3d73ce0-8a6f-49c7-b76d-6d57e0e08775
This commit is contained in:
parent
3ca4086b22
commit
1701c4d421
@ -31,9 +31,9 @@ ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
**********************************************************************
|
||||
|
||||
PostgreSQL adopted the code out of Tcl 8.4.1. Portions of regc_locale.c
|
||||
and re_syntax.n were developed by Tcl developers other than Henry; these
|
||||
files bear the Tcl copyright and license notice:
|
||||
wxWindows adopted the code out of Tcl 8.4.5. Portions of regc_locale.c
|
||||
and re_syntax.n were developed by Tcl developers other than Henry Spencer;
|
||||
these files bear the Tcl copyright and license notice:
|
||||
|
||||
**********************************************************************
|
||||
|
||||
@ -80,31 +80,7 @@ terms specified in this license.
|
||||
|
||||
**********************************************************************
|
||||
|
||||
Subsequent modifications to the code by the PostgreSQL project follow
|
||||
the same license terms as the rest of PostgreSQL.
|
||||
(License follows)
|
||||
****************************************************************************
|
||||
PostgreSQL Database Management System
|
||||
(formerly known as Postgres, then as Postgres95)
|
||||
|
||||
Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
|
||||
|
||||
Portions Copyright (c) 1994, The Regents of the University of California
|
||||
|
||||
Permission to use, copy, modify, and distribute this software and its
|
||||
documentation for any purpose, without fee, and without a written agreement
|
||||
is hereby granted, provided that the above copyright notice and this
|
||||
paragraph and the following two paragraphs appear in all copies.
|
||||
|
||||
IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
|
||||
DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
|
||||
LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
|
||||
DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
|
||||
INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS
|
||||
ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO
|
||||
PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
|
||||
The wxWindows license applies to further modifications to regcustom.h
|
||||
and regc_locale.c.
|
||||
|
||||
****************************************************************************
|
||||
|
@ -1,69 +1,41 @@
|
||||
wxWindows regex
|
||||
----------------
|
||||
This is a version of Henry Spencer's regex,
|
||||
which was taken from postresql, which was
|
||||
taken from the source of TCL (Toolkit Command Language).
|
||||
---------------
|
||||
This is a version of Henry Spencer's regex, which was taken from the
|
||||
source of TCL (Toolkit Command Language). It implements POSIX regular
|
||||
expressions and also supports Unicode and some Perl5 extensions.
|
||||
|
||||
This is version of Henry Spencer's library is
|
||||
modified by the wxWindows team. The modifications
|
||||
made by the wxWindows team are as follows:
|
||||
regcustom.h (all source code)
|
||||
regex.h (function wrappers, other)
|
||||
regc_locale.c (A function or two).
|
||||
All the rest (Using standard c library routines instead of
|
||||
postresql routines, various cleanup/optimizations)
|
||||
The modifications made by the wxWindows team are as follows:
|
||||
|
||||
The source code that is wxWindows-specific is as follows:
|
||||
regcustom.h (all source code, see comments in source on how to replace)
|
||||
regex.h (where noted with comments in source, compiler workarounds)
|
||||
regcomp.c (a few of the functions prefixed with wx_,
|
||||
these may be replaced by c library routines)
|
||||
regcustom.h
|
||||
-----------
|
||||
Types and constants appropriate for our use of the library are defined
|
||||
here.
|
||||
|
||||
This newer library was chosen over the old one because
|
||||
Henry Spencer's old library did not support unicode and
|
||||
had some other bugs that were fixed by this one, and
|
||||
the license was incompatible with the wxWindows license
|
||||
and the gpl.
|
||||
regex.h
|
||||
-------
|
||||
This is unmodified. Note though, that a portion of it (clearly marked)
|
||||
is copied from regcustom.h as part of the build process.
|
||||
|
||||
Regular Expression syntax documentation is in re_syntax.n.
|
||||
Programming information (from older regex, but with
|
||||
the function wrappers implemented in the wxWindows
|
||||
version, the usage is somewhat the same) is in regex.3.
|
||||
regc_locale.c
|
||||
-------------
|
||||
This module provides character classifications.
|
||||
|
||||
Modifications made by the wxWindows team are not licensed.
|
||||
Contact - Ryan Norton <wxprojects@comcast.net>
|
||||
The current version from Tcl supports only a Unicode build. The
|
||||
original code from Henry Spencer, on the other hand, was ASCII only.
|
||||
Therefore, in order to support both, code from the ASCII version has been
|
||||
incorporated into the Unicode version, conditionally compiled depending
|
||||
on wxUSE_UNICODE.
|
||||
|
||||
The original readme (from the TCL distribution) follows:
|
||||
The only non-trivial dependencies were: Tcl_UniCharToUpper,
|
||||
Tcl_UniCharToLower and Tcl_UniCharToTitle. The implementations of these
|
||||
have also been incorporated (from Tcl). These in turn depend only the data
|
||||
tables in tclUniData.c (which is unmodified). At some point wxWindows
|
||||
may have it's own Unicode character classification code, at which point
|
||||
these should be used instead.
|
||||
|
||||
alpha3.8 release.
|
||||
Tue Aug 10 15:51:48 EDT 1999
|
||||
henry@spsystems.net (formerly henry@zoo.toronto.edu)
|
||||
Other dependencies (on Tcl_DString) have been eliminated using wxWindows
|
||||
wxChar functions.
|
||||
|
||||
See WHATSNEW for change listing.
|
||||
The ASCII version has also been extended to support character
|
||||
classifications based on the current locale rather than ASCII only.
|
||||
|
||||
installation notes:
|
||||
--------
|
||||
Read the comments at the beginning of Makefile before running.
|
||||
|
||||
Utils.h contains some things that just might have to be modified on
|
||||
some systems, as well as a nested include (ugh) of <assert.h>.
|
||||
|
||||
The "fake" directory contains quick-and-dirty fakes for some header
|
||||
files and routines that old systems may not have. Note also that
|
||||
-DUSEBCOPY will make utils.h substitute bcopy() for memmove().
|
||||
|
||||
After that, "make r" will build regcomp.o, regexec.o, regfree.o,
|
||||
and regerror.o (the actual routines), bundle them together into a test
|
||||
program, and run regression tests on them. No output is good output.
|
||||
|
||||
"make lib" builds just the .o files for the actual routines (when
|
||||
you're happy with testing and have adjusted CFLAGS for production),
|
||||
and puts them together into libregex.a. You can pick up either the
|
||||
library or *.o ("make lib" makes sure there are no other .o files left
|
||||
around to confuse things).
|
||||
|
||||
Main.c, debug.c, split.c are used for regression testing but are not part
|
||||
of the RE routines themselves.
|
||||
|
||||
Regex.h goes in /usr/include. All other .h files are internal only.
|
||||
--------
|
||||
|
@ -10,112 +10,120 @@
|
||||
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
|
||||
*
|
||||
* RCS: @(#) $Id$
|
||||
*
|
||||
* wxWindows:
|
||||
* The Scriptics license can be found in the file COPYRIGHT. Modifications
|
||||
* for wxWindows are under the wxWindows licence, see README for details.
|
||||
*/
|
||||
|
||||
#define CONST const
|
||||
|
||||
/* ASCII character-name table */
|
||||
|
||||
static struct cname {
|
||||
char *name;
|
||||
chr *name;
|
||||
char code;
|
||||
} cnames[] = {
|
||||
{"NUL", '\0'},
|
||||
{"SOH", '\001'},
|
||||
{"STX", '\002'},
|
||||
{"ETX", '\003'},
|
||||
{"EOT", '\004'},
|
||||
{"ENQ", '\005'},
|
||||
{"ACK", '\006'},
|
||||
{"BEL", '\007'},
|
||||
{"alert", '\007'},
|
||||
{"BS", '\010'},
|
||||
{"backspace", '\b'},
|
||||
{"HT", '\011'},
|
||||
{"tab", '\t'},
|
||||
{"LF", '\012'},
|
||||
{"newline", '\n'},
|
||||
{"VT", '\013'},
|
||||
{"vertical-tab", '\v'},
|
||||
{"FF", '\014'},
|
||||
{"form-feed", '\f'},
|
||||
{"CR", '\015'},
|
||||
{"carriage-return", '\r'},
|
||||
{"SO", '\016'},
|
||||
{"SI", '\017'},
|
||||
{"DLE", '\020'},
|
||||
{"DC1", '\021'},
|
||||
{"DC2", '\022'},
|
||||
{"DC3", '\023'},
|
||||
{"DC4", '\024'},
|
||||
{"NAK", '\025'},
|
||||
{"SYN", '\026'},
|
||||
{"ETB", '\027'},
|
||||
{"CAN", '\030'},
|
||||
{"EM", '\031'},
|
||||
{"SUB", '\032'},
|
||||
{"ESC", '\033'},
|
||||
{"IS4", '\034'},
|
||||
{"FS", '\034'},
|
||||
{"IS3", '\035'},
|
||||
{"GS", '\035'},
|
||||
{"IS2", '\036'},
|
||||
{"RS", '\036'},
|
||||
{"IS1", '\037'},
|
||||
{"US", '\037'},
|
||||
{"space", ' '},
|
||||
{"exclamation-mark",'!'},
|
||||
{"quotation-mark", '"'},
|
||||
{"number-sign", '#'},
|
||||
{"dollar-sign", '$'},
|
||||
{"percent-sign", '%'},
|
||||
{"ampersand", '&'},
|
||||
{"apostrophe", '\''},
|
||||
{"left-parenthesis",'('},
|
||||
{"right-parenthesis", ')'},
|
||||
{"asterisk", '*'},
|
||||
{"plus-sign", '+'},
|
||||
{"comma", ','},
|
||||
{"hyphen", '-'},
|
||||
{"hyphen-minus", '-'},
|
||||
{"period", '.'},
|
||||
{"full-stop", '.'},
|
||||
{"slash", '/'},
|
||||
{"solidus", '/'},
|
||||
{"zero", '0'},
|
||||
{"one", '1'},
|
||||
{"two", '2'},
|
||||
{"three", '3'},
|
||||
{"four", '4'},
|
||||
{"five", '5'},
|
||||
{"six", '6'},
|
||||
{"seven", '7'},
|
||||
{"eight", '8'},
|
||||
{"nine", '9'},
|
||||
{"colon", ':'},
|
||||
{"semicolon", ';'},
|
||||
{"less-than-sign", '<'},
|
||||
{"equals-sign", '='},
|
||||
{"greater-than-sign", '>'},
|
||||
{"question-mark", '?'},
|
||||
{"commercial-at", '@'},
|
||||
{"left-square-bracket", '['},
|
||||
{"backslash", '\\'},
|
||||
{"reverse-solidus", '\\'},
|
||||
{"right-square-bracket", ']'},
|
||||
{"circumflex", '^'},
|
||||
{"circumflex-accent", '^'},
|
||||
{"underscore", '_'},
|
||||
{"low-line", '_'},
|
||||
{"grave-accent", '`'},
|
||||
{"left-brace", '{'},
|
||||
{"left-curly-bracket", '{'},
|
||||
{"vertical-line", '|'},
|
||||
{"right-brace", '}'},
|
||||
{"right-curly-bracket", '}'},
|
||||
{"tilde", '~'},
|
||||
{"DEL", '\177'},
|
||||
{NULL, 0}
|
||||
{_T("NUL"), '\0'},
|
||||
{_T("SOH"), '\001'},
|
||||
{_T("STX"), '\002'},
|
||||
{_T("ETX"), '\003'},
|
||||
{_T("EOT"), '\004'},
|
||||
{_T("ENQ"), '\005'},
|
||||
{_T("ACK"), '\006'},
|
||||
{_T("BEL"), '\007'},
|
||||
{_T("alert"), '\007'},
|
||||
{_T("BS"), '\010'},
|
||||
{_T("backspace"), '\b'},
|
||||
{_T("HT"), '\011'},
|
||||
{_T("tab"), '\t'},
|
||||
{_T("LF"), '\012'},
|
||||
{_T("newline"), '\n'},
|
||||
{_T("VT"), '\013'},
|
||||
{_T("vertical-tab"), '\v'},
|
||||
{_T("FF"), '\014'},
|
||||
{_T("form-feed"), '\f'},
|
||||
{_T("CR"), '\015'},
|
||||
{_T("carriage-return"), '\r'},
|
||||
{_T("SO"), '\016'},
|
||||
{_T("SI"), '\017'},
|
||||
{_T("DLE"), '\020'},
|
||||
{_T("DC1"), '\021'},
|
||||
{_T("DC2"), '\022'},
|
||||
{_T("DC3"), '\023'},
|
||||
{_T("DC4"), '\024'},
|
||||
{_T("NAK"), '\025'},
|
||||
{_T("SYN"), '\026'},
|
||||
{_T("ETB"), '\027'},
|
||||
{_T("CAN"), '\030'},
|
||||
{_T("EM"), '\031'},
|
||||
{_T("SUB"), '\032'},
|
||||
{_T("ESC"), '\033'},
|
||||
{_T("IS4"), '\034'},
|
||||
{_T("FS"), '\034'},
|
||||
{_T("IS3"), '\035'},
|
||||
{_T("GS"), '\035'},
|
||||
{_T("IS2"), '\036'},
|
||||
{_T("RS"), '\036'},
|
||||
{_T("IS1"), '\037'},
|
||||
{_T("US"), '\037'},
|
||||
{_T("space"), ' '},
|
||||
{_T("exclamation-mark"), '!'},
|
||||
{_T("quotation-mark"), '"'},
|
||||
{_T("number-sign"), '#'},
|
||||
{_T("dollar-sign"), '$'},
|
||||
{_T("percent-sign"), '%'},
|
||||
{_T("ampersand"), '&'},
|
||||
{_T("apostrophe"), '\''},
|
||||
{_T("left-parenthesis"), '('},
|
||||
{_T("right-parenthesis"), ')'},
|
||||
{_T("asterisk"), '*'},
|
||||
{_T("plus-sign"), '+'},
|
||||
{_T("comma"), ','},
|
||||
{_T("hyphen"), '-'},
|
||||
{_T("hyphen-minus"), '-'},
|
||||
{_T("period"), '.'},
|
||||
{_T("full-stop"), '.'},
|
||||
{_T("slash"), '/'},
|
||||
{_T("solidus"), '/'},
|
||||
{_T("zero"), '0'},
|
||||
{_T("one"), '1'},
|
||||
{_T("two"), '2'},
|
||||
{_T("three"), '3'},
|
||||
{_T("four"), '4'},
|
||||
{_T("five"), '5'},
|
||||
{_T("six"), '6'},
|
||||
{_T("seven"), '7'},
|
||||
{_T("eight"), '8'},
|
||||
{_T("nine"), '9'},
|
||||
{_T("colon"), ':'},
|
||||
{_T("semicolon"), ';'},
|
||||
{_T("less-than-sign"), '<'},
|
||||
{_T("equals-sign"), '='},
|
||||
{_T("greater-than-sign"), '>'},
|
||||
{_T("question-mark"), '?'},
|
||||
{_T("commercial-at"), '@'},
|
||||
{_T("left-square-bracket"), '['},
|
||||
{_T("backslash"), '\\'},
|
||||
{_T("reverse-solidus"), '\\'},
|
||||
{_T("right-square-bracket"), ']'},
|
||||
{_T("circumflex"), '^'},
|
||||
{_T("circumflex-accent"), '^'},
|
||||
{_T("underscore"), '_'},
|
||||
{_T("low-line"), '_'},
|
||||
{_T("grave-accent"), '`'},
|
||||
{_T("left-brace"), '{'},
|
||||
{_T("left-curly-bracket"), '{'},
|
||||
{_T("vertical-line"), '|'},
|
||||
{_T("right-brace"), '}'},
|
||||
{_T("right-curly-bracket"), '}'},
|
||||
{_T("tilde"), '~'},
|
||||
{_T("DEL"), '\177'},
|
||||
{NULL, 0}
|
||||
};
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
|
||||
/* Unicode character-class tables */
|
||||
|
||||
typedef struct crange {
|
||||
@ -518,6 +526,120 @@ static chr graphCharTable[] = {
|
||||
* End of auto-generated Unicode character ranges declarations.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Supply implementations for some tcl functions that this module depends on
|
||||
* to make it self contained
|
||||
*/
|
||||
|
||||
#include "tclUniData.c"
|
||||
#define Tcl_UniChar wxChar
|
||||
|
||||
/*
|
||||
* Compute the uppercase equivalent of the given Unicode character.
|
||||
* Taken from tcl.
|
||||
*/
|
||||
|
||||
Tcl_UniChar Tcl_UniCharToUpper(int ch)
|
||||
{
|
||||
int info = GetUniCharInfo(ch);
|
||||
|
||||
if (GetCaseType(info) & 0x04) {
|
||||
return (Tcl_UniChar) (ch - GetDelta(info));
|
||||
} else {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the lowercase equivalent of the given Unicode character.
|
||||
* Taken from tcl.
|
||||
*/
|
||||
|
||||
Tcl_UniChar Tcl_UniCharToLower(int ch)
|
||||
{
|
||||
int info = GetUniCharInfo(ch);
|
||||
|
||||
if (GetCaseType(info) & 0x02) {
|
||||
return (Tcl_UniChar) (ch + GetDelta(info));
|
||||
} else {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Compute the titlecase equivalent of the given Unicode character.
|
||||
* Taken from tcl.
|
||||
*/
|
||||
|
||||
Tcl_UniChar Tcl_UniCharToTitle(int ch)
|
||||
{
|
||||
int info = GetUniCharInfo(ch);
|
||||
int mode = GetCaseType(info);
|
||||
|
||||
if (mode & 0x1) {
|
||||
/*
|
||||
* Subtract or add one depending on the original case.
|
||||
*/
|
||||
|
||||
return (Tcl_UniChar) (ch + ((mode & 0x4) ? -1 : 1));
|
||||
} else if (mode == 0x4) {
|
||||
return (Tcl_UniChar) (ch - GetDelta(info));
|
||||
} else {
|
||||
return ch;
|
||||
}
|
||||
}
|
||||
|
||||
#else /* wxUSE_UNICODE */
|
||||
|
||||
#include <locale.h>
|
||||
|
||||
typedef int (*isfunc_t)(int);
|
||||
|
||||
/* ASCII character-class table */
|
||||
static struct cclass {
|
||||
char *name;
|
||||
char *chars;
|
||||
int hasch;
|
||||
isfunc_t isfunc;
|
||||
} cclasses[] = {
|
||||
{"alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789", 1, isalnum},
|
||||
{"alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz",
|
||||
1, isalpha},
|
||||
{"blank", " \t", 0, NULL},
|
||||
{"cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\
|
||||
\25\26\27\30\31\32\33\34\35\36\37\177", 0, iscntrl},
|
||||
{"digit", "0123456789", 0, isdigit},
|
||||
{"graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
1, isgraph},
|
||||
{"lower", "abcdefghijklmnopqrstuvwxyz",
|
||||
1, islower},
|
||||
{"print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\
|
||||
0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ",
|
||||
1, isprint},
|
||||
{"punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~",
|
||||
0, ispunct},
|
||||
{"space", "\t\n\v\f\r ", 0, isspace},
|
||||
{"upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
0, isupper},
|
||||
{"xdigit", "0123456789ABCDEFabcdef",
|
||||
0, isxdigit},
|
||||
{NULL, 0, 0, NULL}
|
||||
};
|
||||
|
||||
/*
|
||||
* Supply implementations for some tcl functions that this module depends on
|
||||
* to make it self contained
|
||||
*/
|
||||
|
||||
#define Tcl_UniChar wxChar
|
||||
Tcl_UniChar Tcl_UniCharToUpper(int ch) { return wxToupper(ch); }
|
||||
Tcl_UniChar Tcl_UniCharToLower(int ch) { return wxTolower(ch); }
|
||||
Tcl_UniChar Tcl_UniCharToTitle(int ch) { return wxToupper(ch); }
|
||||
|
||||
#endif /* !wxUSE_UNICODE */
|
||||
|
||||
#define CH NOCELT
|
||||
|
||||
/*
|
||||
@ -569,8 +691,6 @@ element(v, startp, endp)
|
||||
{
|
||||
struct cname *cn;
|
||||
size_t len;
|
||||
Tcl_DString ds;
|
||||
CONST char *np;
|
||||
|
||||
/* generic: one-chr names stand for themselves */
|
||||
assert(startp < endp);
|
||||
@ -582,14 +702,11 @@ element(v, startp, endp)
|
||||
NOTE(REG_ULOCALE);
|
||||
|
||||
/* search table */
|
||||
Tcl_DStringInit(&ds);
|
||||
np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
|
||||
for (cn=cnames; cn->name!=NULL; cn++) {
|
||||
if (strlen(cn->name)==len && strncmp(cn->name, np, len)==0) {
|
||||
if (wxStrlen_(cn->name)==len && wxStrncmp(cn->name, startp, len)==0) {
|
||||
break; /* NOTE BREAK OUT */
|
||||
}
|
||||
}
|
||||
Tcl_DStringFree(&ds);
|
||||
if (cn->name != NULL) {
|
||||
return CHR(cn->code);
|
||||
}
|
||||
@ -708,6 +825,8 @@ eclass(v, c, cases)
|
||||
return cv;
|
||||
}
|
||||
|
||||
#if wxUSE_UNICODE
|
||||
|
||||
/*
|
||||
- cclass - supply cvec for a character class
|
||||
* Must include case counterparts on request.
|
||||
@ -722,18 +841,17 @@ cclass(v, startp, endp, cases)
|
||||
{
|
||||
size_t len;
|
||||
struct cvec *cv = NULL;
|
||||
Tcl_DString ds;
|
||||
CONST char *np;
|
||||
char **namePtr;
|
||||
CONST chr *np;
|
||||
chr **namePtr;
|
||||
int i, index;
|
||||
|
||||
/*
|
||||
* The following arrays define the valid character class names.
|
||||
*/
|
||||
|
||||
static char *classNames[] = {
|
||||
"alnum", "alpha", "ascii", "blank", "cntrl", "digit", "graph",
|
||||
"lower", "print", "punct", "space", "upper", "xdigit", NULL
|
||||
static chr *classNames[] = {
|
||||
_T("alnum"), _T("alpha"), _T("ascii"), _T("blank"), _T("cntrl"), _T("digit"), _T("graph"),
|
||||
_T("lower"), _T("print"), _T("punct"), _T("space"), _T("upper"), _T("xdigit"), NULL
|
||||
};
|
||||
|
||||
enum classes {
|
||||
@ -747,16 +865,15 @@ cclass(v, startp, endp, cases)
|
||||
*/
|
||||
|
||||
len = endp - startp;
|
||||
Tcl_DStringInit(&ds);
|
||||
np = Tcl_UniCharToUtfDString(startp, (int)len, &ds);
|
||||
np = startp;
|
||||
|
||||
/*
|
||||
* Remap lower and upper to alpha if the match is case insensitive.
|
||||
*/
|
||||
|
||||
if (cases && len == 5 && (strncmp("lower", np, 5) == 0
|
||||
|| strncmp("upper", np, 5) == 0)) {
|
||||
np = "alpha";
|
||||
if (cases && len == 5 && (wxStrncmp(_T("lower"), np, 5) == 0
|
||||
|| wxStrncmp(_T("upper"), np, 5) == 0)) {
|
||||
np = _T("alpha");
|
||||
}
|
||||
|
||||
/*
|
||||
@ -765,12 +882,11 @@ cclass(v, startp, endp, cases)
|
||||
|
||||
index = -1;
|
||||
for (namePtr=classNames,i=0 ; *namePtr!=NULL ; namePtr++,i++) {
|
||||
if ((strlen(*namePtr) == len) && (strncmp(*namePtr, np, len) == 0)) {
|
||||
if ((wxStrlen_(*namePtr) == len) && (wxStrncmp(*namePtr, np, len) == 0)) {
|
||||
index = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
Tcl_DStringInit(&ds);
|
||||
if (index == -1) {
|
||||
ERR(REG_ECTYPE);
|
||||
return NULL;
|
||||
@ -919,6 +1035,79 @@ cclass(v, startp, endp, cases)
|
||||
return cv;
|
||||
}
|
||||
|
||||
#else /* wxUSE_UNICODE */
|
||||
|
||||
static struct cvec *
|
||||
cclass(v, startp, endp, cases)
|
||||
struct vars *v;
|
||||
chr *startp; /* where the name starts */
|
||||
chr *endp; /* just past the end of the name */
|
||||
int cases; /* case-independent? */
|
||||
{
|
||||
size_t len;
|
||||
char *p;
|
||||
struct cclass *cc;
|
||||
struct cvec *cv;
|
||||
chr *np;
|
||||
int i;
|
||||
int count;
|
||||
char buf[256];
|
||||
const char *loc;
|
||||
|
||||
/* find the name */
|
||||
len = endp - startp;
|
||||
np = startp;
|
||||
if (cases && len == 5 && (wxStrncmp(_T("lower"), np, 5) == 0 ||
|
||||
wxStrncmp(_T("upper"), np, 5) == 0))
|
||||
np = _T("alpha");
|
||||
for (cc = cclasses; cc->name != NULL; cc++)
|
||||
if (wxStrlen_(cc->name) == len && wxStrncmp(cc->name, np, len) == 0)
|
||||
break; /* NOTE BREAK OUT */
|
||||
if (cc->name == NULL) {
|
||||
ERR(REG_ECTYPE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
loc = setlocale(LC_CTYPE, NULL);
|
||||
|
||||
if (!cc->isfunc || loc == NULL || strcmp(loc, "C") == 0)
|
||||
{
|
||||
/* set up vector */
|
||||
cv = getcvec(v, (int)strlen(cc->chars), 0, 0);
|
||||
if (cv == NULL) {
|
||||
ERR(REG_ESPACE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* fill it in */
|
||||
for (p = cc->chars; *p != '\0'; p++)
|
||||
addchr(cv, (chr)*p);
|
||||
}
|
||||
else
|
||||
{
|
||||
count = 0;
|
||||
for (i = 0; i < 256; i++)
|
||||
if (cc->isfunc(i))
|
||||
buf[count++] = i;
|
||||
|
||||
/* set up vector */
|
||||
cv = getcvec(v, count, 0, 0);
|
||||
if (cv == NULL) {
|
||||
ERR(REG_ESPACE);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* fill it in */
|
||||
for (i = 0; i < count; i++)
|
||||
addchr(cv, buf[i]);
|
||||
}
|
||||
|
||||
return cv;
|
||||
}
|
||||
|
||||
#endif /* !wxUSE_UNICODE */
|
||||
|
||||
|
||||
/*
|
||||
- allcases - supply cvec for all case counterparts of a chr (including itself)
|
||||
* This is a shortcut, preferably an efficient one, for simple characters;
|
||||
|
@ -27,15 +27,16 @@
|
||||
*/
|
||||
|
||||
/* headers if any */
|
||||
#include "tclInt.h"
|
||||
#include "wx/wxchar.h"
|
||||
|
||||
/* overrides for regguts.h definitions, if any */
|
||||
#define FUNCPTR(name, args) (*name) _ANSI_ARGS_(args)
|
||||
#define MALLOC(n) ckalloc(n)
|
||||
#define FREE(p) ckfree(VS(p))
|
||||
#define REALLOC(p,n) ckrealloc(VS(p),n)
|
||||
|
||||
|
||||
/* regguts only includes standard headers if NULL is not defined, so do it
|
||||
* ourselves here */
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
/*
|
||||
* Do not insert extras between the "begin" and "end" lines -- this
|
||||
@ -67,53 +68,57 @@
|
||||
#ifdef __REG_NOCHAR
|
||||
#undef __REG_NOCHAR
|
||||
#endif
|
||||
/* interface types */
|
||||
#define __REG_WIDE_T Tcl_UniChar
|
||||
#define __REG_REGOFF_T long /* not really right, but good enough... */
|
||||
#define __REG_VOID_T VOID
|
||||
#define __REG_CONST CONST
|
||||
/* names and declarations */
|
||||
#define __REG_WIDE_COMPILE TclReComp
|
||||
#define __REG_WIDE_EXEC TclReExec
|
||||
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
|
||||
#define __REG_NOCHAR /* or the char versions */
|
||||
#define regfree TclReFree
|
||||
#define regerror TclReError
|
||||
#if wxUSE_UNICODE
|
||||
# define __REG_WIDE_T wxChar
|
||||
# define __REG_WIDE_COMPILE re_comp
|
||||
# define __REG_WIDE_EXEC re_exec
|
||||
# define __REG_NOCHAR /* don't want the char versions */
|
||||
#endif
|
||||
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
|
||||
#define _ANSI_ARGS_(x) x
|
||||
/* --- end --- */
|
||||
|
||||
|
||||
|
||||
/* internal character type and related */
|
||||
typedef Tcl_UniChar chr; /* the type itself */
|
||||
typedef int pchr; /* what it promotes to */
|
||||
typedef unsigned uchr; /* unsigned type that will hold a chr */
|
||||
typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
|
||||
#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
|
||||
#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */
|
||||
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
|
||||
#if TCL_UTF_MAX > 3
|
||||
#define CHRBITS 32 /* bits in a chr; must not use sizeof */
|
||||
#define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
|
||||
#define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
|
||||
typedef wxChar chr; /* the type itself */
|
||||
typedef int pchr; /* what it promotes to */
|
||||
typedef unsigned uchr; /* unsigned type that will hold a chr */
|
||||
typedef int celt; /* type to hold chr, MCCE number, or NOCELT */
|
||||
#define NOCELT (-1) /* celt value which is not valid chr or MCCE */
|
||||
#define UCHAR(c) ((unsigned char) (c))
|
||||
#define CHR(c) (UCHAR(c)) /* turn char literal into chr literal */
|
||||
#define DIGITVAL(c) ((c)-'0') /* turn chr digit into its value */
|
||||
#if !wxUSE_UNICODE
|
||||
# define CHRBITS 8 /* bits in a chr; must not use sizeof */
|
||||
# define CHR_MIN 0x00 /* smallest and largest chr; the value */
|
||||
# define CHR_MAX 0xff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
|
||||
#elif SIZEOF_WCHAR_T == 4
|
||||
# define CHRBITS 32 /* bits in a chr; must not use sizeof */
|
||||
# define CHR_MIN 0x00000000 /* smallest and largest chr; the value */
|
||||
# define CHR_MAX 0xffffffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
|
||||
#else
|
||||
#define CHRBITS 16 /* bits in a chr; must not use sizeof */
|
||||
#define CHR_MIN 0x0000 /* smallest and largest chr; the value */
|
||||
#define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
|
||||
# define CHRBITS 16 /* bits in a chr; must not use sizeof */
|
||||
# define CHR_MIN 0x0000 /* smallest and largest chr; the value */
|
||||
# define CHR_MAX 0xffff /* CHR_MAX-CHR_MIN+1 should fit in uchr */
|
||||
#endif
|
||||
|
||||
/* functions operating on chr */
|
||||
#define iscalnum(x) Tcl_UniCharIsAlnum(x)
|
||||
#define iscalpha(x) Tcl_UniCharIsAlpha(x)
|
||||
#define iscdigit(x) Tcl_UniCharIsDigit(x)
|
||||
#define iscspace(x) Tcl_UniCharIsSpace(x)
|
||||
/*
|
||||
* I'm using isalpha et al. instead of wxIsalpha since BCC 5.5's iswalpha
|
||||
* seems not to work on Windows 9x? Note that these are only used by the
|
||||
* lexer, and although they must work for wxChars, they need only return
|
||||
* true for characters within the ascii range.
|
||||
*/
|
||||
#define iscalnum(x) ((wxUChar)(x) < 128 && isalnum(x))
|
||||
#define iscalpha(x) ((wxUChar)(x) < 128 && isalpha(x))
|
||||
#define iscdigit(x) ((wxUChar)(x) < 128 && isdigit(x))
|
||||
#define iscspace(x) ((wxUChar)(x) < 128 && isspace(x))
|
||||
|
||||
/* name the external functions */
|
||||
#define compile TclReComp
|
||||
#define exec TclReExec
|
||||
#define compile re_comp
|
||||
#define exec re_exec
|
||||
|
||||
/* enable/disable debugging code (by whether REG_DEBUG is defined or not) */
|
||||
#if 0 /* no debug unless requested by makefile */
|
||||
#define REG_DEBUG /* */
|
||||
#if 0 /* no debug unless requested by makefile */
|
||||
#define REG_DEBUG /* */
|
||||
#endif
|
||||
|
||||
/* and pick up the standard header */
|
||||
|
@ -107,18 +107,14 @@ extern "C" {
|
||||
#ifdef __REG_NOCHAR
|
||||
#undef __REG_NOCHAR
|
||||
#endif
|
||||
/* interface types */
|
||||
#define __REG_WIDE_T Tcl_UniChar
|
||||
#define __REG_REGOFF_T long /* not really right, but good enough... */
|
||||
#define __REG_VOID_T VOID
|
||||
#define __REG_CONST CONST
|
||||
/* names and declarations */
|
||||
#define __REG_WIDE_COMPILE TclReComp
|
||||
#define __REG_WIDE_EXEC TclReExec
|
||||
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
|
||||
#define __REG_NOCHAR /* or the char versions */
|
||||
#define regfree TclReFree
|
||||
#define regerror TclReError
|
||||
#if wxUSE_UNICODE
|
||||
# define __REG_WIDE_T wxChar
|
||||
# define __REG_WIDE_COMPILE re_comp
|
||||
# define __REG_WIDE_EXEC re_exec
|
||||
# define __REG_NOCHAR /* don't want the char versions */
|
||||
#endif
|
||||
#define __REG_NOFRONT /* don't want regcomp() and regexec() */
|
||||
#define _ANSI_ARGS_(x) x
|
||||
/* --- end --- */
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user