Fix bug for (*ACCEPT) inside a capturing group.
This commit is contained in:
parent
2fe64ff926
commit
b3e8a9f1aa
@ -44,4 +44,11 @@ strings matched by the repetition are not all the same length.
|
|||||||
information. This applied to any pattern with a group that matched no
|
information. This applied to any pattern with a group that matched no
|
||||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||||
|
|
||||||
|
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
|
||||||
|
those parentheses to be closed with whatever has been captured so far. However,
|
||||||
|
it was failing to mark any other groups between the hightest capture so far and
|
||||||
|
the currrent group as "unset". Thus, the ovector for those groups contained
|
||||||
|
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
|
||||||
|
matched against "abcd".
|
||||||
|
|
||||||
****
|
****
|
||||||
|
@ -1465,7 +1465,18 @@ for (;;)
|
|||||||
mb->ovector[offset] =
|
mb->ovector[offset] =
|
||||||
mb->ovector[mb->offset_end - number];
|
mb->ovector[mb->offset_end - number];
|
||||||
mb->ovector[offset+1] = eptr - mb->start_subject;
|
mb->ovector[offset+1] = eptr - mb->start_subject;
|
||||||
if (offset_top <= offset) offset_top = offset + 2;
|
|
||||||
|
/* If this group is at or above the current highwater mark, ensure that
|
||||||
|
any groups between the current high water mark and this group are marked
|
||||||
|
unset and then update the high water mark. */
|
||||||
|
|
||||||
|
if (offset >= offset_top)
|
||||||
|
{
|
||||||
|
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
|
||||||
|
register PCRE2_SIZE *iend = mb->ovector + offset;
|
||||||
|
while (iptr < iend) *iptr++ = PCRE2_UNSET;
|
||||||
|
offset_top = offset + 2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
ecode += 1 + IMM2_SIZE;
|
ecode += 1 + IMM2_SIZE;
|
||||||
break;
|
break;
|
||||||
@ -6321,18 +6332,18 @@ while (nextframe != NULL)
|
|||||||
* Match a Regular Expression *
|
* Match a Regular Expression *
|
||||||
*************************************************/
|
*************************************************/
|
||||||
|
|
||||||
/* This function applies a compiled re to a subject string and picks out
|
/* This function applies a compiled pattern to a subject string and picks out
|
||||||
portions of the string if it matches. Two elements in the vector are set for
|
portions of the string if it matches. Two elements in the vector are set for
|
||||||
each substring: the offsets to the start and end of the substring.
|
each substring: the offsets to the start and end of the substring.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
context points a PCRE2 context
|
|
||||||
code points to the compiled expression
|
code points to the compiled expression
|
||||||
subject points to the subject string
|
subject points to the subject string
|
||||||
length length of subject string (may contain binary zeros)
|
length length of subject string (may contain binary zeros)
|
||||||
start_offset where to start in the subject string
|
start_offset where to start in the subject string
|
||||||
options option bits
|
options option bits
|
||||||
match_data points to a match_data block
|
match_data points to a match_data block
|
||||||
|
mcontext points a PCRE2 context
|
||||||
|
|
||||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||||
= 0 => success, but ovector is not big enough
|
= 0 => success, but ovector is not big enough
|
||||||
|
@ -163,6 +163,7 @@ void vms_setsymbol( char *, char *, int );
|
|||||||
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
||||||
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
||||||
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
||||||
|
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
|
||||||
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||||
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||||
|
|
||||||
@ -4685,12 +4686,18 @@ else
|
|||||||
|
|
||||||
for (gmatched = 0;; gmatched++)
|
for (gmatched = 0;; gmatched++)
|
||||||
{
|
{
|
||||||
|
PCRE2_SIZE j;
|
||||||
int capcount;
|
int capcount;
|
||||||
PCRE2_SIZE *ovector;
|
PCRE2_SIZE *ovector;
|
||||||
PCRE2_SIZE ovecsave[2];
|
PCRE2_SIZE ovecsave[2];
|
||||||
|
|
||||||
ovector = FLD(match_data, ovector);
|
ovector = FLD(match_data, ovector);
|
||||||
|
|
||||||
|
/* Fill the ovector with junk to detect elements that do not get set
|
||||||
|
when they should be. */
|
||||||
|
|
||||||
|
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
|
||||||
|
|
||||||
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
||||||
stack callback function. */
|
stack callback function. */
|
||||||
|
|
||||||
@ -4786,7 +4793,7 @@ for (gmatched = 0;; gmatched++)
|
|||||||
{
|
{
|
||||||
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Run a single DFA or NFA match. */
|
/* Run a single DFA or NFA match. */
|
||||||
|
|
||||||
if ((dat_datctl.control & CTL_DFA) != 0)
|
if ((dat_datctl.control & CTL_DFA) != 0)
|
||||||
@ -4887,14 +4894,27 @@ for (gmatched = 0;; gmatched++)
|
|||||||
fprintf(outfile, "Start of matched string is beyond its end - "
|
fprintf(outfile, "Start of matched string is beyond its end - "
|
||||||
"displaying from end to start.\n");
|
"displaying from end to start.\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
fprintf(outfile, "%2d: ", i/2);
|
fprintf(outfile, "%2d: ", i/2);
|
||||||
|
|
||||||
|
/* Check for an unset group */
|
||||||
|
|
||||||
if (start == PCRE2_UNSET)
|
if (start == PCRE2_UNSET)
|
||||||
{
|
{
|
||||||
fprintf(outfile, "<unset>\n");
|
fprintf(outfile, "<unset>\n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Check for silly offsets, in particular, values that have not been
|
||||||
|
set when they should have been. */
|
||||||
|
|
||||||
|
if (start > ulen || end > ulen)
|
||||||
|
{
|
||||||
|
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
|
||||||
|
start, end);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
|
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
|
||||||
JIT, it is disabled above, with a comment.) When the match is done by the
|
JIT, it is disabled above, with a comment.) When the match is done by the
|
||||||
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
|
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
|
||||||
@ -4918,7 +4938,6 @@ for (gmatched = 0;; gmatched++)
|
|||||||
|
|
||||||
if (showallused)
|
if (showallused)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE j;
|
|
||||||
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
|
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
|
||||||
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
|
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
|
||||||
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
|
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
|
||||||
@ -4944,7 +4963,6 @@ for (gmatched = 0;; gmatched++)
|
|||||||
fprintf(outfile, " (JIT)");
|
fprintf(outfile, " (JIT)");
|
||||||
if (startchar != start)
|
if (startchar != start)
|
||||||
{
|
{
|
||||||
PCRE2_SIZE j;
|
|
||||||
fprintf(outfile, "\n ");
|
fprintf(outfile, "\n ");
|
||||||
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
|
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
|
||||||
}
|
}
|
||||||
|
3
testdata/testinput1
vendored
3
testdata/testinput1
vendored
@ -5702,4 +5702,7 @@ name)/mark
|
|||||||
abd
|
abd
|
||||||
xyd
|
xyd
|
||||||
|
|
||||||
|
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
|
||||||
|
1234abcd
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
9
testdata/testoutput1
vendored
9
testdata/testoutput1
vendored
@ -9403,4 +9403,13 @@ No match
|
|||||||
xyd
|
xyd
|
||||||
0: d
|
0: d
|
||||||
|
|
||||||
|
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
|
||||||
|
1234abcd
|
||||||
|
0:
|
||||||
|
1: <unset>
|
||||||
|
2: <unset>
|
||||||
|
3: <unset>
|
||||||
|
4: <unset>
|
||||||
|
5:
|
||||||
|
|
||||||
# End of testinput1
|
# End of testinput1
|
||||||
|
Loading…
Reference in New Issue
Block a user