Fix bug for (*ACCEPT) inside a capturing group.
This commit is contained in:
parent
2fe64ff926
commit
b3e8a9f1aa
@ -44,4 +44,11 @@ strings matched by the repetition are not all the same length.
|
||||
information. This applied to any pattern with a group that matched no
|
||||
characters, for example: /(?:(?=.)|(?<!x))a/.
|
||||
|
||||
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
|
||||
those parentheses to be closed with whatever has been captured so far. However,
|
||||
it was failing to mark any other groups between the hightest capture so far and
|
||||
the currrent group as "unset". Thus, the ovector for those groups contained
|
||||
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
|
||||
matched against "abcd".
|
||||
|
||||
****
|
||||
|
@ -1465,7 +1465,18 @@ for (;;)
|
||||
mb->ovector[offset] =
|
||||
mb->ovector[mb->offset_end - number];
|
||||
mb->ovector[offset+1] = eptr - mb->start_subject;
|
||||
if (offset_top <= offset) offset_top = offset + 2;
|
||||
|
||||
/* If this group is at or above the current highwater mark, ensure that
|
||||
any groups between the current high water mark and this group are marked
|
||||
unset and then update the high water mark. */
|
||||
|
||||
if (offset >= offset_top)
|
||||
{
|
||||
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
|
||||
register PCRE2_SIZE *iend = mb->ovector + offset;
|
||||
while (iptr < iend) *iptr++ = PCRE2_UNSET;
|
||||
offset_top = offset + 2;
|
||||
}
|
||||
}
|
||||
ecode += 1 + IMM2_SIZE;
|
||||
break;
|
||||
@ -6321,18 +6332,18 @@ while (nextframe != NULL)
|
||||
* Match a Regular Expression *
|
||||
*************************************************/
|
||||
|
||||
/* This function applies a compiled re to a subject string and picks out
|
||||
/* This function applies a compiled pattern to a subject string and picks out
|
||||
portions of the string if it matches. Two elements in the vector are set for
|
||||
each substring: the offsets to the start and end of the substring.
|
||||
|
||||
Arguments:
|
||||
context points a PCRE2 context
|
||||
code points to the compiled expression
|
||||
subject points to the subject string
|
||||
length length of subject string (may contain binary zeros)
|
||||
start_offset where to start in the subject string
|
||||
options option bits
|
||||
match_data points to a match_data block
|
||||
mcontext points a PCRE2 context
|
||||
|
||||
Returns: > 0 => success; value is the number of ovector pairs filled
|
||||
= 0 => success, but ovector is not big enough
|
||||
|
@ -163,6 +163,7 @@ void vms_setsymbol( char *, char *, int );
|
||||
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
|
||||
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
|
||||
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
|
||||
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
|
||||
#define LOOPREPEAT 500000 /* Default loop count for timing */
|
||||
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
|
||||
|
||||
@ -4685,12 +4686,18 @@ else
|
||||
|
||||
for (gmatched = 0;; gmatched++)
|
||||
{
|
||||
PCRE2_SIZE j;
|
||||
int capcount;
|
||||
PCRE2_SIZE *ovector;
|
||||
PCRE2_SIZE ovecsave[2];
|
||||
|
||||
ovector = FLD(match_data, ovector);
|
||||
|
||||
/* Fill the ovector with junk to detect elements that do not get set
|
||||
when they should be. */
|
||||
|
||||
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
|
||||
|
||||
/* When matching is via pcre2_match(), we will detect the use of JIT via the
|
||||
stack callback function. */
|
||||
|
||||
@ -4889,12 +4896,25 @@ for (gmatched = 0;; gmatched++)
|
||||
}
|
||||
|
||||
fprintf(outfile, "%2d: ", i/2);
|
||||
|
||||
/* Check for an unset group */
|
||||
|
||||
if (start == PCRE2_UNSET)
|
||||
{
|
||||
fprintf(outfile, "<unset>\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Check for silly offsets, in particular, values that have not been
|
||||
set when they should have been. */
|
||||
|
||||
if (start > ulen || end > ulen)
|
||||
{
|
||||
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
|
||||
start, end);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
|
||||
JIT, it is disabled above, with a comment.) When the match is done by the
|
||||
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
|
||||
@ -4918,7 +4938,6 @@ for (gmatched = 0;; gmatched++)
|
||||
|
||||
if (showallused)
|
||||
{
|
||||
PCRE2_SIZE j;
|
||||
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
|
||||
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
|
||||
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
|
||||
@ -4944,7 +4963,6 @@ for (gmatched = 0;; gmatched++)
|
||||
fprintf(outfile, " (JIT)");
|
||||
if (startchar != start)
|
||||
{
|
||||
PCRE2_SIZE j;
|
||||
fprintf(outfile, "\n ");
|
||||
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
|
||||
}
|
||||
|
3
testdata/testinput1
vendored
3
testdata/testinput1
vendored
@ -5702,4 +5702,7 @@ name)/mark
|
||||
abd
|
||||
xyd
|
||||
|
||||
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
|
||||
1234abcd
|
||||
|
||||
# End of testinput1
|
||||
|
9
testdata/testoutput1
vendored
9
testdata/testoutput1
vendored
@ -9403,4 +9403,13 @@ No match
|
||||
xyd
|
||||
0: d
|
||||
|
||||
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
|
||||
1234abcd
|
||||
0:
|
||||
1: <unset>
|
||||
2: <unset>
|
||||
3: <unset>
|
||||
4: <unset>
|
||||
5:
|
||||
|
||||
# End of testinput1
|
||||
|
Loading…
Reference in New Issue
Block a user