Fix bug for (*ACCEPT) inside a capturing group.

This commit is contained in:
ph10 2014-11-05 16:05:19 +00:00
parent 2fe64ff926
commit b3e8a9f1aa
5 changed files with 55 additions and 7 deletions

View File

@ -44,4 +44,11 @@ strings matched by the repetition are not all the same length.
information. This applied to any pattern with a group that matched no
characters, for example: /(?:(?=.)|(?<!x))a/.
7. When an (*ACCEPT) is triggered inside capturing parentheses, it arranges for
those parentheses to be closed with whatever has been captured so far. However,
it was failing to mark any other groups between the hightest capture so far and
the currrent group as "unset". Thus, the ovector for those groups contained
whatever was previously there. An example is the pattern /(x)|((*ACCEPT))/ when
matched against "abcd".
****

View File

@ -1465,7 +1465,18 @@ for (;;)
mb->ovector[offset] =
mb->ovector[mb->offset_end - number];
mb->ovector[offset+1] = eptr - mb->start_subject;
if (offset_top <= offset) offset_top = offset + 2;
/* If this group is at or above the current highwater mark, ensure that
any groups between the current high water mark and this group are marked
unset and then update the high water mark. */
if (offset >= offset_top)
{
register PCRE2_SIZE *iptr = mb->ovector + offset_top;
register PCRE2_SIZE *iend = mb->ovector + offset;
while (iptr < iend) *iptr++ = PCRE2_UNSET;
offset_top = offset + 2;
}
}
ecode += 1 + IMM2_SIZE;
break;
@ -6321,18 +6332,18 @@ while (nextframe != NULL)
* Match a Regular Expression *
*************************************************/
/* This function applies a compiled re to a subject string and picks out
/* This function applies a compiled pattern to a subject string and picks out
portions of the string if it matches. Two elements in the vector are set for
each substring: the offsets to the start and end of the substring.
Arguments:
context points a PCRE2 context
code points to the compiled expression
subject points to the subject string
length length of subject string (may contain binary zeros)
start_offset where to start in the subject string
options option bits
match_data points to a match_data block
mcontext points a PCRE2 context
Returns: > 0 => success; value is the number of ovector pairs filled
= 0 => success, but ovector is not big enough

View File

@ -163,6 +163,7 @@ void vms_setsymbol( char *, char *, int );
#define CFAIL_UNSET UINT32_MAX /* Unset value for cfail fields */
#define DFA_WS_DIMENSION 1000 /* Size of DFA workspace */
#define DEFAULT_OVECCOUNT 15 /* Default ovector count */
#define JUNK_OFFSET 0xdeadbeef /* For initializing ovector */
#define LOOPREPEAT 500000 /* Default loop count for timing */
#define VERSION_SIZE 64 /* Size of buffer for the version strings */
@ -4685,12 +4686,18 @@ else
for (gmatched = 0;; gmatched++)
{
PCRE2_SIZE j;
int capcount;
PCRE2_SIZE *ovector;
PCRE2_SIZE ovecsave[2];
ovector = FLD(match_data, ovector);
/* Fill the ovector with junk to detect elements that do not get set
when they should be. */
for (j = 0; j < 2*dat_datctl.oveccount; j++) ovector[j] = JUNK_OFFSET;
/* When matching is via pcre2_match(), we will detect the use of JIT via the
stack callback function. */
@ -4786,7 +4793,7 @@ for (gmatched = 0;; gmatched++)
{
PCRE2_SET_CALLOUT(dat_context, NULL, NULL); /* No callout */
}
/* Run a single DFA or NFA match. */
if ((dat_datctl.control & CTL_DFA) != 0)
@ -4887,14 +4894,27 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, "Start of matched string is beyond its end - "
"displaying from end to start.\n");
}
fprintf(outfile, "%2d: ", i/2);
/* Check for an unset group */
if (start == PCRE2_UNSET)
{
fprintf(outfile, "<unset>\n");
continue;
}
/* Check for silly offsets, in particular, values that have not been
set when they should have been. */
if (start > ulen || end > ulen)
{
fprintf(outfile, "ERROR: bad value(s) for offset(s): 0x%lx 0x%lx\n",
start, end);
continue;
}
/* When JIT is not being used, ALLUSEDTEXT may be set. (It if is set with
JIT, it is disabled above, with a comment.) When the match is done by the
interpreter, leftchar and rightchar are available, and if ALLUSEDTEXT is
@ -4918,7 +4938,6 @@ for (gmatched = 0;; gmatched++)
if (showallused)
{
PCRE2_SIZE j;
PCHARS(lleft, pp, leftchar, start - leftchar, utf, outfile);
PCHARS(lmiddle, pp, start, end - start, utf, outfile);
PCHARS(lright, pp, end, rightchar - end, utf, outfile);
@ -4944,7 +4963,6 @@ for (gmatched = 0;; gmatched++)
fprintf(outfile, " (JIT)");
if (startchar != start)
{
PCRE2_SIZE j;
fprintf(outfile, "\n ");
for (j = 0; j < lleft; j++) fprintf(outfile, "^");
}

3
testdata/testinput1 vendored
View File

@ -5702,4 +5702,7 @@ name)/mark
abd
xyd
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
1234abcd
# End of testinput1

View File

@ -9403,4 +9403,13 @@ No match
xyd
0: d
/(?:((abcd))|(((?:(?:(?:(?:abc|(?:abcdef))))b)abcdefghi)abc)|((*ACCEPT)))/
1234abcd
0:
1: <unset>
2: <unset>
3: <unset>
4: <unset>
5:
# End of testinput1