Improve minimum length finder in the presence of back references when there are
multiple groups with the same number.
This commit is contained in:
parent
8dd228d5ba
commit
770fd48acd
@ -31,11 +31,17 @@ minimum is potentially useful.
|
||||
9. Some changes to the way the minimum subject length is handled:
|
||||
|
||||
* When PCRE2_NO_START_OPTIMIZE is set, no minimum length is computed;
|
||||
pcre2test no longer shows a value (of zero).
|
||||
pcre2test omits this item instead of showing a value of zero.
|
||||
|
||||
* When no minimum length is set by the normal scan, but a first and/or last
|
||||
code unit is recorded, set the minimum to 1 or 2 as appropriate.
|
||||
|
||||
* When a pattern contains multiple groups with the same number, a back
|
||||
reference cannot know which one to scan for a minimum length. This used to
|
||||
cause the minimum length finder to give up with no result. Now it treats
|
||||
such references as not adding to the minimum length (which it should have
|
||||
done all along).
|
||||
|
||||
10. A (*MARK) value inside a successful condition was not being returned by the
|
||||
interpretive matcher (it was returned by JIT). This bug has been mended.
|
||||
|
||||
|
@ -92,7 +92,6 @@ Returns: the minimum length
|
||||
-1 \C in UTF-8 mode
|
||||
or (*ACCEPT)
|
||||
or pattern too complicated
|
||||
or back reference to duplicate name/number
|
||||
-2 internal error (missing capturing bracket)
|
||||
-3 internal error (opcode not listed)
|
||||
*/
|
||||
@ -135,7 +134,7 @@ for (;;)
|
||||
int d, min, recno;
|
||||
PCRE2_UCHAR *cs, *ce;
|
||||
PCRE2_UCHAR op = *cc;
|
||||
|
||||
|
||||
if (branchlength >= UINT16_MAX) return UINT16_MAX;
|
||||
|
||||
switch (op)
|
||||
@ -452,12 +451,12 @@ for (;;)
|
||||
that case we must set the minimum length to zero. */
|
||||
|
||||
/* Duplicate named pattern back reference. We cannot reliably find a length
|
||||
for this if duplicate numbers are present in the pattern. */
|
||||
for this if duplicate numbers are present in the pattern, so we set the
|
||||
length to zero here also. */
|
||||
|
||||
case OP_DNREF:
|
||||
case OP_DNREFI:
|
||||
if (dupcapused) return -1;
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
int count = GET2(cc, 1+IMM2_SIZE);
|
||||
PCRE2_UCHAR *slot =
|
||||
@ -524,14 +523,13 @@ for (;;)
|
||||
|
||||
case OP_REF:
|
||||
case OP_REFI:
|
||||
if (dupcapused) return -1;
|
||||
recno = GET2(cc, 1);
|
||||
if (recno <= backref_cache[0] && backref_cache[recno] >= 0)
|
||||
d = backref_cache[recno];
|
||||
else
|
||||
{
|
||||
int i;
|
||||
if ((re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
if (!dupcapused && (re->overall_options & PCRE2_MATCH_UNSET_BACKREF) == 0)
|
||||
{
|
||||
ce = cs = (PCRE2_UCHAR *)PRIV(find_bracket)(startcode, utf, recno);
|
||||
if (cs == NULL) return -2;
|
||||
|
6
testdata/testoutput2
vendored
6
testdata/testoutput2
vendored
@ -14607,7 +14607,7 @@ Subject length lower bound = 65535
|
||||
Capture group count = 1
|
||||
Max back reference = 1
|
||||
Starting code units: a b
|
||||
Subject length lower bound = 0
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?|(aaa)|(b))(?1)/I
|
||||
Capture group count = 1
|
||||
@ -14625,7 +14625,7 @@ Max back reference = 1
|
||||
Named capture groups:
|
||||
a 1
|
||||
Starting code units: a b
|
||||
Subject length lower bound = 0
|
||||
Subject length lower bound = 1
|
||||
|
||||
/(?|(?'a'aaa)|(?'a'b))(?'a'cccc)\k'a'/I,dupnames
|
||||
Capture group count = 2
|
||||
@ -14636,7 +14636,7 @@ Named capture groups:
|
||||
Options: dupnames
|
||||
Starting code units: a b
|
||||
Last code unit = 'c'
|
||||
Subject length lower bound = 1
|
||||
Subject length lower bound = 5
|
||||
|
||||
/ab{3cd/
|
||||
ab{3cd
|
||||
|
Loading…
Reference in New Issue
Block a user