Fix \Q\E quantification bug.

This commit is contained in:
ph10 2016-12-11 16:42:29 +00:00
parent a8b0c38cfc
commit 1d535a96d9
4 changed files with 42 additions and 12 deletions

View File

@ -218,6 +218,10 @@ followed by a caseful back reference, could lose the caselessness of the first
repeated back reference (example: /(Z)(a)\2{1,2}?(?-i)\1X/i should match ZaAAZX
but didn't).
35. If \Q was preceded by a quantified item, and the following \E was followed
by '?' or '+', and there was at least one literal character between them, an
internal error "unexpected repeat" occurred (example: /.+\QX\E+/).
Version 10.22 29-July-2016
--------------------------

View File

@ -1352,7 +1352,7 @@ entry, ptr is pointing at the character after \. On exit, it points after the
final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
in replacement strings. In this case, the cb argument is NULL, and in the case
in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
character are recognised. The isclass argument is not relevant; the options
argument is the final value of the compiled pattern's options.
@ -2327,6 +2327,7 @@ while (ptr < ptrend)
parsed_pattern = manage_callouts(thisptr, &previous_callout, options,
parsed_pattern, cb);
PARSED_LITERAL(c, parsed_pattern);
meta_quantifier = 0;
}
continue; /* Next character */
}
@ -2362,7 +2363,7 @@ while (ptr < ptrend)
case CHAR_RIGHT_PARENTHESIS:
inverbname = FALSE;
okquantifier = FALSE; /* Was probably set by literals */
okquantifier = FALSE; /* Was probably set by literals */
/* This is the length in characters */
verbnamelength = (PCRE2_SIZE)(parsed_pattern - verblengthptr - 1);
/* But the limit on the length is in code units */
@ -2405,10 +2406,10 @@ while (ptr < ptrend)
continue; /* Next character in pattern */
}
/* At the point we must process everything that must not change the
qualification state. This is mainly comments, but we handle \Q and \E here as
well, so that an item such as A\Q\E+ is treated as A+, as in Perl. An
isolated \E is ignored. */
/* Not a verb name character. At this point we must process everything that
must not change the quantification state. This is mainly comments, but we
handle \Q and \E here as well, so that an item such as A\Q\E+ is treated as
A+, as in Perl. An isolated \E is ignored. */
if (c == CHAR_BACKSLASH && ptr < ptrend)
{

10
testdata/testinput2 vendored
View File

@ -4923,12 +4923,16 @@ a)"xI
%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout
/./newline=crlf
\=ph
\=ph
/(\x0e00\000000\xc)/replace=\P,substitute_extended
\x0e00\000000\xc
\x0e00\000000\xc
//replace=0
\=offset=7
\=offset=7
".+\QX\E+"B,no_auto_possess
".+\QX\E+"B,auto_callout,no_auto_possess
# End of testinput2

27
testdata/testoutput2 vendored
View File

@ -15375,17 +15375,38 @@ Failed: error 109 at offset 6: quantifier does not follow a repeatable item
%(*:(:(svvvvvvvvvv:]*[ Z!*;[]*[^[]*!^[+.+{{2,7}' _\\\\\\\\\\\\\)?.:.. *w////\\\Q\\\\\\\\\\\\\\\T\\\\\+/?/////'+\\\EEE?/////'+/*+/[^K]?]//(w)%never_backslash_c,alt_verbnames,auto_callout
/./newline=crlf
\=ph
\=ph
No match
/(\x0e00\000000\xc)/replace=\P,substitute_extended
\x0e00\000000\xc
\x0e00\000000\xc
Failed: error -57 at offset 2 in replacement: bad escape sequence in replacement string
//replace=0
\=offset=7
\=offset=7
Failed: error -33: bad offset value
".+\QX\E+"B,no_auto_possess
------------------------------------------------------------------
Bra
Any+
X+
Ket
End
------------------------------------------------------------------
".+\QX\E+"B,auto_callout,no_auto_possess
------------------------------------------------------------------
Bra
Callout 255 0 4
Any+
Callout 255 4 4
X+
Callout 255 8 0
Ket
End
------------------------------------------------------------------
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data