Fix negated POSIX class bug.
This commit is contained in:
parent
865ab5cd5d
commit
4423a9e968
@ -337,6 +337,9 @@ misbehaved. This bug was found by the LLVM fuzzer.
|
||||
100. The error for an invalid UTF pattern string always gave the code unit
|
||||
offset as zero instead of where the invalidity was found.
|
||||
|
||||
101. Further to 97 above, negated classes such as [^[:^ascii:]\d] were also not
|
||||
working correctly in UCP mode.
|
||||
|
||||
|
||||
Version 10.20 30-June-2015
|
||||
--------------------------
|
||||
|
@ -3857,7 +3857,7 @@ for (;; ptr++)
|
||||
{
|
||||
BOOL negate_class;
|
||||
BOOL should_flip_negation;
|
||||
BOOL match_all_wide_chars;
|
||||
BOOL match_all_or_no_wide_chars;
|
||||
BOOL possessive_quantifier;
|
||||
BOOL is_quantifier;
|
||||
BOOL is_recurse;
|
||||
@ -4207,9 +4207,10 @@ for (;; ptr++)
|
||||
/* If a non-extended class contains a negative special such as \S, we need
|
||||
to flip the negation flag at the end, so that support for characters > 255
|
||||
works correctly (they are all included in the class). An extended class may
|
||||
need to insert specific matching code for wide characters. */
|
||||
need to insert specific matching or non-matching code for wide characters.
|
||||
*/
|
||||
|
||||
should_flip_negation = match_all_wide_chars = FALSE;
|
||||
should_flip_negation = match_all_or_no_wide_chars = FALSE;
|
||||
|
||||
/* Extended class (xclass) will be used when characters > 255
|
||||
might match. */
|
||||
@ -4365,21 +4366,20 @@ for (;; ptr++)
|
||||
|
||||
/* For the other POSIX classes (ascii, xdigit) we are going to fall
|
||||
through to the non-UCP case and build a bit map for characters with
|
||||
code points less than 256. If we are in a negated POSIX class
|
||||
within a non-negated overall class, characters with code points
|
||||
greater than 255 must all match. In the special case where we have
|
||||
not yet generated any xclass data, and this is the final item in
|
||||
the overall class, we need do nothing: later on, the opcode
|
||||
OP_NCLASS will be used to indicate that characters greater than 255
|
||||
are acceptable. If we have already seen an xclass item or one may
|
||||
follow (we have to assume that it might if this is not the end of
|
||||
the class), set a flag to cause the generation of an explicit range
|
||||
for all wide codepoints. */
|
||||
code points less than 256. However, if we are in a negated POSIX
|
||||
class, characters with code points greater than 255 must either all
|
||||
match or all not match, depending on whether the whole class is not
|
||||
or is negated. For example, for [[:^ascii:]... they must all match,
|
||||
whereas for [^[:^xdigit:]... they must not.
|
||||
|
||||
In the special case where there are no xclass items, this is
|
||||
automatically handled by the use of OP_CLASS or OP_NCLASS, but an
|
||||
explicit range is needed for OP_XCLASS. Setting a flag here causes
|
||||
the range to be generated later when it is known that OP_XCLASS is
|
||||
required. */
|
||||
|
||||
default:
|
||||
if (!negate_class && local_negate &&
|
||||
(xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
|
||||
match_all_wide_chars = TRUE;
|
||||
match_all_or_no_wide_chars |= local_negate;
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -4878,13 +4878,14 @@ for (;; ptr++)
|
||||
(\p or \P), we have to compile an extended class, with its own opcode,
|
||||
unless there were no property settings and there was a negated special such
|
||||
as \S in the class, and PCRE2_UCP is not set, because in that case all
|
||||
characters > 255 are in the class, so any that were explicitly given as
|
||||
well can be ignored.
|
||||
characters > 255 are in or not in the class, so any that were explicitly
|
||||
given as well can be ignored.
|
||||
|
||||
In the UCP case, if certain negated POSIX classes ([:^ascii:] or
|
||||
{^:xdigit:]) were present in a non-negative class, we again have to match
|
||||
all wide characters, indicated by match_all_wide_chars being true. We do
|
||||
this by including an explicit range.
|
||||
[^:xdigit:]) were present in a class, we either have to match or not match
|
||||
all wide characters (depending on whether the whole class is or is not
|
||||
negated). This requirement is indicated by match_all_or_no_wide_chars being
|
||||
true. We do this by including an explicit range, which works in both cases.
|
||||
|
||||
If, when generating an xclass, there are no characters < 256, we can omit
|
||||
the bitmap in the actual compiled code. */
|
||||
@ -4897,12 +4898,11 @@ for (;; ptr++)
|
||||
if (xclass && (xclass_has_prop || !should_flip_negation))
|
||||
#endif
|
||||
{
|
||||
if (match_all_wide_chars)
|
||||
if (match_all_or_no_wide_chars)
|
||||
{
|
||||
*class_uchardata++ = XCL_RANGE;
|
||||
class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
|
||||
class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT,
|
||||
class_uchardata);
|
||||
class_uchardata += PRIV(ord2utf)(MAX_UTF_CODE_POINT, class_uchardata);
|
||||
}
|
||||
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
|
||||
*code++ = OP_XCLASS;
|
||||
|
Loading…
Reference in New Issue
Block a user