Fix bad memory computation for "(*UTF)[\S\V\H]" (a pattern with a negative

class (\S) and explicit wide characters).
This commit is contained in:
ph10 2015-03-25 19:26:27 +00:00
parent 698f0a04ee
commit 625fd31e3e
4 changed files with 31 additions and 26 deletions

View File

@ -30,6 +30,13 @@ possessification code could take exponential time to complete. A recursion
depth limit of 10000 has been imposed to limit the resources used by this
optimization. This infelicity was discovered by the LLVM fuzzer.
9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class
such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored
because \S ensures they are all in the class. The code for doing this was
interacting badly with the code for computing the amount of space needed to
compile the pattern, leading to a buffer overflow. This bug was discovered by
the LLVM fuzzer.
Version 10.10 06-March-2015
---------------------------

View File

@ -3556,20 +3556,6 @@ for (;; ptr++)
}
#endif
#ifdef SUPPORT_WIDE_CHARS
/* In the pre-compile phase, accumulate the length of any wide characters
and reset the pointer. This is so that very large classes that contain a
zillion wide characters no longer overwrite the work space (which is on
the stack). We have to remember that there was XCLASS data, however. */
if (lengthptr != NULL && class_uchardata > class_uchardata_base)
{
xclass = TRUE;
*lengthptr += class_uchardata - class_uchardata_base;
class_uchardata = class_uchardata_base;
}
#endif
/* Inside \Q...\E everything is literal except \E */
if (inescq)
@ -4074,20 +4060,28 @@ for (;; ptr++)
nestptr = NULL;
c = *(++ptr);
}
if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
} /* End of main class-processing loop */
/* We will need an XCLASS if data has been placed in class_uchardata. In
the second phase this is a sufficient test. However, in the pre-compile
phase, class_uchardata gets emptied to prevent workspace overflow, so it
only if the very last character in the class needs XCLASS will it contain
anything at this point. For this reason, xclass gets set TRUE above when
class_uchardata is emptied, and that's why this code is the way it is here
instead of just doing a test on class_uchardata below. */
#ifdef SUPPORT_WIDE_CHARS
if (class_uchardata > class_uchardata_base) xclass = TRUE;
/* If any wide characters have been encountered, set xclass = TRUE. Then,
in the pre-compile phase, accumulate the length of the wide characters
and reset the pointer. This is so that very large classes that contain a
zillion wide characters do not overwrite the work space (which is on the
stack). */
if (class_uchardata > class_uchardata_base)
{
xclass = TRUE;
if (lengthptr != NULL)
{
*lengthptr += class_uchardata - class_uchardata_base;
class_uchardata = class_uchardata_base;
}
}
#endif
/* An unescaped ] ends the class */
if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break;
} /* End of main class-processing loop */
/* If this is the first thing in the branch, there can be no first char
setting, whatever the repeat count. Any reqcu setting must remain
@ -4107,12 +4101,12 @@ for (;; ptr++)
be listed) there are no characters < 256, we can omit the bitmap in the
actual compiled code. */
#ifdef SUPPORT_WIDE_CHARS
#ifdef SUPPORT_UNICODE
if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0))
#elif PCRE2_CODE_UNIT_WIDTH != 8
if (xclass && !should_flip_negation)
#endif
#ifdef SUPPORT_WIDE_CHARS
{
*class_uchardata++ = XCL_END; /* Marks the end of extra data */
*code++ = OP_XCLASS;

2
testdata/testinput4 vendored
View File

@ -2219,4 +2219,6 @@
/[A-`]/i,utf
abcdefghijklmno
"[\S\V\H]"utf
# End of testinput4

View File

@ -3739,4 +3739,6 @@ No match
abcdefghijklmno
0: a
"[\S\V\H]"utf
# End of testinput4