From 625fd31e3e7d6ecb7b786cd41f1e5a280ee1dd22 Mon Sep 17 00:00:00 2001 From: ph10 Date: Wed, 25 Mar 2015 19:26:27 +0000 Subject: [PATCH] Fix bad memory computation for "(*UTF)[\S\V\H]" (a pattern with a negative class (\S) and explicit wide characters). --- ChangeLog | 7 +++++++ src/pcre2_compile.c | 46 +++++++++++++++++++------------------------- testdata/testinput4 | 2 ++ testdata/testoutput4 | 2 ++ 4 files changed, 31 insertions(+), 26 deletions(-) diff --git a/ChangeLog b/ChangeLog index 2d42176..53422ed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -30,6 +30,13 @@ possessification code could take exponential time to complete. A recursion depth limit of 10000 has been imposed to limit the resources used by this optimization. This infelicity was discovered by the LLVM fuzzer. +9. A pattern such as /(*UTF)[\S\V\H]/, which contains a negated special class +such as \S in non-UCP mode, explicit wide characters (> 255) can be ignored +because \S ensures they are all in the class. The code for doing this was +interacting badly with the code for computing the amount of space needed to +compile the pattern, leading to a buffer overflow. This bug was discovered by +the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index dee00c0..1b8c939 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -3556,20 +3556,6 @@ for (;; ptr++) } #endif -#ifdef SUPPORT_WIDE_CHARS - /* In the pre-compile phase, accumulate the length of any wide characters - and reset the pointer. This is so that very large classes that contain a - zillion wide characters no longer overwrite the work space (which is on - the stack). We have to remember that there was XCLASS data, however. */ - - if (lengthptr != NULL && class_uchardata > class_uchardata_base) - { - xclass = TRUE; - *lengthptr += class_uchardata - class_uchardata_base; - class_uchardata = class_uchardata_base; - } -#endif - /* Inside \Q...\E everything is literal except \E */ if (inescq) @@ -4074,20 +4060,28 @@ for (;; ptr++) nestptr = NULL; c = *(++ptr); } - if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; - } /* End of main class-processing loop */ - - /* We will need an XCLASS if data has been placed in class_uchardata. In - the second phase this is a sufficient test. However, in the pre-compile - phase, class_uchardata gets emptied to prevent workspace overflow, so it - only if the very last character in the class needs XCLASS will it contain - anything at this point. For this reason, xclass gets set TRUE above when - class_uchardata is emptied, and that's why this code is the way it is here - instead of just doing a test on class_uchardata below. */ #ifdef SUPPORT_WIDE_CHARS - if (class_uchardata > class_uchardata_base) xclass = TRUE; + /* If any wide characters have been encountered, set xclass = TRUE. Then, + in the pre-compile phase, accumulate the length of the wide characters + and reset the pointer. This is so that very large classes that contain a + zillion wide characters do not overwrite the work space (which is on the + stack). */ + + if (class_uchardata > class_uchardata_base) + { + xclass = TRUE; + if (lengthptr != NULL) + { + *lengthptr += class_uchardata - class_uchardata_base; + class_uchardata = class_uchardata_base; + } + } #endif + /* An unescaped ] ends the class */ + + if (c == CHAR_RIGHT_SQUARE_BRACKET && !inescq) break; + } /* End of main class-processing loop */ /* If this is the first thing in the branch, there can be no first char setting, whatever the repeat count. Any reqcu setting must remain @@ -4107,12 +4101,12 @@ for (;; ptr++) be listed) there are no characters < 256, we can omit the bitmap in the actual compiled code. */ +#ifdef SUPPORT_WIDE_CHARS #ifdef SUPPORT_UNICODE if (xclass && (!should_flip_negation || (options & PCRE2_UCP) != 0)) #elif PCRE2_CODE_UNIT_WIDTH != 8 if (xclass && !should_flip_negation) #endif -#ifdef SUPPORT_WIDE_CHARS { *class_uchardata++ = XCL_END; /* Marks the end of extra data */ *code++ = OP_XCLASS; diff --git a/testdata/testinput4 b/testdata/testinput4 index 7f7d4ec..c50169d 100644 --- a/testdata/testinput4 +++ b/testdata/testinput4 @@ -2219,4 +2219,6 @@ /[A-`]/i,utf abcdefghijklmno +"[\S\V\H]"utf + # End of testinput4 diff --git a/testdata/testoutput4 b/testdata/testoutput4 index 80b14c6..8364515 100644 --- a/testdata/testoutput4 +++ b/testdata/testoutput4 @@ -3739,4 +3739,6 @@ No match abcdefghijklmno 0: a +"[\S\V\H]"utf + # End of testinput4