From 35849de261f454a3037b1de56353c2b3c2015af2 Mon Sep 17 00:00:00 2001 From: ph10 Date: Mon, 18 May 2015 17:31:29 +0000 Subject: [PATCH] Fix buffer overflow for lookbehind with mutually recursive groups. --- ChangeLog | 3 +++ src/pcre2_compile.c | 12 +++++++++--- testdata/testinput2 | 2 ++ testdata/testoutput2 | 3 +++ 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6665ac0..57c8999 100644 --- a/ChangeLog +++ b/ChangeLog @@ -129,6 +129,9 @@ as an int; fixed by writing it as 1u). 32. Fix pcre2grep compile when -std=c99 is used with gcc, though it still gives a warning for "fileno" unless -std=gnu99 us used. +33. A lookbehind assertion within a set of mutually recursive subpatterns could +provoke a buffer overflow. This bug was discovered by the LLVM fuzzer. + Version 10.10 06-March-2015 --------------------------- diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 32c2aa2..07b23c8 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -886,14 +886,18 @@ for (;;) cc += 1 + LINK_SIZE; break; - /* Skip over assertive subpatterns */ + /* Skip over assertive subpatterns. Note that we must increment cc by + 1 + LINK_SIZE at the end, not by OP_length[*cc] because in a recursive + situation this assertion may be the one that is ultimately being checked + for having a fixed length, in which case its terminating OP_KET will have + been temporarily replaced by OP_END. */ case OP_ASSERT: case OP_ASSERT_NOT: case OP_ASSERTBACK: case OP_ASSERTBACK_NOT: do cc += GET(cc, 1); while (*cc == OP_ALT); - cc += PRIV(OP_lengths)[*cc]; + cc += 1 + LINK_SIZE; break; /* Skip over things that don't match chars */ @@ -8143,7 +8147,9 @@ if (errorcode == 0 && cb.check_lookbehind) /* Loop, searching for OP_REVERSE items, and process those that do not have their length set. (Actually, it will also re-process any that have a length of zero, but that is a pathological case, and it does no harm.) When we find - one, we temporarily terminate the branch it is in while we scan it. */ + one, we temporarily terminate the branch it is in while we scan it. Note that + calling find_bracket() with a negative group number returns a pointer to the + OP_REVERSE item, not the actual lookbehind. */ for (cc = (PCRE2_UCHAR *)PRIV(find_bracket)(codestart, utf, -1); cc != NULL; diff --git a/testdata/testinput2 b/testdata/testinput2 index de81888..a00a3d8 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -4306,4 +4306,6 @@ a random value. /Ix "(?J)(?'d'(?'d'\g{d}))" +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 1113bae..51a04a1 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -14405,4 +14405,7 @@ Failed: error 115 at offset 26: reference to non-existent subpattern "(?J)(?'d'(?'d'\g{d}))" +"(?=!((?2)(?))({8(?<=(?1){29}8bbbb\x16\xd\xc6^($(\xa9H4){4}h}?1)B))\x15')" +Failed: error 125 at offset 72: lookbehind assertion is not fixed length + # End of testinput2