Fix overrun bug caused by conditional with assertion using too much memory.

This commit is contained in:
ph10 2016-11-18 18:59:37 +00:00
parent e86513c76f
commit 12220c8b72
6 changed files with 79 additions and 41 deletions

View File

@ -82,6 +82,13 @@ copied).
(i) An insufficient memory size was being computed for compiling with
PCRE2_AUTO_CALLOUT.
(j) A conditional group with an assertion condition used more memory than was
allowed for it during parsing, so too many of them could therefore
overrun a buffer.
(k) If parsing a pattern exactly filled the buffer, the internal test for
overrun did not check when the final META_END item was added.
4. Back references are now permitted in lookbehind assertions when there are
no duplicated group numbers (that is, (?| has not been used), and, if the

View File

@ -186,6 +186,7 @@ META_CLASS_EMPTY_NOT [^] negative empty class - ditto
META_CLASS_END ] end of non-empty class
META_CLASS_NOT [^ start non-empty negative class
META_COMMIT (*COMMIT)
META_COND_ASSERT (?(?assertion)
META_DOLLAR $ metacharacter
META_DOT . metacharacter
META_END End of pattern (this value is 0x80000000)
@ -274,9 +275,8 @@ This one is followed by an offset, for use in error messages, then a number:
META_COND_NUMBER (?([+-]digits)
The following are followed just by an offset, for use in error messages:
The following is followed just by an offset, for use in error messages:
META_COND_ASSERT (?(?assertion)
META_COND_DEFINE (?(DEFINE)
In fact, META_COND_ASSERT is used for any group starting (?( that does not

View File

@ -856,6 +856,7 @@ for (;;)
case META_BIGVALUE: fprintf(stderr, "META_BIGVALUE %.8x", *pptr++); break;
case META_CIRCUMFLEX: fprintf(stderr, "META_CIRCUMFLEX"); break;
case META_COND_ASSERT: fprintf(stderr, "META_COND_ASSERT"); break;
case META_DOLLAR: fprintf(stderr, "META_DOLLAR"); break;
case META_DOT: fprintf(stderr, "META_DOT"); break;
case META_ASTERISK: fprintf(stderr, "META *"); break;
@ -949,12 +950,6 @@ for (;;)
fprintf(stderr, "%zd", offset);
break;
case META_COND_ASSERT:
fprintf(stderr, "META_COND_ASSERT offset=");
GETOFFSET(offset, pptr);
fprintf(stderr, "%zd", offset);
break;
case META_COND_VERSION:
fprintf(stderr, "META (?(VERSION%s", (*pptr++ == 0)? "=" : ">=");
fprintf(stderr, "%d.", *pptr++);
@ -2189,12 +2184,12 @@ while (ptr < ptrend)
PCRE2_SPTR tempptr;
PCRE2_SPTR thisptr;
PCRE2_SIZE offset;
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
goto FAILED;
}
goto FAILED;
}
if (nest_depth > cb->cx->parens_nest_limit)
{
@ -2368,6 +2363,42 @@ while (ptr < ptrend)
parsed_pattern, cb);
}
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
assertion, possibly preceded by a callout. If the value is 1, we have just
had the callout and expect an assertion. There must be at least 3 more
characters in all cases. We know that the current character is an opening
parenthesis, as otherwise we wouldn't be here. Note that expect_cond_assert
may be negative, since all callouts just decrement it. */
if (expect_cond_assert > 0)
{
BOOL ok = ptrend - ptr >= 3 && ptr[0] == CHAR_QUESTION_MARK;
if (ok) switch(ptr[1])
{
case CHAR_C:
ok = expect_cond_assert == 2;
break;
case CHAR_EQUALS_SIGN:
case CHAR_EXCLAMATION_MARK:
break;
case CHAR_LESS_THAN_SIGN:
ok = ptr[2] == CHAR_EQUALS_SIGN || ptr[2] == CHAR_EXCLAMATION_MARK;
break;
default:
ok = FALSE;
}
if (!ok)
{
ptr--; /* Adjust error offset */
errorcode = ERR28;
goto FAILED;
}
}
/* Remember whether we are expecting a conditional assertion, and set the
default for this item. */
@ -3443,7 +3474,7 @@ while (ptr < ptrend)
ptr = startptr; /* To give a more useful message */
goto FAILED;
}
if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
if (*ptr == delimiter && (++ptr >= ptrend || *ptr != delimiter))
break;
}
@ -3519,18 +3550,16 @@ while (ptr < ptrend)
nest_depth++;
/* If the next character is ? there must be an assertion next (optionally
preceded by a callout). We do not check this here, but instead we just
preserve the offset so that the later check can give a sensible error
message. Pull back the pointer to the start of the assertion (or
callout), and set expect_cond_assert to 2. If this is still greater than
zero (callouts decrement it) when the next assertion is read, it will be
marked as a condition that must not be repeated. */
preceded by a callout). We do not check this here, but instead we set
expect_cond_assert to 2. If this is still greater than zero (callouts
decrement it) when the next assertion is read, it will be marked as a
condition that must not be repeated. A value greater than zero also
causes checking that an assertion (possibly with callout) follows. */
if (*ptr == CHAR_QUESTION_MARK)
{
*parsed_pattern++ = META_COND_ASSERT;
offset = (PCRE2_SIZE)(--ptr - cb->start_pattern - 2);
PUTOFFSET(offset, parsed_pattern);
ptr--; /* Pull pointer back to the opening parenthesis. */
expect_cond_assert = 2;
break; /* End of conditional */
}
@ -3902,6 +3931,11 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, options,
/* Terminate the parsed pattern, then return success if all groups are closed.
Otherwise we have unclosed parentheses. */
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
goto FAILED;
}
*parsed_pattern = META_END;
if (nest_depth == 0) return 0;
@ -5806,23 +5840,10 @@ for (;; pptr++)
pptr += 3;
goto GROUP_PROCESS;
/* The condition is alleged to be an assertion, possibly preceded by a
callout, because it's not one of the others, and began with (?(?. This is
where the check for the next thing being an assertion (with optional
callout) is done. */
/* The condition is an assertion, possibly preceded by a callout. */
case META_COND_ASSERT:
bravalue = OP_COND;
GETPLUSOFFSET(offset, pptr);
i = (pptr[1] == META_CALLOUT_NUMBER)? 5 :
(pptr[1] == META_CALLOUT_STRING)? (5 + SIZEOFFSET) : 1;
if (META_CODE(pptr[i]) < META_LOOKAHEAD ||
META_CODE(pptr[i]) > META_LOOKBEHINDNOT)
{
*errorcodeptr = ERR28; /* Assertion expected */
cb->erroroffset = offset + 2; /* Point after initial (? */
return FALSE;
}
goto GROUP_PROCESS;
@ -8563,7 +8584,7 @@ for (;; pptr++)
goto CHECK_GROUP;
case META_COND_ASSERT:
pptr += 1 + SIZEOFFSET;
pptr += 1;
goto CHECK_GROUP;
case META_COND_VERSION:
@ -8733,6 +8754,7 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_CLASS_END:
case META_CLASS_NOT:
case META_COMMIT:
case META_COND_ASSERT:
case META_DOLLAR:
case META_DOT:
case META_FAIL:
@ -8753,7 +8775,6 @@ for (pptr = cb->parsed_pattern; *pptr != META_END; pptr++)
case META_THEN:
break;
case META_COND_ASSERT:
case META_RECURSE:
pptr += SIZEOFFSET;
break;
@ -9178,8 +9199,8 @@ if (parsed_size_needed >= PARSED_PATTERN_DEFAULT_SIZE)
}
cb.parsed_pattern = heap_parsed_pattern;
}
cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
cb.parsed_pattern_end = cb.parsed_pattern + parsed_size_needed + 1;
/* Do the parsing scan. */
errorcode = parse_regex(ptr, cb.external_options, &has_lookbehind, &cb);
@ -9569,7 +9590,7 @@ if ((re->overall_options & PCRE2_NO_START_OPTIMIZE) == 0 &&
goto HAD_CB_ERROR;
}
/* Control ends up here in all cases. When running under valgrind, make a
/* Control ends up here in all cases. When running under valgrind, make a
pattern's terminating zero defined again. If memory was obtained for the parsed
version of the pattern, free it before returning. Also free the list of named
groups if a larger one had to be obtained, and likewise the group information

4
testdata/testinput2 vendored
View File

@ -4912,4 +4912,8 @@ a)"xI
\=get=i00000000000000000000000000000000
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
"(?(?C))"
/(?(?(?(?(?(?))))))/
# End of testinput2

View File

@ -145,7 +145,7 @@ Failed: POSIX code 9: bad escape sequence at offset 4
Failed: POSIX code 11: unbalanced () at offset 6
"(?(?C))"
Failed: POSIX code 3: pattern error at offset 2
Failed: POSIX code 3: pattern error at offset 6
/abcd/substitute_extended
** Ignored with POSIX interface: substitute_extended

View File

@ -556,7 +556,7 @@ Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
Failed: error 115 at offset 3: reference to non-existent subpattern
/(?(?<ab))/
Failed: error 142 at offset 7: syntax error in subpattern name (missing terminator)
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
/((?s)blah)\s+\1/I
Capturing subpattern count = 1
@ -15361,6 +15361,12 @@ Failed: error 157 at offset 6: \g is not followed by a braced, angle-bracketed,
\=get=i2345678901234567890123456789012,get=i1245678901234567890123456789012
** Too many characters in named 'get' modifiers
"(?(?C))"
Failed: error 128 at offset 6: assertion expected after (?( or (?(?C)
/(?(?(?(?(?(?))))))/
Failed: error 128 at offset 2: assertion expected after (?( or (?(?C)
# End of testinput2
Error -63: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data