From bcf460622d869c36f13214a52757433f338eb39a Mon Sep 17 00:00:00 2001 From: ph10 Date: Fri, 7 Apr 2017 08:46:29 +0000 Subject: [PATCH] Extend auto-anchoring to ignore "never-obeyed" groups at the start. --- ChangeLog | 5 +++++ src/pcre2_compile.c | 12 ++++++++++++ testdata/testinput2 | 16 ++++++++++++++++ testdata/testoutput2 | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+) diff --git a/ChangeLog b/ChangeLog index fe2a2d8..486ee88 100644 --- a/ChangeLog +++ b/ChangeLog @@ -116,6 +116,11 @@ a message, and abandon the run (this would have detected #13 above). 20. Applied Jason Hood's patches (slightly modified) to pcre2grep, to implement the --output=text (-O) option and the inbuilt callout echo. +21. Extend auto-anchoring etc. to ignore groups with a zero qualifier and +single-branch conditions with a false condition (e.g. DEFINE) at the start of a +branch. For example, /(?(DEFINE)...)^A/ and /(...){0}^B/ are now flagged as +anchored. + Version 10.23 14-February-2017 ------------------------------ diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c index 0b6175b..bab1494 100644 --- a/src/pcre2_compile.c +++ b/src/pcre2_compile.c @@ -4164,6 +4164,18 @@ for (;;) case OP_CALLOUT_STR: code += GET(code, 1 + 2*LINK_SIZE); break; + + case OP_SKIPZERO: + code += 2 + GET(code, 2) + LINK_SIZE; + break; + + case OP_COND: + case OP_SCOND: + if (code[1+LINK_SIZE] != OP_FALSE || /* Not DEFINE */ + code[GET(code, 1)] != OP_KET) /* More than one branch */ + return code; + code += GET(code, 1) + 1 + LINK_SIZE; + break; default: return code; diff --git a/testdata/testinput2 b/testdata/testinput2 index cd1cbfa..c49c26b 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5040,4 +5040,20 @@ a)"xI #subject -no_jit +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I + +/(a){0}.*bc/sI + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I + # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index 5251a0e..539f853 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -15576,6 +15576,38 @@ No match #subject -no_jit +# Check auto-anchoring when there is a group that is never obeyed at +# the start of a branch. + +/(?(DEFINE)(a))^bc/I +Capturing subpattern count = 1 +Compile options: +Overall options: anchored +Subject length lower bound = 2 + +/(a){0}.*bc/sI +Capturing subpattern count = 1 +Compile options: dotall +Overall options: anchored dotall +Last code unit = 'c' +Subject length lower bound = 2 + +# This should be anchored, as the condition is always false and there is +# no alternative branch. + +/(?(VERSION>=999)yes)^bc/I +Capturing subpattern count = 0 +Compile options: +Overall options: anchored +Subject length lower bound = 2 + +# This should not be anchored. + +/(?(VERSION>=999)yes|no)^bc/I +Capturing subpattern count = 0 +Last code unit = 'c' +Subject length lower bound = 4 + # End of testinput2 Error -63: PCRE2_ERROR_BADDATA (unknown error number) Error -62: bad serialized data