From b1fb07001de491d7612bf66be73c569cc21e4e53 Mon Sep 17 00:00:00 2001 From: zherczeg Date: Tue, 23 Jul 2019 12:34:58 +0000 Subject: [PATCH] Follow the partial matching changes in JIT. --- src/pcre2_jit_compile.c | 19 ++++++++++++++----- src/sljit/sljitConfigInternal.h | 4 ++++ src/sljit/sljitNativeX86_64.c | 2 +- testdata/testinput2 | 24 ++++++++++++------------ testdata/testoutput2 | 24 ++++++++++++------------ 5 files changed, 43 insertions(+), 30 deletions(-) diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c index 77751b1..ebc1779 100644 --- a/src/pcre2_jit_compile.c +++ b/src/pcre2_jit_compile.c @@ -413,6 +413,8 @@ typedef struct compiler_common { sljit_sw lcc; /* Mode can be PCRE2_JIT_COMPLETE and others. */ int mode; + /* TRUE, when empty match is accepted for partial matching. */ + BOOL allow_empty_partial; /* TRUE, when minlength is greater than 0. */ BOOL might_be_empty; /* \K is found in the pattern. */ @@ -3303,7 +3305,7 @@ SLJIT_ASSERT(!force || common->mode != PCRE2_JIT_COMPLETE); if (common->mode == PCRE2_JIT_COMPLETE) return; -if (!force) +if (!force && !common->allow_empty_partial) jump = CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0); else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) jump = CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1); @@ -3365,7 +3367,11 @@ if (common->mode == PCRE2_JIT_COMPLETE) /* Partial matching mode. */ jump = CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0); -add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +if (!common->allow_empty_partial) + add_jump(compiler, backtracks, CMP(SLJIT_GREATER_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, STR_PTR, 0)); +else if (common->mode == PCRE2_JIT_PARTIAL_SOFT) + add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_MEM1(SLJIT_SP), common->start_used_ptr, SLJIT_IMM, -1)); + if (common->mode == PCRE2_JIT_PARTIAL_SOFT) { OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_SP), common->hit_start, SLJIT_IMM, 0); @@ -8332,12 +8338,14 @@ switch(type) JUMPHERE(jump[3]); } JUMPHERE(jump[0]); - check_partial(common, FALSE); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); return cc; case OP_EOD: add_jump(compiler, backtracks, CMP(SLJIT_LESS, STR_PTR, 0, STR_END, 0)); - check_partial(common, FALSE); + if (common->mode != PCRE2_JIT_COMPLETE) + check_partial(common, TRUE); return cc; case OP_DOLL: @@ -12642,7 +12650,7 @@ struct sljit_jump *once = NULL; struct sljit_jump *cond = NULL; struct sljit_label *rmin_label = NULL; struct sljit_label *exact_label = NULL; -struct sljit_put_label *put_label; +struct sljit_put_label *put_label = NULL; if (*cc == OP_BRAZERO || *cc == OP_BRAMINZERO) { @@ -13696,6 +13704,7 @@ common->fcc = tables + fcc_offset; common->lcc = (sljit_sw)(tables + lcc_offset); common->mode = mode; common->might_be_empty = re->minlength == 0; +common->allow_empty_partial = (re->max_lookbehind > 0) || (re->flags & PCRE2_MATCH_EMPTY) != 0; common->nltype = NLTYPE_FIXED; switch(re->newline_convention) { diff --git a/src/sljit/sljitConfigInternal.h b/src/sljit/sljitConfigInternal.h index ba60311..acba9da 100644 --- a/src/sljit/sljitConfigInternal.h +++ b/src/sljit/sljitConfigInternal.h @@ -214,6 +214,10 @@ #define SLJIT_MEMCPY(dest, src, len) memcpy(dest, src, len) #endif +#ifndef SLJIT_MEMMOVE +#define SLJIT_MEMMOVE(dest, src, len) memmove(dest, src, len) +#endif + #ifndef SLJIT_ZEROMEM #define SLJIT_ZEROMEM(dest, len) memset(dest, 0, len) #endif diff --git a/src/sljit/sljitNativeX86_64.c b/src/sljit/sljitNativeX86_64.c index 8ebbd1c..5758711 100644 --- a/src/sljit/sljitNativeX86_64.c +++ b/src/sljit/sljitNativeX86_64.c @@ -103,7 +103,7 @@ static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, slji } code_ptr -= put_label->flags + (2 + sizeof(sljit_uw)); - SLJIT_MEMCPY(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); + SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags); SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W); diff --git a/testdata/testinput2 b/testdata/testinput2 index aad9f1d..16c03c9 100644 --- a/testdata/testinput2 +++ b/testdata/testinput2 @@ -5694,35 +5694,35 @@ a)"xI /(\2)((?=(?<=\1)))/ /c*+(?<=[bc])/ - abc\=ph,no_jit - ab\=ph,no_jit - abc\=ps,no_jit - ab\=ps,no_jit + abc\=ph + ab\=ph + abc\=ps + ab\=ps /c++(?<=[bc])/ - abc\=ph,no_jit - ab\=ph,no_jit + abc\=ph + ab\=ph /(?<=(?=.(?<=x)))/ abx - ab\=ph,no_jit + ab\=ph bxyz xyz /\z/ - abc\=ph,no_jit + abc\=ph abc\=ps /\Z/ - abc\=ph,no_jit + abc\=ph abc\=ps - abc\n\=ph,no_jit + abc\n\=ph abc\n\=ps /(?![ab]).*/ - ab\=ph,no_jit + ab\=ph /c*+/ - ab\=ph,offset=2,no_jit + ab\=ph,offset=2 # End of testinput2 diff --git a/testdata/testoutput2 b/testdata/testoutput2 index b23b7d9..2fcdac6 100644 --- a/testdata/testoutput2 +++ b/testdata/testoutput2 @@ -17190,25 +17190,25 @@ Subject length lower bound = 1 Failed: error 125 at offset 8: lookbehind assertion is not fixed length /c*+(?<=[bc])/ - abc\=ph,no_jit + abc\=ph Partial match: c - ab\=ph,no_jit + ab\=ph Partial match: - abc\=ps,no_jit + abc\=ps 0: c - ab\=ps,no_jit + ab\=ps 0: /c++(?<=[bc])/ - abc\=ph,no_jit + abc\=ph Partial match: c - ab\=ph,no_jit + ab\=ph Partial match: /(?<=(?=.(?<=x)))/ abx 0: - ab\=ph,no_jit + ab\=ph Partial match: bxyz 0: @@ -17216,27 +17216,27 @@ Partial match: 0: /\z/ - abc\=ph,no_jit + abc\=ph Partial match: abc\=ps 0: /\Z/ - abc\=ph,no_jit + abc\=ph Partial match: abc\=ps 0: - abc\n\=ph,no_jit + abc\n\=ph Partial match: \x0a abc\n\=ps 0: /(?![ab]).*/ - ab\=ph,no_jit + ab\=ph Partial match: /c*+/ - ab\=ph,offset=2,no_jit + ab\=ph,offset=2 Partial match: # End of testinput2