pcre/testdata/testinput5
2020-03-25 17:18:33 +00:00

2192 lines
40 KiB
Plaintext
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# This set of tests checks the API, internals, and non-Perl stuff for UTF
# support, including Unicode properties. However, tests that give different
# results in 8-bit, 16-bit, and 32-bit modes are excluded (see tests 10 and
# 12).
#newline_default lf any anycrlf
# PCRE2 and Perl disagree about the characteristics of certain Unicode
# characters. For example, 061C was considered by Perl to be Arabic, though
# it was not listed as such in the Unicode Scripts.txt file for Unicode 8.
# However, it *is* in that file for Unicode 10, but when I came to re-check,
# Perl had changed in the meantime, with 5.026 not recognizing it as Arabic.
# 2066-2069 are graphic and printable according to Perl, though they are
# actually "isolate" control characters. That is why the following tests are
# here rather than in test 4.
/^[\p{Arabic}]/utf
\x{061c}
/^[[:graph:]]+$/utf,ucp
\= Expect no match
\x{61c}
\x{2066}
\x{2067}
\x{2068}
\x{2069}
/^[[:print:]]+$/utf,ucp
\= Expect no match
\x{61c}
\x{2066}
\x{2067}
\x{2068}
\x{2069}
/^[[:^graph:]]+$/utf,ucp
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}
\x{2028}\x{2029}\x{202f}\x{2065}\x{2066}\x{2067}\x{2068}\x{2069}
/^[[:^print:]]+$/utf,ucp
\x{09}\x{1D}\x{85}\x{61c}\x{2028}\x{2029}\x{2065}\x{2066}\x{2067}
\x{2068}\x{2069}
# Perl does not consider U+180e to be a space character. It is true that it
# does not appear in the Unicode PropList.txt file as such, but in many other
# sources it is listed as a space, and has been treated as such in PCRE for
# a long time.
/^>[[:blank:]]*/utf,ucp
>\x{20}\x{a0}\x{1680}\x{180e}\x{2000}\x{202f}\x{9}\x{b}\x{2028}
/^A\s+Z/utf,ucp
A\x{85}\x{180e}\x{2005}Z
/^A[\s]+Z/utf,ucp
A\x{2005}Z
A\x{85}\x{2005}Z
/^[[:graph:]]+$/utf,ucp
\= Expect no match
\x{180e}
/^[[:print:]]+$/utf,ucp
\x{180e}
/^[[:^graph:]]+$/utf,ucp
\x{09}\x{0a}\x{1D}\x{20}\x{85}\x{a0}\x{61c}\x{1680}\x{180e}
/^[[:^print:]]+$/utf,ucp
\= Expect no match
\x{180e}
# End of U+180E tests.
# ---------------------------------------------------------------------
/\x{110000}/IB,utf
/\o{4200000}/IB,utf
/\x{ffffffff}/utf
/\o{37777777777}/utf
/\x{100000000}/utf
/\o{77777777777}/utf
/\x{d800}/utf
/\o{154000}/utf
/\x{dfff}/utf
/\o{157777}/utf
/\x{d7ff}/utf
/\o{153777}/utf
/\x{e000}/utf
/\o{170000}/utf
/^\x{100}a\x{1234}/utf
\x{100}a\x{1234}bcd
/\x{0041}\x{2262}\x{0391}\x{002e}/IB,utf
\x{0041}\x{2262}\x{0391}\x{002e}
/.{3,5}X/IB,utf
\x{212ab}\x{212ab}\x{212ab}\x{861}X
/.{3,5}?/IB,utf
\x{212ab}\x{212ab}\x{212ab}\x{861}
/^[ab]/IB,utf
bar
\= Expect no match
c
\x{ff}
\x{100}
/\x{100}*(\d+|"(?1)")/utf
1234
"1234"
\x{100}1234
"\x{100}1234"
\x{100}\x{100}12ab
\x{100}\x{100}"12"
\= Expect no match
\x{100}\x{100}abcd
/\x{100}*/IB,utf
/a\x{100}*/IB,utf
/ab\x{100}*/IB,utf
/[\x{200}-\x{100}]/utf
/[Ā-Ą]/utf
\x{100}
\x{104}
\= Expect no match
\x{105}
\x{ff}
/[\xFF]/IB
>\xff<
/[^\xFF]/IB
/[Ä-Ü]/utf
Ö # Matches without Study
\x{d6}
/[Ä-Ü]/utf
Ö <-- Same with Study
\x{d6}
/[\x{c4}-\x{dc}]/utf
Ö # Matches without Study
\x{d6}
/[\x{c4}-\x{dc}]/utf
Ö <-- Same with Study
\x{d6}
/[^\x{100}]abc(xyz(?1))/IB,utf
/(\x{100}(b(?2)c))?/IB,utf
/(\x{100}(b(?2)c)){0,2}/IB,utf
/(\x{100}(b(?1)c))?/IB,utf
/(\x{100}(b(?1)c)){0,2}/IB,utf
/\W/utf
A.B
A\x{100}B
/\w/utf
\x{100}X
# Use no_start_optimize because the first code unit is different in 8-bit from
# the wider modes.
/^\ሴ/IB,utf,no_start_optimize
/()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
()()()()()()()()()()
A (x) (?41) B/x,utf
AxxB
/^[\x{100}\E-\Q\E\x{150}]/B,utf
/^[\QĀ\E-\QŐ\E]/B,utf
/^abc./gmx,newline=any,utf
abc1 \x0aabc2 \x0babc3xx \x0cabc4 \x0dabc5xx \x0d\x0aabc6 \x{0085}abc7 \x{2028}abc8 \x{2029}abc9 JUNK
/abc.$/gmx,newline=any,utf
abc1\x0a abc2\x0b abc3\x0c abc4\x0d abc5\x0d\x0a abc6\x{0085} abc7\x{2028} abc8\x{2029} abc9
/^a\Rb/bsr=unicode,utf
a\nb
a\rb
a\r\nb
a\x0bb
a\x0cb
a\x{85}b
a\x{2028}b
a\x{2029}b
\= Expect no match
a\n\rb
/^a\R*b/bsr=unicode,utf
ab
a\nb
a\rb
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
a\x{85}b
a\n\rb
a\n\r\x{85}\x0cb
/^a\R+b/bsr=unicode,utf
a\nb
a\rb
a\r\nb
a\x0bb
a\x0c\x{2028}\x{2029}b
a\x{85}b
a\n\rb
a\n\r\x{85}\x0cb
\= Expect no match
ab
/^a\R{1,3}b/bsr=unicode,utf
a\nb
a\n\rb
a\n\r\x{85}b
a\r\n\r\nb
a\r\n\r\n\r\nb
a\n\r\n\rb
a\n\n\r\nb
\= Expect no match
a\n\n\n\rb
a\r
/\H\h\V\v/utf
X X\x0a
X\x09X\x0b
\= Expect no match
\x{a0} X\x0a
/\H*\h+\V?\v{3,4}/utf
\x09\x20\x{a0}X\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c\x0d\x0a
\x09\x20\x{a0}\x0a\x0b\x0c
\= Expect no match
\x09\x20\x{a0}\x0a\x0b
/\H\h\V\v/utf
\x{3001}\x{3000}\x{2030}\x{2028}
X\x{180e}X\x{85}
\= Expect no match
\x{2009} X\x0a
/\H*\h+\V?\v{3,4}/utf
\x{1680}\x{180e}\x{2007}X\x{2028}\x{2029}\x0c\x0d\x0a
\x09\x{205f}\x{a0}\x0a\x{2029}\x0c\x{2028}\x0a
\x09\x20\x{202f}\x0a\x0b\x0c
\= Expect no match
\x09\x{200a}\x{a0}\x{2028}\x0b
/[\h]/B,utf
>\x{1680}
/[\h]{3,}/B,utf
>\x{1680}\x{180e}\x{2000}\x{2003}\x{200a}\x{202f}\x{205f}\x{3000}<
/[\v]/B,utf
/[\H]/B,utf
/[\V]/B,utf
/.*$/newline=any,utf
\x{1ec5}
/a\Rb/I,bsr=anycrlf,utf
a\rb
a\nb
a\r\nb
\= Expect no match
a\x{85}b
a\x0bb
/a\Rb/I,bsr=unicode,utf
a\rb
a\nb
a\r\nb
a\x{85}b
a\x0bb
/a\R?b/I,bsr=anycrlf,utf
a\rb
a\nb
a\r\nb
\= Expect no match
a\x{85}b
a\x0bb
/a\R?b/I,bsr=unicode,utf
a\rb
a\nb
a\r\nb
a\x{85}b
a\x0bb
/.*a.*=.b.*/utf,newline=any
QQQ\x{2029}ABCaXYZ=!bPQR
\= Expect no match
a\x{2029}b
\x61\xe2\x80\xa9\x62
/[[:a\x{100}b:]]/utf
/a[^]b/utf,allow_empty_class,match_unset_backref
a\x{1234}b
a\nb
\= Expect no match
ab
/a[^]+b/utf,allow_empty_class,match_unset_backref
aXb
a\nX\nX\x{1234}b
\= Expect no match
ab
/(\x{de})\1/
\x{de}\x{de}
/X/newline=any,utf,firstline
A\x{1ec5}ABCXYZ
/Xa{2,4}b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/Xa{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/Xa{2,4}+b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X\x{123}{2,4}b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}+b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\x{123}{2,4}b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}?b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\x{123}{2,4}+b/utf
\= Expect no match
Xx\=ps
X\x{123}x\=ps
X\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}x\=ps
X\x{123}\x{123}\x{123}\x{123}x\=ps
/X\d{2,4}b/utf
X\=ps
X3\=ps
X33\=ps
X333\=ps
X3333\=ps
/X\d{2,4}?b/utf
X\=ps
X3\=ps
X33\=ps
X333\=ps
X3333\=ps
/X\d{2,4}+b/utf
X\=ps
X3\=ps
X33\=ps
X333\=ps
X3333\=ps
/X\D{2,4}b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X\D{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X\D{2,4}+b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X\D{2,4}b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\D{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X\D{2,4}+b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc]{2,4}b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X[abc]{2,4}?b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X[abc]{2,4}+b/utf
X\=ps
Xa\=ps
Xaa\=ps
Xaaa\=ps
Xaaaa\=ps
/X[abc\x{123}]{2,4}b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc\x{123}]{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[abc\x{123}]{2,4}+b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}b/utf
X\=ps
Xz\=ps
Xzz\=ps
Xzzz\=ps
Xzzzz\=ps
/X[^a]{2,4}?b/utf
X\=ps
Xz\=ps
Xzz\=ps
Xzzz\=ps
Xzzzz\=ps
/X[^a]{2,4}+b/utf
X\=ps
Xz\=ps
Xzz\=ps
Xzzz\=ps
Xzzzz\=ps
/X[^a]{2,4}b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}?b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/X[^a]{2,4}+b/utf
X\=ps
X\x{123}\=ps
X\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\=ps
X\x{123}\x{123}\x{123}\x{123}\=ps
/(Y)X\1{2,4}b/utf
YX\=ps
YXY\=ps
YXYY\=ps
YXYYY\=ps
YXYYYY\=ps
/(Y)X\1{2,4}?b/utf
YX\=ps
YXY\=ps
YXYY\=ps
YXYYY\=ps
YXYYYY\=ps
/(Y)X\1{2,4}+b/utf
YX\=ps
YXY\=ps
YXYY\=ps
YXYYY\=ps
YXYYYY\=ps
/(\x{123})X\1{2,4}b/utf
\x{123}X\=ps
\x{123}X\x{123}\=ps
\x{123}X\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/(\x{123})X\1{2,4}?b/utf
\x{123}X\=ps
\x{123}X\x{123}\=ps
\x{123}X\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/(\x{123})X\1{2,4}+b/utf
\x{123}X\=ps
\x{123}X\x{123}\=ps
\x{123}X\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\=ps
\x{123}X\x{123}\x{123}\x{123}\x{123}\=ps
/\bthe cat\b/utf
the cat\=ps
the cat\=ph
/abcd*/utf
xxxxabcd\=ps
xxxxabcd\=ph
/abcd*/i,utf
xxxxabcd\=ps
xxxxabcd\=ph
XXXXABCD\=ps
XXXXABCD\=ph
/abc\d*/utf
xxxxabc1\=ps
xxxxabc1\=ph
/(a)bc\1*/utf
xxxxabca\=ps
xxxxabca\=ph
/abc[de]*/utf
xxxxabcde\=ps
xxxxabcde\=ph
/X\W{3}X/utf
X\=ps
/\sxxx\s/utf,tables=2
AB\x{85}xxx\x{a0}XYZ
AB\x{a0}xxx\x{85}XYZ
/\S \S/utf,tables=2
\x{a2} \x{84}
'A#хц'Bx,newline=any,utf
'A#хц
PQ'Bx,newline=any,utf
/a+#хaa
z#XX?/Bx,newline=any,utf
/a+#хaa
z#х?/Bx,newline=any,utf
/\g{A}xxx#bXX(?'A'123)
(?'A'456)/Bx,newline=any,utf
/\g{A}xxx#bх(?'A'123)
(?'A'456)/Bx,newline=any,utf
/^\cģ/utf
/(\R*)(.)/s,utf
\r\n
\r\r\n\n\r
\r\r\n\n\r\n
/(\R)*(.)/s,utf
\r\n
\r\r\n\n\r
\r\r\n\n\r\n
/[^\x{1234}]+/Ii,utf
/[^\x{1234}]+?/Ii,utf
/[^\x{1234}]++/Ii,utf
/[^\x{1234}]{2}/Ii,utf
/f.*/
for\=ph
/f.*/s
for\=ph
/f.*/utf
for\=ph
/f.*/s,utf
for\=ph
/\x{d7ff}\x{e000}/utf
/\x{d800}/utf
/\x{dfff}/utf
/\h+/utf
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
/[\h\x{e000}]+/B,utf
\x{1681}\x{200b}\x{1680}\x{2000}\x{202f}\x{3000}
\x{3001}\x{2fff}\x{200a}\x{a0}\x{2000}
/\H+/utf
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
/[\H\x{d7ff}]+/B,utf
\x{1680}\x{180e}\x{167f}\x{1681}\x{180d}\x{180f}
\x{2000}\x{200a}\x{1fff}\x{200b}
\x{202f}\x{205f}\x{202e}\x{2030}\x{205e}\x{2060}
\x{a0}\x{3000}\x{9f}\x{a1}\x{2fff}\x{3001}
/\v+/utf
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/[\v\x{e000}]+/B,utf
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/\V+/utf
\x{2028}\x{2029}\x{2027}\x{2030}
\x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
/[\V\x{d7ff}]+/B,utf
\x{2028}\x{2029}\x{2027}\x{2030}
\x{85}\x0a\x0b\x0c\x0d\x09\x0e\x{84}\x{86}
/\R+/bsr=unicode,utf
\x{2027}\x{2030}\x{2028}\x{2029}
\x09\x0e\x{84}\x{86}\x{85}\x0a\x0b\x0c\x0d
/(..)\1/utf
ab\=ps
aba\=ps
abab\=ps
/(..)\1/i,utf
ab\=ps
abA\=ps
aBAb\=ps
/(..)\1{2,}/utf
ab\=ps
aba\=ps
abab\=ps
ababa\=ps
ababab\=ps
ababab\=ph
abababa\=ps
abababa\=ph
/(..)\1{2,}/i,utf
ab\=ps
aBa\=ps
aBAb\=ps
AbaBA\=ps
abABAb\=ps
aBAbaB\=ph
abABabA\=ps
abaBABa\=ph
/(..)\1{2,}?x/i,utf
ab\=ps
abA\=ps
aBAb\=ps
abaBA\=ps
abAbaB\=ps
abaBabA\=ps
abAbABaBx\=ps
/./utf,newline=crlf
\r\=ps
\r\=ph
/.{2,3}/utf,newline=crlf
\r\=ps
\r\=ph
\r\r\=ps
\r\r\=ph
\r\r\r\=ps
\r\r\r\=ph
/.{2,3}?/utf,newline=crlf
\r\=ps
\r\=ph
\r\r\=ps
\r\r\=ph
\r\r\r\=ps
\r\r\r\=ph
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/B,utf
/[^\x{100}][^\x{1234}][^\x{ffff}][^\x{10000}][^\x{10ffff}]/Bi,utf
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/B,utf
/[^\x{100}]*[^\x{10000}]+[^\x{10ffff}]??[^\x{8000}]{4,}[^\x{7fff}]{2,9}?[^\x{fffff}]{5,6}+/Bi,utf
/(?<=\x{1234}\x{1234})\bxy/I,utf
/(?<!^)ETA/utf
\= Expect no match
ETA
/\u0100/B,utf,alt_bsux,allow_empty_class,match_unset_backref
/[\u0100-\u0200]/B,utf,alt_bsux,allow_empty_class,match_unset_backref
/\ud800/utf,alt_bsux,allow_empty_class,match_unset_backref
/^\u{0000000000010ffff}/utf,extra_alt_bsux
\x{10ffff}
/\u/utf,alt_bsux
\\u
/^a+[a\x{200}]/B,utf
aa
/[b-d\x{200}-\x{250}]*[ae-h]?#[\x{200}-\x{250}]{0,8}[\x00-\xff]*#[\x{200}-\x{250}]+[a-z]/B,utf
/[\p{L}]/IB
/[\p{^L}]/IB
/[\P{L}]/IB
/[\P{^L}]/IB
/[abc\p{L}\x{0660}]/IB,utf
/[\p{Nd}]/IB,utf
1234
/[\p{Nd}+-]+/IB,utf
1234
12-34
12+\x{661}-34
\= Expect no match
abcd
/(?:[\PPa*]*){8,}/
/[\P{Any}]/B
/[\P{Any}\E]/B
/(\P{Yi}+\277)/
/(\P{Yi}+\277)?/
/(?<=\P{Yi}{3}A)X/
/\p{Yi}+(\P{Yi}+)(?1)/
/(\P{Yi}{2}\277)?/
/[\P{Yi}A]/
/[\P{Yi}\P{Yi}\P{Yi}A]/
/[^\P{Yi}A]/
/[^\P{Yi}\P{Yi}\P{Yi}A]/
/(\P{Yi}*\277)*/
/(\P{Yi}*?\277)*/
/(\p{Yi}*+\277)*/
/(\P{Yi}?\277)*/
/(\P{Yi}??\277)*/
/(\p{Yi}?+\277)*/
/(\P{Yi}{0,3}\277)*/
/(\P{Yi}{0,3}?\277)*/
/(\p{Yi}{0,3}+\277)*/
/\p{Zl}{2,3}+/B,utf
\x{2028}\x{2028}\x{2028}
/\p{Zl}/B,utf
/\p{Lu}{3}+/B,utf
/\pL{2}+/B,utf
/\p{Cc}{2}+/B,utf
/^\p{Cf}/utf
\x{180e}
\x{061c}
\x{2066}
\x{2067}
\x{2068}
\x{2069}
/^\p{Cs}/utf
\x{dfff}\=no_utf_check
\= Expect no match
\x{09f}
/^\p{Mn}/utf
\x{1a1b}
/^\p{Pe}/utf
\x{2309}
\x{230b}
/^\p{Ps}/utf
\x{2308}
\x{230a}
/^\p{Sc}+/utf
$\x{a2}\x{a3}\x{a4}\x{a5}\x{a6}
\x{9f2}
\= Expect no match
X
\x{2c2}
/^\p{Zs}/utf
\ \
\x{a0}
\x{1680}
\x{2000}
\x{2001}
\= Expect no match
\x{2028}
\x{200d}
# These are here because Perl has problems with the negative versions of the
# properties and has changed how it behaves for caseless matching.
/\p{^Lu}/i,utf
1234
\= Expect no match
ABC
/\P{Lu}/i,utf
1234
\= Expect no match
ABC
/\p{Ll}/i,utf
a
Az
\= Expect no match
ABC
/\p{Lu}/i,utf
A
a\x{10a0}B
\= Expect no match
a
\x{1d00}
/\p{Lu}/i,utf
A
aZ
\= Expect no match
abc
/[\x{c0}\x{391}]/i,utf
\x{c0}
\x{e0}
# The next two are special cases where the lengths of the different cases of
# the same character differ. The first went wrong with heap frame storage; the
# second was broken in all cases.
/^\x{023a}+?(\x{0130}+)/i,utf
\x{023a}\x{2c65}\x{0130}
/^\x{023a}+([^X])/i,utf
\x{023a}\x{2c65}X
/\x{c0}+\x{116}+/i,utf
\x{c0}\x{e0}\x{116}\x{117}
/[\x{c0}\x{116}]+/i,utf
\x{c0}\x{e0}\x{116}\x{117}
/(\x{de})\1/i,utf
\x{de}\x{de}
\x{de}\x{fe}
\x{fe}\x{fe}
\x{fe}\x{de}
/^\x{c0}$/i,utf
\x{c0}
\x{e0}
/^\x{e0}$/i,utf
\x{c0}
\x{e0}
# The next two should be Perl-compatible, but it fails to match \x{e0}. PCRE
# will match it only with UCP support, because without that it has no notion
# of case for anything other than the ASCII letters.
/((?i)[\x{c0}])/utf
\x{c0}
\x{e0}
/(?i:[\x{c0}])/utf
\x{c0}
\x{e0}
# These are PCRE's extra properties to help with Unicodizing \d etc.
/^\p{Xan}/utf
ABCD
1234
\x{6ca}
\x{a6c}
\x{10a7}
\= Expect no match
_ABC
/^\p{Xan}+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
_ABC
/^\p{Xan}+?/utf
\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}*/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}{2,9}/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xan}{2,9}?/utf
\x{6ca}\x{a6c}\x{10a7}_
/^[\p{Xan}]/utf
ABCD1234_
1234abcd_
\x{6ca}
\x{a6c}
\x{10a7}
\= Expect no match
_ABC
/^[\p{Xan}]+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
\= Expect no match
_ABC
/^>\p{Xsp}/utf
>\x{1680}\x{2028}\x{0b}
>\x{a0}
\= Expect no match
\x{0b}
/^>\p{Xsp}+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}+?/utf
>\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}*/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xsp}{2,9}?/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>[\p{Xsp}]/utf
>\x{2028}\x{0b}
/^>[\p{Xsp}]+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}/utf
>\x{1680}\x{2028}\x{0b}
>\x{a0}
\= Expect no match
\x{0b}
/^>\p{Xps}+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}+?/utf
>\x{1680}\x{2028}\x{0b}
/^>\p{Xps}*/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>\p{Xps}{2,9}?/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^>[\p{Xps}]/utf
>\x{2028}\x{0b}
/^>[\p{Xps}]+/utf
> \x{09}\x{0a}\x{0c}\x{0d}\x{a0}\x{1680}\x{2028}\x{0b}
/^\p{Xwd}/utf
ABCD
1234
\x{6ca}
\x{a6c}
\x{10a7}
_ABC
\= Expect no match
[]
/^\p{Xwd}+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}+?/utf
\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}*/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
/^\p{Xwd}{2,9}/utf
A_B12\x{6ca}\x{a6c}\x{10a7}
/^\p{Xwd}{2,9}?/utf
\x{6ca}\x{a6c}\x{10a7}_
/^[\p{Xwd}]/utf
ABCD1234_
1234abcd_
\x{6ca}
\x{a6c}
\x{10a7}
_ABC
\= Expect no match
[]
/^[\p{Xwd}]+/utf
ABCD1234\x{6ca}\x{a6c}\x{10a7}_
# A check not in UTF-8 mode
/^[\p{Xwd}]+/
ABCD1234_
# Some negative checks
/^[\P{Xwd}]+/utf
!.+\x{019}\x{35a}AB
/^[\p{^Xwd}]+/utf
!.+\x{019}\x{35a}AB
/[\D]/B,utf,ucp
1\x{3c8}2
/[\d]/B,utf,ucp
>\x{6f4}<
/[\S]/B,utf,ucp
\x{1680}\x{6f4}\x{1680}
/[\s]/B,utf,ucp
>\x{1680}<
/[\W]/B,utf,ucp
A\x{1712}B
/[\w]/B,utf,ucp
>\x{1723}<
/\D/B,utf,ucp
1\x{3c8}2
/\d/B,utf,ucp
>\x{6f4}<
/\S/B,utf,ucp
\x{1680}\x{6f4}\x{1680}
/\s/B,utf,ucp
>\x{1680}>
/\W/B,utf,ucp
A\x{1712}B
/\w/B,utf,ucp
>\x{1723}<
/[[:alpha:]]/B,ucp
/[[:lower:]]/B,ucp
/[[:upper:]]/B,ucp
/[[:alnum:]]/B,ucp
/[[:ascii:]]/B,ucp
/[[:cntrl:]]/B,ucp
/[[:digit:]]/B,ucp
/[[:graph:]]/B,ucp
/[[:print:]]/B,ucp
/[[:punct:]]/B,ucp
/[[:space:]]/B,ucp
/[[:word:]]/B,ucp
/[[:xdigit:]]/B,ucp
# Unicode properties for \b abd \B
/\b...\B/utf,ucp
abc_
\x{37e}abc\x{376}
\x{37e}\x{376}\x{371}\x{393}\x{394}
!\x{c0}++\x{c1}\x{c2}
!\x{c0}+++++
# Without PCRE_UCP, non-ASCII always fail, even if < 256
/\b...\B/utf
abc_
\= Expect no match
\x{37e}abc\x{376}
\x{37e}\x{376}\x{371}\x{393}\x{394}
!\x{c0}++\x{c1}\x{c2}
!\x{c0}+++++
# With PCRE_UCP, non-UTF8 chars that are < 256 still check properties
/\b...\B/ucp
abc_
!\x{c0}++\x{c1}\x{c2}
!\x{c0}+++++
# Some of these are silly, but they check various combinations
/[[:^alpha:][:^cntrl:]]+/B,utf,ucp
123
abc
/[[:^cntrl:][:^alpha:]]+/B,utf,ucp
123
abc
/[[:alpha:]]+/B,utf,ucp
abc
/[[:^alpha:]\S]+/B,utf,ucp
123
abc
/[^\d]+/B,utf,ucp
abc123
abc\x{123}
\x{660}abc
/\p{Lu}+9\p{Lu}+B\p{Lu}+b/B
/\p{^Lu}+9\p{^Lu}+B\p{^Lu}+b/B
/\P{Lu}+9\P{Lu}+B\P{Lu}+b/B
/\p{Han}+X\p{Greek}+\x{370}/B,utf
/\p{Xan}+!\p{Xan}+A/B
/\p{Xsp}+!\p{Xsp}\t/B
/\p{Xps}+!\p{Xps}\t/B
/\p{Xwd}+!\p{Xwd}_/B
/A+\p{N}A+\dB+\p{N}*B+\d*/B,ucp
# These behaved oddly in Perl, so they are kept in this test
/(\x{23a}\x{23a}\x{23a})?\1/i,utf
\= Expect no match
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)?\1/i,utf
\= Expect no match
ȺȺȺⱥⱥ
/(\x{23a}\x{23a}\x{23a})?\1/i,utf
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
/(ȺȺȺ)?\1/i,utf
ȺȺȺⱥⱥⱥ
/(\x{23a}\x{23a}\x{23a})\1/i,utf
\= Expect no match
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}
/(ȺȺȺ)\1/i,utf
\= Expect no match
ȺȺȺⱥⱥ
/(\x{23a}\x{23a}\x{23a})\1/i,utf
\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}
/(ȺȺȺ)\1/i,utf
ȺȺȺⱥⱥⱥ
/(\x{2c65}\x{2c65})\1/i,utf
\x{2c65}\x{2c65}\x{23a}\x{23a}
/(ⱥⱥ)\1/i,utf
ⱥⱥȺȺ
/(\x{23a}\x{23a}\x{23a})\1Y/i,utf
X\x{23a}\x{23a}\x{23a}\x{2c65}\x{2c65}\x{2c65}YZ
/(\x{2c65}\x{2c65})\1Y/i,utf
X\x{2c65}\x{2c65}\x{23a}\x{23a}YZ
# These scripts weren't yet in Perl when I added Unicode 6.0.0 to PCRE
/^[\p{Batak}]/utf
\x{1bc0}
\x{1bff}
\= Expect no match
\x{1bf4}
/^[\p{Brahmi}]/utf
\x{11000}
\x{1106f}
\= Expect no match
\x{1104e}
/^[\p{Mandaic}]/utf
\x{840}
\x{85e}
\= Expect no match
\x{85c}
\x{85d}
/(\X*)(.)/s,utf
A\x{300}
/^S(\X*)e(\X*)$/utf
Stéréo
/^\X/utf
́réo
/^a\X41z/alt_bsux,allow_empty_class,match_unset_backref,dupnames
aX41z
\= Expect no match
aAz
/\X/
a\=ps
a\=ph
/\Xa/
aa\=ps
aa\=ph
/\X{2}/
aa\=ps
aa\=ph
/\X+a/
a\=ps
aa\=ps
aa\=ph
/\X+?a/
a\=ps
ab\=ps
aa\=ps
aa\=ph
aba\=ps
# These Unicode 6.1.0 scripts are not known to Perl.
/\p{Chakma}\d/utf,ucp
\x{11100}\x{1113c}
/\p{Takri}\d/utf,ucp
\x{11680}\x{116c0}
/^\X/utf
A\=ps
A\=ph
A\x{300}\x{301}\=ps
A\x{300}\x{301}\=ph
A\x{301}\=ps
A\x{301}\=ph
/^\X{2,3}/utf
A\=ps
A\=ph
AA\=ps
AA\=ph
A\x{300}\x{301}\=ps
A\x{300}\x{301}\=ph
A\x{300}\x{301}A\x{300}\x{301}\=ps
A\x{300}\x{301}A\x{300}\x{301}\=ph
/^\X{2}/utf
AA\=ps
AA\=ph
A\x{300}\x{301}A\x{300}\x{301}\=ps
A\x{300}\x{301}A\x{300}\x{301}\=ph
/^\X+/utf
AA\=ps
AA\=ph
/^\X+?Z/utf
AA\=ps
AA\=ph
/A\x{3a3}B/IBi,utf
/[\x{3a3}]/Bi,utf
/[^\x{3a3}]/Bi,utf
/[\x{3a3}]+/Bi,utf
/[^\x{3a3}]+/Bi,utf
/a*\x{3a3}/Bi,utf
/\x{3a3}+a/Bi,utf
/\x{3a3}*\x{3c2}/Bi,utf
/\x{3a3}{3}/i,utf,aftertext
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}{2,4}/i,utf,aftertext
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}{2,4}?/i,utf,aftertext
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}+./i,utf,aftertext
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}++./i,utf,aftertext
\= Expect no match
\x{3a3}\x{3c3}\x{3c2}\x{3a3}\x{3c3}\x{3c2}
/\x{3a3}*\x{3c2}/Bi,utf
/[^\x{3a3}]*\x{3c2}/Bi,utf
/[^a]*\x{3c2}/Bi,utf
/ist/Bi,utf
\= Expect no match
ikt
/is+t/i,utf
iSs\x{17f}t
\= Expect no match
ikt
/is+?t/i,utf
\= Expect no match
ikt
/is?t/i,utf
\= Expect no match
ikt
/is{2}t/i,utf
\= Expect no match
iskt
# This property is a PCRE special
/^\p{Xuc}/utf
$abc
@abc
`abc
\x{1234}abc
\= Expect no match
abc
/^\p{Xuc}+/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}+?/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}+?\*/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}++/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}{3,5}/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\p{Xuc}{3,5}?/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^[\p{Xuc}]/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^[\p{Xuc}]+/utf
$@`\x{a0}\x{1234}\x{e000}**
\= Expect no match
\x{9f}
/^\P{Xuc}/utf
abc
\= Expect no match
$abc
@abc
`abc
\x{1234}abc
/^[\P{Xuc}]/utf
abc
\= Expect no match
$abc
@abc
`abc
\x{1234}abc
# Some auto-possessification tests
/\pN+\z/B
/\PN+\z/B
/\pN+/B
/\PN+/B
/\p{Any}+\p{Any} \p{Any}+\P{Any} \p{Any}+\p{L&} \p{Any}+\p{L} \p{Any}+\p{Lu} \p{Any}+\p{Han} \p{Any}+\p{Xan} \p{Any}+\p{Xsp} \p{Any}+\p{Xps} \p{Xwd}+\p{Any} \p{Any}+\p{Xuc}/Bx,ucp
/\p{L&}+\p{Any} \p{L&}+\p{L&} \P{L&}+\p{L&} \p{L&}+\p{L} \p{L&}+\p{Lu} \p{L&}+\p{Han} \p{L&}+\p{Xan} \p{L&}+\P{Xan} \p{L&}+\p{Xsp} \p{L&}+\p{Xps} \p{Xwd}+\p{L&} \p{L&}+\p{Xuc}/Bx,ucp
/\p{N}+\p{Any} \p{N}+\p{L&} \p{N}+\p{L} \p{N}+\P{L} \p{N}+\P{N} \p{N}+\p{Lu} \p{N}+\p{Han} \p{N}+\p{Xan} \p{N}+\p{Xsp} \p{N}+\p{Xps} \p{Xwd}+\p{N} \p{N}+\p{Xuc}/Bx,ucp
/\p{Lu}+\p{Any} \p{Lu}+\p{L&} \p{Lu}+\p{L} \p{Lu}+\p{Lu} \P{Lu}+\p{Lu} \p{Lu}+\p{Nd} \p{Lu}+\P{Nd} \p{Lu}+\p{Han} \p{Lu}+\p{Xan} \p{Lu}+\p{Xsp} \p{Lu}+\p{Xps} \p{Xwd}+\p{Lu} \p{Lu}+\p{Xuc}/Bx,ucp
/\p{Han}+\p{Lu} \p{Han}+\p{L&} \p{Han}+\p{L} \p{Han}+\p{Lu} \p{Han}+\p{Arabic} \p{Arabic}+\p{Arabic} \p{Han}+\p{Xan} \p{Han}+\p{Xsp} \p{Han}+\p{Xps} \p{Xwd}+\p{Han} \p{Han}+\p{Xuc}/Bx,ucp
/\p{Xan}+\p{Any} \p{Xan}+\p{L&} \P{Xan}+\p{L&} \p{Xan}+\p{L} \p{Xan}+\p{Lu} \p{Xan}+\p{Han} \p{Xan}+\p{Xan} \p{Xan}+\P{Xan} \p{Xan}+\p{Xsp} \p{Xan}+\p{Xps} \p{Xwd}+\p{Xan} \p{Xan}+\p{Xuc}/Bx,ucp
/\p{Xsp}+\p{Any} \p{Xsp}+\p{L&} \p{Xsp}+\p{L} \p{Xsp}+\p{Lu} \p{Xsp}+\p{Han} \p{Xsp}+\p{Xan} \p{Xsp}+\p{Xsp} \P{Xsp}+\p{Xsp} \p{Xsp}+\p{Xps} \p{Xwd}+\p{Xsp} \p{Xsp}+\p{Xuc}/Bx,ucp
/\p{Xwd}+\p{Any} \p{Xwd}+\p{L&} \p{Xwd}+\p{L} \p{Xwd}+\p{Lu} \p{Xwd}+\p{Han} \p{Xwd}+\p{Xan} \p{Xwd}+\p{Xsp} \p{Xwd}+\p{Xps} \p{Xwd}+\p{Xwd} \p{Xwd}+\P{Xwd} \p{Xwd}+\p{Xuc}/Bx,ucp
/\p{Xuc}+\p{Any} \p{Xuc}+\p{L&} \p{Xuc}+\p{L} \p{Xuc}+\p{Lu} \p{Xuc}+\p{Han} \p{Xuc}+\p{Xan} \p{Xuc}+\p{Xsp} \p{Xuc}+\p{Xps} \p{Xwd}+\p{Xuc} \p{Xuc}+\p{Xuc} \p{Xuc}+\P{Xuc}/Bx,ucp
/\p{N}+\p{Ll} \p{N}+\p{Nd} \p{N}+\P{Nd}/Bx,ucp
/\p{Xan}+\p{L} \p{Xan}+\p{N} \p{Xan}+\p{C} \p{Xan}+\P{L} \P{Xan}+\p{N} \p{Xan}+\P{C}/Bx,ucp
/\p{L}+\p{Xan} \p{N}+\p{Xan} \p{C}+\p{Xan} \P{L}+\p{Xan} \p{N}+\p{Xan} \P{C}+\p{Xan} \p{L}+\P{Xan}/Bx,ucp
/\p{Xan}+\p{Lu} \p{Xan}+\p{Nd} \p{Xan}+\p{Cc} \p{Xan}+\P{Ll} \P{Xan}+\p{No} \p{Xan}+\P{Cf}/Bx,ucp
/\p{Lu}+\p{Xan} \p{Nd}+\p{Xan} \p{Cs}+\p{Xan} \P{Lt}+\p{Xan} \p{Nl}+\p{Xan} \P{Cc}+\p{Xan} \p{Lt}+\P{Xan}/Bx,ucp
/\w+\p{P} \w+\p{Po} \w+\s \p{Xan}+\s \s+\p{Xan} \s+\w/Bx,ucp
/\w+\P{P} \W+\p{Po} \w+\S \P{Xan}+\s \s+\P{Xan} \s+\W/Bx,ucp
/\w+\p{Po} \w+\p{Pc} \W+\p{Po} \W+\p{Pc} \w+\P{Po} \w+\P{Pc}/Bx,ucp
/\p{Nl}+\p{Xan} \P{Nl}+\p{Xan} \p{Nl}+\P{Xan} \P{Nl}+\P{Xan}/Bx,ucp
/\p{Xan}+\p{Nl} \P{Xan}+\p{Nl} \p{Xan}+\P{Nl} \P{Xan}+\P{Nl}/Bx,ucp
/\p{Xan}+\p{Nd} \P{Xan}+\p{Nd} \p{Xan}+\P{Nd} \P{Xan}+\P{Nd}/Bx,ucp
# End auto-possessification tests
/\w+/B,utf,ucp,auto_callout
abcd
/[\p{N}]?+/B,no_auto_possess
/[\p{L}ab]{2,3}+/B,no_auto_possess
/\D+\X \d+\X \S+\X \s+\X \W+\X \w+\X \R+\X \H+\X \h+\X \V+\X \v+\X a+\X \n+\X .+\X/Bx
/.+\X/Bsx
/\X+$/Bmx
/\X+\D \X+\d \X+\S \X+\s \X+\W \X+\w \X+. \X+\R \X+\H \X+\h \X+\V \X+\v \X+\X \X+\Z \X+\z \X+$/Bx
/\d+\s{0,5}=\s*\S?=\w{0,4}\W*/B,utf,ucp
/[RST]+/Bi,utf,ucp
/[R-T]+/Bi,utf,ucp
/[Q-U]+/Bi,utf,ucp
/^s?c/Iim,utf
scat
/\X?abc/utf,no_start_optimize
\xff\x7f\x00\x00\x03\x00\x41\xcc\x80\x41\x{300}\x61\x62\x63\x00\=no_utf_check,offset=06
/\x{100}\x{200}\K\x{300}/utf,startchar
\x{100}\x{200}\x{300}
# Test UTF characters in a substitution
/ábc/utf,replace=XሴZ
123ábc123
/(?<=abc)(|def)/g,utf,replace=<$0>
123abcáyzabcdef789abcሴqr
/[A-`]/iB,utf
abcdefghijklmno
/(?<=\K\x{17f})/g,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
/(?<=\K\x{17f})/altglobal,utf,aftertext
\x{17f}\x{17f}\x{17f}\x{17f}\x{17f}
"\xa\xf<(.\pZ*\P{Xwd}+^\xa8\3'3yq.::?(?J:()\xd1+!~:3'(8?:)':(?'d'(?'d'^u]!.+.+\\A\Ah(n+?9){7}+\K;(?'X'u'(?'c'(?'z'(?<y>\xb::\xf0'|\xd3(\xae?'w(z\x8?P>l)\x8?P>a)'\H\R\xd1+!!~:3'(?:h$N{26875}\W+?\\=D{2}\x89(?i:Uy0\N({2\xa(\v\x85*){y*\A(()\p{L}+?\P{^Xan}'+?\xff\+pS\?|).{;y*\A(()\p{L}+?\8}\d?1(|)(/1){7}.+[Lp{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(\xbf(R))\x8a\X*?\x8a\xb\xd1^9\3*+(\xc1,\k'R'\xb4)\xcc(z\z(?J)(?'X'\x1b(\xb\xd1^9\?'3*+P{^Xan}+?\xff\+(\xc1.]k+\xb'Pm'\xb4)\xcc4f\xa7'\xd1V(?i:U,{2,2})'(?'X'))?-%--\x95$9*\4'|\xd1(\x9c''%\x94$9)#(?'R')3\x7?('P\xed7'\xa8\xb1^u\xeaw\1\0\0\(|(?1){7}.+[\p{Me}].\s\xdcC*^\x14?(?(<y>))(?<!^)$C((;*?(R*?))+(?(R)\x8a\X*?\x8a\xb\xd1^9\3*+|(\xc1,\k'R'\xb4)\xcc! z)\z(?JJ)(?'X';(\xb\xd1^9\?'3*+(\xc1.]k+\xb'Pm'\xb4))':(?'d')(?'RD'(d')|)|$)'|(?<x>\g{d});\g{x}\x11\g{d}\x81\|$((?'X'\'X'(?'W''\x92()'9'\x83*))\xba*\!?^ <){)':;\xcc4'\xd1'(?'X'28))?-%--\x95$9*\4'|\xd1((''e\x94*$9:)*#(?'R')3)\x7?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+0!~:(?)'d'E:yD!\s(?'R'\x1e;\x10:U))|'\x9g!\xb0*){)\\x16:;()\x1e\x10\x87*:(?<y>)\xd1+!~:(?)'}'\d'E:yD!\s(?'R'\x1e;\x10:U))|'))|)g!\xb0*R+9{29+)#(?'P'})*?pS\{3,}\x85,{0,}l{*UTF)(\xe{7}){3722,{9,}d{2,?|))|{)\(A?&d}}{\xa,}2}){3,}7,l{)22}(,}l:7{2,4}}29\x19+)#?'P'})*v?))\x5"
/$(&.+[\p{Me}].\s\xdcC*?(?(<y>))(?<!^)$C((;*?(R))+(?(R)){0,6}?|){12\x8a\X*?\x8a\x0b\xd1^9\3*+(\xc1,\k'P'\xb4)\xcc(z\z(?JJ)(?'X'8};(\x0b\xd1^9\?'3*+(\xc1.]k+\x0b'Pm'\xb4\xcc4'\xd1'(?'X'))?-%--\x95$9*\4'|\xd1(''%\x95*$9)#(?'R')3\x07?('P\xed')\\x16:;()\x1e\x10*:(?<y>)\xd1+!~:(?)''(d'E:yD!\s(?'R'\x1e;\x10:U))|')g!\xb0*){29+))#(?'P'})*?/
"(*UTF)(*UCP)(.UTF).+X(\V+;\^(\D|)!999}(?(?C{7(?C')\H*\S*/^\x5\xa\\xd3\x85n?(;\D*(?m).[^mH+((*UCP)(*U:F)})(?!^)(?'"
/[\pS#moq]/
=
/(*:a\x{12345}b\t(d\)c)xxx/utf,alt_verbnames,mark
cxxxz
/abcd/utf,replace=x\x{824}y\o{3333}z(\Q12\$34$$\x34\E5$$),substitute_extended
abcd
/a(\x{e0}\x{101})(\x{c0}\x{102})/utf,replace=a\u$1\U$1\E$1\l$2\L$2\Eab\U\x{e0}\x{101}\L\x{d0}\x{160}\EDone,substitute_extended
a\x{e0}\x{101}\x{c0}\x{102}
/((?<digit>\d)|(?<letter>\p{L}))/g,substitute_extended,replace=<${digit:+digit; :not digit; }${letter:+letter:not a letter}>
ab12cde
/(*UCP)(*UTF)[[:>:]]X/B
/abc/utf,replace=xyz
abc\=zero_terminate
/a[[:punct:]b]/ucp,bincode
/a[[:punct:]b]/utf,ucp,bincode
/a[b[:punct:]]/utf,ucp,bincode
/[[:^ascii:]]/utf,ucp,bincode
/[[:^ascii:]\w]/utf,ucp,bincode
/[\w[:^ascii:]]/utf,ucp,bincode
/[^[:ascii:]\W]/utf,ucp,bincode
\x{de}
\x{200}
\= Expect no match
\x{300}
\x{37e}
/[[:^ascii:]a]/utf,ucp,bincode
/L(?#(|++<!(2)?/B,utf,no_auto_possess,auto_callout
/L(?#(|++<!(2)?/B,utf,ucp,auto_callout
/(*UTF)C\x09((?<!'(?x)!*H? #\xcc\x9a[^$]/
/[\D]/utf
\x{1d7cf}
/[\D\P{Nd}]/utf
\x{1d7cf}
/[^\D]/utf
a9b
\= Expect no match
\x{1d7cf}
/[^\D\P{Nd}]/utf
a9b
\x{1d7cf}
\= Expect no match
\x{10000}
# Hex uses pattern length, not zero-terminated. This tests for overrunning
# the given length of a pattern.
/'(*UTF)'/hex
/'#('/hex,extended,utf
/a(?<=A\XB)/utf
/ab(?<=A\RB)/utf
/../utf,auto_callout
\n\x{123}\x{123}\x{123}\x{123}
# This tests processing wide characters in extended mode.
/XȀ/x,utf
# These three test a bug fix that was not clearing up after a locale setting
# when the test or a subsequent one matched a wide character.
//locale=C
/[\P{Yi}]/utf
\x{2f000}
/[\P{Yi}]/utf,locale=C
\x{2f000}
/^(?<!(?=􃡜))/B,utf
# Horizontal and vertical space lists ignore caseless
/[\HH]/Bi,utf
/[^\HH]/Bi,utf
//g,utf
\=zero_terminate
/^(?1)\p{Nd}{3}(a)/
a123a
/\p{Nd}{0,3}[\pL](*:abc)(?C1)xxx/callout_info
# ---------------------------------------------------------------------------
# A bunch of tests that hit lines of code that others do not (at least when
# these were created).
/^[^a]{3,}?x/i,utf,no_start_optimize,no_auto_possess
\= Expect no match
bbb
cc
/^[ac]{3,}?x/i,utf,no_start_optimize,no_auto_possess
\= Expect no match
aaa\x{100}
/^X\X/no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{L&}+?/no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{L}+?/no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{Lu}+?/no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{Arabic}+?/no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{Xan}+?/ucp,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\s+?/ucp,no_start_optimize,no_auto_possess
\= Expect no match
X
XX
/^X\S+?/ucp,no_start_optimize,no_auto_possess
XX
\= Expect no match
X
/^X\w+?/ucp,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X[^\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X[\x{b5}]+?/i,utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{Xuc}+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\R+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\H+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\V+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\s+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
XX
/^X\S+?/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
/^X\p{Any}{1,3}?Z/s,no_start_optimize,no_auto_possess
XYYYZ
\= Expect no match
XY
XYY
XYYY
XYYYYZ
/^X\p{L&}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
/^X\p{L}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
/^X\p{Lu}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
/^X\P{Han}{1,3}?Z/s,utf,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
XY\x{2f00}!
/^X\p{Xan}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
/^X\p{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
X\n
X\n!
X\n\n!
/^X\P{Xsp}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XYY\n
/^X\p{Xwd}{1,3}?Z/s,no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
XYY!
/^X\x{b5}+?Z/i,utf,no_start_optimize,no_auto_possess
\= Expect no match
X
X\x{b5}
X\x{b5}\x{b5}Y
/^X\p{Xuc}+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X
X$
X@@Y
/(*CRLF)^X.+?Z/utf,no_start_optimize,no_auto_possess
\= Expect partial match
XYY\r\=ph
\= Expect no match
X
/^X.+?Z/s,utf,no_start_optimize,no_auto_possess
\= Expect no match
X
XYY
/^X\R+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X\nX
X\n\rX
X\n\r\nX
X\n\n
X\n\x{0c}
/(*BSR_ANYCRLF)^X\R+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X\nX
X\n\rX
X\n\r\nX
X\n\n
X\n\x{0c}
/^X\H+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
XY\t
XYY
/^X\h+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X\t\t
X\tY
/^X\V+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
XY\n
XYY
/^X\v+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X\n\n
X\nY
/^X\D+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
XY9
XYY
/^X\d+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X99
X9Y
/^X\S+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
XY\n
XYY
/^X\s+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X\n\n
X\nY
/^X\W+?Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X.A
X++
/^X\p{L&}{1,3}Z/no_start_optimize,no_auto_possess
\= Expect no match
XY
XY!
/^X\p{L}{1,3}Z/no_start_optimize,no_auto_possess
\= Expect no match
XY
/^X\p{Xan}{1,3}Z/no_start_optimize,no_auto_possess
\= Expect no match
XY
/^X\P{Xsp}{1,3}Z/no_start_optimize,no_auto_possess
\= Expect no match
XYY
/^X\p{Xuc}+Z/utf,no_start_optimize,no_auto_possess
\= Expect no match
X$
# ----------------------------------------------------------------------
# These test the dangerous PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL option.
/\x{d800}/B,utf,bad_escape_is_literal
/\ud800/B,utf,alt_bsux,bad_escape_is_literal
# ----------------------------------------------------------------------
/Aሴ+B/literal,utf,no_utf_check
Aሴ+B
# These are here because I upgraded to Unicode 10.0.0 before Perl did, so it
# doesn't recognize all these scripts. In time these three tests can be moved
# to test 4.
/^(\p{Adlam}+)(\p{Bhaiksuki}+)(\p{Marchen}+)(\p{Newa}+)(\p{Osage}+)
(\p{Tangut}+)(\p{Masaram_Gondi}+)(\p{Nushu}+)(\p{Soyombo}+)
(\p{Zanabazar_Square}+)/x,utf
\x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}
/^\x{1E900}\x{104B0}/i,utf
\x{1E900}\x{104B0}
\x{1E922}\x{104D8}
/^(?:(\X)(?C))+$/utf
\x{1E900}\x{1E924}\x{1E953}\x{11C00}\x{11C2D}\x{11C3E}\x{11C70}\x{11C77}\x{11CAB}\x{11400}\x{1142F}\x{11455}\x{104B0}\x{104D8}\x{104FB}\x{16FE0}\x{18800}\x{18AF2}\x{11D00}\x{11D3A}\x{11D59}\x{16FE1}\x{1B170}\x{1B2FB}\x{11A50}\x{11A58}\x{11AA2}\x{11A00}\x{11A07}\x{11A47}\=callout_capture,callout_no_where
# Similarly for Unicode 11.0.0
/^(\p{Dogra}+)(\p{Gunjala_Gondi}+)(\p{Hanifi_Rohingya}+)(\p{Makasar}+)
(\p{Medefaidrin}+)(\p{Old_Sogdian}+)(\p{Sogdian}+)/x,utf
\x{11800}\x{11da9}\x{10d27}\x{11ee0}\x{16e48}\x{10f27}\x{10f30}
# These two are here because of differences from Perl.
/^\X/utf
A\x{200d}B A ZWJ
\x{261d}\x{261d}B Extended_Pictographic Extended_Pictographic
\x{261D}\x{1F3FB}B Extended_Pictographic Extend
\x{1F1E6}\x{1F1E7}B RegionalIndicator RegionalIndicator
\x{261D}\x{1F3FB}\x{261d}B Extended_Pictographic Extend E-P
\x{261D}\x{1F3FB}\x{200d}\x{261d}B Extended_Pictographic Extend ZWJ E-P
# Regional indicators
/^(\X)(\X)/utf,aftertext
\x{1F1E6}\x{1F1E7}\x{1F1E7}B
\x{1F1E6}\x{1F1E7}\x{1F1E7}\x{1F1E6}B
# More differences from Perl
/^[\p{Arabic}]/utf
\= Expect no match
\x{650}
\x{651}
\x{652}
\x{653}
\x{654}
\x{655}
/^\p{Common}/utf
\x{60c}
\x{61f}
\x{964}
\x{965}
/^\p{Inherited}/utf
\x{64b}
\x{654}
\x{655}
\x{1D1AA}
/\N{U+}/
/\N{U+}/utf
/\N{U}/
# This tests the non-UTF Unicode NEL pattern whitespace character, only
# recognized by PCRE2 with /x when there is Unicode support.
/A
<EFBFBD>B/x
AB
# This tests Unicode Pattern White Space characters in verb names when they
# are being processed with PCRE2_EXTENDED. Note: there are UTF-8 characters
# with code points greater than 255 between A, B, and C in the pattern.
/(*: ABC)abc/x,utf,mark,alt_verbnames
abc
# Script run tests: auto-possessification
/^(*sr:.*)/B,utf
paypаl.com A classic example of why script run checks are a good thing
/^(*sr:.*(*ACCEPT))/utf
paypаl.com But *ACCEPT breaks things
/^(*sr:\x{2e80}*)/B,utf
/^(*sr:\x{2e80}*)\x{2e80}/B,utf
/(?<!)(*sr:)/B
/(?<=abc(?=X(*sr:BXY)CCC)XBXYCCC)./B
abcXBXYCCC!
# Some script run patterns are broken in Perl 5.28.0. These can be moved into
# test 4 when a mended version of Perl is released.
/^(*sr:.{4})/utf
\x{0980}12\x{0993} Bengali Common-digits Bengali
\x{0780}12\x{07b1} Thaana Common-digits Thaana
\x{0e01}12\x{0e5b} Thai Common-digits Thai
\x{1780}12\x{19ff} Khmer Common-digits Khmer
\x{0904}12\x{0939} Devanagari Common-digits Devanagari
A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
# These ones involve non-ASCII but nevertheless Common digits. As of October
# 2018 even blead Perl wasn't handling all of these - but is going to.
/^(*sr:.{4})/utf
A\x{ff10}\x{ff19}B Latin Common-notascii-digits Latin
\x{ff10}\x{ff19}.. Common-notascii-digits Common Common
A\x{ff10}BC Latin Common-notascii-digit Latin Latin
A\x{1d7ce}\x{1d7cf}B Latin fancy-common-digits Latin
\x{1d7ce}\x{1d7cf},, fancy-common-digits Common Common
A\x{1d7ce}BC Latin fancy-common-digit Latin Latin
# Some Unicode 12.1.0 new script characters
/\p{Elymaic}\p{Nandinagari}\p{Nyiakeng_Puachue_Hmong}\p{Wancho}/utf
\x{10fe5}\x{119AC}\x{1E10E}\x{1E2D1}
# Some Unicode 13.0.0 new script characters
/\p{Chorasmian}\p{Dives_Akuru}\p{Khitan_Small_Script}\p{Yezidi}/utf
\x{10FB0}\x{11900}\x{18B00}\x{10E80}
# -------
# Test reference and errors in non-ASCII characters in group names
/(?'𑠅ABC'...)/I,utf
abcde\=copy=𑠅ABC
# Bad ones
/(?'AB၌C'...)\g{AB၌C}/utf
/(?'٠ABC'...)/utf
/(?'²ABC'...)/utf
/(?'X²ABC'...)/utf
# -------
/\p{Any}*xyz/I
/(|<7C>)7/caseless,ucp
/(\xc1)\1/i,ucp
\xc1\xe1\=no_jit
# End of testinput5