pcre/testdata/testoutput22-16

# Tests of \C when Unicode support is available. Note that \C is not supported
# for DFA matching in UTF mode, so this test is not run with -dfa. The output
# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
# in some widths and not in others.

/ab\Cde/utf,info
Capture group count = 0
Contains \C
Options: utf
First code unit = 'a'
Last code unit = 'e'
Subject length lower bound = 2
    abXde
 0: abXde

# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and
# 16-bit modes, but not in 32-bit mode.

/(?<=ab\Cde)X/utf
Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-16 mode
    ab!deXYZ

# Autopossessification tests

/\C+\X \X+\C/Bx
------------------------------------------------------------------
        Bra
        AllAny+
        extuni
        extuni+
        AllAny
        Ket
        End
------------------------------------------------------------------

/\C+\X \X+\C/Bx,utf
------------------------------------------------------------------
        Bra
        Anybyte+
        extuni
        extuni+
        Anybyte
        Ket
        End
------------------------------------------------------------------

/\C\X*TӅ;
{0,6}\v+
F
/utf
\= Expect no match
    Ӆ\x0a
No match

/\C(\W?ſ)'?{{/utf
\= Expect no match
    \\C(\\W?ſ)'?{{
No match

/X(\C{3})/utf
    X\x{1234}
No match
    X\x{11234}Y
 0: X\x{11234}Y
 1: \x{11234}Y
    X\x{11234}YZ
 0: X\x{11234}Y
 1: \x{11234}Y

/X(\C{4})/utf
    X\x{1234}YZ
No match
    X\x{11234}YZ
 0: X\x{11234}YZ
 1: \x{11234}YZ
    X\x{11234}YZW
 0: X\x{11234}YZ
 1: \x{11234}YZ

/X\C*/utf
    XYZabcdce
 0: XYZabcdce

/X\C*?/utf
    XYZabcde
 0: X

/X\C{3,5}/utf
    Xabcdefg
 0: Xabcde
    X\x{1234}
No match
    X\x{1234}YZ
 0: X\x{1234}YZ
    X\x{1234}\x{512}
No match
    X\x{1234}\x{512}YZ
 0: X\x{1234}\x{512}YZ
    X\x{11234}Y
 0: X\x{11234}Y
    X\x{11234}YZ
 0: X\x{11234}YZ
    X\x{11234}\x{512}
 0: X\x{11234}\x{512}
    X\x{11234}\x{512}YZ
 0: X\x{11234}\x{512}YZ
    X\x{11234}\x{512}\x{11234}Z
 0: X\x{11234}\x{512}\x{11234}

/X\C{3,5}?/utf
    Xabcdefg
 0: Xabc
    X\x{1234}
No match
    X\x{1234}YZ
 0: X\x{1234}YZ
    X\x{1234}\x{512}
No match
    X\x{11234}Y
 0: X\x{11234}Y
    X\x{11234}YZ
 0: X\x{11234}Y
    X\x{11234}\x{512}YZ
 0: X\x{11234}\x{512}
    X\x{11234}
No match

/a\Cb/utf
    aXb
 0: aXb
    a\nb
 0: a\x{0a}b
    a\x{100}b
 0: a\x{100}b

/a\C\Cb/utf
    a\x{100}b
No match
    a\x{12257}b
 0: a\x{12257}b
    a\x{12257}\x{11234}b
No match

/ab\Cde/utf
    abXde
 0: abXde

# This one is here not because it's different to Perl, but because the way
# the captured single code unit is displayed. (In Perl it becomes a character,
# and you can't tell the difference.)

/X(\C)(.*)/utf
    X\x{1234}
 0: X\x{1234}
 1: \x{1234}
 2: 
    X\nabc
 0: X\x{0a}abc
 1: \x{0a}
 2: abc

# This one is here because Perl gives out a grumbly error message (quite
# correctly, but that messes up comparisons).

/a\Cb/utf
\= Expect no match in 8-bit mode
    a\x{100}b
 0: a\x{100}b

/^ab\C/utf,no_start_optimize
\= Expect no match - tests \C at end of subject
    ab
No match

/\C[^\v]+\x80/utf
    [AΏBŀC]
No match

/\C[^\d]+\x80/utf
    [AΏBŀC]
No match

# End of testinput22
-												Implement --never-backslash-C


											
										
										
											2015-10-17 09:50:56 -04:00
+								# Tests of \C when Unicode support is available. Note that \C is not supported
 								# for DFA matching in UTF mode, so this test is not run with -dfa. The output
 								# of this test is different in 8-, 16-, and 32-bit modes. Some tests may match
 								# in some widths and not in others.
-												Some tests were in the wrong files.


											
										
										
											2015-12-15 13:21:49 -05:00
+								/ab\Cde/utf,info
-												Allow non-ASCII in group names when UTF is set; revise group naming terminology 
in documentation to use "capture group", as Perl does.


											
										
										
											2019-02-06 13:11:36 -05:00
+								Capture group count = 0
-												Some tests were in the wrong files.


											
										
										
											2015-12-15 13:21:49 -05:00
+								Contains \C
 								Options: utf
 								First code unit = 'a'
 								Last code unit = 'e'
-												Minor improvement to minimum length calculation.


											
										
										
											2019-06-13 12:00:11 -04:00
+								Subject length lower bound = 2
-												Implement --never-backslash-C


											
										
										
											2015-10-17 09:50:56 -04:00
+								    abXde
 : abXde
-												Support \C in lookbehinds and DFA matching in UTF-32 mode.


											
										
										
											2016-06-20 14:14:51 -04:00
+								# This should produce an error diagnostic (\C in UTF lookbehind) in 8-bit and
 								# 16-bit modes, but not in 32-bit mode.
-												Implement --never-backslash-C


											
										
										
											2015-10-17 09:50:56 -04:00
 								/(?<=ab\Cde)X/utf
-												Major refactoring of pcre2_compile.c; see ChangeLog and HACKING.


											
										
										
											2016-10-02 12:01:01 -04:00
+								Failed: error 136 at offset 0: \C is not allowed in a lookbehind assertion in UTF-16 mode
-												Support \C in lookbehinds and DFA matching in UTF-32 mode.


											
										
										
											2016-06-20 14:14:51 -04:00
+								    ab!deXYZ
-												Implement --never-backslash-C


											
										
										
											2015-10-17 09:50:56 -04:00
 								# Autopossessification tests
 								/\C+\X \X+\C/Bx
 								------------------------------------------------------------------
 								        Bra
 								        AllAny+
 								        extuni
 								        extuni+
 								        AllAny
 								        Ket
 								        End
 								------------------------------------------------------------------
 								/\C+\X \X+\C/Bx,utf
 								------------------------------------------------------------------
 								        Bra
 								        Anybyte+
 								        extuni
 								        extuni+
 								        Anybyte
 								        Ket
 								        End
 								------------------------------------------------------------------
 								/\C\X*TӅ;
 								{0,6}\v+
F
 								/utf
 								\= Expect no match
 								    Ӆ\x0a
 								No match
 								/\C(\W?ſ)'?{{/utf
 								\= Expect no match
 								    \\C(\\W?ſ)'?{{
 								No match
 								/X(\C{3})/utf
 								    X\x{1234}
 								No match
 								    X\x{11234}Y
 : X\x{11234}Y
 : \x{11234}Y
 								    X\x{11234}YZ
 : X\x{11234}Y
 : \x{11234}Y
 								/X(\C{4})/utf
 								    X\x{1234}YZ
 								No match
 								    X\x{11234}YZ
 : X\x{11234}YZ
 : \x{11234}YZ
 								    X\x{11234}YZW
 : X\x{11234}YZ
 : \x{11234}YZ
 								/X\C*/utf
 								    XYZabcdce
 : XYZabcdce
 								/X\C*?/utf
 								    XYZabcde
 : X
 								/X\C{3,5}/utf
 								    Xabcdefg
 : Xabcde
 								    X\x{1234}
 								No match
 								    X\x{1234}YZ
 : X\x{1234}YZ
 								    X\x{1234}\x{512}
 								No match
 								    X\x{1234}\x{512}YZ
 : X\x{1234}\x{512}YZ
 								    X\x{11234}Y
 : X\x{11234}Y
 								    X\x{11234}YZ
 : X\x{11234}YZ
 								    X\x{11234}\x{512}
 : X\x{11234}\x{512}
 								    X\x{11234}\x{512}YZ
 : X\x{11234}\x{512}YZ
 								    X\x{11234}\x{512}\x{11234}Z
 : X\x{11234}\x{512}\x{11234}
 								/X\C{3,5}?/utf
 								    Xabcdefg
 : Xabc
 								    X\x{1234}
 								No match
 								    X\x{1234}YZ
 : X\x{1234}YZ
 								    X\x{1234}\x{512}
 								No match
 								    X\x{11234}Y
 : X\x{11234}Y
 								    X\x{11234}YZ
 : X\x{11234}Y
 								    X\x{11234}\x{512}YZ
 : X\x{11234}\x{512}
 								    X\x{11234}
 								No match
 								/a\Cb/utf
 								    aXb
 : aXb
 								    a\nb
 : a\x{0a}b
 								    a\x{100}b
 : a\x{100}b
 								/a\C\Cb/utf
 								    a\x{100}b
 								No match
 								    a\x{12257}b
 : a\x{12257}b
 								    a\x{12257}\x{11234}b
 								No match
 								/ab\Cde/utf
 								    abXde
 : abXde
 								# This one is here not because it's different to Perl, but because the way
 								# the captured single code unit is displayed. (In Perl it becomes a character,
 								# and you can't tell the difference.)
 								/X(\C)(.*)/utf
 								    X\x{1234}
 : X\x{1234}
 : \x{1234}
 :
 								    X\nabc
 : X\x{0a}abc
 : \x{0a}
 : abc
 								# This one is here because Perl gives out a grumbly error message (quite
 								# correctly, but that messes up comparisons).
 								/a\Cb/utf
 								\= Expect no match in 8-bit mode
 								    a\x{100}b
 : a\x{100}b
-												Update 16-bit and 32-bit tests.


											
										
										
											2017-04-16 09:04:57 -04:00
+								/^ab\C/utf,no_start_optimize
 								\= Expect no match - tests \C at end of subject
 								    ab
 								No match
-												Fix \C bug with repeated character classes in UTF-8 mode.


											
										
										
											2018-02-19 12:26:33 -05:00
+								/\C[^\v]+\x80/utf
 								    [AΏBŀC]
 								No match
-												Add another test.


											
										
										
											2018-02-20 10:37:49 -05:00
+								/\C[^\d]+\x80/utf
 								    [AΏBŀC]
 								No match
-												Implement --never-backslash-C


											
										
										
											2015-10-17 09:50:56 -04:00
+								# End of testinput22