7638ce507c
named groups with their numbers before the rest of the compiling code is run. This has simplified the main compiling code and removed some sources of error.
1458 lines
38 KiB
Plaintext
1458 lines
38 KiB
Plaintext
# This set of tests is for UTF-16 and UTF-32 support, including Unicode
|
||
# properties. It is relevant only to the 16-bit and 32-bit libraries. The
|
||
# output is different for each library, so there are separate output files.
|
||
|
||
/ÃÃÃxxx/IB,utf,no_utf_check
|
||
** Failed: invalid UTF-8 string cannot be converted to 16-bit string
|
||
|
||
/abc/utf
|
||
Ã]
|
||
** Failed: invalid UTF-8 string cannot be used as input in UTF mode
|
||
|
||
/X(\C{3})/utf
|
||
X\x{11234}Y
|
||
0: X\x{11234}Y
|
||
1: \x{11234}Y
|
||
X\x{11234}YZ
|
||
0: X\x{11234}Y
|
||
1: \x{11234}Y
|
||
|
||
/X(\C{4})/utf
|
||
X\x{11234}YZ
|
||
0: X\x{11234}YZ
|
||
1: \x{11234}YZ
|
||
X\x{11234}YZW
|
||
0: X\x{11234}YZ
|
||
1: \x{11234}YZ
|
||
|
||
/X\C*/utf
|
||
XYZabcdce
|
||
0: XYZabcdce
|
||
|
||
/X\C*?/utf
|
||
XYZabcde
|
||
0: X
|
||
|
||
/X\C{3,5}/utf
|
||
Xabcdefg
|
||
0: Xabcde
|
||
X\x{11234}Y
|
||
0: X\x{11234}Y
|
||
X\x{11234}YZ
|
||
0: X\x{11234}YZ
|
||
X\x{11234}\x{512}
|
||
0: X\x{11234}\x{512}
|
||
X\x{11234}\x{512}YZ
|
||
0: X\x{11234}\x{512}YZ
|
||
X\x{11234}\x{512}\x{11234}Z
|
||
0: X\x{11234}\x{512}\x{11234}
|
||
|
||
/X\C{3,5}?/utf
|
||
Xabcdefg
|
||
0: Xabc
|
||
X\x{11234}Y
|
||
0: X\x{11234}Y
|
||
X\x{11234}YZ
|
||
0: X\x{11234}Y
|
||
X\x{11234}\x{512}YZ
|
||
0: X\x{11234}\x{512}
|
||
*** Failers
|
||
No match
|
||
X\x{11234}
|
||
No match
|
||
|
||
/a\Cb/utf
|
||
aXb
|
||
0: aXb
|
||
a\nb
|
||
0: a\x{0a}b
|
||
|
||
/a\C\Cb/utf
|
||
a\x{12257}b
|
||
0: a\x{12257}b
|
||
a\x{12257}\x{11234}b
|
||
No match
|
||
** Failers
|
||
No match
|
||
a\x{100}b
|
||
No match
|
||
|
||
/ab\Cde/utf
|
||
abXde
|
||
0: abXde
|
||
|
||
# Check maximum character size
|
||
|
||
/\x{ffff}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{ffff}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{ffff}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{10000}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{10000}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{d800}
|
||
Last code unit = \x{dc00}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{1000}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{1000}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{1000}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{10000}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{10000}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{d800}
|
||
Last code unit = \x{dc00}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100000}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100000}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{dbc0}
|
||
Last code unit = \x{dc00}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{10ffff}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{10ffff}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{dbff}
|
||
Last code unit = \x{dfff}
|
||
Subject length lower bound = 1
|
||
|
||
/[\x{ff}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{ff}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \xff
|
||
Subject length lower bound = 1
|
||
|
||
/[\x{100}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Subject length lower bound = 1
|
||
|
||
/\x80/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{80}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x80
|
||
Subject length lower bound = 1
|
||
|
||
/\xff/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{ff}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{D55c}\x{ad6d}\x{C5B4}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{d55c}\x{ad6d}\x{c5b4}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{d55c}
|
||
Last code unit = \x{c5b4}
|
||
Subject length lower bound = 3
|
||
\x{D55c}\x{ad6d}\x{C5B4}
|
||
0: \x{d55c}\x{ad6d}\x{c5b4}
|
||
|
||
/\x{65e5}\x{672c}\x{8a9e}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{65e5}\x{672c}\x{8a9e}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{65e5}
|
||
Last code unit = \x{8a9e}
|
||
Subject length lower bound = 3
|
||
\x{65e5}\x{672c}\x{8a9e}
|
||
0: \x{65e5}\x{672c}\x{8a9e}
|
||
|
||
/\x{80}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{80}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x80
|
||
Subject length lower bound = 1
|
||
|
||
/\x{084}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{84}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x84
|
||
Subject length lower bound = 1
|
||
|
||
/\x{104}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{104}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{104}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{861}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{861}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{861}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{212ab}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{212ab}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{d844}
|
||
Last code unit = \x{deab}
|
||
Subject length lower bound = 1
|
||
|
||
# This one is here not because it's different to Perl, but because the way
|
||
# the captured single-byte is displayed. (In Perl it becomes a character, and you
|
||
# can't tell the difference.)
|
||
|
||
/X(\C)(.*)/utf
|
||
X\x{1234}
|
||
0: X\x{1234}
|
||
1: \x{1234}
|
||
2:
|
||
X\nabc
|
||
0: X\x{0a}abc
|
||
1: \x{0a}
|
||
2: abc
|
||
|
||
# This one is here because Perl gives out a grumbly error message (quite
|
||
# correctly, but that messes up comparisons).
|
||
|
||
/a\Cb/utf
|
||
*** Failers
|
||
No match
|
||
a\x{100}b
|
||
0: a\x{100}b
|
||
|
||
/[^ab\xC0-\xF0]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[\x00-`c-\xbf\xf1-\xff] (neg)
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4
|
||
5 6 7 8 9 : ; < = > ? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y
|
||
Z [ \ ] ^ _ ` c d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f
|
||
\x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e
|
||
\x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d
|
||
\x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac
|
||
\xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb
|
||
\xbc \xbd \xbe \xbf \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb
|
||
\xfc \xfd \xfe \xff
|
||
Subject length lower bound = 1
|
||
\x{f1}
|
||
0: \x{f1}
|
||
\x{bf}
|
||
0: \x{bf}
|
||
\x{100}
|
||
0: \x{100}
|
||
\x{1000}
|
||
0: \x{1000}
|
||
*** Failers
|
||
0: *
|
||
\x{c0}
|
||
No match
|
||
\x{f0}
|
||
No match
|
||
|
||
/Ä€{3,4}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}{3}
|
||
\x{100}?+
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Last code unit = \x{100}
|
||
Subject length lower bound = 3
|
||
\x{100}\x{100}\x{100}\x{100\x{100}
|
||
0: \x{100}\x{100}\x{100}
|
||
|
||
/(\x{100}+|x)/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
CBra 1
|
||
\x{100}++
|
||
Alt
|
||
x
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
Starting code units: x \xff
|
||
Subject length lower bound = 1
|
||
|
||
/(\x{100}*a|x)/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
CBra 1
|
||
\x{100}*+
|
||
a
|
||
Alt
|
||
x
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
Starting code units: a x \xff
|
||
Subject length lower bound = 1
|
||
|
||
/(\x{100}{0,2}a|x)/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
CBra 1
|
||
\x{100}{0,2}+
|
||
a
|
||
Alt
|
||
x
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
Starting code units: a x \xff
|
||
Subject length lower bound = 1
|
||
|
||
/(\x{100}{1,2}a|x)/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
CBra 1
|
||
\x{100}
|
||
\x{100}{0,1}+
|
||
a
|
||
Alt
|
||
x
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
Starting code units: x \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Subject length lower bound = 1
|
||
|
||
/a\x{100}\x{101}*/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
a\x{100}
|
||
\x{101}*+
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = 'a'
|
||
Last code unit = \x{100}
|
||
Subject length lower bound = 2
|
||
|
||
/a\x{100}\x{101}+/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
a\x{100}
|
||
\x{101}++
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = 'a'
|
||
Last code unit = \x{101}
|
||
Subject length lower bound = 3
|
||
|
||
/[^\x{c4}]/IB
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[^\x{c4}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Subject length lower bound = 1
|
||
|
||
/[\x{100}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Subject length lower bound = 1
|
||
\x{100}
|
||
0: \x{100}
|
||
Z\x{100}
|
||
0: \x{100}
|
||
\x{100}Z
|
||
0: \x{100}
|
||
*** Failers
|
||
No match
|
||
|
||
/[\xff]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{ff}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \xff
|
||
Subject length lower bound = 1
|
||
>\x{ff}<
|
||
0: \x{ff}
|
||
|
||
/[^\xff]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[^\x{ff}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}abc(xyz(?1))/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}abc
|
||
CBra 1
|
||
xyz
|
||
Recurse
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Last code unit = 'z'
|
||
Subject length lower bound = 7
|
||
|
||
/\777/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{1ff}
|
||
Subject length lower bound = 1
|
||
\x{1ff}
|
||
0: \x{1ff}
|
||
\777
|
||
0: \x{1ff}
|
||
|
||
/\x{100}+\x{200}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}++
|
||
\x{200}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Last code unit = \x{200}
|
||
Subject length lower bound = 2
|
||
|
||
/\x{100}+X/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}++
|
||
X
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = \x{100}
|
||
Last code unit = 'X'
|
||
Subject length lower bound = 2
|
||
|
||
/^[\QÄ€\E-\QÅ<51>\E/B,utf
|
||
Failed: error 106 at offset 13: missing terminating ] for character class
|
||
|
||
/X/utf
|
||
XX\x{d800}
|
||
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||
XX\x{d800}\=no_utf_check
|
||
0: X
|
||
XX\x{da00}
|
||
Failed: error -24: UTF-16 error: missing low surrogate at end at offset 2
|
||
XX\x{da00}\=no_utf_check
|
||
0: X
|
||
XX\x{dc00}
|
||
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||
XX\x{dc00}\=no_utf_check
|
||
0: X
|
||
XX\x{de00}
|
||
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||
XX\x{de00}\=no_utf_check
|
||
0: X
|
||
XX\x{dfff}
|
||
Failed: error -26: UTF-16 error: isolated low surrogate at offset 2
|
||
XX\x{dfff}\=no_utf_check
|
||
0: X
|
||
XX\x{110000}
|
||
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||
XX\x{d800}\x{1234}
|
||
Failed: error -25: UTF-16 error: invalid low surrogate at offset 3
|
||
|
||
/(*UTF16)\x{11234}/
|
||
abcd\x{11234}pqr
|
||
0: \x{11234}
|
||
|
||
/(*UTF)\x{11234}/I
|
||
Capturing subpattern count = 0
|
||
Compile options: <none>
|
||
Overall options: utf
|
||
First code unit = \x{d804}
|
||
Last code unit = \x{de34}
|
||
Subject length lower bound = 1
|
||
abcd\x{11234}pqr
|
||
0: \x{11234}
|
||
|
||
/(*UTF-32)\x{11234}/
|
||
Failed: error 134 at offset 17: character code point value in \x{} or \o{} is too large
|
||
abcd\x{11234}pqr
|
||
|
||
/(*UTF-32)\x{112}/
|
||
Failed: error 160 at offset 5: (*VERB) not recognized or malformed
|
||
abcd\x{11234}pqr
|
||
|
||
/(*CRLF)(*UTF16)(*BSR_UNICODE)a\Rb/I
|
||
Capturing subpattern count = 0
|
||
Compile options: <none>
|
||
Overall options: utf
|
||
\R matches any Unicode newline
|
||
Forced newline is CRLF
|
||
First code unit = 'a'
|
||
Last code unit = 'b'
|
||
Subject length lower bound = 3
|
||
|
||
/(*CRLF)(*UTF32)(*BSR_UNICODE)a\Rb/I
|
||
Failed: error 160 at offset 12: (*VERB) not recognized or malformed
|
||
|
||
/\h/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x09 \x20 \xa0 \xff
|
||
Subject length lower bound = 1
|
||
ABC\x{09}
|
||
0: \x{09}
|
||
ABC\x{20}
|
||
0:
|
||
ABC\x{a0}
|
||
0: \x{a0}
|
||
ABC\x{1680}
|
||
0: \x{1680}
|
||
ABC\x{180e}
|
||
0: \x{180e}
|
||
ABC\x{2000}
|
||
0: \x{2000}
|
||
ABC\x{202f}
|
||
0: \x{202f}
|
||
ABC\x{205f}
|
||
0: \x{205f}
|
||
ABC\x{3000}
|
||
0: \x{3000}
|
||
|
||
/\v/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||
Subject length lower bound = 1
|
||
ABC\x{0a}
|
||
0: \x{0a}
|
||
ABC\x{0b}
|
||
0: \x{0b}
|
||
ABC\x{0c}
|
||
0: \x{0c}
|
||
ABC\x{0d}
|
||
0: \x{0d}
|
||
ABC\x{85}
|
||
0: \x{85}
|
||
ABC\x{2028}
|
||
0: \x{2028}
|
||
|
||
/\h*A/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x09 \x20 A \xa0 \xff
|
||
Last code unit = 'A'
|
||
Subject length lower bound = 1
|
||
CDBABC
|
||
0: A
|
||
\x{2000}ABC
|
||
0: \x{2000}A
|
||
|
||
/\R*A/I,bsr=unicode,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
\R matches any Unicode newline
|
||
Starting code units: \x0a \x0b \x0c \x0d A \x85 \xff
|
||
Last code unit = 'A'
|
||
Subject length lower bound = 1
|
||
CDBABC
|
||
0: A
|
||
\x{2028}A
|
||
0: \x{2028}A
|
||
|
||
/\v+A/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||
Last code unit = 'A'
|
||
Subject length lower bound = 2
|
||
|
||
/\s?xxx\s/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 x
|
||
Last code unit = 'x'
|
||
Subject length lower bound = 4
|
||
|
||
/\sxxx\s/I,utf,tables=2
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \x85 \xa0
|
||
Last code unit = 'x'
|
||
Subject length lower bound = 5
|
||
AB\x{85}xxx\x{a0}XYZ
|
||
0: \x{85}xxx\x{a0}
|
||
AB\x{a0}xxx\x{85}XYZ
|
||
0: \x{a0}xxx\x{85}
|
||
|
||
/\S \S/I,utf,tables=2
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
|
||
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
|
||
\x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
|
||
D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
|
||
i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
|
||
\x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94
|
||
\x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa1 \xa2 \xa3 \xa4
|
||
\xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
|
||
\xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
|
||
\xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
|
||
\xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
|
||
\xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
|
||
\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
|
||
\xff
|
||
Last code unit = ' '
|
||
Subject length lower bound = 3
|
||
\x{a2} \x{84}
|
||
0: \x{a2} \x{84}
|
||
A Z
|
||
0: A Z
|
||
|
||
/a+/utf
|
||
a\x{123}aa\=offset=1
|
||
0: aa
|
||
a\x{123}aa\=offset=2
|
||
0: aa
|
||
a\x{123}aa\=offset=3
|
||
0: a
|
||
a\x{123}aa\=offset=4
|
||
No match
|
||
a\x{123}aa\=offset=5
|
||
Failed: error -33: bad offset value
|
||
a\x{123}aa\=offset=6
|
||
Failed: error -33: bad offset value
|
||
|
||
/\x{1234}+/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = \x{1234}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{1234}+?/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = \x{1234}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{1234}++/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = \x{1234}
|
||
Subject length lower bound = 1
|
||
|
||
/\x{1234}{2}/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = \x{1234}
|
||
Last code unit = \x{1234}
|
||
Subject length lower bound = 2
|
||
|
||
/[^\x{c4}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[^\x{c4}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Subject length lower bound = 1
|
||
|
||
/X+\x{200}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
X++
|
||
\x{200}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = 'X'
|
||
Last code unit = \x{200}
|
||
Subject length lower bound = 2
|
||
|
||
/\R/I,utf
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x0a \x0b \x0c \x0d \x85 \xff
|
||
Subject length lower bound = 1
|
||
|
||
# Check bad offset
|
||
|
||
/a/utf
|
||
\x{10000}\=offset=1
|
||
Error -36 (bad UTF-16 offset)
|
||
\x{10000}ab\=offset=1
|
||
Error -36 (bad UTF-16 offset)
|
||
\x{10000}ab\=offset=2
|
||
0: a
|
||
\x{10000}ab\=offset=3
|
||
No match
|
||
\x{10000}ab\=offset=4
|
||
No match
|
||
\x{10000}ab\=offset=5
|
||
Failed: error -33: bad offset value
|
||
|
||
/í¼€/utf
|
||
Failed: error -26 at offset 0: UTF-16 error: isolated low surrogate
|
||
|
||
/\w+\x{C4}/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\w++
|
||
\x{c4}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
a\x{C4}\x{C4}
|
||
0: a\x{c4}
|
||
|
||
/\w+\x{C4}/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\w+
|
||
\x{c4}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
a\x{C4}\x{C4}
|
||
0: a\x{c4}\x{c4}
|
||
|
||
/\W+\x{C4}/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\W+
|
||
\x{c4}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
!\x{C4}
|
||
0: !\x{c4}
|
||
|
||
/\W+\x{C4}/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\W++
|
||
\x{c4}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
!\x{C4}
|
||
0: !\x{c4}
|
||
|
||
/\W+\x{A1}/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\W+
|
||
\x{a1}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
!\x{A1}
|
||
0: !\x{a1}
|
||
|
||
/\W+\x{A1}/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\W+
|
||
\x{a1}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
!\x{A1}
|
||
0: !\x{a1}
|
||
|
||
/X\s+\x{A0}/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
X
|
||
\s++
|
||
\x{a0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
X\x20\x{A0}\x{A0}
|
||
0: X \x{a0}
|
||
|
||
/X\s+\x{A0}/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
X
|
||
\s+
|
||
\x{a0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
X\x20\x{A0}\x{A0}
|
||
0: X \x{a0}\x{a0}
|
||
|
||
/\S+\x{A0}/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\S+
|
||
\x{a0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
X\x{A0}\x{A0}
|
||
0: X\x{a0}\x{a0}
|
||
|
||
/\S+\x{A0}/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\S++
|
||
\x{a0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
X\x{A0}\x{A0}
|
||
0: X\x{a0}
|
||
|
||
/\x{a0}+\s!/B,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{a0}++
|
||
\s
|
||
!
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
\x{a0}\x20!
|
||
0: \x{a0} !
|
||
|
||
/\x{a0}+\s!/B,utf,tables=2
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{a0}+
|
||
\s
|
||
!
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
\x{a0}\x20!
|
||
0: \x{a0} !
|
||
|
||
/(*UTF)abc/never_utf
|
||
Failed: error 174 at offset 6: using UTF is disabled by the application
|
||
|
||
/abc/utf,never_utf
|
||
Failed: error 174 at offset 0: using UTF is disabled by the application
|
||
|
||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
/i A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = 'A' (caseless)
|
||
Last code unit = \x{1fb0} (caseless)
|
||
Subject length lower bound = 5
|
||
|
||
/A\x{391}\x{10427}\x{ff3a}\x{1fb0}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
A\x{391}\x{10427}\x{ff3a}\x{1fb0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = 'A'
|
||
Last code unit = \x{1fb0}
|
||
Subject length lower bound = 5
|
||
|
||
/AB\x{1fb0}/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
AB\x{1fb0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
First code unit = 'A'
|
||
Last code unit = \x{1fb0}
|
||
Subject length lower bound = 3
|
||
|
||
/AB\x{1fb0}/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
/i AB\x{1fb0}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = 'A' (caseless)
|
||
Last code unit = \x{1fb0} (caseless)
|
||
Subject length lower bound = 3
|
||
|
||
/\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
First code unit = \x{401} (caseless)
|
||
Last code unit = \x{42f} (caseless)
|
||
Subject length lower bound = 17
|
||
\x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||
0: \x{401}\x{420}\x{421}\x{422}\x{423}\x{424}\x{425}\x{426}\x{427}\x{428}\x{429}\x{42a}\x{42b}\x{42c}\x{42d}\x{42e}\x{42f}
|
||
\x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||
0: \x{451}\x{440}\x{441}\x{442}\x{443}\x{444}\x{445}\x{446}\x{447}\x{448}\x{449}\x{44a}\x{44b}\x{44c}\x{44d}\x{44e}\x{44f}
|
||
|
||
/[â±¥]/Bi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
/i \x{2c65}
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
|
||
/[^â±¥]/Bi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
/i [^\x{2c65}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
|
||
/[[:blank:]]/B,ucp
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[\x09 \xa0\x{1680}\x{180e}\x{2000}-\x{200a}\x{202f}\x{205f}\x{3000}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
|
||
/\x{212a}+/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: K k \xff
|
||
Subject length lower bound = 1
|
||
KKkk\x{212a}
|
||
0: KKkk\x{212a}
|
||
|
||
/s+/Ii,utf
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: S s \xff
|
||
Subject length lower bound = 1
|
||
SSss\x{17f}
|
||
0: SSss\x{17f}
|
||
|
||
# Non-UTF characters should give errors in both 16-bit and 32-bit modes.
|
||
|
||
/\x{110000}/utf
|
||
Failed: error 134 at offset 9: character code point value in \x{} or \o{} is too large
|
||
|
||
/\o{4200000}/utf
|
||
Failed: error 134 at offset 10: character code point value in \x{} or \o{} is too large
|
||
|
||
/\C/utf
|
||
\x{110000}
|
||
** Failed: character \x{110000} is greater than 0x10ffff and so cannot be converted to UTF-16
|
||
|
||
/\x{100}*A/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*+
|
||
A
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: A \xff
|
||
Last code unit = 'A'
|
||
Subject length lower bound = 1
|
||
A
|
||
0: A
|
||
|
||
/\x{100}*\d(?R)/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*+
|
||
\d
|
||
Recurse
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
|
||
Subject length lower bound = 1
|
||
|
||
/[Z\x{100}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[Z\x{100}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: Z \xff
|
||
Subject length lower bound = 1
|
||
Z\x{100}
|
||
0: Z
|
||
\x{100}
|
||
0: \x{100}
|
||
\x{100}Z
|
||
0: \x{100}
|
||
*** Failers
|
||
No match
|
||
|
||
/[z-\x{100}]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[z-\xff\x{100}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87
|
||
\x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96
|
||
\x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5
|
||
\xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4
|
||
\xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3
|
||
\xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2
|
||
\xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1
|
||
\xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0
|
||
\xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||
Subject length lower bound = 1
|
||
|
||
/[z\Qa-d]Ä€\E]/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[\-\]adz\x{100}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: - ] a d z \xff
|
||
Subject length lower bound = 1
|
||
\x{100}
|
||
0: \x{100}
|
||
Ä€
|
||
0: \x{100}
|
||
|
||
/[ab\x{100}]abc(xyz(?1))/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[ab\x{100}]
|
||
abc
|
||
CBra 1
|
||
xyz
|
||
Recurse
|
||
Ket
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 1
|
||
Options: utf
|
||
Starting code units: a b \xff
|
||
Last code unit = 'z'
|
||
Subject length lower bound = 7
|
||
|
||
/\x{100}*\s/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*+
|
||
\s
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x09 \x0a \x0b \x0c \x0d \x20 \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}*\d/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*+
|
||
\d
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: 0 1 2 3 4 5 6 7 8 9 \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}*\w/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*+
|
||
\w
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P
|
||
Q R S T U V W X Y Z _ a b c d e f g h i j k l m n o p q r s t u v w x y z
|
||
\xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}*\D/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*
|
||
\D
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
|
||
? @ A B C D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c
|
||
d e f g h i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82
|
||
\x83 \x84 \x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91
|
||
\x92 \x93 \x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0
|
||
\xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf
|
||
\xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe
|
||
\xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd
|
||
\xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc
|
||
\xdd \xde \xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb
|
||
\xec \xed \xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa
|
||
\xfb \xfc \xfd \xfe \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}*\S/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*
|
||
\S
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x0e \x0f
|
||
\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1a \x1b \x1c \x1d \x1e
|
||
\x1f ! " # $ % & ' ( ) * + , - . / 0 1 2 3 4 5 6 7 8 9 : ; < = > ? @ A B C
|
||
D E F G H I J K L M N O P Q R S T U V W X Y Z [ \ ] ^ _ ` a b c d e f g h
|
||
i j k l m n o p q r s t u v w x y z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84
|
||
\x85 \x86 \x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93
|
||
\x94 \x95 \x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2
|
||
\xa3 \xa4 \xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1
|
||
\xb2 \xb3 \xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0
|
||
\xc1 \xc2 \xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf
|
||
\xd0 \xd1 \xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde
|
||
\xdf \xe0 \xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed
|
||
\xee \xef \xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc
|
||
\xfd \xfe \xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{100}*\W/IB,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
\x{100}*
|
||
\W
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: utf
|
||
Starting code units: \x00 \x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0a
|
||
\x0b \x0c \x0d \x0e \x0f \x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19
|
||
\x1a \x1b \x1c \x1d \x1e \x1f \x20 ! " # $ % & ' ( ) * + , - . / : ; < = >
|
||
? @ [ \ ] ^ ` { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86 \x87 \x88 \x89
|
||
\x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95 \x96 \x97 \x98
|
||
\x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4 \xa5 \xa6 \xa7
|
||
\xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3 \xb4 \xb5 \xb6
|
||
\xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2 \xc3 \xc4 \xc5
|
||
\xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1 \xd2 \xd3 \xd4
|
||
\xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0 \xe1 \xe2 \xe3
|
||
\xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef \xf0 \xf1 \xf2
|
||
\xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe \xff
|
||
Subject length lower bound = 1
|
||
|
||
/[\x{105}-\x{109}]/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[\x{104}-\x{109}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: \xff
|
||
Subject length lower bound = 1
|
||
\x{104}
|
||
0: \x{104}
|
||
\x{105}
|
||
0: \x{105}
|
||
\x{109}
|
||
0: \x{109}
|
||
** Failers
|
||
No match
|
||
\x{100}
|
||
No match
|
||
\x{10a}
|
||
No match
|
||
|
||
/[z-\x{100}]/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
|
||
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
|
||
\x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
|
||
\xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
|
||
\xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
|
||
\xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
|
||
\xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
|
||
\xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
|
||
\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
|
||
\xff
|
||
Subject length lower bound = 1
|
||
Z
|
||
0: Z
|
||
z
|
||
0: z
|
||
\x{39c}
|
||
0: \x{39c}
|
||
\x{178}
|
||
0: \x{178}
|
||
|
|
||
0: |
|
||
\x{80}
|
||
0: \x{80}
|
||
\x{ff}
|
||
0: \x{ff}
|
||
\x{100}
|
||
0: \x{100}
|
||
\x{101}
|
||
0: \x{101}
|
||
** Failers
|
||
No match
|
||
\x{102}
|
||
No match
|
||
Y
|
||
No match
|
||
y
|
||
No match
|
||
|
||
/[z-\x{100}]/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
[Zz-\xff\x{39c}\x{3bc}\x{212b}\x{1e9e}\x{212b}\x{178}\x{100}-\x{101}]
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: Z z { | } ~ \x7f \x80 \x81 \x82 \x83 \x84 \x85 \x86
|
||
\x87 \x88 \x89 \x8a \x8b \x8c \x8d \x8e \x8f \x90 \x91 \x92 \x93 \x94 \x95
|
||
\x96 \x97 \x98 \x99 \x9a \x9b \x9c \x9d \x9e \x9f \xa0 \xa1 \xa2 \xa3 \xa4
|
||
\xa5 \xa6 \xa7 \xa8 \xa9 \xaa \xab \xac \xad \xae \xaf \xb0 \xb1 \xb2 \xb3
|
||
\xb4 \xb5 \xb6 \xb7 \xb8 \xb9 \xba \xbb \xbc \xbd \xbe \xbf \xc0 \xc1 \xc2
|
||
\xc3 \xc4 \xc5 \xc6 \xc7 \xc8 \xc9 \xca \xcb \xcc \xcd \xce \xcf \xd0 \xd1
|
||
\xd2 \xd3 \xd4 \xd5 \xd6 \xd7 \xd8 \xd9 \xda \xdb \xdc \xdd \xde \xdf \xe0
|
||
\xe1 \xe2 \xe3 \xe4 \xe5 \xe6 \xe7 \xe8 \xe9 \xea \xeb \xec \xed \xee \xef
|
||
\xf0 \xf1 \xf2 \xf3 \xf4 \xf5 \xf6 \xf7 \xf8 \xf9 \xfa \xfb \xfc \xfd \xfe
|
||
\xff
|
||
Subject length lower bound = 1
|
||
|
||
/\x{3a3}B/IBi,utf
|
||
------------------------------------------------------------------
|
||
Bra
|
||
clist 03a3 03c2 03c3
|
||
/i B
|
||
Ket
|
||
End
|
||
------------------------------------------------------------------
|
||
Capturing subpattern count = 0
|
||
Options: caseless utf
|
||
Starting code units: \xff
|
||
Last code unit = 'B' (caseless)
|
||
Subject length lower bound = 2
|
||
|
||
# End of testinput12
|