2020-02-23 11:40:05 -05:00
|
|
|
# These test special UTF and UCP features of DFA matching. The output is
|
|
|
|
# different for the different widths.
|
2015-08-18 06:34:05 -04:00
|
|
|
|
|
|
|
#subject dfa
|
|
|
|
|
2020-02-23 11:40:05 -05:00
|
|
|
# ----------------------------------------------------
|
|
|
|
# These are a selection of the more comprehensive tests that are run for
|
|
|
|
# non-DFA matching.
|
|
|
|
|
2015-08-18 06:34:05 -04:00
|
|
|
/X/utf
|
|
|
|
XX\x{d800}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
XX\x{d800}\=offset=3
|
|
|
|
Error -36 (bad UTF-8 offset)
|
|
|
|
XX\x{d800}\=no_utf_check
|
|
|
|
0: X
|
|
|
|
XX\x{da00}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
XX\x{da00}\=no_utf_check
|
|
|
|
0: X
|
|
|
|
XX\x{dc00}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
XX\x{dc00}\=no_utf_check
|
|
|
|
0: X
|
|
|
|
XX\x{de00}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
XX\x{de00}\=no_utf_check
|
|
|
|
0: X
|
|
|
|
XX\x{dfff}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
XX\x{dfff}\=no_utf_check
|
|
|
|
0: X
|
|
|
|
XX\x{110000}
|
|
|
|
Failed: error -15: UTF-8 error: code points greater than 0x10ffff are not defined at offset 2
|
|
|
|
XX\x{d800}\x{1234}
|
|
|
|
Failed: error -16: UTF-8 error: code points 0xd800-0xdfff are not defined at offset 2
|
|
|
|
|
|
|
|
/badutf/utf
|
|
|
|
X\xdf
|
|
|
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 1
|
|
|
|
XX\xef
|
|
|
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
|
|
|
XXX\xef\x80
|
|
|
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
|
|
|
|
X\xf7
|
|
|
|
Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 1
|
|
|
|
XX\xf7\x80
|
|
|
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
|
|
|
XXX\xf7\x80\x80
|
|
|
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 3
|
|
|
|
|
|
|
|
/shortutf/utf
|
|
|
|
XX\xdf\=ph
|
|
|
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
|
|
|
|
XX\xef\=ph
|
|
|
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 2
|
|
|
|
XX\xef\x80\=ph
|
|
|
|
Failed: error -3: UTF-8 error: 1 byte missing at end at offset 2
|
|
|
|
\xf7\=ph
|
|
|
|
Failed: error -5: UTF-8 error: 3 bytes missing at end at offset 0
|
|
|
|
\xf7\x80\=ph
|
|
|
|
Failed: error -4: UTF-8 error: 2 bytes missing at end at offset 0
|
2020-02-23 11:40:05 -05:00
|
|
|
|
|
|
|
# ----------------------------------------------------
|
|
|
|
# UCP and casing tests - except for the first two, these will all fail in 8-bit
|
|
|
|
# mode because they are testing UCP without UTF and use characters > 255.
|
|
|
|
|
|
|
|
/\x{c1}/i,no_start_optimize
|
|
|
|
\= Expect no match
|
|
|
|
\x{e1}
|
|
|
|
No match
|
|
|
|
|
|
|
|
/\x{c1}+\x{e1}/iB,ucp
|
|
|
|
------------------------------------------------------------------
|
|
|
|
Bra
|
|
|
|
/i \x{c1}+
|
|
|
|
/i \x{e1}
|
|
|
|
Ket
|
|
|
|
End
|
|
|
|
------------------------------------------------------------------
|
|
|
|
\x{c1}\x{c1}\x{c1}
|
|
|
|
0: \xc1\xc1\xc1
|
|
|
|
1: \xc1\xc1
|
|
|
|
\x{e1}\x{e1}\x{e1}
|
|
|
|
0: \xe1\xe1\xe1
|
|
|
|
1: \xe1\xe1
|
|
|
|
|
|
|
|
/\x{120}\x{c1}/i,ucp,no_start_optimize
|
|
|
|
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
|
|
|
\x{121}\x{e1}
|
|
|
|
|
|
|
|
/\x{120}\x{c1}/i,ucp
|
|
|
|
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
|
|
|
\x{121}\x{e1}
|
|
|
|
|
|
|
|
/[^\x{120}]/i,no_start_optimize
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
\x{121}
|
|
|
|
|
|
|
|
/[^\x{120}]/i,ucp,no_start_optimize
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
\= Expect no match
|
|
|
|
\x{121}
|
|
|
|
|
|
|
|
/[^\x{120}]/i
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
\x{121}
|
|
|
|
|
|
|
|
/[^\x{120}]/i,ucp
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
\= Expect no match
|
|
|
|
\x{121}
|
|
|
|
|
|
|
|
/\x{120}{2}/i,ucp
|
|
|
|
Failed: error 134 at offset 6: character code point value in \x{} or \o{} is too large
|
|
|
|
\x{121}\x{121}
|
|
|
|
|
|
|
|
/[^\x{120}]{2}/i,ucp
|
|
|
|
Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too large
|
|
|
|
\= Expect no match
|
|
|
|
\x{121}\x{121}
|
|
|
|
|
|
|
|
# ----------------------------------------------------
|
2015-08-18 06:34:05 -04:00
|
|
|
|
|
|
|
# End of testinput14
|