wxWidgets/tests/regex/reg.test

# reg.test --
#
# This file contains a collection of tests for one or more of the Tcl
# built-in commands.  Sourcing this file into Tcl runs the tests and
# generates output for errors.  No output means no errors were found.
# (Don't panic if you are seeing this as part of the reg distribution
# and aren't using Tcl -- reg's own regression tester also knows how
# to read this file, ignoring the Tcl-isms.)
#
# Copyright (c) 1998, 1999 Henry Spencer.  All rights reserved.
#

if {[lsearch [namespace children] ::tcltest] == -1} {
    package require tcltest 2
    namespace import -force ::tcltest::*
}

# All tests require the testregexp command, return if this
# command doesn't exist

::tcltest::testConstraint testregexp \
	[expr {[info commands testregexp] != {}}]
::tcltest::testConstraint localeRegexp 0

# This file uses some custom procedures, defined below, for regexp regression
# testing.  The name of the procedure indicates the general nature of the
# test:
#	e	compile error expected
#	f	match failure expected
#	m	successful match
#	i	successful match with -indices (used in checking things like
#		nonparticipating subexpressions)
#	p	unsuccessful match with -indices (!!) (used in checking
#		partial-match reporting)
# There is also "doing" which sets up title and major test number for each
# block of tests.

# The first 3 arguments are constant:  a minor number (which often gets
# a letter or two suffixed to it internally), some flags, and the RE itself.
# For e, the remaining argument is the name of the compile error expected,
# less the leading "REG_".  For the rest, the next argument is the string
# to try the match against.  Remaining arguments are the substring expected
# to be matched, and any substrings expected to be matched by subexpressions.
# (For f, these arguments are optional, and if present are ignored except
# that they indicate how many subexpressions should be present in the RE.)
# It is an error for the number of subexpression arguments to be wrong.
# Cases involving nonparticipating subexpressions, checking where empty
# substrings are located, etc. should be done using i and p.

# The flag characters are complex and a bit eclectic.  Generally speaking,
# lowercase letters are compile options, uppercase are expected re_info
# bits, and nonalphabetics are match options, controls for how the test is
# run, or testing options.  The one small surprise is that AREs are the
# default, and you must explicitly request lesser flavors of RE.  The flags
# are as follows.  It is admitted that some are not very mnemonic.
# There are some others which are purely debugging tools and are not
# useful in this file.
#
#	-	no-op (placeholder)
#	+	provide fake xy equivalence class and ch collating element
#	%	force small state-set cache in matcher (to test cache replace)
#	^	beginning of string is not beginning of line
#	$	end of string is not end of line
#	*	test is Unicode-specific, needs big character set
#
#	&	test as both ARE and BRE
#	b	BRE
#	e	ERE
#	a	turn advanced-features bit on (error unless ERE already)
#	q	literal string, no metacharacters at all
#
#	i	case-independent matching
#	o	("opaque") no subexpression capture
#	p	newlines are half-magic, excluded from . and [^ only
#	w	newlines are half-magic, significant to ^ and $ only
#	n	newlines are fully magic, both effects
#	x	expanded RE syntax
#	t	incomplete-match reporting
#
#	A	backslash-_a_lphanumeric seen
#	B	ERE/ARE literal-_b_race heuristic used
#	E	backslash (_e_scape) seen within []
#	H	looka_h_ead constraint seen
#	I	_i_mpossible to match
#	L	_l_ocale-specific construct seen
#	M	unportable (_m_achine-specific) construct seen
#	N	RE can match empty (_n_ull) string
#	P	non-_P_OSIX construct seen
#	Q	{} _q_uantifier seen
#	R	back _r_eference seen
#	S	POSIX-un_s_pecified syntax seen
#	T	prefers shortest (_t_iny)
#	U	saw original-POSIX botch:  unmatched right paren in ERE (_u_gh)

# The one area we can't easily test is memory-allocation failures (which
# are hard to provoke on command).  Embedded NULs also are not tested at
# the moment, but this is a historical accident which should be fixed.


# test procedures and related

set ask "about"
set xflags "xflags"
set testbypassed 0

# re_info abbreviation mapping table
set infonames(A) "REG_UBSALNUM"
set infonames(B) "REG_UBRACES"
set infonames(E) "REG_UBBS"
set infonames(H) "REG_ULOOKAHEAD"
set infonames(I) "REG_UIMPOSSIBLE"
set infonames(L) "REG_ULOCALE"
set infonames(M) "REG_UUNPORT"
set infonames(N) "REG_UEMPTYMATCH"
set infonames(P) "REG_UNONPOSIX"
set infonames(Q) "REG_UBOUNDS"
set infonames(R) "REG_UBACKREF"
set infonames(S) "REG_UUNSPEC"
set infonames(T) "REG_USHORTEST"
set infonames(U) "REG_UPBOTCH"
set infonameorder "RHQBAUEPSMLNIT"	;# must match bit order, lsb first

# set major test number and description
proc doing {major desc} {
	global prefix description testbypassed

	if {$testbypassed != 0} {
		puts stdout "!!! bypassed $testbypassed tests in\
					 $prefix, `$description'"
	}

	set prefix reg-$major
	set description "reg $desc"
	set testbypassed 0
}

# build test number (internal)
proc tno {testid} {
	return [join $testid .]
}

# build description, with possible modifiers (internal)
proc desc {testid} {
	global description

	set d $description
	if {[llength $testid] > 1} {
		set d "([lreplace $testid 0 0]) $d"
	}
	return $d
}

# build trailing options and flags argument from a flags string (internal)
proc flags {fl} {
	global xflags

	set args [list]
	set flags ""
	foreach f [split $fl ""] {
		switch -exact -- $f {
		"i" { lappend args "-nocase" }
		"x" { lappend args "-expanded" }
		"n" { lappend args "-line" }
		"p" { lappend args "-linestop" }
		"w" { lappend args "-lineanchor" }
		"-" { }
		default { append flags $f }
		}
	}
	if {[string compare $flags ""] != 0} {
		lappend args -$xflags $flags
	}
	return $args
}

# build info-flags list from a flags string (internal)
proc infoflags {fl} {
	global infonames infonameorder

	set ret [list]
	foreach f [split $infonameorder ""] {
		if {[string first $f $fl] >= 0} {
			lappend ret $infonames($f)
		}
	}
	return $ret
}

# compilation error expected
proc e {testid flags re err} {
	global prefix ask errorCode

	# Tcl locale stuff doesn't do the ch/xy test fakery yet
	if {[string first "+" $flags] >= 0} {
	    # This will register as a skipped test
	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
	    return
	}

	# if &, test as both ARE and BRE
	set amp [string first "&" $flags]
	if {$amp >= 0} {
		set f [string range $flags 0 [expr $amp - 1]]
		append f [string range $flags [expr $amp + 1] end]
		e [linsert $testid end ARE] ${f} $re $err
		e [linsert $testid end BRE] ${f}b $re $err
		return
	}

	set cmd [concat [list testregexp -$ask] [flags $flags] [list $re]]
	set run "list \[catch \{$cmd\}\] \[lindex \$errorCode 1\]"
	test $prefix.[tno $testid] [desc $testid] \
		{testregexp} $run [list 1 REG_$err]
}

# match failure expected
proc f {testid flags re target args} {
	global prefix description ask

	# Tcl locale stuff doesn't do the ch/xy test fakery yet
	if {[string first "+" $flags] >= 0} {
	    # This will register as a skipped test
	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
	    return
	}

	# if &, test as both ARE and BRE
	set amp [string first "&" $flags]
	if {$amp >= 0} {
		set f [string range $flags 0 [expr $amp - 1]]
		append f [string range $flags [expr $amp + 1] end]
		eval [linsert $args 0 f [linsert $testid end ARE] ${f} $re \
								$target]
		eval [linsert $args 0 f [linsert $testid end BRE] ${f}b $re \
								$target]
		return
	}

	set f [flags $flags]
	set infoflags [infoflags $flags]
	set ccmd [concat [list testregexp -$ask] $f [list $re]]
	set nsub [expr [llength $args] - 1]
	if {$nsub == -1} {
		# didn't tell us number of subexps
		set ccmd "lreplace \[$ccmd\] 0 0"
		set info [list $infoflags]
	} else {
		set info [list $nsub $infoflags]
	}
	lappend testid "compile"
	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info

	set testid [lreplace $testid end end "execute"]
	set ecmd [concat [list testregexp] $f [list $re $target]]
	test $prefix.[tno $testid] [desc $testid] {testregexp} $ecmd 0
}

# match expected, internal routine that does the work
# parameters like the "real" routines except they don't have "opts",
#  which is a possibly-empty list of switches for the regexp match attempt
# The ! flag is used to indicate expected match failure (for REG_EXPECT,
#  which wants argument testing even in the event of failure).
proc matchexpected {opts testid flags re target args} {
	global prefix description ask regBug

    if {[info exists regBug] && $regBug} {
	# This will register as a skipped test
	test $prefix.[tno $testid] [desc $testid] knownBug {format 0} {1}
	return
    }

	# Tcl locale stuff doesn't do the ch/xy test fakery yet
	if {[string first "+" $flags] >= 0} {
	    # This will register as a skipped test
	    test $prefix.[tno $testid] [desc $testid] localeRegexp {} {}
	    return
	}

	# if &, test as both BRE and ARE
	set amp [string first "&" $flags]
	if {$amp >= 0} {
		set f [string range $flags 0 [expr $amp - 1]]
		append f [string range $flags [expr $amp + 1] end]
		eval [concat [list matchexpected $opts \
			[linsert $testid end ARE] ${f} $re $target] $args]
		eval [concat [list matchexpected $opts \
			[linsert $testid end BRE] ${f}b $re $target] $args]
		return
	}

	set f [flags $flags]
	set infoflags [infoflags $flags]
	set ccmd [concat [list testregexp -$ask] $f [list $re]]
	set ecmd [concat [list testregexp] $opts $f [list $re $target]]

	set nsub [expr [llength $args] - 1]
	set names [list]
	set refs ""
	for {set i 0} {$i <= $nsub} {incr i} {
		if {$i == 0} {
			set name match
		} else {
			set name sub$i
		}
		lappend names $name
		append refs " \$$name"
		set $name ""
	}
	if {[string first "o" $flags] >= 0} {	;# REG_NOSUB kludge
		set nsub 0		;# unsigned value cannot be -1
	}
	if {[string first "t" $flags] >= 0} {	;# REG_EXPECT
		incr nsub -1		;# the extra does not count
	}
	set ecmd [concat $ecmd $names]
	set erun "list \[$ecmd\] $refs"
	set retcode [list 1]
	if {[string first "!" $flags] >= 0} {
		set retcode [list 0]
	}
	set result [concat $retcode $args]

	set info [list $nsub $infoflags]
	lappend testid "compile"
	test $prefix.[tno $testid] [desc $testid] {testregexp} $ccmd $info
	set testid [lreplace $testid end end "execute"]
	test $prefix.[tno $testid] [desc $testid] {testregexp} $erun $result
}

# match expected (no missing, empty, or ambiguous submatches)
# m testno flags re target mat submat ...
proc m {args} {
	eval matchexpected [linsert $args 0 [list]]
}

# match expected (full fanciness)
# i testno flags re target mat submat ...
proc i {args} {
	eval matchexpected [linsert $args 0 [list "-indices"]]
}

# partial match expected
# p testno flags re target mat "" ...
# Quirk:  number of ""s must be one more than number of subREs.
proc p {args} {
	set f [lindex $args 1]			;# add ! flag
	set args [lreplace $args 1 1 "!$f"]
	eval matchexpected [linsert $args 0 [list "-indices"]]
}

# test is a knownBug
proc knownBug {args} {
    set ::regBug 1
    uplevel #0 $args
    set ::regBug 0
}


# the tests themselves


# support functions and preliminary misc.
# This is sensitive to changes in message wording, but we really have to
# test the code->message expansion at least once.
test reg-0.1 "regexp error reporting" {
	list [catch {regexp (*) ign} msg] $msg
} {1 {couldn't compile regular expression pattern: quantifier operand invalid}}


doing 1 "basic sanity checks"
m  1	&	abc		abc	abc
f  2	&	abc		def
m  3	&	abc		xyabxabce	abc


doing 2 "invalid option combinations"
e  1	qe	a		INVARG
e  2	qa	a		INVARG
e  3	qx	a		INVARG
e  4	qn	a		INVARG
e  5	ba	a		INVARG


doing 3 "basic syntax"
i  1	&NS	""		a	{0 -1}
m  2	NS	a|		a	a
m  3	-	a|b		a	a
m  4	-	a|b		b	b
m  5	NS	a||b		b	b
m  6	&	ab		ab	ab


doing 4 "parentheses"
m  1	-	(a)e		ae	ae	a
m  2	o	(a)e		ae
m  3	b	{\(a\)b}	ab	ab	a
m  4	-	a((b)c)		abc	abc	bc	b
m  5	-	a(b)(c)		abc	abc	b	c
e  6	-	a(b		EPAREN
e  7	b	{a\(b}		EPAREN
# sigh, we blew it on the specs here... someday this will be fixed in POSIX,
#  but meanwhile, it's fixed in AREs
m  8	eU	a)b		a)b	a)b
e  9	-	a)b		EPAREN
e 10	b	{a\)b}		EPAREN
m 11	P	a(?:b)c		abc	abc
e 12	e	a(?:b)c		BADRPT
i 13	S	a()b		ab	{0 1}	{1 0}
m 14	SP	a(?:)b		ab	ab
i 15	S	a(|b)c		ac	{0 1}	{1 0}
m 16	S	a(b|)c		abc	abc	b


doing 5 "simple one-char matching"
# general case of brackets done later
m  1	&	a.b		axb	axb
f  2	&n	"a.b"		"a\nb"
m  3	&	{a[bc]d}	abd	abd
m  4	&	{a[bc]d}	acd	acd
f  5	&	{a[bc]d}	aed
f  6	&	{a[^bc]d}	abd
m  7	&	{a[^bc]d}	aed	aed
f  8	&p	"a\[^bc]d"	"a\nd"


doing 6 "context-dependent syntax"
# plus odds and ends
e  1	-	*		BADRPT
m  2	b	*		*	*
m  3	b	{\(*\)}		*	*	*
e  4	-	(*)		BADRPT
m  5	b	^*		*	*
e  6	-	^*		BADRPT
f  7	&	^b		^b
m  8	b	x^		x^	x^
f  9	I	x^		x
m 10	n	"\n^"		"x\nb"	"\n"
f 11	bS	{\(^b\)}	^b
m 12	-	(^b)		b	b	b
m 13	&	{x$}		x	x
m 14	bS	{\(x$\)}	x	x	x
m 15	-	{(x$)}		x	x	x
m 16	b	{x$y}		"x\$y"	"x\$y"
f 17	I	{x$y}		xy
m 18	n	"x\$\n"		"x\n"	"x\n"
e 19	-	+		BADRPT
e 20	-	?		BADRPT


doing 7 "simple quantifiers"
m  1	&N	a*		aa	aa
i  2	&N	a*		b	{0 -1}
m  3	-	a+		aa	aa
m  4	-	a?b		ab	ab
m  5	-	a?b		b	b
e  6	-	**		BADRPT
m  7	bN	**		***	***
e  8	&	a**		BADRPT
e  9	&	a**b		BADRPT
e 10	&	***		BADRPT
e 11	-	a++		BADRPT
e 12	-	a?+		BADRPT
e 13	-	a?*		BADRPT
e 14	-	a+*		BADRPT
e 15	-	a*+		BADRPT


doing 8 "braces"
m  1	NQ	"a{0,1}"	""	""
m  2	NQ	"a{0,1}"	ac	a
e  3	-	"a{1,0}"	BADBR
e  4	-	"a{1,2,3}"	BADBR
e  5	-	"a{257}"	BADBR
e  6	-	"a{1000}"	BADBR
e  7	-	"a{1"		EBRACE
e  8	-	"a{1n}"		BADBR
m  9	BS	"a{b"		"a\{b"	"a\{b"
m 10	BS	"a{"		"a\{"	"a\{"
m 11	bQ	"a\\{0,1\\}b"	cb	b
e 12	b	"a\\{0,1"	EBRACE
e 13	-	"a{0,1\\"	BADBR
m 14	Q	"a{0}b"		ab	b
m 15	Q	"a{0,0}b"	ab	b
m 16	Q	"a{0,1}b"	ab	ab
m 17	Q	"a{0,2}b"	b	b
m 18	Q	"a{0,2}b"	aab	aab
m 19	Q	"a{0,}b"	aab	aab
m 20	Q	"a{1,1}b"	aab	ab
m 21	Q	"a{1,3}b"	aaaab	aaab
f 22	Q	"a{1,3}b"	b
m 23	Q	"a{1,}b"	aab	aab
f 24	Q	"a{2,3}b"	ab
m 25	Q	"a{2,3}b"	aaaab	aaab
f 26	Q	"a{2,}b"	ab
m 27	Q	"a{2,}b"	aaaab	aaaab


doing 9 "brackets"
m  1	&	{a[bc]}		ac	ac
m  2	&	{a[-]}		a-	a-
m  3	&	{a[[.-.]]}	a-	a-
m  4	&L	{a[[.zero.]]}	a0	a0
m  5	&LM	{a[[.zero.]-9]}	a2	a2
m  6	&M	{a[0-[.9.]]}	a2	a2
m  7	&+L	{a[[=x=]]}	ax	ax
m  8	&+L	{a[[=x=]]}	ay	ay
f  9	&+L	{a[[=x=]]}	az
e 10	&	{a[0-[=x=]]}	ERANGE
m 11	&L	{a[[:digit:]]}	a0	a0
e 12	&	{a[[:woopsie:]]}	ECTYPE
f 13	&L	{a[[:digit:]]}	ab
e 14	&	{a[0-[:digit:]]}	ERANGE
m 15	&LP	{[[:<:]]a}	a	a
m 16	&LP	{a[[:>:]]}	a	a
e 17	&	{a[[..]]b}	ECOLLATE
e 18	&	{a[[==]]b}	ECOLLATE
e 19	&	{a[[::]]b}	ECTYPE
e 20	&	{a[[.a}		EBRACK
e 21	&	{a[[=a}		EBRACK
e 22	&	{a[[:a}		EBRACK
e 23	&	{a[}		EBRACK
e 24	&	{a[b}		EBRACK
e 25	&	{a[b-}		EBRACK
e 26	&	{a[b-c}		EBRACK
m 27	&M	{a[b-c]}	ab	ab
m 28	&	{a[b-b]}	ab	ab
m 29	&M	{a[1-2]}	a2	a2
e 30	&	{a[c-b]}	ERANGE
e 31	&	{a[a-b-c]}	ERANGE
m 32	&M	{a[--?]b}	a?b	a?b
m 33	&	{a[---]b}	a-b	a-b
m 34	&	{a[]b]c}	a]c	a]c
m 35	EP	{a[\]]b}	a]b	a]b
f 36	bE	{a[\]]b}	a]b
m 37	bE	{a[\]]b}	"a\\]b"	"a\\]b"
m 38	eE	{a[\]]b}	"a\\]b"	"a\\]b"
m 39	EP	{a[\\]b}	"a\\b"	"a\\b"
m 40	eE	{a[\\]b}	"a\\b"	"a\\b"
m 41	bE	{a[\\]b}	"a\\b"	"a\\b"
e 42	-	{a[\Z]b}	EESCAPE
m 43	&	{a[[b]c}	"a\[c"	"a\[c"
m 44	EMP*	{a[\u00fe-\u0507][\u00ff-\u0300]b} \
			"a\u0102\u02ffb"	"a\u0102\u02ffb"


doing 10 "anchors and newlines"
m  1	&	^a		a	a
f  2	&^	^a		a
i  3	&N	^		a	{0 -1}
i  4	&	{a$}		aba	{2 2}
f  5	{&$}	{a$}		a
i  6	&N	{$}		ab	{2 1}
m  7	&n	^a		a	a
m  8	&n	"^a"		"b\na"	"a"
i  9	&w	"^a"		"a\na"	{0 0}
i 10	&n^	"^a"		"a\na"	{2 2}
m 11	&n	{a$}		a	a
m 12	&n	"a\$"		"a\nb"	"a"
i 13	&n	"a\$"		"a\na"	{0 0}
i 14	N	^^		a	{0 -1}
m 15	b	^^		^	^
i 16	N	{$$}		a	{1 0}
m 17	b	{$$}		"\$"	"\$"
m 18	&N	{^$}		""	""
f 19	&N	{^$}		a
i 20	&nN	"^\$"		"a\n\nb"	{2 1}
m 21	N	{$^}		""	""
m 22	b	{$^}		"\$^"	"\$^"
m 23	P	{\Aa}		a	a
m 24	^P	{\Aa}		a	a
f 25	^nP	{\Aa}		"b\na"
m 26	P	{a\Z}		a	a
m 27	{$P}	{a\Z}		a	a
f 28	{$nP}	{a\Z}		"a\nb"
e 29	-	^*		BADRPT
e 30	-	{$*}		BADRPT
e 31	-	{\A*}		BADRPT
e 32	-	{\Z*}		BADRPT


doing 11 "boundary constraints"
m  1	&LP	{[[:<:]]a}	a	a
m  2	&LP	{[[:<:]]a}	-a	a
f  3	&LP	{[[:<:]]a}	ba
m  4	&LP	{a[[:>:]]}	a	a
m  5	&LP	{a[[:>:]]}	a-	a
f  6	&LP	{a[[:>:]]}	ab
m  7	bLP	{\<a}		a	a
f  8	bLP	{\<a}		ba
m  9	bLP	{a\>}		a	a
f 10	bLP	{a\>}		ab
m 11	LP	{\ya}		a	a
f 12	LP	{\ya}		ba
m 13	LP	{a\y}		a	a
f 14	LP	{a\y}		ab
m 15	LP	{a\Y}		ab	a
f 16	LP	{a\Y}		a-
f 17	LP	{a\Y}		a
f 18	LP	{-\Y}		-a
m 19	LP	{-\Y}		-%	-
f 20	LP	{\Y-}		a-
e 21	-	{[[:<:]]*}	BADRPT
e 22	-	{[[:>:]]*}	BADRPT
e 23	b	{\<*}		BADRPT
e 24	b	{\>*}		BADRPT
e 25	-	{\y*}		BADRPT
e 26	-	{\Y*}		BADRPT
m 27	LP	{\ma}		a	a
f 28	LP	{\ma}		ba
m 29	LP	{a\M}		a	a
f 30	LP	{a\M}		ab
f 31	ILP	{\Ma}		a
f 32	ILP	{a\m}		a


doing 12 "character classes"
m  1	LP	{a\db}		a0b	a0b
f  2	LP	{a\db}		axb
f  3	LP	{a\Db}		a0b
m  4	LP	{a\Db}		axb	axb
m  5	LP	"a\\sb"		"a b"	"a b"
m  6	LP	"a\\sb"		"a\tb"	"a\tb"
m  7	LP	"a\\sb"		"a\nb"	"a\nb"
f  8	LP	{a\sb}		axb
m  9	LP	{a\Sb}		axb	axb
f 10	LP	"a\\Sb"		"a b"
m 11	LP	{a\wb}		axb	axb
f 12	LP	{a\wb}		a-b
f 13	LP	{a\Wb}		axb
m 14	LP	{a\Wb}		a-b	a-b
m 15	LP	{\y\w+z\y}	adze-guz	guz
m 16	LPE	{a[\d]b}	a1b	a1b
m 17	LPE	"a\[\\s]b"	"a b"	"a b"
m 18	LPE	{a[\w]b}	axb	axb


doing 13 "escapes"
e  1	&	"a\\"		EESCAPE
m  2	-	{a\<b}		a<b	a<b
m  3	e	{a\<b}		a<b	a<b
m  4	bAS	{a\wb}		awb	awb
m  5	eAS	{a\wb}		awb	awb
m  6	PL	"a\\ab"		"a\007b"	"a\007b"
m  7	P	"a\\bb"		"a\bb"	"a\bb"
m  8	P	{a\Bb}		"a\\b"	"a\\b"
m  9	MP	"a\\chb"	"a\bb"	"a\bb"
m 10	MP	"a\\cHb"	"a\bb"	"a\bb"
m 11	LMP	"a\\e"		"a\033"	"a\033"
m 12	P	"a\\fb"		"a\fb"	"a\fb"
m 13	P	"a\\nb"		"a\nb"	"a\nb"
m 14	P	"a\\rb"		"a\rb"	"a\rb"
m 15	P	"a\\tb"		"a\tb"	"a\tb"
m 16	P	"a\\u0008x"	"a\bx"	"a\bx"
e 17	-	{a\u008x}	EESCAPE
m 18	P	"a\\u00088x"	"a\b8x"	"a\b8x"
m 19	P	"a\\U00000008x"	"a\bx"	"a\bx"
e 20	-	{a\U0000008x}	EESCAPE
m 21	P	"a\\vb"		"a\vb"	"a\vb"
m 22	MP	"a\\x08x"	"a\bx"	"a\bx"
e 23	-	{a\xq}		EESCAPE
m 24	MP	"a\\x0008x"	"a\bx"	"a\bx"
e 25	-	{a\z}		EESCAPE
m 26	MP	"a\\010b"	"a\bb"	"a\bb"


doing 14 "back references"
# ugh
m  1	RP	{a(b*)c\1}	abbcbb	abbcbb	bb
m  2	RP	{a(b*)c\1}	ac	ac	""
f  3	RP	{a(b*)c\1}	abbcb
m  4	RP	{a(b*)\1}	abbcbb	abb	b
m  5	RP	{a(b|bb)\1}	abbcbb	abb	b
m  6	RP	{a([bc])\1}	abb	abb	b
f  7	RP	{a([bc])\1}	abc
m  8	RP	{a([bc])\1}	abcabb	abb	b
f  9	RP	{a([bc])*\1}	abc
f 10	RP	{a([bc])\1}	abB
m 11	iRP	{a([bc])\1}	abB	abB	b
m 12	RP	{a([bc])\1+}	abbb	abbb	b
m 13	QRP	"a(\[bc])\\1{3,4}"	abbbb	abbbb	b
f 14	QRP	"a(\[bc])\\1{3,4}"	abbb
m 15	RP	{a([bc])\1*}	abbb	abbb	b
m 16	RP	{a([bc])\1*}	ab	ab	b
m 17	RP	{a([bc])(\1*)}	ab	ab	b	""
e 18	-	{a((b)\1)}	ESUBREG
e 19	-	{a(b)c\2}	ESUBREG
m 20	bR	{a\(b*\)c\1}	abbcbb	abbcbb	bb


doing 15 "octal escapes vs back references"
# initial zero is always octal
m  1	MP	"a\\010b"	"a\bb"	"a\bb"
m  2	MP	"a\\0070b"	"a\0070b"	"a\0070b"
m  3	MP	"a\\07b"	"a\007b"	"a\007b"
m  4	MP	"a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\\07c"	"abbbbbbbbbb\007c" \
	"abbbbbbbbbb\007c"	"b"	"b"	"b"	"b"	"b"	"b" \
	"b"	"b"	"b"	"b"
# a single digit is always a backref
e  5	-	{a\7b}		ESUBREG
# otherwise it's a backref only if within range (barf!)
m  6	MP	"a\\10b"	"a\bb"	"a\bb"
m  7	MP	{a\101b}	aAb	aAb
m  8	RP	{a(b)(b)(b)(b)(b)(b)(b)(b)(b)(b)\10c}	abbbbbbbbbbbc \
	abbbbbbbbbbbc	b	b	b	b	b	b	b \
	b	b	b
# but we're fussy about border cases -- guys who want octal should use the zero
e  9	-	{a((((((((((b\10))))))))))c}	ESUBREG
# BREs don't have octal, EREs don't have backrefs
m 10	MP	"a\\12b"	"a\nb"	"a\nb"
e 11	b	{a\12b}		ESUBREG
m 12	eAS	{a\12b}		a12b	a12b


doing 16 "expanded syntax"
m  1	xP	"a b c"		"abc"	"abc"
m  2	xP	"a b #oops\nc\td"	"abcd"	"abcd"
m  3	x	"a\\ b\\\tc"	"a b\tc"	"a b\tc"
m  4	xP	"a b\\#c"	"ab#c"	"ab#c"
m  5	xP	"a b\[c d]e"	"ab e"	"ab e"
m  6	xP	"a b\[c#d]e"	"ab#e"	"ab#e"
m  7	xP	"a b\[c#d]e"	"abde"	"abde"
m  8	xSPB	"ab{ d"		"ab\{d"	"ab\{d"
m  9	xPQ	"ab{ 1 , 2 }c"	"abc"	"abc"


doing 17 "misc syntax"
m  1	P	a(?#comment)b	ab	ab


doing 18 "unmatchable REs"
f  1	I	a^b		ab


doing 19 "case independence"
m  1	&i	ab		Ab	Ab
m  2	&i	{a[bc]}		aC	aC
f  3	&i	{a[^bc]}	aB
m  4	&iM	{a[b-d]}	aC	aC
f  5	&iM	{a[^b-d]}	aC


doing 20 "directors and embedded options"
e  1	&	***?		BADPAT
m  2	q	***?		***?	***?
m  3	&P	***=a*b		a*b	a*b
m  4	q	***=a*b		***=a*b	***=a*b
m  5	bLP	{***:\w+}	ab	ab
m  6	eLP	{***:\w+}	ab	ab
e  7	&	***:***=a*b	BADRPT
m  8	&P	***:(?b)a+b	a+b	a+b
m  9	P	(?b)a+b		a+b	a+b
e 10	e	{(?b)\w+}	BADRPT
m 11	bAS	{(?b)\w+}	(?b)w+	(?b)w+
m 12	iP	(?c)a		a	a
f 13	iP	(?c)a		A
m 14	APS	{(?e)\W+}	WW	WW
m 15	P	(?i)a+		Aa	Aa
f 16	P	"(?m)a.b"	"a\nb"
m 17	P	"(?m)^b"	"a\nb"	"b"
f 18	P	"(?n)a.b"	"a\nb"
m 19	P	"(?n)^b"	"a\nb"	"b"
f 20	P	"(?p)a.b"	"a\nb"
f 21	P	"(?p)^b"	"a\nb"
m 22	P	(?q)a+b		a+b	a+b
m 23	nP	"(?s)a.b"	"a\nb"	"a\nb"
m 24	xP	"(?t)a b"	"a b"	"a b"
m 25	P	"(?w)a.b"	"a\nb"	"a\nb"
m 26	P	"(?w)^b"	"a\nb"	"b"
m 27	P	"(?x)a b"	"ab"	"ab"
e 28	-	(?z)ab		BADOPT
m 29	P	(?ici)a+	Aa	Aa
e 30	P	(?i)(?q)a+	BADRPT
m 31	P	(?q)(?i)a+	(?i)a+	(?i)a+
m 32	P	(?qe)a+		a	a
m 33	xP	"(?q)a b"	"a b"	"a b"
m 34	P	"(?qx)a b"	"a b"	"a b"
m 35	P	(?qi)ab		Ab	Ab


doing 21 "capturing"
m  1	-	a(b)c		abc	abc	b
m  2	P	a(?:b)c		xabc	abc
m  3	-	a((b))c		xabcy	abc	b	b
m  4	P	a(?:(b))c	abcy	abc	b
m  5	P	a((?:b))c	abc	abc	b
m  6	P	a(?:(?:b))c	abc	abc
i  7	Q	"a(b){0}c"	ac	{0 1}	{-1 -1}
m  8	-	a(b)c(d)e	abcde	abcde	b	d
m  9	-	(b)c(d)e	bcde	bcde	b	d
m 10	-	a(b)(d)e	abde	abde	b	d
m 11	-	a(b)c(d)	abcd	abcd	b	d
m 12	-	(ab)(cd)	xabcdy	abcd	ab	cd
m 13	-	a(b)?c		xabcy	abc	b
i 14	-	a(b)?c		xacy	{1 2}	{-1 -1}
m 15	-	a(b)?c(d)?e	xabcdey	abcde	b	d
i 16	-	a(b)?c(d)?e	xacdey	{1 4}	{-1 -1}	{3 3}
i 17	-	a(b)?c(d)?e	xabcey	{1 4}	{2 2}	{-1 -1}
i 18	-	a(b)?c(d)?e	xacey	{1 3}	{-1 -1}	{-1 -1}
m 19	-	a(b)*c		xabcy	abc	b
i 20	-	a(b)*c		xabbbcy	{1 5}	{4 4}
i 21	-	a(b)*c		xacy	{1 2}	{-1 -1}
m 22	-	a(b*)c		xabbbcy	abbbc	bbb
m 23	-	a(b*)c		xacy	ac	""
f 24	-	a(b)+c		xacy
m 25	-	a(b)+c		xabcy	abc	b
i 26	-	a(b)+c		xabbbcy	{1 5}	{4 4}
m 27	-	a(b+)c		xabbbcy	abbbc	bbb
i 28	Q	"a(b){2,3}c"	xabbbcy	{1 5}	{4 4}
i 29	Q	"a(b){2,3}c"	xabbcy	{1 4}	{3 3}
f 30	Q	"a(b){2,3}c"	xabcy
m 31	LP	"\\y(\\w+)\\y"	"-- abc-"	"abc"	"abc"
m 32	-	a((b|c)d+)+	abacdbd	acdbd	bd	b
m 33	N	(.*).*		abc	abc	abc
m 34	N	(a*)*		bc	""	""


doing 22 "multicharacter collating elements"
# again ugh
m  1	&+L	{a[c]e}		ace	ace
f  2	&+IL	{a[c]h}		ach
m  3	&+L	{a[[.ch.]]}	ach	ach
f  4	&+L	{a[[.ch.]]}	ace
m  5	&+L	{a[c[.ch.]]}	ac	ac
m  6	&+L	{a[c[.ch.]]}	ace	ac
m  7	&+L	{a[c[.ch.]]}	ache	ach
f  8	&+L	{a[^c]e}	ace
m  9	&+L	{a[^c]e}	abe	abe
m 10	&+L	{a[^c]e}	ache	ache
f 11	&+L	{a[^[.ch.]]}	ach
m 12	&+L	{a[^[.ch.]]}	ace	ac
m 13	&+L	{a[^[.ch.]]}	ac	ac
m 14	&+L	{a[^[.ch.]]}	abe	ab
f 15	&+L	{a[^c[.ch.]]}	ach
f 16	&+L	{a[^c[.ch.]]}	ace
f 17	&+L	{a[^c[.ch.]]}	ac
m 18	&+L	{a[^c[.ch.]]}	abe	ab
m 19	&+L	{a[^b]}		ac	ac
m 20	&+L	{a[^b]}		ace	ac
m 21	&+L	{a[^b]}		ach	ach
f 22	&+L	{a[^b]}		abe


doing 23 "lookahead constraints"
m  1	HP	a(?=b)b*	ab	ab
f  2	HP	a(?=b)b*	a
m  3	HP	a(?=b)b*(?=c)c*	abc	abc
f  4	HP	a(?=b)b*(?=c)c*	ab
f  5	HP	a(?!b)b*	ab
m  6	HP	a(?!b)b*	a	a
m  7	HP	(?=b)b		b	b
f  8	HP	(?=b)b		a


doing 24 "non-greedy quantifiers"
m  1	PT	ab+?		abb	ab
m  2	PT	ab+?c		abbc	abbc
m  3	PT	ab*?		abb	a
m  4	PT	ab*?c		abbc	abbc
m  5	PT	ab??		ab	a
m  6	PT	ab??c		abc	abc
m  7	PQT	"ab{2,4}?"	abbbb	abb
m  8	PQT	"ab{2,4}?c"	abbbbc	abbbbc
m  9	-	3z*		123zzzz456	3zzzz
m 10	PT	3z*?		123zzzz456	3
m 11	-	z*4		123zzzz456	zzzz4
m 12	PT	z*?4		123zzzz456	zzzz4


doing 25 "mixed quantifiers"
# this is very incomplete as yet
# should include |
m  1	PNT	{^(.*?)(a*)$}	xyza	xyza	xyz	a
m  2	PNT	{^(.*?)(a*)$}	xyzaa	xyzaa	xyz	aa
m  3	PNT	{^(.*?)(a*)$}	xyz	xyz	xyz	""


doing 26 "tricky cases"
# attempts to trick the matcher into accepting a short match
m  1	-	(week|wee)(night|knights)	weeknights	weeknights \
	wee	knights
m  2	RP	{a(bc*).*\1}	abccbccb	abccbccb	b
m  3	-	{a(b.[bc]*)+}	abcbd	abcbd	bd


doing 27 "implementation misc."
# duplicate arcs are suppressed
m  1	P	a(?:b|b)c	abc	abc
# make color/subcolor relationship go back and forth
m  2	&	{[ab][ab][ab]}	aba	aba
m  3	&	{[ab][ab][ab][ab][ab][ab][ab]}	abababa	abababa


doing 28 "boundary busters etc."
# color-descriptor allocation changes at 10
m  1	&	abcdefghijkl	abcdefghijkl	abcdefghijkl
# so does arc allocation
m  2	P	a(?:b|c|d|e|f|g|h|i|j|k|l|m)n	agn	agn
# subexpression tracking also at 10
m  3	-	a(((((((((((((b)))))))))))))c	abc	abc	b	b	b	b	b	b	b	b	b	b	b	b	b
# state-set handling changes slightly at unsigned size (might be 64...)
# (also stresses arc allocation)
m  4	Q	"ab{1,100}c"	abbc	abbc
m  5	Q	"ab{1,100}c"	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
m  6	Q	"ab{1,100}c" \
	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc \
	abbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbc
# force small cache and bust it, several ways
m  7	LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
m  8	%LP	{\w+abcdefgh}	xyzabcdefgh	xyzabcdefgh
m  9	%LP	{\w+abcdefghijklmnopqrst}	xyzabcdefghijklmnopqrst \
	xyzabcdefghijklmnopqrst
i 10	%LP	{\w+(abcdefgh)?}	xyz	{0 2}	{-1 -1}
i 11	%LP	{\w+(abcdefgh)?}	xyzabcdefg	{0 9}	{-1 -1}
i 12	%LP	{\w+(abcdefghijklmnopqrst)?}	xyzabcdefghijklmnopqrs \
	{0 21}	{-1 -1}


doing 29 "incomplete matches"
p  1	t	def		abc	{3 2}	""
p  2	t	bcd		abc	{1 2}	""
p  3	t	abc		abab	{0 3}	""
p  4	t	abc		abdab	{3 4}	""
i  5	t	abc		abc	{0 2}	{0 2}
i  6	t	abc		xyabc	{2 4}	{2 4}
p  7	t	abc+		xyab	{2 3}	""
i  8	t	abc+		xyabc	{2 4}	{2 4}
knownBug i  9	t	abc+		xyabcd	{2 4}	{6 5}
i  10	t	abc+		xyabcdd	{2 4}	{7 6}
p  11	tPT	abc+?		xyab	{2 3}	""
# the retain numbers in these two may look wrong, but they aren't
i  12	tPT	abc+?		xyabc	{2 4}	{5 4}
i  13	tPT	abc+?		xyabcc	{2 4}	{6 5}
i  14	tPT	abc+?		xyabcd	{2 4}	{6 5}
i  15	tPT	abc+?		xyabcdd	{2 4}	{7 6}
i  16	t	abcd|bc		xyabc	{3 4}	{2 4}
p  17	tn	.*k		"xx\nyyy"	{3 5}	""


doing 30 "misc. oddities and old bugs"
e  1	&	***		BADRPT
m  2	N	a?b*		abb	abb
m  3	N	a?b*		bb	bb
m  4	&	a*b		aab	aab
m  5	&	^a*b		aaaab	aaaab
m  6	&M	{[0-6][1-2][0-3][0-6][1-6][0-6]}	010010	010010
# temporary REG_BOSONLY kludge
m  7	s	abc		abcd	abc
f  8	s	abc		xabcd
# back to normal stuff
m  9	HLP	{(?n)^(?![t#])\S+}	"tk\n\n#\n#\nit0"	it0


# flush any leftover complaints
doing 0 "flush"

# Tests resulting from bugs reported by users
test reg-31.1 {[[:xdigit:]] behaves correctly when followed by [[:space:]]} {
    set str {2:::DebugWin32}
    set re {([[:xdigit:]])([[:space:]]*)}
    list [regexp $re $str match xdigit spaces] $match $xdigit $spaces
    # Code used to produce {1 2:::DebugWin32 2 :::DebugWin32} !!!
} {1 2 2 {}}

test reg-32.1 {canmatch functionality -- at end} {
    set pat {blah}
    set line "asd asd"
    # can match at the final d, if '%' follows
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 7}

test reg-32.2 {canmatch functionality -- at end} {
    set pat {s%$}
    set line "asd asd"
    # can only match after the end of the string
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 7}

test reg-32.3 {canmatch functionality -- not last char} {
    set pat {[^d]%$}
    set line "asd asd"
    # can only match after the end of the string
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 7}

test reg-32.3.1 {canmatch functionality -- no match} {
    set pat {\Zx}
    set line "asd asd"
    # can match the last char, if followed by x
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 -1}

test reg-32.4 {canmatch functionality -- last char} {knownBug} {
    set pat {.x}
    set line "asd asd"
    # can match the last char, if followed by x
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.4.1 {canmatch functionality -- last char} {knownBug} {
    set pat {.x$}
    set line "asd asd"
    # can match the last char, if followed by x
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.5 {canmatch functionality -- last char} {knownBug} {
    set pat {.[^d]x$}
    set line "asd asd"
    # can match the last char, if followed by not-d and x.
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.6 {canmatch functionality -- last char} {knownBug} {
    set pat {[^a]%[^\r\n]*$}
    set line "asd asd"
    # can match at the final d, if '%' follows
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.7 {canmatch functionality -- last char} {knownBug} {
    set pat {[^a]%$}
    set line "asd asd"
    # can match at the final d, if '%' follows
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.8 {canmatch functionality -- last char} {knownBug} {
    set pat {[^x]%$}
    set line "asd asd"
    # can match at the final d, if '%' follows
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

test reg-32.9 {canmatch functionality -- more complex case} {knownBug} {
    set pat {((\B\B|\Bh+line)[ \t]*|[^\B]%[^\r\n]*)$}
    set line "asd asd"
    # can match at the final d, if '%' follows
    set res [testregexp -xflags -- c $pat $line resvar]
    lappend res $resvar
} {0 6}

# Tests reg-33.*: Checks for bug fixes

test reg-33.1 {Bug 230589} {
    regexp {[ ]*(^|[^%])%V} "*%V2" m s
} 1

test reg-33.2 {Bug 504785} {
    regexp -inline {([^_.]*)([^.]*)\.(..)(.).*} bbcos_001_c01.q1la
} {bbcos_001_c01.q1la bbcos _001_c01 q1 l}

test reg-33.3 {Bug 505048} {
    regexp {\A\s*[^<]*\s*<([^>]+)>} a<a>
} 1

test reg-33.4 {Bug 505048} {
    regexp {\A\s*([^b]*)b} ab
} 1

test reg-33.5 {Bug 505048} {
    regexp {\A\s*[^b]*(b)} ab
} 1

test reg-33.6 {Bug 505048} {
    regexp {\A(\s*)[^b]*(b)} ab
} 1

test reg-33.7 {Bug 505048} {
    regexp {\A\s*[^b]*b} ab
} 1

test reg-33.8 {Bug 505048} {
    regexp -inline {\A\s*[^b]*b} ab
} ab

test reg-33.9 {Bug 505048} {
    regexp -indices -inline {\A\s*[^b]*b} ab
} {{0 1}}

test reg-33.10 {Bug 840258} {
    regsub {(^|\n)+\.*b} \n.b {} tmp
} 1

test reg-33.11 {Bug 840258} {
    regsub {(^|[\n\r]+)\.*\?<.*?(\n|\r)+} \
            "TQ\r\n.?<5000267>Test already stopped\r\n" {} tmp
} 1

# cleanup
::tcltest::cleanupTests
return