Merge branch 'natural-sort'
Add natural sort functions. See https://github.com/wxWidgets/wxWidgets/pull/1923
This commit is contained in:
commit
2289f8be55
@ -42,12 +42,30 @@ wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2)
|
||||
return cmp ? cmp : s1.Cmp(s2);
|
||||
}
|
||||
|
||||
|
||||
inline int wxCMPFUNC_CONV
|
||||
wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
return wxDictionaryStringSortAscending(s2, s1);
|
||||
}
|
||||
|
||||
WXDLLIMPEXP_BASE
|
||||
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2);
|
||||
|
||||
WXDLLIMPEXP_BASE
|
||||
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2);
|
||||
|
||||
inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
return wxCmpNatural(s1, s2);
|
||||
}
|
||||
|
||||
inline int wxCMPFUNC_CONV wxNaturalStringSortDescending(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
return wxCmpNatural(s2, s1);
|
||||
}
|
||||
|
||||
|
||||
#if wxUSE_STD_CONTAINERS
|
||||
|
||||
typedef int (wxCMPFUNC_CONV *CMPFUNCwxString)(wxString*, wxString*);
|
||||
|
@ -363,7 +363,8 @@ public:
|
||||
This function can be used with wxSortedArrayString::Sort() or passed as an
|
||||
argument to wxSortedArrayString constructor.
|
||||
|
||||
@see wxStringSortDescending(), wxDictionaryStringSortAscending()
|
||||
@see wxStringSortDescending(), wxDictionaryStringSortAscending(),
|
||||
wxNaturalStringSortAscending()
|
||||
|
||||
@since 3.1.0
|
||||
*/
|
||||
@ -375,7 +376,8 @@ int wxStringSortAscending(const wxString& s1, const wxString& s2);
|
||||
This function can be used with wxSortedArrayString::Sort() or passed as an
|
||||
argument to wxSortedArrayString constructor.
|
||||
|
||||
@see wxStringSortAscending(), wxDictionaryStringSortAscending()
|
||||
@see wxStringSortAscending(), wxDictionaryStringSortDescending(),
|
||||
wxNaturalStringSortDescending()
|
||||
|
||||
@since 3.1.0
|
||||
*/
|
||||
@ -392,7 +394,9 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2);
|
||||
This function can be used with wxSortedArrayString::Sort() or passed as an
|
||||
argument to wxSortedArrayString constructor.
|
||||
|
||||
@see wxStringSortAscending(), wxDictionaryStringSortDescending()
|
||||
@see wxDictionaryStringSortDescending(),
|
||||
wxStringSortAscending(),
|
||||
wxNaturalStringSortAscending()
|
||||
|
||||
@since 3.1.0
|
||||
*/
|
||||
@ -403,11 +407,94 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
|
||||
|
||||
See wxDictionaryStringSortAscending() for the dictionary sort description.
|
||||
|
||||
@see wxStringSortDescending()
|
||||
@see wxDictionaryStringSortAscending(),
|
||||
wxStringSortDescending(),
|
||||
wxNaturalStringSortDescending()
|
||||
|
||||
@since 3.1.0
|
||||
*/
|
||||
int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
|
||||
int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2);
|
||||
|
||||
|
||||
/**
|
||||
Comparison function comparing strings in natural order.
|
||||
|
||||
This function can be used with wxSortedArrayString::Sort()
|
||||
or passed as an argument to wxSortedArrayString constructor.
|
||||
|
||||
See wxCmpNatural() for more information about how natural
|
||||
sort order is implemented.
|
||||
|
||||
@see wxNaturalStringSortDescending(),
|
||||
wxStringSortAscending(), wxDictionaryStringSortAscending()
|
||||
|
||||
@since 3.1.4
|
||||
*/
|
||||
int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2);
|
||||
|
||||
/**
|
||||
Comparison function comparing strings in reverse natural order.
|
||||
|
||||
This function can be used with wxSortedArrayString::Sort()
|
||||
or passed as an argument to wxSortedArrayString constructor.
|
||||
|
||||
See wxCmpNatural() for more information about how natural
|
||||
sort order is implemented.
|
||||
|
||||
@see wxNaturalStringSortAscending(),
|
||||
wxStringSortDescending(), wxDictionaryStringSortDescending()
|
||||
|
||||
@since 3.1.4
|
||||
*/
|
||||
int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2);
|
||||
|
||||
/**
|
||||
This function compares strings using case-insensitive collation and
|
||||
additionally, numbers within strings are recognised and compared
|
||||
numerically, rather than alphabetically. When used for sorting,
|
||||
the result is that e.g. file names containing numbers are sorted
|
||||
in a natural way.
|
||||
|
||||
For example, sorting with a simple string comparison results in:
|
||||
- file1.txt
|
||||
- file10.txt
|
||||
- file100.txt
|
||||
- file2.txt
|
||||
- file20.txt
|
||||
- file3.txt
|
||||
|
||||
But sorting the same strings in natural sort order results in:
|
||||
- file1.txt
|
||||
- file2.txt
|
||||
- file3.txt
|
||||
- file10.txt
|
||||
- file20.txt
|
||||
- file100.txt
|
||||
|
||||
wxCmpNatural() uses an OS native natural sort function when available
|
||||
(currently only under Microsoft Windows), wxCmpNaturalGeneric() otherwise.
|
||||
|
||||
Be aware that OS native implementations might differ from each other,
|
||||
and might change behaviour from release to release.
|
||||
|
||||
@see wxNaturalStringSortAscending(), wxNaturalStringSortDescending()
|
||||
|
||||
@since 3.1.4
|
||||
*/
|
||||
int wxCmpNatural(const wxString& s1, const wxString& s2);
|
||||
|
||||
/**
|
||||
This is wxWidgets' own implementation of the natural sort comparison function.
|
||||
|
||||
Requires wxRegEx, if it is unavailable numbers within strings are not
|
||||
recognised and only case-insensitive collation is performed.
|
||||
|
||||
@see wxCmpNatural()
|
||||
|
||||
@since 3.1.4
|
||||
*/
|
||||
int wxCmpNaturalGeneric(const wxString& s1, const wxString& s2);
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// Global functions/macros
|
||||
|
@ -20,13 +20,20 @@
|
||||
#endif
|
||||
|
||||
#include "wx/arrstr.h"
|
||||
#include "wx/regex.h"
|
||||
#include "wx/scopedarray.h"
|
||||
#include "wx/wxcrt.h"
|
||||
|
||||
#include "wx/beforestd.h"
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include "wx/afterstd.h"
|
||||
|
||||
#if defined( __WINDOWS__ )
|
||||
#include <shlwapi.h>
|
||||
#endif
|
||||
|
||||
|
||||
// ============================================================================
|
||||
// ArrayString
|
||||
// ============================================================================
|
||||
@ -721,3 +728,199 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#if wxUSE_REGEX
|
||||
|
||||
namespace // helpers needed by wxCmpNaturalGeneric()
|
||||
{
|
||||
// Used for comparison of string parts
|
||||
struct wxStringFragment
|
||||
{
|
||||
// Fragment types are generally sorted like this:
|
||||
// Empty < SpaceOrPunct < Digit < LetterOrSymbol
|
||||
// Fragments of the same type are compared as follows:
|
||||
// SpaceOrPunct - collated, Digit - as numbers using value
|
||||
// LetterOrSymbol - lower-cased and then collated
|
||||
enum Type
|
||||
{
|
||||
Empty,
|
||||
SpaceOrPunct, // whitespace or punctuation
|
||||
Digit, // a sequence of decimal digits
|
||||
LetterOrSymbol // letters and symbols, i.e., anything not covered by the above types
|
||||
};
|
||||
|
||||
wxStringFragment() : type(Empty), value(0) {}
|
||||
|
||||
Type type;
|
||||
wxString text;
|
||||
wxUint64 value; // used only for Digit type
|
||||
};
|
||||
|
||||
|
||||
wxStringFragment GetFragment(wxString& text)
|
||||
{
|
||||
static const wxRegEx reSpaceOrPunct(wxS("^([[:space:]]|[[:punct:]])+"));
|
||||
// Limit the length to make sure the value will fit into a wxUint64
|
||||
static const wxRegEx reDigit(wxS("^[[:digit:]]{1,19}"));
|
||||
static const wxRegEx reLetterOrSymbol("^[^[:space:]|[:punct:]|[:digit:]]+");
|
||||
|
||||
if ( text.empty() )
|
||||
return wxStringFragment();
|
||||
|
||||
wxStringFragment fragment;
|
||||
size_t length = 0;
|
||||
|
||||
// In attempt to minimize the number of wxRegEx.Matches() calls,
|
||||
// try to do them from the most expected to the least expected
|
||||
// string fragment type.
|
||||
if ( reLetterOrSymbol.Matches(text) )
|
||||
{
|
||||
if ( reLetterOrSymbol.GetMatch(NULL, &length) )
|
||||
{
|
||||
fragment.type = wxStringFragment::LetterOrSymbol;
|
||||
fragment.text = text.Left(length);
|
||||
}
|
||||
}
|
||||
else if ( reDigit.Matches(text) )
|
||||
{
|
||||
if ( reDigit.GetMatch(NULL, &length) )
|
||||
{
|
||||
fragment.type = wxStringFragment::Digit;
|
||||
fragment.text = text.Left(length);
|
||||
fragment.text.ToULongLong(&fragment.value);
|
||||
}
|
||||
}
|
||||
else if ( reSpaceOrPunct.Matches(text) )
|
||||
{
|
||||
if ( reSpaceOrPunct.GetMatch(NULL, &length) )
|
||||
{
|
||||
fragment.type = wxStringFragment::SpaceOrPunct;
|
||||
fragment.text = text.Left(length);
|
||||
}
|
||||
}
|
||||
|
||||
text.erase(0, length);
|
||||
return fragment;
|
||||
}
|
||||
|
||||
int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs)
|
||||
{
|
||||
switch ( lhs.type )
|
||||
{
|
||||
case wxStringFragment::Empty:
|
||||
switch ( rhs.type )
|
||||
{
|
||||
case wxStringFragment::Empty:
|
||||
return 0;
|
||||
case wxStringFragment::SpaceOrPunct:
|
||||
case wxStringFragment::Digit:
|
||||
case wxStringFragment::LetterOrSymbol:
|
||||
return -1;
|
||||
}
|
||||
|
||||
case wxStringFragment::SpaceOrPunct:
|
||||
switch ( rhs.type )
|
||||
{
|
||||
case wxStringFragment::Empty:
|
||||
return 1;
|
||||
case wxStringFragment::SpaceOrPunct:
|
||||
return wxStrcoll_String(lhs.text, rhs.text);
|
||||
case wxStringFragment::Digit:
|
||||
case wxStringFragment::LetterOrSymbol:
|
||||
return -1;
|
||||
}
|
||||
|
||||
case wxStringFragment::Digit:
|
||||
switch ( rhs.type )
|
||||
{
|
||||
case wxStringFragment::Empty:
|
||||
case wxStringFragment::SpaceOrPunct:
|
||||
return 1;
|
||||
case wxStringFragment::Digit:
|
||||
if ( lhs.value > rhs.value )
|
||||
return 1;
|
||||
else if ( lhs.value < rhs.value )
|
||||
return -1;
|
||||
else
|
||||
return 0;
|
||||
case wxStringFragment::LetterOrSymbol:
|
||||
return -1;
|
||||
}
|
||||
|
||||
case wxStringFragment::LetterOrSymbol:
|
||||
switch ( rhs.type )
|
||||
{
|
||||
case wxStringFragment::Empty:
|
||||
case wxStringFragment::SpaceOrPunct:
|
||||
case wxStringFragment::Digit:
|
||||
return 1;
|
||||
case wxStringFragment::LetterOrSymbol:
|
||||
return wxStrcoll_String(lhs.text.Lower(), rhs.text.Lower());
|
||||
}
|
||||
}
|
||||
|
||||
// all possible cases should be covered by the switch above
|
||||
// but return also from here to prevent the compiler warning
|
||||
return 1;
|
||||
}
|
||||
|
||||
} // unnamed namespace
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// wxCmpNaturalGeneric
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
wxString lhs(s1);
|
||||
wxString rhs(s2);
|
||||
|
||||
int comparison = 0;
|
||||
|
||||
while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) )
|
||||
{
|
||||
const wxStringFragment fragmentLHS = GetFragment(lhs);
|
||||
const wxStringFragment fragmentRHS = GetFragment(rhs);
|
||||
|
||||
comparison = CompareFragmentNatural(fragmentLHS, fragmentRHS);
|
||||
}
|
||||
|
||||
return comparison;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
int wxCMPFUNC_CONV wxCmpNaturalGeneric(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
return wxStrcoll_String(s1.Lower(), s2.Lower());
|
||||
}
|
||||
|
||||
#endif // #if wxUSE_REGEX
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// Declaration of StrCmpLogicalW()
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// In some distributions of MinGW32, this function is exported in the library,
|
||||
// but not declared in shlwapi.h. Therefore we declare it here.
|
||||
#if defined( __MINGW32_TOOLCHAIN__ )
|
||||
extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2);
|
||||
#endif
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
// wxCmpNatural
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// If a native version of Natural sort is available, then use that, otherwise
|
||||
// use the generic version.
|
||||
inline int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2)
|
||||
{
|
||||
#if defined( __WINDOWS__ )
|
||||
return StrCmpLogicalW(s1.wc_str(), s2.wc_str());
|
||||
#else
|
||||
return wxCmpNaturalGeneric(s1, s2);
|
||||
#endif // #if defined( __WINDOWS__ )
|
||||
}
|
||||
|
||||
|
@ -780,3 +780,84 @@ void ArraysTestCase::IndexFromEnd()
|
||||
CPPUNIT_ASSERT_EQUAL( 1, a.Index(1, /*bFromEnd=*/true) );
|
||||
CPPUNIT_ASSERT_EQUAL( 2, a.Index(42, /*bFromEnd=*/true) );
|
||||
}
|
||||
|
||||
|
||||
TEST_CASE("wxNaturalStringComparisonGeneric()", "[wxString][compare]")
|
||||
{
|
||||
#if !wxUSE_REGEX
|
||||
WARN("Skipping wxCmpNaturalGeneric() tests: wxRegEx not available");
|
||||
#else
|
||||
// simple string comparison
|
||||
CHECK(wxCmpNaturalGeneric("a", "a") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "z") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("z", "a") > 0);
|
||||
|
||||
// case insensitivity
|
||||
CHECK(wxCmpNaturalGeneric("a", "A") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("A", "a") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("AB", "a") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "AB") < 0);
|
||||
|
||||
// empty strings sort before whitespace and punctiation
|
||||
CHECK(wxCmpNaturalGeneric("", " ") < 0);
|
||||
CHECK(wxCmpNaturalGeneric(" ", "") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("", ",") < 0);
|
||||
CHECK(wxCmpNaturalGeneric(",", "") > 0);
|
||||
|
||||
// empty strings sort before numbers
|
||||
CHECK(wxCmpNaturalGeneric("", "0") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("0", "") > 0);
|
||||
|
||||
// empty strings sort before letters and symbols
|
||||
CHECK(wxCmpNaturalGeneric("", "abc") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("abc", "") > 0);
|
||||
|
||||
// whitespace and punctiation sort before numbers
|
||||
CHECK(wxCmpNaturalGeneric(" ", "1") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", " ") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(",", "1") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", ",") > 0);
|
||||
|
||||
// strings containing numbers sort before letters and symbols
|
||||
CHECK(wxCmpNaturalGeneric("00", "a") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a", "00") > 0);
|
||||
|
||||
// strings containing numbers are compared by their value
|
||||
CHECK(wxCmpNaturalGeneric("01", "1") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "01") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "05") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("05", "1") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("10", "5") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("5", "10") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("1", "9999999999999999999") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("9999999999999999999", "1") > 0);
|
||||
|
||||
// comparing strings composed from whitespace,
|
||||
// punctuation, numbers, letters, and symbols
|
||||
CHECK(wxCmpNaturalGeneric("1st", " 1st") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(" 1st", "1st") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("1st", ",1st") > 0);
|
||||
CHECK(wxCmpNaturalGeneric(",1st", "1st") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("1st", "01st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("01st", "1st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("10th", "5th") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("5th", "10th") < 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("a1st", "a01st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a01st", "a1st") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a10th", "a5th") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th", "a10th") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a 10th", "a5th") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th", "a 10th") > 0);
|
||||
|
||||
CHECK(wxCmpNaturalGeneric("a1st1", "a01st01") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a01st01", "a1st1") == 0);
|
||||
CHECK(wxCmpNaturalGeneric("a10th10", "a5th5") > 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th5", "a10th10") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a 10th 10", "a5th 5") < 0);
|
||||
CHECK(wxCmpNaturalGeneric("a5th 5", "a 10th 10") > 0);
|
||||
#endif // #if !wxUSE_REGEX
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user