Add functions for sorting strings in natural sort order

Use StrCmpLogicalW() under MSW and generic implementation under the
other platforms.

See https://github.com/wxWidgets/wxWidgets/pull/780
This commit is contained in:
Hugo Elias 2018-04-09 22:35:00 +01:00 committed by Vadim Zeitlin
parent a2e4e6ebcf
commit 371c4b1366
4 changed files with 350 additions and 6 deletions

View File

@ -42,12 +42,30 @@ wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2)
return cmp ? cmp : s1.Cmp(s2);
}
inline int wxCMPFUNC_CONV
wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2)
{
return wxDictionaryStringSortAscending(s2, s1);
}
WXDLLIMPEXP_BASE
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2);
WXDLLIMPEXP_BASE
int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2);
inline int wxCMPFUNC_CONV wxNaturalStringSortAscending(const wxString& s1, const wxString& s2)
{
return wxCmpNatural(s1, s2);
}
inline int wxCMPFUNC_CONV wxNaturalStringSortDescending(const wxString& s1, const wxString& s2)
{
return wxCmpNatural(s2, s1);
}
#if wxUSE_STD_CONTAINERS
typedef int (wxCMPFUNC_CONV *CMPFUNCwxString)(wxString*, wxString*);

View File

@ -363,7 +363,8 @@ public:
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortDescending(), wxDictionaryStringSortAscending()
@see wxStringSortDescending(), wxDictionaryStringSortAscending(),
wxNaturalStringSortAscending()
@since 3.1.0
*/
@ -375,7 +376,8 @@ int wxStringSortAscending(const wxString& s1, const wxString& s2);
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortAscending(), wxDictionaryStringSortAscending()
@see wxStringSortAscending(), wxDictionaryStringSortDescending(),
wxNaturalStringSortDescending()
@since 3.1.0
*/
@ -392,8 +394,10 @@ int wxStringSortDescending(const wxString& s1, const wxString& s2);
This function can be used with wxSortedArrayString::Sort() or passed as an
argument to wxSortedArrayString constructor.
@see wxStringSortAscending(), wxDictionaryStringSortDescending()
@see wxDictionaryStringSortDescending(),
wxStringSortAscending(),
wxNaturalStringSortAscending()
@since 3.1.0
*/
int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
@ -403,11 +407,98 @@ int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
See wxDictionaryStringSortAscending() for the dictionary sort description.
@see wxStringSortDescending()
@see wxDictionaryStringSortAscending(),
wxStringSortDescending(),
wxNaturalStringSortDescending()
@since 3.1.0
*/
int wxDictionaryStringSortAscending(const wxString& s1, const wxString& s2);
int wxDictionaryStringSortDescending(const wxString& s1, const wxString& s2);
/**
Comparison function used for Natural Sort.
Functions in the same way as wxDictionaryStringSortAscending(), with
the exception that numbers within the string are recognised, and
compared numerically, rather than alphabetically. When used for
sorting, the result is that e.g. file names containing numbers are
sorted in a natural way.
This function will use an OS native function if one is available,
to ensure that the sort order is the same as the OS uses.
Comparison is case insensitive.
e.g. Sorting using wxDictionaryStringSortAscending() results in:
- file1.txt
- file10.txt
- file100.txt
- file2.txt
- file20.txt
- file3.txt
e.g. Sorting using wxNaturalStringSortAscending() results in:
- file1.txt
- file2.txt
- file3.txt
- file11.txt
- file20.txt
- file100.txt
@see wxNaturalStringSortDescending(),
wxStringSortAscending(),
wxDictionaryStringSortAscending()
@since 3.1.2
*/
int wxNaturalStringSortAscending(const wxString& s1, const wxString& s2);
/**
Comparison function comparing strings in reverse natural order.
See wxNaturalStringSortAscending() for the natural sort description.
@see wxNaturalStringSortAscending(),
wxStringSortDescending(),
wxDictionaryStringSortDescending()
@since 3.1.2
*/
int wxNaturalStringSortDescending(const wxString& s1, const wxString& s2);
/**
This is wxWidgets' own implementation of the natural sort comparison
function. This will be used whenever an OS native function is not available.
Since OS native implementations might differ from each other, the user might
wish to use this function which behaves in the same way across all platforms.
@since 3.1.2
*/
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2);
/**
Comparison function, identical to wxNaturalStringSortAscending().
In fact, wxNaturalStringSortAscending() and wxNaturalStringSortDescending()
are both implemented using this function.
When an OS native natural sort function is available, that will be used,
otherwise wxCmpNatural() will be used.
Be aware that OS native implementations might differ from each other, and
might change behaviour from release to release.
@see wxNaturalStringSortAscending(),
wxNaturalStringSortDescending()
@since 3.1.2
*/
int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2);
// ============================================================================
// Global functions/macros

View File

@ -26,6 +26,12 @@
#include <algorithm>
#include <functional>
#include "wx/afterstd.h"
#include "wx/regex.h"
#if defined( __WINDOWS__ )
#include <shlwapi.h>
#endif
// ============================================================================
// ArrayString
@ -721,3 +727,186 @@ wxArrayString wxSplit(const wxString& str, const wxChar sep, const wxChar escape
return ret;
}
namespace // enum, class and functions needed by wxCmpNatural().
{
enum wxStringFragmentType
{
wxFRAGMENT_TYPE_EMPTY = 0,
wxFRAGMENT_TYPE_ALPHA = 1,
wxFRAGMENT_TYPE_DIGIT = 2
};
// ----------------------------------------------------------------------------
// wxStringFragment
// ----------------------------------------------------------------------------
//
// Lightweight object returned by GetNaturalFragment().
// Represents either a number, or a string which contains no numerical digits.
class wxStringFragment
{
public:
wxStringFragment()
: type(wxFRAGMENT_TYPE_EMPTY)
{}
wxString text;
long value;
wxStringFragmentType type;
};
wxStringFragment GetFragment(wxString& text)
{
static const wxRegEx naturalNumeric(wxS("[0-9]+"));
static const wxRegEx naturalAlpha(wxS("[^0-9]+"));
size_t digitStart = 0;
size_t digitLength = 0;
size_t alphaStart = 0;
size_t alphaLength = 0;
wxStringFragment fragment;
if ( text.empty() )
return fragment;
if ( naturalNumeric.Matches(text) )
{
naturalNumeric.GetMatch(&digitStart, &digitLength, 0);
}
if ( naturalAlpha.Matches(text) )
{
naturalAlpha.GetMatch(&alphaStart, &alphaLength, 0);
}
if ( alphaStart == 0 )
{
fragment.text = text.Mid(0, alphaLength);
fragment.value = 0;
fragment.type = wxFRAGMENT_TYPE_ALPHA;
text.erase(0, alphaLength);
}
if ( digitStart == 0 )
{
fragment.text = text.Mid(0, digitLength);
fragment.text.ToLong(&fragment.value);
fragment.type = wxFRAGMENT_TYPE_DIGIT;
text.erase(0, digitLength);
}
return fragment;
}
int CompareFragmentNatural(const wxStringFragment& lhs, const wxStringFragment& rhs)
{
if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) &&
(rhs.type == wxFRAGMENT_TYPE_ALPHA) )
{
return lhs.text.CmpNoCase(rhs.text);
}
if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) &&
(rhs.type == wxFRAGMENT_TYPE_DIGIT) )
{
if ( lhs.value == rhs.value )
{
return 0;
}
if ( lhs.value < rhs.value )
{
return -1;
}
if ( lhs.value > rhs.value )
{
return 1;
}
}
if ( (lhs.type == wxFRAGMENT_TYPE_DIGIT) &&
(rhs.type == wxFRAGMENT_TYPE_ALPHA) )
{
return -1;
}
if ( (lhs.type == wxFRAGMENT_TYPE_ALPHA) &&
(rhs.type == wxFRAGMENT_TYPE_DIGIT) )
{
return 1;
}
if ( lhs.type == wxFRAGMENT_TYPE_EMPTY )
{
return -1;
}
if ( rhs.type == wxFRAGMENT_TYPE_EMPTY )
{
return 1;
}
return 0;
}
} // unnamed namespace
// ----------------------------------------------------------------------------
// wxCmpNaturalNative
// ----------------------------------------------------------------------------
//
int wxCMPFUNC_CONV wxCmpNatural(const wxString& s1, const wxString& s2)
{
wxString lhs(s1);
wxString rhs(s2);
int comparison = 0;
while ( (comparison == 0) && (!lhs.empty() || !rhs.empty()) )
{
wxStringFragment fragmentL = GetFragment(lhs);
wxStringFragment fragmentR = GetFragment(rhs);
comparison = CompareFragmentNatural(fragmentL, fragmentR);
}
return comparison;
}
// ----------------------------------------------------------------------------
// Declaration of StrCmpLogicalW()
// ----------------------------------------------------------------------------
//
// In some distributions of MinGW32, this function is exported in the library,
// but not declared in shlwapi.h. Therefore we declare it here.
#if defined( __MINGW32_TOOLCHAIN__ )
extern "C" __declspec(dllimport) int WINAPI StrCmpLogicalW(LPCWSTR psz1, LPCWSTR psz2);
#endif
// ----------------------------------------------------------------------------
// wxCmpNaturalNative
// ----------------------------------------------------------------------------
//
// If a native version of Natural sort is available, then use that, otherwise
// use the wxWidgets version, wxCmpNatural().
int wxCMPFUNC_CONV wxCmpNaturalNative(const wxString& s1, const wxString& s2)
{
#if defined( __WINDOWS__ )
return StrCmpLogicalW( s1.wc_str(), s2.wc_str() );
#else
return wxCmpNatural( s1, s2 );
#endif
}

View File

@ -780,3 +780,49 @@ void ArraysTestCase::IndexFromEnd()
CPPUNIT_ASSERT_EQUAL( 1, a.Index(1, /*bFromEnd=*/true) );
CPPUNIT_ASSERT_EQUAL( 2, a.Index(42, /*bFromEnd=*/true) );
}
TEST_CASE("wxNaturalStringSortAscending()", "[array][sort][string]")
{
wxString s01("3String");
wxString s02("21String");
wxString s03("100string");
wxString s04("100String");
wxString s05("10String");
wxString s06("Str3ing");
wxString s07("Str20ing");
wxString s08("Str200ing");
wxString s09("String8");
wxString s10("String90");
wxString s11("7String3");
wxString s12("07String20");
wxString s13("007String100");
CHECK(wxCmpNatural(s01, s02) < 0);
CHECK(wxCmpNatural(s02, s03) < 0);
CHECK(wxCmpNatural(s03, s04) == 0); // Check that case is ignored
CHECK(wxCmpNatural(s05, s06) < 0);
CHECK(wxCmpNatural(s06, s07) < 0);
CHECK(wxCmpNatural(s07, s08) < 0);
CHECK(wxCmpNatural(s08, s09) < 0);
CHECK(wxCmpNatural(s09, s10) < 0);
CHECK(wxCmpNatural(s11, s12) < 0);
CHECK(wxCmpNatural(s12, s13) < 0);
CHECK(wxCmpNatural(s01, s01) == 0); // Check that equality works in all cases
CHECK(wxCmpNatural(s02, s02) == 0);
CHECK(wxCmpNatural(s03, s03) == 0);
CHECK(wxCmpNatural(s04, s04) == 0);
CHECK(wxCmpNatural(s05, s05) == 0);
CHECK(wxCmpNatural(s06, s06) == 0);
CHECK(wxCmpNatural(s07, s07) == 0);
CHECK(wxCmpNatural(s08, s08) == 0);
CHECK(wxCmpNatural(s09, s09) == 0);
CHECK(wxCmpNatural(s10, s10) == 0);
CHECK(wxCmpNatural(s11, s11) == 0);
CHECK(wxCmpNatural(s12, s12) == 0);
CHECK(wxCmpNatural(s13, s13) == 0);
}