The branch main has been updated by kib:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=05c9a0158f6837bb3a3781e4ed75f66115f6415a

commit 05c9a0158f6837bb3a3781e4ed75f66115f6415a
Author:     Aymeric Wibo <obi...@gmail.com>
AuthorDate: 2022-08-24 23:20:13 +0000
Commit:     Konstantin Belousov <k...@freebsd.org>
CommitDate: 2022-08-25 00:29:03 +0000

    libc: Add strverscmp(3) and versionsort(3)
    
    Add a strverscmp(3) function to libc, a GNU extension I implemented by
    reading its glibc manual page. It orders strings following a much more
    natural ordering (e.g. "ent1 < ent2 < ent10" as opposed to
    "ent1 < ent10 < ent2" with strcmp(3)'s lexicographic ordering).
    
    Also add versionsort(3) for use as scandir(3)'s compar argument.
    
    Update manual page for scandir(3) and add one for strverscmp(3).
    
    Reviewed by:    pstef, gbe, kib
    MFC after:      1 week
    Differential Revision: https://reviews.freebsd.org/D35807
---
 include/dirent.h                        |  1 +
 include/string.h                        |  1 +
 lib/libc/gen/Makefile.inc               |  3 +-
 lib/libc/gen/Symbol.map                 |  1 +
 lib/libc/gen/scandir.3                  | 22 +++++++-
 lib/libc/gen/scandir.c                  |  7 +++
 lib/libc/string/Makefile.inc            |  4 +-
 lib/libc/string/Symbol.map              |  1 +
 lib/libc/string/strverscmp.3            | 56 ++++++++++++++++++++
 lib/libc/string/strverscmp.c            | 91 ++++++++++++++++++++++++++++++++
 lib/libc/tests/string/Makefile          |  5 +-
 lib/libc/tests/string/strverscmp_test.c | 93 +++++++++++++++++++++++++++++++++
 12 files changed, 278 insertions(+), 7 deletions(-)

diff --git a/include/dirent.h b/include/dirent.h
index 047206258471..751a016838c0 100644
--- a/include/dirent.h
+++ b/include/dirent.h
@@ -108,6 +108,7 @@ int  alphasort(const struct dirent **, const struct dirent 
**);
 int     dirfd(DIR *);
 #endif
 #if __BSD_VISIBLE
+int     versionsort(const struct dirent **, const struct dirent **);
 DIR    *__opendir2(const char *, int);
 int     fdclosedir(DIR *);
 ssize_t         getdents(int, char *, size_t);
diff --git a/include/string.h b/include/string.h
index 15d4dc7e9701..dc5756830208 100644
--- a/include/string.h
+++ b/include/string.h
@@ -81,6 +81,7 @@ char  *strcat(char * __restrict, const char * __restrict);
 char   *strchr(const char *, int) __pure;
 #if __BSD_VISIBLE
 char   *strchrnul(const char*, int) __pure;
+int     strverscmp(const char *, const char *) __pure;
 #endif
 int     strcmp(const char *, const char *) __pure;
 int     strcoll(const char *, const char *);
diff --git a/lib/libc/gen/Makefile.inc b/lib/libc/gen/Makefile.inc
index 6d0e542a1f7b..45977b6b9005 100644
--- a/lib/libc/gen/Makefile.inc
+++ b/lib/libc/gen/Makefile.inc
@@ -495,7 +495,8 @@ MLINKS+=rand48.3 _rand48.3 \
 MLINKS+=recv.2 recvmmsg.2
 MLINKS+=scandir.3 alphasort.3 \
        scandir.3 scandirat.3 \
-       scandir.3 scandir_b.3
+       scandir.3 scandir_b.3 \
+       scandir.3 versionsort.3
 MLINKS+=sem_open.3 sem_close.3 \
        sem_open.3 sem_unlink.3
 MLINKS+=sem_wait.3 sem_trywait.3
diff --git a/lib/libc/gen/Symbol.map b/lib/libc/gen/Symbol.map
index 74676ffe2270..0a20a41d0e20 100644
--- a/lib/libc/gen/Symbol.map
+++ b/lib/libc/gen/Symbol.map
@@ -443,6 +443,7 @@ FBSD_1.7 {
         sched_getaffinity;
         sched_setaffinity;
         sched_getcpu;
+        versionsort;
         __cpuset_alloc;
         __cpuset_free;
 };
diff --git a/lib/libc/gen/scandir.3 b/lib/libc/gen/scandir.3
index b533b33ce7a7..07d8074ae592 100644
--- a/lib/libc/gen/scandir.3
+++ b/lib/libc/gen/scandir.3
@@ -35,7 +35,8 @@
 .Nm scandir ,
 .Nm scandirat ,
 .Nm scandir_b ,
-.Nm alphasort
+.Nm alphasort ,
+.Nm versionsort
 .Nd scan a directory
 .Sh LIBRARY
 .Lb libc
@@ -65,6 +66,8 @@
 .Fc
 .Ft int
 .Fn alphasort "const struct dirent **d1" "const struct dirent **d2"
+.Ft int
+.Fn versionsort "const struct dirent **d1" "const struct dirent **d2"
 .Sh DESCRIPTION
 The
 .Fn scandir
@@ -106,6 +109,13 @@ is a routine which can be used for the
 argument to sort the array alphabetically using
 .Xr strcoll 3 .
 .Pp
+The
+.Fn versionsort
+function is a routine which can be used for the
+.Fa compar
+argument to sort the array naturally using
+.Xr strverscmp 3 .
+.Pp
 The memory allocated for the array can be deallocated with
 .Xr free 3 ,
 by freeing each pointer in the array and then the array itself.
@@ -161,7 +171,12 @@ cannot allocate enough memory to hold all the data 
structures.
 .Xr malloc 3 ,
 .Xr qsort 3 ,
 .Xr strcoll 3 ,
+.Xr strverscmp 3 ,
 .Xr dir 5
+.Sh STANDARDS
+The
+.Fn versionsort
+function is a GNU extension and conforms to no standard.
 .Sh HISTORY
 The
 .Fn scandir
@@ -171,5 +186,8 @@ functions appeared in
 .Bx 4.2 .
 The
 .Fn scandirat
-function was added in
+and
+.Fn
+versionsort
+functions were added in
 .Fx 14.0 .
diff --git a/lib/libc/gen/scandir.c b/lib/libc/gen/scandir.c
index 3a891b0ad3f2..8a260adcd2f3 100644
--- a/lib/libc/gen/scandir.c
+++ b/lib/libc/gen/scandir.c
@@ -191,6 +191,13 @@ alphasort(const struct dirent **d1, const struct dirent 
**d2)
        return (strcoll((*d1)->d_name, (*d2)->d_name));
 }
 
+int
+versionsort(const struct dirent **d1, const struct dirent **d2)
+{
+
+       return (strverscmp((*d1)->d_name, (*d2)->d_name));
+}
+
 static int
 alphasort_thunk(void *thunk, const void *p1, const void *p2)
 {
diff --git a/lib/libc/string/Makefile.inc b/lib/libc/string/Makefile.inc
index 1df3d40e329f..afc113eeb867 100644
--- a/lib/libc/string/Makefile.inc
+++ b/lib/libc/string/Makefile.inc
@@ -16,7 +16,7 @@ MISRCS+=bcmp.c bcopy.c bzero.c explicit_bzero.c \
        strcspn.c strdup.c strerror.c strlcat.c strlcpy.c strlen.c strmode.c \
        strncat.c strncmp.c strncpy.c strndup.c strnlen.c strnstr.c \
        strpbrk.c strrchr.c strsep.c strsignal.c strspn.c strstr.c strtok.c \
-       strxfrm.c swab.c \
+       strverscmp.c strxfrm.c swab.c \
        timingsafe_bcmp.c \
        timingsafe_memcmp.c \
        wcpcpy.c wcpncpy.c wcscasecmp.c wcscat.c \
@@ -46,7 +46,7 @@ MAN+= bcmp.3 bcopy.3 bstring.3 bzero.3 ffs.3 index.3 
memccpy.3 memchr.3 \
        memcmp.3 memcpy.3 memmem.3 memmove.3 memset.3 strcasecmp.3 strcat.3 \
        strchr.3 strcmp.3 strcoll.3 strcpy.3 strdup.3 strerror.3 \
        string.3 strlcpy.3 strlen.3 strmode.3 strpbrk.3 strsep.3 \
-       strspn.3 strstr.3 strtok.3 strxfrm.3 swab.3 \
+       strspn.3 strstr.3 strtok.3 strverscmp.3 strxfrm.3 swab.3 \
        timingsafe_bcmp.3 \
        wcscoll.3 wcstok.3 \
        wcswidth.3 wcsxfrm.3 wmemchr.3
diff --git a/lib/libc/string/Symbol.map b/lib/libc/string/Symbol.map
index ec45d7fd7ddb..4fcd194bafd8 100644
--- a/lib/libc/string/Symbol.map
+++ b/lib/libc/string/Symbol.map
@@ -116,6 +116,7 @@ FBSD_1.6 {
 
 FBSD_1.7 {
        mempcpy;
+       strverscmp;
        wmempcpy;
 };
 
diff --git a/lib/libc/string/strverscmp.3 b/lib/libc/string/strverscmp.3
new file mode 100644
index 000000000000..e4413fb96e36
--- /dev/null
+++ b/lib/libc/string/strverscmp.3
@@ -0,0 +1,56 @@
+.\" SPDX-License-Identifier: BSD-2-Clause
+.\" Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+.Dd July 11, 2022
+.Dt STRVERSCMP 3
+.Os
+.Sh NAME
+.Nm strverscmp
+.Nd compare strings according to natural order
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In string.h
+.Ft int
+.Fn strverscmp "const char *s1" "const char *s2"
+.Sh DESCRIPTION
+The
+.Fn strverscmp
+function
+compares the null-terminated strings
+.Fa s1
+and
+.Fa s2
+according to their natural order
+and returns an integer greater than, equal to, or less than 0,
+depending on whether
+.Fa s1
+is greater than, equal to, or less than
+.Fa s2 .
+.Pp
+More specifically, this natural order is found by iterating over both
+strings until a difference is found.
+If the difference is between non-decimal characters,
+.Fn strverscmp
+acts like
+.Xr strcmp 3
+(thus, the ordering would be "a", "b", "train").
+If a decimal digit is found, the whole number is read and compared
+(thus, the ordering would be "9", "10", "420" which is different to 
lexicographic order,
+what
+.Xr strcmp 3
+would have done).
+Numbers with leading zeroes are interpreted as fractional parts (even without 
a decimal point),
+and numbers with more leading zeroes are placed before numbers with fewer 
leading zeroes
+(thus, the ordering would be "000", "00", "01", "010", "09", "0", "1", "9", 
"10").
+.Sh SEE ALSO
+.Xr strcmp 3 ,
+.Xr versionsort 3
+.Sh STANDARDS
+The
+.Fn strverscmp
+function is a GNU extension and conforms to no standard.
+.Sh HISTORY
+The
+.Fn strverscmp
+function was added in
+.Fx 14.0 .
diff --git a/lib/libc/string/strverscmp.c b/lib/libc/string/strverscmp.c
new file mode 100644
index 000000000000..6051adb35499
--- /dev/null
+++ b/lib/libc/string/strverscmp.c
@@ -0,0 +1,91 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+*/
+
+#include <ctype.h>
+#include <stddef.h>
+
+int
+strverscmp(const char *s1, const char *s2)
+{
+       size_t digit_count_1, digit_count_2;
+       size_t zeros_count_1, zeros_count_2;
+       const unsigned char *num_1, *num_2;
+       const unsigned char *u1 = __DECONST(const unsigned char *, s1);
+       const unsigned char *u2 = __DECONST(const unsigned char *, s2);
+
+       /*
+        * If pointers are the same, no need to go through to process of
+        * comparing them.
+        */
+       if (s1 == s2)
+               return (0);
+
+       while (*u1 != '\0' && *u2 != '\0') {
+               /* If either character is not a digit, act like strcmp(3). */
+
+               if (!isdigit(*u1) || !isdigit(*u2)) {
+                       if (*u1 != *u2)
+                               return (*u1 - *u2);
+                       u1++;
+                       u2++;
+                       continue;
+               }
+               if (*u1 == '0' || *u2 == '0') {
+                       /*
+                        * Treat leading zeros as if they were the fractional
+                        * part of a number, i.e. as if they had a decimal point
+                        * in front. First, count the leading zeros (more zeros
+                        * == smaller number).
+                        */
+                       zeros_count_1 = 0;
+                       zeros_count_2 = 0;
+                       for (; *u1 == '0'; u1++)
+                               zeros_count_1++;
+                       for (; *u2 == '0'; u2++)
+                               zeros_count_2++;
+                       if (zeros_count_1 != zeros_count_2)
+                               return (zeros_count_2 - zeros_count_1);
+
+                       /* Handle the case where 0 < 09. */
+                       if (!isdigit(*u1) && isdigit(*u2))
+                               return (1);
+                       if (!isdigit(*u2) && isdigit(*u1))
+                               return (-1);
+               } else {
+                       /*
+                        * No leading zeros; we're simply comparing two numbers.
+                        * It is necessary to first count how many digits there
+                        * are before going back to compare each digit, so that
+                        * e.g. 7 is not considered larger than 60.
+                        */
+                       num_1 = u1;
+                       num_2 = u2;
+
+                       /* Count digits (more digits == larger number). */
+                       for (; isdigit(*u1); u1++)
+                               ;
+                       for (; isdigit(*u2); u2++)
+                               ;
+                       digit_count_1 = u1 - num_1;
+                       digit_count_2 = u2 - num_2;
+                       if (digit_count_1 != digit_count_2)
+                               return (digit_count_1 - digit_count_2);
+
+                       /*
+                        * If there are the same number of digits, go back to
+                        * the start of the number.
+                        */
+                       u1 = num_1;
+                       u2 = num_2;
+               }
+
+               /* Compare each digit until there are none left. */
+               for (; isdigit(*u1) && isdigit(*u2); u1++, u2++) {
+                       if (*u1 != *u2)
+                               return (*u1 - *u2);
+               }
+       }
+       return (*u1 - *u2);
+}
diff --git a/lib/libc/tests/string/Makefile b/lib/libc/tests/string/Makefile
index c6a98572564d..eacf7e15c27c 100644
--- a/lib/libc/tests/string/Makefile
+++ b/lib/libc/tests/string/Makefile
@@ -4,10 +4,11 @@ ATF_TESTS_C+=         memcmp_test
 ATF_TESTS_C+=          memset_s_test
 ATF_TESTS_C+=          stpncpy_test
 ATF_TESTS_C+=          strerror2_test
-ATF_TESTS_C+=          wcscasecmp_test
-ATF_TESTS_C+=          wcsnlen_test
+ATF_TESTS_C+=          strverscmp_test
 ATF_TESTS_C+=          strxfrm_test
+ATF_TESTS_C+=          wcscasecmp_test
 ATF_TESTS_C+=          wcscoll_test
+ATF_TESTS_C+=          wcsnlen_test
 
 # TODO: popcount, stresep
 
diff --git a/lib/libc/tests/string/strverscmp_test.c 
b/lib/libc/tests/string/strverscmp_test.c
new file mode 100644
index 000000000000..fd6a2620cb48
--- /dev/null
+++ b/lib/libc/tests/string/strverscmp_test.c
@@ -0,0 +1,93 @@
+/*-
+* SPDX-License-Identifier: BSD-2-Clause
+* Copyright (c) 2022 Aymeric Wibo <obi...@gmail.com>
+*/
+
+#include <atf-c.h>
+#include <string.h>
+
+static void
+check_all(size_t len, const char *ordered[len])
+{
+       const char *a, *b;
+
+       for (size_t i = 0; i < len; i++) {
+               for (size_t j = 0; j < len; j++) {
+                       a = ordered[i];
+                       b = ordered[j];
+
+                       if (i == j)
+                               ATF_CHECK_MSG(
+                                   strverscmp(a, b) == 0,
+                                   "strverscmp(\"%s\", \"%s\") == 0",
+                                   a, b
+                               );
+                       else if (i < j)
+                               ATF_CHECK_MSG(
+                                   strverscmp(a, b) < 0,
+                                   "strverscmp(\"%s\", \"%s\") < 0",
+                                   a, b
+                               );
+                       else if (i > j)
+                               ATF_CHECK_MSG(
+                                   strverscmp(a, b) > 0,
+                                   "strverscmp(\"%s\", \"%s\") > 0",
+                                   a, b
+                               );
+               }
+       }
+}
+
+#define        CHECK_ALL(...) do {                                     \
+       const char *ordered[] = { __VA_ARGS__ };                \
+       check_all(sizeof(ordered) / sizeof(*ordered), ordered); \
+} while (0)
+
+ATF_TC_WITHOUT_HEAD(strcmp_functionality);
+ATF_TC_BODY(strcmp_functionality, tc)
+{
+       CHECK_ALL("", "a", "b");
+}
+
+/* from Linux man page strverscmp(3) */
+
+ATF_TC_WITHOUT_HEAD(vers_ordering);
+ATF_TC_BODY(vers_ordering, tc)
+{
+       CHECK_ALL("000", "00", "01", "010", "09", "0", "1", "9", "10");
+}
+
+ATF_TC_WITHOUT_HEAD(natural_ordering);
+ATF_TC_BODY(natural_ordering, tc)
+{
+       CHECK_ALL("jan1", "jan2", "jan9", "jan10", "jan11", "jan19", "jan20");
+}
+
+/* https://sourceware.org/bugzilla/show_bug.cgi?id=9913 */
+
+ATF_TC_WITHOUT_HEAD(glibc_bug_9913);
+ATF_TC_BODY(glibc_bug_9913, tc)
+{
+       CHECK_ALL(
+           "B0075022800016.gbp.corp.com",
+           "B007502280067.gbp.corp.com",
+           "B007502357019.GBP.CORP.COM"
+       );
+}
+
+ATF_TC_WITHOUT_HEAD(semver_ordering);
+ATF_TC_BODY(semver_ordering, tc)
+{
+       CHECK_ALL("2.6.20", "2.6.21");
+}
+
+ATF_TP_ADD_TCS(tp)
+{
+       ATF_TP_ADD_TC(tp, strcmp_functionality);
+       ATF_TP_ADD_TC(tp, vers_ordering);
+       ATF_TP_ADD_TC(tp, natural_ordering);
+       ATF_TP_ADD_TC(tp, glibc_bug_9913);
+       ATF_TP_ADD_TC(tp, semver_ordering);
+
+       return (atf_no_error());
+}

Reply via email to