For quite some time Djamel Belazzougui has been suggesting speed
improvements to glibc's qsort function.  I'm not sure yet that this is
a good idea, but it does seem to me that there is use for a function
that can sort a vector of pointers to data (as opposed to qsort, which
sorts an array of data).  Such a function can lead to faster execution
in some important cases, as I have verified using GNU 'ls' as a guinea
pig.  So I'm adding this module to gnulib, and plan to submit a patch
to coreutils 'ls' shortly.

2007-01-28  Paul Eggert  <[EMAIL PROTECTED]>

        * MODULES.html.sh: New module mpsort.
        * lib/mpsort.c, lib/mpsort.h, m4/mpsort.m4, modules/mpsort: New files.

        * lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
        a circularity problem with HP-UX ia64 reported by Bob Proulx in
        <http://lists.gnu.org/archive/html/bug-gnulib/2007-01/msg00394.html>.
        All uses changed.
        (_Restrict_arr_): Renamed from __restrict_arr, for similar reasons.
        All uses changed.
        * lib/regcomp.c, lib/regexec.c: Change all uses from __restrict
        to _Restrict_.
        * lib/regexec.c (regexec): Declare pmatch with _Restrict_arr_, so that
        the parameter matches the prototype.

Index: MODULES.html.sh
===================================================================
RCS file: /cvsroot/gnulib/gnulib/MODULES.html.sh,v
retrieving revision 1.177
diff -u -p -r1.177 MODULES.html.sh
--- MODULES.html.sh     27 Jan 2007 01:05:04 -0000      1.177
+++ MODULES.html.sh     29 Jan 2007 07:23:18 -0000
@@ -1540,6 +1540,16 @@ func_all_modules ()
   func_module pagealign_alloc
   func_end_table

+  element="Sorting functions <stdlib.h>"
+  element=`printf "%s" "$element" | sed -e "$sed_lt" -e "$sed_gt"`
+  func_section_wrap ansic_enh_stdlib_sorting
+  func_wrap H3
+  func_echo "$element"
+
+  func_begin_table
+  func_module mpsort
+  func_end_table
+
   element="Date and time <time.h>"
   element=`printf "%s" "$element" | sed -e "$sed_lt" -e "$sed_gt"`
   func_section_wrap ansic_enh_time_datetime
Index: lib/mpsort.c
===================================================================
RCS file: lib/mpsort.c
diff -N lib/mpsort.c
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/mpsort.c        29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,157 @@
+/* Sort a vector of pointers to data.
+
+   Copyright (C) 2007 Free Software Foundation, Inc.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 2, or (at your option)
+   any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License along
+   with this program; if not, write to the Free Software Foundation,
+   Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
+
+/* Written by Paul Eggert.  */
+
+#include <config.h>
+
+#include "mpsort.h"
+
+#include <string.h>
+
+/* The type of qsort-style comparison functions.  */
+
+typedef int (*comparison_function) (void const *, void const *);
+
+static void mpsort_with_tmp (void const **restrict, size_t,
+                            void const **restrict, comparison_function);
+
+/* Sort a vector BASE containing N pointers, placing the sorted array
+   into TMP.  Compare pointers with CMP.  N must be at least 2.  */
+
+static void
+mpsort_into_tmp (void const **restrict base, size_t n,
+                void const **restrict tmp,
+                comparison_function cmp)
+{
+  size_t n1 = n / 2;
+  size_t n2 = n - n1;
+  size_t a = 0;
+  size_t alim = n1;
+  size_t b = n1;
+  size_t blim = n;
+  void const *ba;
+  void const *bb;
+
+  mpsort_with_tmp (base + n1, n2, tmp, cmp);
+  mpsort_with_tmp (base, n1, tmp, cmp);
+
+  ba = base[a];
+  bb = base[b];
+
+  for (;;)
+    if (cmp (ba, bb) <= 0)
+      {
+       *tmp++ = ba;
+       a++;
+       if (a == alim)
+         {
+           a = b;
+           alim = blim;
+           break;
+         }
+       ba = base[a];
+      }
+    else
+      {
+       *tmp++ = bb;
+       b++;
+       if (b == blim)
+         break;
+       bb = base[b];
+      }
+
+  memcpy (tmp, base + a, (alim - a) * sizeof *base);
+}
+
+/* Sort a vector BASE containing N pointers, in place.  Use TMP
+   (containing N / 2 pointers) for temporary storage.  Compare
+   pointers with CMP.  */
+
+static void
+mpsort_with_tmp (void const **restrict base, size_t n,
+                void const **restrict tmp,
+                comparison_function cmp)
+{
+  if (n <= 2)
+    {
+      if (n == 2)
+       {
+         void const *p0 = base[0];
+         void const *p1 = base[1];
+         if (! (cmp (p0, p1) <= 0))
+           {
+             base[0] = p1;
+             base[1] = p0;
+           }
+       }
+    }
+  else
+    {
+      size_t n1 = n / 2;
+      size_t n2 = n - n1;
+      size_t i;
+      size_t t = 0;
+      size_t tlim = n1;
+      size_t b = n1;
+      size_t blim = n;
+      void const *bb;
+      void const *tt;
+
+      mpsort_with_tmp (base + n1, n2, tmp, cmp);
+
+      if (n1 < 2)
+       tmp[0] = base[0];
+      else
+       mpsort_into_tmp (base, n1, tmp, cmp);
+
+      tt = tmp[t];
+      bb = base[b];
+
+      for (i = 0; ; )
+       if (cmp (tt, bb) <= 0)
+         {
+           base[i++] = tt;
+           t++;
+           if (t == tlim)
+             break;
+           tt = tmp[t];
+         }
+       else
+         {
+           base[i++] = bb;
+           b++;
+           if (b == blim)
+             {
+               memcpy (base + i, tmp + t, (tlim - t) * sizeof *base);
+               break;
+             }
+           bb = base[b];
+         }
+    }
+}
+
+/* Sort a vector BASE containing N pointers, in place.  BASE must
+   contain enough storage to hold N + N / 2 vectors; the trailing
+   vectors are used for temporaries.  Compare pointers with CMP.  */
+
+void
+mpsort (void const **base, size_t n, comparison_function cmp)
+{
+  return mpsort_with_tmp (base, n, base + n, cmp);
+}
Index: lib/mpsort.h
===================================================================
RCS file: lib/mpsort.h
diff -N lib/mpsort.h
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ lib/mpsort.h        29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,2 @@
+#include <stddef.h>
+void mpsort (void const **, size_t, int (*) (void const *, void const *));
Index: m4/mpsort.m4
===================================================================
RCS file: m4/mpsort.m4
diff -N m4/mpsort.m4
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ m4/mpsort.m4        29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,13 @@
+# Sort a vector of pointers to data.
+
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_MPSORT],
+[
+  AC_REQUIRE([AC_C_RESTRICT])
+  AC_LIBOBJ([mpsort])
+])
Index: modules/mpsort
===================================================================
RCS file: modules/mpsort
diff -N modules/mpsort
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ modules/mpsort      29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,23 @@
+Description:
+Sort a vector of pointers to data.
+
+Files:
+lib/mpsort.h
+lib/mpsort.c
+m4/mpsort.m4
+
+Depends-on:
+
+configure.ac:
+gl_MPSORT
+
+Makefile.am:
+
+Include:
+"mpsort.h"
+
+License:
+GPL
+
+Maintainer:
+Paul Eggert


Reply via email to