On 2023-10-09 06:48, Pádraig Brady wrote:

An incremental patch attached to use xxhash128 (0.8.2)
shows a good improvement (note avx2 being used on this cpu):

xxhash128 is not a cryptographic hash function, so it doesn't attempt to be random. Of course most people won't care - it's random "enough" - but it would be a functionality change.

blake2 is cryptographic and would be random, but would bloat the 'sort' executable with code that's hardly ever used.

To attack the problem in a more conservative way, I installed the attached patch into coreutils. With it, 'sort -R' continues to use MD5 but on GNUish platforms 'sort' links libcrypto dynamically only if -R is used (Bruno's suggestion). This doesn't significantly affect 'sort -R' performance, and reduces the startup overhead of plain 'sort' to be what it was before we started passing -lcrypto to gcc by default (in coreutils 8.32).

I also toyed with changing MD5 to SHA512, but that hurt performance. For what it's worth, although I tested with an Intel Xeon W-1350, which supports SHA-NI as well as various AVX-512 options, I didn't see where libcrypto (at least on Ubuntu 23.10, which has OpenSSL 3.0.10) takes advantage of these special-purpose instructions.
From 7f57ac2d20c144242953a8dc7d95b02df0244751 Mon Sep 17 00:00:00 2001
From: Paul Eggert <egg...@cs.ucla.edu>
Date: Sun, 25 Feb 2024 17:13:12 -0800
Subject: [PATCH] sort: dynamically link -lcrypto if -R

This saves time in the usual case, which does not need -lcrypto.
* configure.ac (DLOPEN_LIBCRYPTO): New macro.
* src/sort.c [DLOPEN_LIBCRYPTO && HAVE_OPENSSL_MD5]: New macros
MD5_Init, MD5_Update, MD5_Final.  Include "md5.h" after defining
them.  Include <dlfcn.h>, and define new functions link_failure
and symbol_address.
(link_libcrypto): New function.
(random_md5_state_init): Call it before using crypto functions.
---
 configure.ac | 29 +++++++++++++++++++++++++++++
 src/sort.c   | 52 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index c7eca1b8d..043081b90 100644
--- a/configure.ac
+++ b/configure.ac
@@ -351,6 +351,35 @@ if test $utils_cv_localtime_cache = yes; then
   AC_DEFINE([LOCALTIME_CACHE], [1], [FIXME])
 fi
 
+# Should 'sort' link libcrypto dynamically?
+AS_CASE([$LIB_CRYPTO],
+  [-lcrypto],
+    [# Check for dlopen and libcrypto dynamic linking in one program,
+     # as there's little point to checking them separately.
+     AC_CACHE_CHECK([for dlopen and whether libcrypto is linked dynamically],
+       [utils_cv_dlopen_libcrypto],
+       [utils_cv_dlopen_libcrypto=no
+        saved_LIBS=$LIBS
+        LIBS="$LIBS $LIB_CRYPTO"
+        AC_LINK_IFELSE(
+          [AC_LANG_PROGRAM(
+             [[#include <dlfcn.h>
+               #include <openssl/sha.h>
+             ]],
+             [[return !(dlopen ("libcrypto.so", RTLD_LAZY | RTLD_GLOBAL)
+                        && SHA512 (0, 0, 0));]])],
+          [# readelf works with cross-builds; ldd works on more platforms.
+           AS_CASE([`(readelf -d conftest$EXEEXT || ldd conftest$EXEEXT
+                     ) 2>/dev/null`],
+             [*libcrypto*],
+               [utils_cv_dlopen_libcrypto=yes])])
+        LIBS=$saved_LIBS])
+     AS_CASE([$utils_cv_dlopen_libcrypto],
+       [yes],
+         [AC_DEFINE([DLOPEN_LIBCRYPTO], [1],
+                    [Define to 1 if dlopen exists and libcrypto is
+                     linked dynamically.])])])
+
 # macOS >= 10.12
 AC_CHECK_FUNCS([fclonefileat])
 
diff --git a/src/sort.c b/src/sort.c
index dea7be45d..cefe381bf 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -39,7 +39,6 @@
 #include "hash.h"
 #include "heap.h"
 #include "ignore-value.h"
-#include "md5.h"
 #include "mbswidth.h"
 #include "nproc.h"
 #include "physmem.h"
@@ -2085,6 +2084,56 @@ getmonth (char const *month, char **ea)
   return 0;
 }
 
+/* When using the OpenSSL implementation, dynamically link only if -R.
+   This saves startup time in the usual (sans -R) case.  */
+
+#if DLOPEN_LIBCRYPTO && HAVE_OPENSSL_MD5
+/* In the typical case where md5.h does not #undef HAVE_OPENSSL_MD5,
+   trick md5.h into declaring and using pointers to functions not functions.
+   This causes the compiler's -lcrypto option to have no effect,
+   as sort.o no longer uses any crypto symbols statically.  */
+# define MD5_Init (*ptr_MD5_Init)
+# define MD5_Update (*ptr_MD5_Update)
+# define MD5_Final (*ptr_MD5_Final)
+#endif
+
+#include "md5.h"
+
+#if DLOPEN_LIBCRYPTO && HAVE_OPENSSL_MD5
+# include <dlfcn.h>
+
+/* Diagnose a dynamic linking failure.  */
+static void
+link_failure (void)
+{
+  error (SORT_FAILURE, 0, "%s", dlerror ());
+}
+
+/* Return a function pointer in HANDLE for SYMBOL.  */
+static void *
+symbol_address (void *handle, char const *symbol)
+{
+  void *address = dlsym (handle, symbol);
+  if (!address)
+    link_failure ();
+  return address;
+}
+#endif
+
+/* Dynamically link the crypto library, if it needs linking.  */
+static void
+link_libcrypto (void)
+{
+#if DLOPEN_LIBCRYPTO && HAVE_OPENSSL_MD5
+  void *handle = dlopen ("libcrypto.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (!handle)
+    link_failure ();
+  ptr_MD5_Init = symbol_address (handle, "MD5_Init");
+  ptr_MD5_Update = symbol_address (handle, "MD5_Update");
+  ptr_MD5_Final = symbol_address (handle, "MD5_Final");
+#endif
+}
+
 /* A randomly chosen MD5 state, used for random comparison.  */
 static struct md5_ctx random_md5_state;
 
@@ -2100,6 +2149,7 @@ random_md5_state_init (char const *random_source)
   randread (r, buf, sizeof buf);
   if (randread_free (r) != 0)
     sort_die (_("close failed"), random_source);
+  link_libcrypto ();
   md5_init_ctx (&random_md5_state);
   md5_process_bytes (buf, sizeof buf, &random_md5_state);
 }
-- 
2.40.1

Reply via email to