This commit adds a new command line option (effective-url=true) which
causes nbdkit-curl-plugin the first time it fetches a URL to update
its internal 'url' variable with the CURLINFO_EFFECTIVE_URL.  That
means, the URL after all redirects have been done.  Further
connections will be done using this post-redirect URL, ensuring that
those connections are stable.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2013000
---
 plugins/curl/nbdkit-curl-plugin.pod | 35 ++++++++++++++++++++++++
 plugins/curl/curl.c                 | 42 ++++++++++++++++++++++++++---
 2 files changed, 73 insertions(+), 4 deletions(-)

diff --git a/plugins/curl/nbdkit-curl-plugin.pod 
b/plugins/curl/nbdkit-curl-plugin.pod
index c7acf6225..673f1d327 100644
--- a/plugins/curl/nbdkit-curl-plugin.pod
+++ b/plugins/curl/nbdkit-curl-plugin.pod
@@ -126,6 +126,40 @@ Run C<SCRIPT> (a command or shell script fragment) to 
generate the
 HTTP/HTTPS cookies.  C<cookie-script> cannot be used with C<cookie>.
 See L</HEADER AND COOKIE SCRIPTS> below.
 
+=item B<effective-url=true>
+
+(nbdkit E<ge> 1.30)
+
+Replace the URL supplied on the command line with the effective URL
+(from L<CURLINFO_EFFECTIVE_URL(3)>, the final URL fetched after server
+redirects).  This can be used with mirror services that redirect to a
+geographical region — for example file download sites — to ensure the
+URL will always be the same.
+
+Note use of this feature in long-lived nbdkit instances can cause
+subtle problems:
+
+=over 4
+
+=item *
+
+The effective URL persists across connections for the lifetime of the
+nbdkit instance.  If nbdkit is used for a long time then it is
+possible for the redirected URL to become stale.
+
+=item *
+
+It will defeat some mirror load-balancing techniques.
+
+=item *
+
+If the mirror service sometimes redirects to a broken URL and it
+happens that the URL you fetch first is broken then nbdkit will no
+longer recover on subsequent connections (instead you will need to
+restart nbdkit).
+
+=back
+
 =item B<followlocation=false>
 
 (nbdkit E<ge> 1.26)
@@ -481,6 +515,7 @@ C<nbdkit-curl-plugin> first appeared in nbdkit 1.2.
 
 L<curl(1)>,
 L<libcurl(3)>,
+L<CURLINFO_EFFECTIVE_URL(3)>,
 L<CURLOPT_CAINFO(3)>,
 L<CURLOPT_CAPATH(3)>,
 L<CURLOPT_COOKIE(3)>,
diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c
index a1b0afba7..8c016509f 100644
--- a/plugins/curl/curl.c
+++ b/plugins/curl/curl.c
@@ -44,6 +44,8 @@
 #include <errno.h>
 #include <assert.h>
 
+#include <pthread.h>
+
 #include <curl/curl.h>
 
 #include <nbdkit-plugin.h>
@@ -73,6 +75,7 @@ const char *cookiefile = NULL;
 const char *cookiejar = NULL;
 const char *cookie_script = NULL;
 unsigned cookie_script_renew = 0;
+bool effectiveurl = false;
 bool followlocation = true;
 struct curl_slist *headers = NULL;
 const char *header_script = NULL;
@@ -93,6 +96,12 @@ const char *unix_socket_path = NULL;
 const char *user = NULL;
 const char *user_agent = NULL;
 
+/* For handling the effective-url flag, we save the first effective
+ * URL we visit in this variable.
+ */
+char *url_effective = NULL;
+pthread_mutex_t url_effective_lock = PTHREAD_MUTEX_INITIALIZER;
+
 /* Use '-D curl.verbose=1' to set. */
 NBDKIT_DLL_PUBLIC int curl_debug_verbose = 0;
 
@@ -118,6 +127,7 @@ curl_unload (void)
   free (proxy_password);
   scripts_unload ();
   curl_global_cleanup ();
+  free (url_effective);
 }
 
 /* See <curl/curl.h> */
@@ -248,6 +258,13 @@ curl_config (const char *key, const char *value)
       return -1;
   }
 
+  else if (strcmp (key, "effective-url") == 0) {
+    r = nbdkit_parse_bool (value);
+    if (r == -1)
+      return -1;
+    effectiveurl = r;
+  }
+
   else if (strcmp (key, "followlocation") == 0) {
     r = nbdkit_parse_bool (value);
     if (r == -1)
@@ -404,6 +421,7 @@ curl_config_complete (void)
   "cookiejar=<FILENAME>       Read and write cookies to jar.\n" \
   "cookie-script=<SCRIPT>     Script to set HTTP/HTTPS cookies.\n" \
   "cookie-script-renew=<SECS> Time to renew HTTP/HTTPS cookies.\n" \
+  "effective-url=true         Always use redirected URL.\n" \
   "followlocation=false       Do not follow redirects.\n" \
   "header=<HEADER>            Set HTTP/HTTPS header.\n" \
   "header-script=<SCRIPT>     Script to set HTTP/HTTPS headers.\n" \
@@ -486,10 +504,14 @@ curl_open (int readonly)
   }
 
   /* Set the URL. */
-  r = curl_easy_setopt (h->c, CURLOPT_URL, url);
-  if (r != CURLE_OK) {
-    display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url);
-    goto err;
+  {
+    ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock);
+
+    r = curl_easy_setopt (h->c, CURLOPT_URL, url_effective ? : url);
+    if (r != CURLE_OK) {
+      display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url);
+      goto err;
+    }
   }
 
   /* Various options we always set.
@@ -594,6 +616,18 @@ curl_open (int readonly)
     goto err;
   }
 
+  if (effectiveurl) {
+    ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock);
+
+    if (url_effective == NULL) {
+      r = curl_easy_getinfo (h->c, CURLINFO_EFFECTIVE_URL, &url_effective);
+      if (r != CURLE_OK) {
+        display_curl_error (h, r, "could not get effective URL");
+        goto err;
+      }
+    }
+  }
+
 #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T
   r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o);
   if (r != CURLE_OK) {
-- 
2.32.0

_______________________________________________
Libguestfs mailing list
[email protected]
https://listman.redhat.com/mailman/listinfo/libguestfs

Reply via email to