This commit adds a new command line option (effective-url=true) which causes nbdkit-curl-plugin the first time it fetches a URL to update its internal 'url' variable with the CURLINFO_EFFECTIVE_URL. That means, the URL after all redirects have been done. Further connections will be done using this post-redirect URL, ensuring that those connections are stable.
Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=2013000 --- plugins/curl/nbdkit-curl-plugin.pod | 35 ++++++++++++++++++++++++ plugins/curl/curl.c | 42 ++++++++++++++++++++++++++--- 2 files changed, 73 insertions(+), 4 deletions(-) diff --git a/plugins/curl/nbdkit-curl-plugin.pod b/plugins/curl/nbdkit-curl-plugin.pod index c7acf6225..673f1d327 100644 --- a/plugins/curl/nbdkit-curl-plugin.pod +++ b/plugins/curl/nbdkit-curl-plugin.pod @@ -126,6 +126,40 @@ Run C<SCRIPT> (a command or shell script fragment) to generate the HTTP/HTTPS cookies. C<cookie-script> cannot be used with C<cookie>. See L</HEADER AND COOKIE SCRIPTS> below. +=item B<effective-url=true> + +(nbdkit E<ge> 1.30) + +Replace the URL supplied on the command line with the effective URL +(from L<CURLINFO_EFFECTIVE_URL(3)>, the final URL fetched after server +redirects). This can be used with mirror services that redirect to a +geographical region — for example file download sites — to ensure the +URL will always be the same. + +Note use of this feature in long-lived nbdkit instances can cause +subtle problems: + +=over 4 + +=item * + +The effective URL persists across connections for the lifetime of the +nbdkit instance. If nbdkit is used for a long time then it is +possible for the redirected URL to become stale. + +=item * + +It will defeat some mirror load-balancing techniques. + +=item * + +If the mirror service sometimes redirects to a broken URL and it +happens that the URL you fetch first is broken then nbdkit will no +longer recover on subsequent connections (instead you will need to +restart nbdkit). + +=back + =item B<followlocation=false> (nbdkit E<ge> 1.26) @@ -481,6 +515,7 @@ C<nbdkit-curl-plugin> first appeared in nbdkit 1.2. L<curl(1)>, L<libcurl(3)>, +L<CURLINFO_EFFECTIVE_URL(3)>, L<CURLOPT_CAINFO(3)>, L<CURLOPT_CAPATH(3)>, L<CURLOPT_COOKIE(3)>, diff --git a/plugins/curl/curl.c b/plugins/curl/curl.c index a1b0afba7..8c016509f 100644 --- a/plugins/curl/curl.c +++ b/plugins/curl/curl.c @@ -44,6 +44,8 @@ #include <errno.h> #include <assert.h> +#include <pthread.h> + #include <curl/curl.h> #include <nbdkit-plugin.h> @@ -73,6 +75,7 @@ const char *cookiefile = NULL; const char *cookiejar = NULL; const char *cookie_script = NULL; unsigned cookie_script_renew = 0; +bool effectiveurl = false; bool followlocation = true; struct curl_slist *headers = NULL; const char *header_script = NULL; @@ -93,6 +96,12 @@ const char *unix_socket_path = NULL; const char *user = NULL; const char *user_agent = NULL; +/* For handling the effective-url flag, we save the first effective + * URL we visit in this variable. + */ +char *url_effective = NULL; +pthread_mutex_t url_effective_lock = PTHREAD_MUTEX_INITIALIZER; + /* Use '-D curl.verbose=1' to set. */ NBDKIT_DLL_PUBLIC int curl_debug_verbose = 0; @@ -118,6 +127,7 @@ curl_unload (void) free (proxy_password); scripts_unload (); curl_global_cleanup (); + free (url_effective); } /* See <curl/curl.h> */ @@ -248,6 +258,13 @@ curl_config (const char *key, const char *value) return -1; } + else if (strcmp (key, "effective-url") == 0) { + r = nbdkit_parse_bool (value); + if (r == -1) + return -1; + effectiveurl = r; + } + else if (strcmp (key, "followlocation") == 0) { r = nbdkit_parse_bool (value); if (r == -1) @@ -404,6 +421,7 @@ curl_config_complete (void) "cookiejar=<FILENAME> Read and write cookies to jar.\n" \ "cookie-script=<SCRIPT> Script to set HTTP/HTTPS cookies.\n" \ "cookie-script-renew=<SECS> Time to renew HTTP/HTTPS cookies.\n" \ + "effective-url=true Always use redirected URL.\n" \ "followlocation=false Do not follow redirects.\n" \ "header=<HEADER> Set HTTP/HTTPS header.\n" \ "header-script=<SCRIPT> Script to set HTTP/HTTPS headers.\n" \ @@ -486,10 +504,14 @@ curl_open (int readonly) } /* Set the URL. */ - r = curl_easy_setopt (h->c, CURLOPT_URL, url); - if (r != CURLE_OK) { - display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); - goto err; + { + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock); + + r = curl_easy_setopt (h->c, CURLOPT_URL, url_effective ? : url); + if (r != CURLE_OK) { + display_curl_error (h, r, "curl_easy_setopt: CURLOPT_URL [%s]", url); + goto err; + } } /* Various options we always set. @@ -594,6 +616,18 @@ curl_open (int readonly) goto err; } + if (effectiveurl) { + ACQUIRE_LOCK_FOR_CURRENT_SCOPE (&url_effective_lock); + + if (url_effective == NULL) { + r = curl_easy_getinfo (h->c, CURLINFO_EFFECTIVE_URL, &url_effective); + if (r != CURLE_OK) { + display_curl_error (h, r, "could not get effective URL"); + goto err; + } + } + } + #ifdef HAVE_CURLINFO_CONTENT_LENGTH_DOWNLOAD_T r = curl_easy_getinfo (h->c, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, &o); if (r != CURLE_OK) { -- 2.32.0 _______________________________________________ Libguestfs mailing list [email protected] https://listman.redhat.com/mailman/listinfo/libguestfs
