Hi Frank, On Wed, Apr 12, 2023 at 04:31:49PM -0400, Frank Ch. Eigler via Elfutils-devel wrote:
> index f13c28d5c6f7..ac3fd6aa862f 100644 > --- a/debuginfod/ChangeLog > +++ b/debuginfod/ChangeLog > @@ -1,3 +1,26 @@ > +2023-04-12 Ryan Golberg <rgoldb...@redhat.com>, Frank Ch. Eigler > <f...@redhat.com> > + > + PR29472: debuginfod metadata query > + * Makefile.am: Add json-c usage. > + * debuginfod-client.c (debuginfod_find_metadata): New function. > + (handle_data): New fields to hold metadata being received. > + (debuginfod_clean_cache): Clean metadata too. > + (header_callback): Simplify to realloc only. > + (metadata_callback): New function. > + (init_server_urls, init_handle, perform_queries, make_cache_path): > + New refactored functions. > + (debuginfod_query_server_by_buildid): Renamed, refactored. Update > + callers. > + * debuginfod-find.c (main): Handle metadata queries. > + * debuginfod.cxx (DEBUGINFOD_SQLITE_DDL): Add an index or two. > + (metadata_maxtime_s, parse_opt): New parameter for load control. > + (add_client_federation_headers): New refactored function. > + (handle_metadata): New function. > + (handler_cb): Call it for /metadata URL. Trace it. > + (groom): Tweak sqlite_ps object lifetimes. > + * debuginfod.h.in (debuginfod_find_metadata): New decl. > + * libdebuginfod.map: Export it under ELFUTILS_0.190. > + > [...] > +/* > + * This function initializes a CURL handle. It takes optional callbacks for > the write > + * function and the header function, which if defined will use userdata of > type struct handle_data*. > + * Specifically the data[i] within an array of struct handle_data's. > + * Returns 0 on success and -Posix error on faliure. > + */ > +int > +init_handle(debuginfod_client *client, > + size_t (*w_callback)(char *buffer,size_t size,size_t nitems,void > *userdata), > + size_t (*h_callback)(char *buffer,size_t size,size_t nitems,void > *userdata), > + struct handle_data *data, int i, long timeout, > + int vfd) > +{ > + data->handle = curl_easy_init(); > + if (data->handle == NULL) > + { > + return -ENETUNREACH; > + } > + > + if (vfd >= 0) > + dprintf (vfd, "url %d %s\n", i, data->url); > + > + /* Only allow http:// + https:// + file:// so we aren't being > + redirected to some unsupported protocol. */ > +#if CURL_AT_LEAST_VERSION(7, 85, 0) > + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS_STR, > "http,https,file"); > +#else > + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS, > + (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE)); > +#endif > + curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url); > + if (vfd >= 0) > + curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER, > + data->errbuf); > + if(w_callback) { > + curl_easy_setopt_ck(data->handle, > + CURLOPT_WRITEFUNCTION, w_callback); > + curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data); > + } > + if (timeout > 0) > + { > + /* Make sure there is at least some progress, > + try to get at least 100K per timeout seconds. */ > + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME, > + timeout); > + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT, > + 100 * 1024L); > + } > + curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1); > + curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1); > + curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1); > + curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1); > + if(h_callback){ > + curl_easy_setopt_ck(data->handle, > + CURLOPT_HEADERFUNCTION, h_callback); > + curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data); > + } > + #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ > + curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1); > + #else > + /* On old curl; no big deal, canonicalization here is almost the > + same, except perhaps for ? # type decorations at the tail. */ > + #endif > + curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1); > + curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, ""); > + curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers); > + > + return 0; > +} OK, extracted from debuginfod_query_server with parameterized header, writer callback functions. Also explains why curl_easy_setopt_ck can just return now, cleanup is done in the caller on failure. > +/* > + * This function busy-waits on one or more curl queries to complete. This can > + * be controled via only_one, which, if true, will find the first winner and > exit > + * once found. If positive maxtime and maxsize dictate the maximum allowed > wait times > + * and download sizes respectivly. Returns 0 on success and -Posix error on > faliure. > + */ > +int > +perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data > *data, debuginfod_client *c, > + int num_urls, long maxtime, long maxsize, bool only_one, int vfd) > +{ > + int still_running = -1; > + long loops = 0; > + int committed_to = -1; > + bool verbose_reported = false; > + struct timespec start_time, cur_time; > + if (c->winning_headers != NULL) > + { > + free (c->winning_headers); > + c->winning_headers = NULL; > + } > + if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) > + { > + return errno; > + } > + long delta = 0; > + do > + { > + /* Check to see how long querying is taking. */ > + if (maxtime > 0) > + { > + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) > + { > + return errno; > + } > + delta = cur_time.tv_sec - start_time.tv_sec; > + if ( delta > maxtime) > + { > + dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", > maxtime, delta ); > + return -ETIME; > + } > + } > + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ > + curl_multi_wait(curlm, NULL, 0, 1000, NULL); > + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); > + > + if(only_one){ > + /* If the target file has been found, abort the other queries. */ > + if (target_handle && *target_handle != NULL) > + { > + for (int i = 0; i < num_urls; i++) > + if (data[i].handle != *target_handle) > + curl_multi_remove_handle(curlm, data[i].handle); > + else > + { > + committed_to = i; > + if (c->winning_headers == NULL) > + { > + c->winning_headers = data[committed_to].response_data; > + if (vfd >= 0 && c->winning_headers != NULL) > + dprintf(vfd, "\n%s", c->winning_headers); > + data[committed_to].response_data = NULL; > + data[committed_to].response_data_size = 0; > + } > + } > + } > + > + if (vfd >= 0 && !verbose_reported && committed_to >= 0) > + { > + bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); > + dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", > + committed_to); > + if (pnl) > + c->default_progressfn_printed_p = 0; > + verbose_reported = true; > + } > + } > + > + if (curlm_res != CURLM_OK) > + { > + switch (curlm_res) > + { > + case CURLM_CALL_MULTI_PERFORM: continue; > + case CURLM_OUT_OF_MEMORY: return -ENOMEM; > + default: return -ENETUNREACH; > + } > + } > + > + long dl_size = -1; > + if(only_one && target_handle){ // Only bother with progress functions if > we're retrieving exactly 1 file > + if (*target_handle && (c->progressfn || maxsize > 0)) > + { > + /* Get size of file being downloaded. NB: If going through > + deflate-compressing proxies, this number is likely to be > + unavailable, so -1 may show. */ > + CURLcode curl_res; > +#if CURL_AT_LEAST_VERSION(7, 55, 0) > + curl_off_t cl; > + curl_res = curl_easy_getinfo(*target_handle, > + CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, > + &cl); > + if (curl_res == CURLE_OK && cl >= 0) > + dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); > +#else > + double cl; > + curl_res = curl_easy_getinfo(*target_handle, > + CURLINFO_CONTENT_LENGTH_DOWNLOAD, > + &cl); > + if (curl_res == CURLE_OK && cl >= 0) > + dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); > +#endif > + /* If Content-Length is -1, try to get the size from > + X-Debuginfod-Size */ > + if (dl_size == -1 && c->winning_headers != NULL) > + { > + long xdl; > + char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); > + size_t off = strlen("x-debuginfod-size:"); > + > + if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1) > + dl_size = xdl; > + } > + } > + > + if (c->progressfn) /* inform/check progress callback */ > + { > + loops ++; > + long pa = loops; /* default param for progress callback */ > + if (*target_handle) /* we've committed to a server; report its > download progress */ > + { > + CURLcode curl_res; > +#if CURL_AT_LEAST_VERSION(7, 55, 0) > + curl_off_t dl; > + curl_res = curl_easy_getinfo(*target_handle, > + CURLINFO_SIZE_DOWNLOAD_T, > + &dl); > + if (curl_res == 0 && dl >= 0) > + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); > +#else > + double dl; > + curl_res = curl_easy_getinfo(*target_handle, > + CURLINFO_SIZE_DOWNLOAD, > + &dl); > + if (curl_res == 0) > + pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); > +#endif > + > + } > + > + if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) > + break; > + } > + } > + /* Check to see if we are downloading something which exceeds maxsize, > if set.*/ > + if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0) > + { > + if (vfd >=0) > + dprintf(vfd, "Content-Length too large.\n"); > + return -EFBIG; > + } > + } while (still_running); > + return 0; > +} I find the indentation really confusing, it isn't really GNU style. Making it hard to see the blocks (which are pretty large). OK, another part of debuginfod_query_server extracted. Now with a new only_one boolean. I still don't have a good grasp of the metadata queries to fully understand why you might want replies from multiple servers. It is somewhat unfortunate that if only_one is true no progress is reported at all. It would be nice to keep calling progressfn if any target made progress. Is target_handle ever set when only_one is true? If it isn't set then maxsize is never checked. In a lot of cases the user will have just one DEBUGINFOD_URLS server. It might be nice if things then just worked as before. The multi-server options confuse me a little :) > + > +/* Helper function to create client cache directory. > + $XDG_CACHE_HOME takes priority over $HOME/.cache. > + $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and > $XDG_CACHE_HOME. > + > + Return resulting path name or NULL on error. Caller must free resulting > string. > + */ > +static char * > +make_cache_path(void) > +{ > + char* cache_path = NULL; > + int rc = 0; > + /* Determine location of the cache. The path specified by the debuginfod > + cache environment variable takes priority. */ > + char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); > + if (cache_var != NULL && strlen (cache_var) > 0) > + xalloc_str (cache_path, "%s", cache_var); > + else > + { > + /* If a cache already exists in $HOME ('/' if $HOME isn't set), then > use > + that. Otherwise use the XDG cache directory naming format. */ > + xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", > cache_default_name); > + > + struct stat st; > + if (stat (cache_path, &st) < 0) > + { > + char cachedir[PATH_MAX]; > + char *xdg = getenv ("XDG_CACHE_HOME"); > + > + if (xdg != NULL && strlen (xdg) > 0) > + snprintf (cachedir, PATH_MAX, "%s", xdg); > + else > + snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: > "/"); > + > + /* Create XDG cache directory if it doesn't exist. */ > + if (stat (cachedir, &st) == 0) > + { > + if (! S_ISDIR (st.st_mode)) > + { > + rc = -EEXIST; > + goto out1; > + } > + } > + else > + { > + rc = mkdir (cachedir, 0700); > + > + /* Also check for EEXIST and S_ISDIR in case another client > just > + happened to create the cache. */ > + if (rc < 0 > + && (errno != EEXIST > + || stat (cachedir, &st) != 0 > + || ! S_ISDIR (st.st_mode))) > + { > + rc = -errno; > + goto out1; > + } > + } > + > + free (cache_path); > + xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); > + } > + } > + > + goto out; > + > + out1: > + (void) rc; > + free (cache_path); > + cache_path = NULL; > + > + out: > + if (cache_path != NULL) > + (void) mkdir (cache_path, 0700); > + return cache_path; > +} OK, another extraction from debuginfod_query_server. No changes I can see, except for the error path goto and mkdir at the end. Shouldn't that also check for errors (that aren't EEXIST)? > /* Query each of the server URLs found in $DEBUGINFOD_URLS for the file > with the specified build-id and type (debuginfo, executable, source or > section). If type is source, then type_arg should be a filename. If > @@ -849,7 +1271,7 @@ cache_find_section (const char *scn_name, const char > *target_cache_dir, > for the target if successful, otherwise return an error code. > */ > static int > -debuginfod_query_server (debuginfod_client *c, > +debuginfod_query_server_by_buildid (debuginfod_client *c, > const unsigned char *build_id, > int build_id_len, > const char *type, > @@ -870,7 +1292,7 @@ debuginfod_query_server (debuginfod_client *c, > char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ > char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; > int vfd = c->verbose_fd; > - int rc; > + int rc, r; > > c->progressfn_cancel = false; > > @@ -995,70 +1417,22 @@ debuginfod_query_server (debuginfod_client *c, > dprintf (vfd, "suffix %s\n", suffix); > > /* set paths needed to perform the query > - > - example format > + example format: > cache_path: $HOME/.cache > target_cache_dir: $HOME/.cache/0123abcd > target_cache_path: $HOME/.cache/0123abcd/debuginfo > target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ? > - > - $XDG_CACHE_HOME takes priority over $HOME/.cache. > - $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and > $XDG_CACHE_HOME. > */ > > - /* Determine location of the cache. The path specified by the debuginfod > - cache environment variable takes priority. */ > - char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); > - if (cache_var != NULL && strlen (cache_var) > 0) > - xalloc_str (cache_path, "%s", cache_var); > - else > + cache_path = make_cache_path(); > + if (!cache_path) > { > - /* If a cache already exists in $HOME ('/' if $HOME isn't set), then > use > - that. Otherwise use the XDG cache directory naming format. */ > - xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", > cache_default_name); > - > - struct stat st; > - if (stat (cache_path, &st) < 0) > - { > - char cachedir[PATH_MAX]; > - char *xdg = getenv ("XDG_CACHE_HOME"); > - > - if (xdg != NULL && strlen (xdg) > 0) > - snprintf (cachedir, PATH_MAX, "%s", xdg); > - else > - snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: > "/"); > - > - /* Create XDG cache directory if it doesn't exist. */ > - if (stat (cachedir, &st) == 0) > - { > - if (! S_ISDIR (st.st_mode)) > - { > - rc = -EEXIST; > - goto out; > - } > - } > - else > - { > - rc = mkdir (cachedir, 0700); > - > - /* Also check for EEXIST and S_ISDIR in case another client > just > - happened to create the cache. */ > - if (rc < 0 > - && (errno != EEXIST > - || stat (cachedir, &st) != 0 > - || ! S_ISDIR (st.st_mode))) > - { > - rc = -errno; > + rc = -ENOMEM; > goto out; > } > - } > - > - free (cache_path); > - xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); > - } > - } > - > xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); > + (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would > be caught later too > + If the same reason holds for make_cache_path then maybe add a similar comment there. > if (section != NULL) > xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, > suffix); > else > @@ -1193,60 +1567,14 @@ debuginfod_query_server (debuginfod_client *c, > goto out0; > } > > - /* Initialize the memory to zero */ > - char *strtok_saveptr; > char **server_url_list = NULL; > - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); > - /* Count number of URLs. */ > - int num_urls = 0; > - > - while (server_url != NULL) > - { > - /* PR 27983: If the url is already set to be used use, skip it */ > - char *slashbuildid; > - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') > - slashbuildid = "buildid"; > - else > - slashbuildid = "/buildid"; > - > - char *tmp_url; > - if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) > - { > - rc = -ENOMEM; > - goto out1; > - } > - int url_index; > - for (url_index = 0; url_index < num_urls; ++url_index) > - { > - if(strcmp(tmp_url, server_url_list[url_index]) == 0) > - { > - url_index = -1; > - break; > - } > - } > - if (url_index == -1) > - { > - if (vfd >= 0) > - dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); > - free(tmp_url); > - } > - else > - { > - num_urls++; > - char ** realloc_ptr; > - realloc_ptr = reallocarray(server_url_list, num_urls, > - sizeof(char*)); > - if (realloc_ptr == NULL) > - { > - free (tmp_url); > - rc = -ENOMEM; > + char *server_url; > + int num_urls; > + r = init_server_urls("buildid", server_urls, &server_url_list, &num_urls, > vfd); > + if(0 != r){ > + rc = r; > goto out1; > } > - server_url_list = realloc_ptr; > - server_url_list[num_urls-1] = tmp_url; > - } > - server_url = strtok_r(NULL, url_delim, &strtok_saveptr); > - } > > int retry_limit = default_retry_limit; > const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR); > @@ -1318,13 +1646,6 @@ debuginfod_query_server (debuginfod_client *c, > > data[i].fd = fd; > data[i].target_handle = &target_handle; > - data[i].handle = curl_easy_init(); > - if (data[i].handle == NULL) > - { > - if (filename) curl_free (escaped_string); > - rc = -ENETUNREACH; > - goto out2; > - } > data[i].client = c; > > if (filename) /* must start with / */ > @@ -1338,226 +1659,29 @@ debuginfod_query_server (debuginfod_client *c, > build_id_bytes, type, section); > else > snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, > build_id_bytes, type); > [...lots of removed lines...] > > + r = init_handle(c, debuginfod_write_callback, header_callback, > &data[i], i, timeout, vfd); > + if(0 != r){ > + rc = r; > + if(filename) curl_free (escaped_string); > + goto out2; > } OK, error handling seems similar. > - > - if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) > - { > - c->progressfn_cancel = true; > - break; > - } > + > + curl_multi_add_handle(curlm, data[i].handle); > } > > - /* Check to see if we are downloading something which exceeds maxsize, > if set.*/ > - if (target_handle && dl_size > maxsize && maxsize > 0) > - { > + if (filename) curl_free(escaped_string); > + > + /* Query servers in parallel. */ > if (vfd >= 0) > - dprintf(vfd, "Content-Length too large.\n"); > - rc = -EFBIG; > + dprintf (vfd, "query %d urls in parallel\n", num_urls); > + > + r = perform_queries(curlm, &target_handle,data,c, num_urls, maxtime, > maxsize, true, vfd); > + if (0 != r) > + { > + rc = r; > goto out2; > } > - } while (still_running); > > /* Check whether a query was successful. If so, assign its handle > to verified_handle. */ > @@ -1709,6 +1833,7 @@ debuginfod_query_server (debuginfod_client *c, > curl_multi_remove_handle(curlm, data[i].handle); /* ok to > repeat */ > curl_easy_cleanup (data[i].handle); > free(data[i].response_data); > + data[i].response_data = NULL; > } > free(c->winning_headers); > c->winning_headers = NULL; OK, rewritten with the new extracted helper functions. > @@ -1918,7 +2043,7 @@ debuginfod_find_debuginfo (debuginfod_client *client, > const unsigned char *build_id, int build_id_len, > char **path) > { > - return debuginfod_query_server(client, build_id, build_id_len, > + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, > "debuginfo", NULL, path); > } > > @@ -1929,7 +2054,7 @@ debuginfod_find_executable(debuginfod_client *client, > const unsigned char *build_id, int build_id_len, > char **path) > { > - return debuginfod_query_server(client, build_id, build_id_len, > + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, > "executable", NULL, path); > } > > @@ -1938,7 +2063,7 @@ int debuginfod_find_source(debuginfod_client *client, > const unsigned char *build_id, int build_id_len, > const char *filename, char **path) > { > - return debuginfod_query_server(client, build_id, build_id_len, > + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, > "source", filename, path); > } > > @@ -1947,7 +2072,7 @@ debuginfod_find_section (debuginfod_client *client, > const unsigned char *build_id, int build_id_len, > const char *section, char **path) > { > - int rc = debuginfod_query_server(client, build_id, build_id_len, > + int rc = debuginfod_query_server_by_buildid(client, build_id, build_id_len, > "section", section, path); > if (rc != -EINVAL) > return rc; > @@ -1997,6 +2122,364 @@ debuginfod_find_section (debuginfod_client *client, > return rc; > } OK, calling debuginfod_query_server_by_buildid instead of debuginfod_query_server now. > + > +int debuginfod_find_metadata (debuginfod_client *client, > + const char* key, const char* value, char > **path) > +{ > + (void) client; > + (void) key; > + (void) value; > + (void) path; > + > +#ifdef HAVE_JSON_C > + char *server_urls = NULL; > + char *urls_envvar = NULL; > + char *cache_path = NULL; > + char *target_cache_dir = NULL; > + char *target_cache_path = NULL; > + char *target_cache_tmppath = NULL; > + char *key_and_value = NULL; > + int rc = 0, r; > + int vfd = client->verbose_fd; > + struct handle_data *data = NULL; > + > + json_object *json_metadata = json_object_new_array(); > + if(NULL == json_metadata) { > + rc = -ENOMEM; > + goto out; > + } > + > + if(NULL == value || NULL == key){ > + rc = -EINVAL; > + goto out; > + } > + > + if (vfd >= 0) > + dprintf (vfd, "debuginfod_find_metadata %s %s\n", key, value); > + > + /* Without query-able URL, we can stop here*/ > + urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR); > + if (vfd >= 0) > + dprintf (vfd, "server urls \"%s\"\n", > + urls_envvar != NULL ? urls_envvar : ""); > + if (urls_envvar == NULL || urls_envvar[0] == '\0') > + { > + rc = -ENOSYS; > + goto out; > + } > + > + /* set paths needed to perform the query > + example format: > + cache_path: $HOME/.cache > + target_cache_dir: $HOME/.cache/metadata > + target_cache_path: $HOME/.cache/metadata/key=ENCODED&value=ENCODED > + target_cache_path: > $HOME/.cache/metadata/key=ENCODED&value=ENCODED.XXXXXX > + */ > + > + // libcurl > 7.62ish has curl_url_set()/etc. to construct these things > more properly. How is it more proper than what we do below? Should we just use curl_url_set if we have it? > + // curl_easy_escape() is older > + { > + CURL *c = curl_easy_init(); > + if (!c) { > + rc = -ENOMEM; > + goto out; > + } > + char *key_escaped = curl_easy_escape(c, key, 0); > + char *value_escaped = curl_easy_escape(c, value, 0); > + > + // fallback to unescaped values in unlikely case of error > + xalloc_str (key_and_value, "key=%s&value=%s", key_escaped ?: key, > value_escaped ?: value); > + curl_free(value_escaped); > + curl_free(key_escaped); > + curl_easy_cleanup(c); > + } > + > + /* Check if we have a recent result already in the cache. */ > + cache_path = make_cache_path(); > + if (! cache_path) > + goto out; > + xalloc_str (target_cache_dir, "%s/metadata", cache_path); > + (void) mkdir (target_cache_dir, 0700); > + xalloc_str (target_cache_path, "%s/%s", target_cache_dir, key_and_value); > + xalloc_str (target_cache_tmppath, "%s/%s.XXXXXX", target_cache_dir, > key_and_value); Extra whitespace at end of line. Also this function is a mix of indentation styles. Could it just use GNU style throughout the whole function? > + > + int fd = open(target_cache_path, O_RDONLY); > + if (fd >= 0) > + { > + struct stat st; > + int metadata_retention = 0; > + time_t now = time(NULL); > + char *metadata_retention_path = NULL; > + > + xalloc_str (metadata_retention_path, "%s/%s", cache_path, > metadata_retention_filename); > + if (metadata_retention_path) > + { > + rc = debuginfod_config_cache(metadata_retention_path, > + metadata_retention_default_s, &st); > + free (metadata_retention_path); > + if (rc < 0) > + rc = 0; > + } > + else > + rc = 0; > + metadata_retention = rc; > + > + if (fstat(fd, &st) != 0) > + { > + rc = -errno; > + close (fd); > + goto out; > + } > + > + if (metadata_retention > 0 && (now - st.st_mtime <= > metadata_retention)) > + { > + if (client && client->verbose_fd >= 0) > + dprintf (client->verbose_fd, "cached metadata %s", > key_and_value); > + > + if (path != NULL) > + { > + *path = target_cache_path; // pass over the pointer > + target_cache_path = NULL; // prevent free() in our own cleanup > + } > + > + /* Success!!!! */ > + rc = fd; > + goto out; > + } > + > + /* We don't have to clear the likely-expired cached object here > + by unlinking. We will shortly make a new request and save > + results right on top. Erasing here could trigger a TOCTOU > + race with another thread just finishing a query and passing > + its results back. > + */ > + // (void) unlink (target_cache_path); This seems to be a diffent choice from what is done in debuginfod_query_server_by_buildid. Why? > + close (fd); > + } > + > + /* No valid cached metadata found: time to make the queries. */ > + > + /* Clear the client of previous urls*/ > + free (client->url); > + client->url = NULL; > + > + long maxtime = 0; > + const char *maxtime_envvar; > + maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR); > + if (maxtime_envvar != NULL) > + maxtime = atol (maxtime_envvar); > + if (maxtime && vfd >= 0) > + dprintf(vfd, "using max time %lds\n", maxtime); > + > + long timeout = default_timeout; > + const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR); > + if (timeout_envvar != NULL) > + timeout = atoi (timeout_envvar); > + if (vfd >= 0) > + dprintf (vfd, "using timeout %ld\n", timeout); > + > + add_default_headers(client); > + > + /* make a copy of the envvar so it can be safely modified. */ > + server_urls = strdup(urls_envvar); > + if (server_urls == NULL) > + { > + rc = -ENOMEM; > + goto out; > + } > + > + /* thereafter, goto out1 on error*/ > + > + char **server_url_list = NULL; > + char *server_url; > + int num_urls = 0; > + r = init_server_urls("metadata", server_urls, &server_url_list, &num_urls, > vfd); > + if(0 != r){ > + rc = r; > + goto out1; > + } > + > + CURLM *curlm = client->server_mhandle; > + assert (curlm != NULL); > + > + CURL *target_handle = NULL; > + data = malloc(sizeof(struct handle_data) * num_urls); > + if (data == NULL) > + { > + rc = -ENOMEM; > + goto out1; > + } > + > + /* thereafter, goto out2 on error. */ > + > + /* Initialize handle_data */ > + for (int i = 0; i < num_urls; i++) > + { > + if ((server_url = server_url_list[i]) == NULL) > + break; > + if (vfd >= 0) > + dprintf (vfd, "init server %d %s\n", i, server_url); > + > + data[i].errbuf[0] = '\0'; > + data[i].target_handle = &target_handle; > + data[i].client = client; > + data[i].metadata = NULL; > + data[i].metadata_size = 0; > + data[i].response_data = NULL; > + data[i].response_data_size = 0; > + > + snprintf(data[i].url, PATH_MAX, "%s?%s", server_url, key_and_value); > + > + r = init_handle(client, metadata_callback, header_callback, &data[i], i, > timeout, vfd); > + if(0 != r){ > + rc = r; > + goto out2; > + } > + curl_multi_add_handle(curlm, data[i].handle); > + } > + > + /* Query servers */ > + if (vfd >= 0) > + dprintf (vfd, "Starting %d queries\n",num_urls); > + r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, > false, vfd); > + if (0 != r) { > + rc = r; > + goto out2; > + } > + > + /* NOTE: We don't check the return codes of the curl messages since > + a metadata query failing silently is just fine. We want to know what's > + available from servers which can be connected with no issues. > + If running with additional verbosity, the failure will be noted in > stderr */ > + > + /* Building the new json array from all the upstream data and > + cleanup while at it. > + */ > + for (int i = 0; i < num_urls; i++) > + { > + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ > + curl_easy_cleanup (data[i].handle); > + free (data[i].response_data); > + > + if (NULL == data[i].metadata) > + { > + if (vfd >= 0) > + dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n", > + data[i].url, data[i].errbuf); > + continue; > + } > + > + json_object *upstream_metadata = json_tokener_parse(data[i].metadata); > + if(NULL == upstream_metadata) continue; > + // Combine the upstream metadata into the json array > + for (int j = 0, n = json_object_array_length(upstream_metadata); j < n; > j++) { > + json_object *entry = json_object_array_get_idx(upstream_metadata, j); > + json_object_get(entry); // increment reference count > + json_object_array_add(json_metadata, entry); > + } > + json_object_put(upstream_metadata); > + > + free (data[i].metadata); > + } Do we really need libjson-c functions to combine the results? Can't we simply return multiple json arrays/objects if we get results from multiple servers? > + /* Because of race with cache cleanup / rmdir, try to mkdir/mkstemp up to > twice. */ > + for (int i=0; i<2; i++) { > + /* (re)create target directory in cache */ > + (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */ > + > + /* NB: write to a temporary file first, to avoid race condition of > + multiple clients checking the cache, while a partially-written or > empty > + file is in there, being written from libcurl. */ > + fd = mkstemp (target_cache_tmppath); > + if (fd >= 0) break; > + } > + if (fd < 0) /* Still failed after two iterations. */ > + { > + rc = -errno; > + goto out1; > + } > + > + > + /* Plop the complete json_metadata object into the cache. */ > + const char* json_string = json_object_to_json_string_ext(json_metadata, > JSON_C_TO_STRING_PRETTY); > + if (json_string == NULL) > + { > + rc = -ENOMEM; > + goto out1; > + } > + ssize_t res = write_retry (fd, json_string, strlen(json_string)); > + (void) lseek(fd, 0, SEEK_SET); // rewind file so client can read it from > the top > + > + /* NB: json_string is auto deleted when json_metadata object is nuked */ > + if (res < 0 || (size_t) res != strlen(json_string)) > + { > + rc = -EIO; > + goto out1; > + } > + /* PR27571: make cache files casually unwriteable; dirs are already 0700 */ > + (void) fchmod(fd, 0400); > + > + /* rename tmp->real */ > + rc = rename (target_cache_tmppath, target_cache_path); > + if (rc < 0) > + { > + rc = -errno; > + goto out1; > + /* Perhaps we need not give up right away; could retry or something > ... */ > + } > + > + /* don't close fd - we're returning it */ > + /* don't unlink the tmppath; it's already been renamed. */ > + if (path != NULL) > + *path = strdup(target_cache_path); > + > + rc = fd; > + goto out1; > + > +/* error exits */ > +out2: > + /* remove all handles from multi */ > + for (int i = 0; i < num_urls; i++) > + { > + if (data[i].handle != NULL) > + { > + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ > + curl_easy_cleanup (data[i].handle); > + free (data[i].response_data); > + free (data[i].metadata); > + } > + } > + > +out1: > + free(data); > + > + for (int i = 0; i < num_urls; ++i) > + free(server_url_list[i]); > + free(server_url_list); > + > +out: > + free (server_urls); > + json_object_put(json_metadata); > + /* Reset sent headers */ > + curl_slist_free_all (client->headers); > + client->headers = NULL; > + client->user_agent_set_p = 0; > + > + free (target_cache_dir); > + free (target_cache_path); > + free (target_cache_tmppath); > + free (key_and_value); > + free (cache_path); > + > + return rc; > + > +#else /* ! HAVE_JSON_C */ > + return -ENOSYS; > +#endif > +} OK. That is it for debuginfod-client.c. Will go over the rest tomorrow. Cheers, Mark