On Sun, Jul 03, 2011 at 02:13:30PM -0500, Dan McGee wrote: > On Fri, Jul 1, 2011 at 7:59 AM, Dave Reisner <[email protected]> wrote: > > From: Dave Reisner <[email protected]> > > > > This means creating a new struct which can pass more descriptive data > > from the back end sync functions to the downloader. In particular, we're > > interested in the download size read from the sync DB. When the remote > > server reports a size larger than this (via a content-length header), > > abort the transfer. > > > > In cases where the size is unknown, we set a hard upper limit of: > > > > * 25MiB for a sync DB > > * 16KiB for a signature > > > > For reference, 25mb is more than twice the size of all of the current > > binary repos (with files) combined, and 16k is a truly gargantuan > > signature. > MiB/KiB here to not confuse people, please. (Or MB/KB, but definitely > not lowercase). Same with any usages in the code comments. >
Fixed. > > > > Signed-off-by: Dave Reisner <[email protected]> > > --- > > lib/libalpm/be_sync.c | 22 +++++++++++++------ > > lib/libalpm/dload.c | 55 > > +++++++++++++++++++++++++++++++++--------------- > > lib/libalpm/dload.h | 13 +++++++++- > > lib/libalpm/sync.c | 30 +++++++++++++++++--------- > > 4 files changed, 84 insertions(+), 36 deletions(-) > > > > diff --git a/lib/libalpm/be_sync.c b/lib/libalpm/be_sync.c > > index 16351f8..65420cf 100644 > > --- a/lib/libalpm/be_sync.c > > +++ b/lib/libalpm/be_sync.c > > @@ -175,16 +175,21 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) > > > > for(i = db->servers; i; i = i->next) { > > const char *server = i->data; > > - char *fileurl; > > + struct dload_payload *payload; > > size_t len; > > int sig_ret = 0; > > > > + CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, > > PM_ERR_MEMORY, -1)); > Any reason not to just allocate it on the stack and not use a pointer? > You will want to memset(&payload, 0, sizeof(payload); then. > > Edit: after reading the rest of the patch, we need heap allocated > payloads elsewhere, so you can probably ignore me here. > Yep, I considered this, but opted for heap allocations throughout since it wasn't always possible to stack allocate. > > + > > + /* set hard upper limit of 25MiB */ > > + payload->max_size = 25 * 1024 * 1024; > > + > > /* print server + filename into a buffer (leave space for > > .sig) */ > > len = strlen(server) + strlen(db->treename) + 9; > > - CALLOC(fileurl, len, sizeof(char), RET_ERR(handle, > > PM_ERR_MEMORY, -1)); > > - snprintf(fileurl, len, "%s/%s.db", server, db->treename); > > + CALLOC(payload->fileurl, len, sizeof(char), RET_ERR(handle, > > PM_ERR_MEMORY, -1)); > > + snprintf(payload->fileurl, len, "%s/%s.db", server, > > db->treename); > > > > - ret = _alpm_download(handle, fileurl, syncpath, NULL, > > force, 0, 0); > > + ret = _alpm_download(handle, payload, syncpath, NULL, > > force, 0, 0); > > > > if(ret == 0 && (check_sig == PM_PGP_VERIFY_ALWAYS || > > check_sig == > > PM_PGP_VERIFY_OPTIONAL)) { > > @@ -199,14 +204,17 @@ int SYMEXPORT alpm_db_update(int force, alpm_db_t *db) > > > > int errors_ok = (check_sig == > > PM_PGP_VERIFY_OPTIONAL); > > /* if we downloaded a DB, we want the .sig from the > > same server */ > > - snprintf(fileurl, len, "%s/%s.db.sig", server, > > db->treename); > > + snprintf(payload->fileurl, len, "%s/%s.db.sig", > > server, db->treename); > > + > > + /* set hard upper limited of 16KiB */ > "limit" Fixed. > > + payload->max_size = 16 * 1024; > > > > - sig_ret = _alpm_download(handle, fileurl, syncpath, > > NULL, 1, 0, errors_ok); > > + sig_ret = _alpm_download(handle, payload, syncpath, > > NULL, 1, 0, errors_ok); > > /* errors_ok suppresses error messages, but not the > > return code */ > > sig_ret = errors_ok ? 0 : sig_ret; > > } > > > > - FREE(fileurl); > > + _alpm_dload_payload_free(payload); > If you do stack-based, this will have to change to not free &payload > itself, but would still be useful as a helper to free all the attached > strings. > Ignoring this. > > if(ret != -1 && sig_ret != -1) { > > break; > > } > > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c > > index eefa285..1758744 100644 > > --- a/lib/libalpm/dload.c > > +++ b/lib/libalpm/dload.c > > @@ -179,7 +179,7 @@ static size_t parse_headers(void *ptr, size_t size, > > size_t nmemb, void *user) > > return realsize; > > } > > > > -static int curl_download_internal(alpm_handle_t *handle, const char *url, > > +static int curl_download_internal(alpm_handle_t *handle, struct > > dload_payload *payload, > > const char *localpath, char **final_file, int force, int > > allow_resume, > > int errors_ok) > > { > > @@ -199,10 +199,10 @@ static int curl_download_internal(alpm_handle_t > > *handle, const char *url, > > > > dlfile.handle = handle; > > dlfile.initial_size = 0.0; > > - dlfile.filename = get_filename(url); > > + dlfile.filename = get_filename(payload->fileurl); > > dlfile.cd_filename = NULL; > > - if(!dlfile.filename || curl_gethost(url, hostname) != 0) { > > - _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), > > url); > > + if(!dlfile.filename || curl_gethost(payload->fileurl, hostname) != > > 0) { > > + _alpm_log(handle, PM_LOG_ERROR, _("url '%s' is invalid\n"), > > payload->fileurl); > > RET_ERR(handle, PM_ERR_SERVER_BAD_URL, -1); > > } > > > > @@ -240,7 +240,7 @@ static int curl_download_internal(alpm_handle_t > > *handle, const char *url, > > /* the curl_easy handle is initialized with the alpm handle, so we > > only need > > * to reset the curl handle set parameters for each time it's used. > > */ > > curl_easy_reset(handle->curl); > > - curl_easy_setopt(handle->curl, CURLOPT_URL, url); > > + curl_easy_setopt(handle->curl, CURLOPT_URL, payload->fileurl); > > curl_easy_setopt(handle->curl, CURLOPT_FAILONERROR, 1L); > > curl_easy_setopt(handle->curl, CURLOPT_ERRORBUFFER, error_buffer); > > curl_easy_setopt(handle->curl, CURLOPT_CONNECTTIMEOUT, 10L); > > @@ -254,6 +254,10 @@ static int curl_download_internal(alpm_handle_t > > *handle, const char *url, > > curl_easy_setopt(handle->curl, CURLOPT_HEADERFUNCTION, > > parse_headers); > > curl_easy_setopt(handle->curl, CURLOPT_WRITEHEADER, &dlfile); > > > > + if(payload->max_size) { > > + curl_easy_setopt(handle->curl, CURLOPT_MAXFILESIZE, > > payload->max_size); > > So I hate to rain on the parade here, but: > CURLOPT_MAXFILESIZE > Pass a long as parameter. This allows you to specify the maximum > size (in bytes) of a file to download. If the file requested is larger > than this value, the transfer will not start and > CURLE_FILESIZE_EXCEEDED will be returned. > The file size is not always known prior to download, and for such > files this option has no effect even if the file transfer ends up > being larger than this given limit. This concerns both FTP and HTTP > transfers. > > Which seems to indicate it is rather easy to bypass said restrictions > if you simply put up a server that forgets the Content-Length header. > Do we need to do something in the progress callback as well to check > up on how much we've downloaded? > In theory, yeah -- we should probably add this. That said, I did a survey of all our current known mirrors, and failed to find any which did _not_ report a content-length header. > > + } > > + > > useragent = getenv("HTTP_USER_AGENT"); > > if(useragent != NULL) { > > curl_easy_setopt(handle->curl, CURLOPT_USERAGENT, useragent); > > @@ -409,18 +413,19 @@ cleanup: > > * @param errors_ok do not log errors (but still return them) > > * @return 0 on success, -1 on error (pm_errno is set accordingly if > > errors_ok == 0) > > */ > > -int _alpm_download(alpm_handle_t *handle, const char *url, const char > > *localpath, > > - char **final_file, int force, int allow_resume, int > > errors_ok) > > +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload, > > + const char *localpath, char **final_file, int force, int > > allow_resume, > > + int errors_ok) > > { > > if(handle->fetchcb == NULL) { > > #ifdef HAVE_LIBCURL > > - return curl_download_internal(handle, url, localpath, > > final_file, force, > > + return curl_download_internal(handle, payload, localpath, > > final_file, force, > > allow_resume, errors_ok); > > #else > > RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1); > > #endif > > } else { > > - int ret = handle->fetchcb(url, localpath, force); > > + int ret = handle->fetchcb(payload->fileurl, localpath, > > force); > > if(ret == -1 && !errors_ok) { > > RET_ERR(handle, PM_ERR_EXTERNAL_DOWNLOAD, -1); > > } > > @@ -434,6 +439,7 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t > > *handle, const char *url) > > char *filepath; > > const char *cachedir; > > char *final_file = NULL; > > + struct dload_payload *payload; > > int ret; > > > > CHECK_HANDLE(handle, return NULL); > > @@ -441,8 +447,11 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t > > *handle, const char *url) > > /* find a valid cache dir to download to */ > > cachedir = _alpm_filecache_setup(handle); > > > > + CALLOC(payload, 1, sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, > > NULL)); > > + payload->filename = strdup(url); > > + > > /* download the file */ > > - ret = _alpm_download(handle, url, cachedir, &final_file, 0, 1, 0); > > + ret = _alpm_download(handle, payload, cachedir, &final_file, 0, 1, > > 0); > > if(ret == -1) { > > _alpm_log(handle, PM_LOG_WARNING, _("failed to download > > %s\n"), url); > > return NULL; > > @@ -452,30 +461,42 @@ char SYMEXPORT *alpm_fetch_pkgurl(alpm_handle_t > > *handle, const char *url) > > /* attempt to download the signature */ > > if(ret == 0 && (handle->sigverify == PM_PGP_VERIFY_ALWAYS || > > handle->sigverify == > > PM_PGP_VERIFY_OPTIONAL)) { > > - char *sig_url; > > size_t len; > > int errors_ok = (handle->sigverify == > > PM_PGP_VERIFY_OPTIONAL); > > + struct dload_payload *spayload; > Is the load getting spayed? sig_payload would be a bit more readable. > It also looks like you never set the 16 KiB limit here. > Just trying to make Bob Barker happy. > > > > + CALLOC(spayload, 1, sizeof(*spayload), RET_ERR(handle, > > PM_ERR_MEMORY, NULL)); > > len = strlen(url) + 5; > > - CALLOC(sig_url, len, sizeof(char), RET_ERR(handle, > > PM_ERR_MEMORY, NULL)); > > - snprintf(sig_url, len, "%s.sig", url); > > + CALLOC(spayload->fileurl, len, sizeof(char), > > RET_ERR(handle, PM_ERR_MEMORY, NULL)); > > + snprintf(spayload->fileurl, len, "%s.sig", url); > > > > - ret = _alpm_download(handle, sig_url, cachedir, > > &final_file, 1, 0, errors_ok); > > + ret = _alpm_download(handle, spayload, cachedir, > > &final_file, 1, 0, errors_ok); > > if(ret == -1 && !errors_ok) { > > - _alpm_log(handle, PM_LOG_WARNING, _("failed to > > download %s\n"), sig_url); > > + _alpm_log(handle, PM_LOG_WARNING, _("failed to > > download %s\n"), spayload->fileurl); > > /* Warn now, but don't return NULL. We will fail > > later during package > > * load time. */ > > } else if(ret == 0) { > > - _alpm_log(handle, PM_LOG_DEBUG, "successfully > > downloaded %s\n", sig_url); > > + _alpm_log(handle, PM_LOG_DEBUG, "successfully > > downloaded %s\n", spayload->fileurl); > > } > > - FREE(sig_url); > > + _alpm_dload_payload_free(spayload); > > } > > > > /* we should be able to find the file the second time around */ > > filepath = _alpm_filecache_find(handle, final_file); > > FREE(final_file); > > + _alpm_dload_payload_free(payload); > > > > return filepath; > > } > > > > +void _alpm_dload_payload_free(void *payload) { > > + struct dload_payload *load = (struct dload_payload*)payload; > > + > > + ASSERT(load, return); > > + > > + FREE(load->filename); > > + FREE(load->fileurl); > > + FREE(load); > > +} > > + > > /* vim: set ts=2 sw=2 noet: */ > > diff --git a/lib/libalpm/dload.h b/lib/libalpm/dload.h > > index 351b2b3..8aaed0c 100644 > > --- a/lib/libalpm/dload.h > > +++ b/lib/libalpm/dload.h > > @@ -33,8 +33,17 @@ struct fileinfo { > > double initial_size; > > }; > > > > -int _alpm_download(alpm_handle_t *handle, const char *url, const char > > *localpath, > > - char **final_file, int force, int allow_resume, int > > errors_ok); > > +struct dload_payload { > > + char *filename; > > + char *fileurl; > > + long max_size; > > +}; > > + > > +void _alpm_dload_payload_free(void *payload); > > + > > +int _alpm_download(alpm_handle_t *handle, struct dload_payload *payload, > > + const char *localpath, char **final_file, int force, int > > allow_resume, > > + int errors_ok); > > > > #endif /* _ALPM_DLOAD_H */ > > > > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c > > index 8df8a17..4277283 100644 > > --- a/lib/libalpm/sync.c > > +++ b/lib/libalpm/sync.c > > @@ -757,10 +757,12 @@ static int download_files(alpm_handle_t *handle, > > alpm_list_t **deltas) > > alpm_pkg_t *spkg = j->data; > > > > if(spkg->origin != PKG_FROM_FILE && current == > > spkg->origin_data.db) { > > - const char *fname = NULL; > > + struct dload_payload *payload; > > + > > + CALLOC(payload, 1, sizeof(*payload), > > RET_ERR(handle, PM_ERR_MEMORY, -1)); > > + STRDUP(payload->filename, spkg->filename, > > RET_ERR(handle, PM_ERR_MEMORY, -1)); > > + payload->max_size = spkg->download_size; > This is actually wrong. Take a look at where and when we set this > field (compute_download_size). You never want a possible delta chain > total size, so you really want to use alpm_pkg_get_size() in all cases > here. > Fixed. Good to know. > Sidenote: I also have a mental TODO in the future of allowing > compute_download_size() to take partial downloads into account, so if > you say had half of libreoffice downloaded, it would report the proper > size at the prompt. > Seems reasonable. > > > > - fname = alpm_pkg_get_filename(spkg); > > - ASSERT(fname != NULL, RET_ERR(handle, > > PM_ERR_PKG_INVALID_NAME, -1)); > > alpm_list_t *delta_path = spkg->delta_path; > > if(delta_path) { > > /* using deltas */ > > @@ -768,14 +770,20 @@ static int download_files(alpm_handle_t *handle, > > alpm_list_t **deltas) > > for(dlts = delta_path; dlts; dlts = > > dlts->next) { > > alpm_delta_t *delta = > > dlts->data; > > if(delta->download_size != > > 0) { > > - files = > > alpm_list_add(files, strdup(delta->delta)); > > + struct > > dload_payload *dpayload; > > + > > + CALLOC(dpayload, 1, > > sizeof(*payload), RET_ERR(handle, PM_ERR_MEMORY, -1)); > > + > > STRDUP(dpayload->filename, delta->delta, RET_ERR(handle, PM_ERR_MEMORY, > > -1)); > > + dpayload->max_size > > = delta->download_size; > > + > > + files = > > alpm_list_add(files, dpayload); > > } > > /* keep a list of all the > > delta files for md5sums */ > > *deltas = > > alpm_list_add(*deltas, delta); > > } > > > > } else if(spkg->download_size != 0) { > > - files = alpm_list_add(files, > > strdup(fname)); > > + files = alpm_list_add(files, > > payload); > > } > > > > } > > @@ -784,7 +792,7 @@ static int download_files(alpm_handle_t *handle, > > alpm_list_t **deltas) > > if(files) { > > EVENT(handle->trans, PM_TRANS_EVT_RETRIEVE_START, > > current->treename, NULL); > > for(j = files; j; j = j->next) { > > - const char *filename = j->data; > > + struct dload_payload *payload = j->data; > > alpm_list_t *server; > > int ret = -1; > > for(server = current->servers; server; > > server = server->next) { > > @@ -793,11 +801,11 @@ static int download_files(alpm_handle_t *handle, > > alpm_list_t **deltas) > > size_t len; > > > > /* print server + filename into a > > buffer */ > > - len = strlen(server_url) + > > strlen(filename) + 2; > > - CALLOC(fileurl, len, sizeof(char), > > RET_ERR(handle, PM_ERR_MEMORY, -1)); > > - snprintf(fileurl, len, "%s/%s", > > server_url, filename); > > + len = strlen(server_url) + > > strlen(payload->filename) + 2; > > + CALLOC(payload->fileurl, len, > > sizeof(char), RET_ERR(handle, PM_ERR_MEMORY, -1)); > > + snprintf(payload->fileurl, len, > > "%s/%s", server_url, payload->filename); > > > > - ret = _alpm_download(handle, > > fileurl, cachedir, NULL, 0, 1, 0); > > + ret = _alpm_download(handle, > > payload, cachedir, NULL, 0, 1, 0); > > FREE(fileurl); > > if(ret != -1) { > > break; > > @@ -809,6 +817,8 @@ static int download_files(alpm_handle_t *handle, > > alpm_list_t **deltas) > > } > > > > FREELIST(files); > > + alpm_list_free_inner(files, > > _alpm_dload_payload_free); > This is going to segfault, you left the FREELIST() call in; you want > to remove that completely. I'm surprised I haven't seen a double free yet because of this. > > + alpm_list_free(files); > > if(errors) { > > _alpm_log(handle, PM_LOG_WARNING, _("failed > > to retrieve some files from %s\n"), > > current->treename); > > -- > > 1.7.6 >
