On 02/02/14 04:12, Florian Pritz wrote:
> This greatly speeds up file list generation times by avoiding
> uncompressing the whole package.
> 
> pacman -S base with a deliberate file conflict:
> before: 9.1 seconds
> after:  2.2 seconds
> 
> Signed-off-by: Florian Pritz <[email protected]>
> ---
> 
> v4:
> 
> - fix mem leak when cleaning up pkg->files
> - make mtree_*size names a little more descriptive
> - switch comments to c style
> - break some long lines, there are still a few left..
> - files starting with . are handled by handle_simple_path now
> - pass the path directly to add_entry_to_files_list
> - consolidate early breaking in main loop
> 
> NOT changed:
> 
> - ret for archive_read_next_header: copied from original code in
>   _alpm_pkg_load_internal, if anyone cares feel free to change it later
> - I didn't break all lines > 80 chars, that should probably be done in a
>   line break cleanup only commit
> 
>  lib/libalpm/be_package.c | 150 
> +++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 146 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
> index 393f436..ac52ae1 100644
> --- a/lib/libalpm/be_package.c
> +++ b/lib/libalpm/be_package.c
> @@ -386,13 +386,36 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
> size_t *files_size,
>  {
>       const size_t files_count = pkg->files.count;
>       alpm_file_t *current_file;
> +     mode_t type;
> +     size_t pathlen;
>  
>       if(!_alpm_realloc((void **)&pkg->files.files, files_size, (files_count 
> + 1) * sizeof(alpm_file_t))) {
>               return -1;
>       }
>  
> +     type = archive_entry_filetype(entry);
> +
> +     pathlen = strlen(path);
> +
>       current_file = pkg->files.files + files_count;
> -     STRDUP(current_file->name, path, return -1);
> +
> +     /* mtree paths don't contain a tailing slash, those we get from
> +      * the archive directly do (expensive way)
> +      * Other code relies on it to detect directories so add it here.*/
> +     if(type == AE_IFDIR && path[pathlen - 1] != '/') {
> +             /* 2 = 1 for / + 1 for \0 */
> +             char *newpath = malloc(pathlen + 2);
> +             if (!newpath) {
> +                     _alpm_alloc_fail(pathlen + 2);
> +                     return -1;
> +             }
> +             strcpy(newpath, path);
> +             newpath[pathlen] = '/';
> +             newpath[pathlen + 1] = '\0';
> +             current_file->name = newpath;
> +     } else {
> +             STRDUP(current_file->name, path, return -1);
> +     }
>       current_file->size = archive_entry_size(entry);
>       current_file->mode = archive_entry_mode(entry);
>       pkg->files.count++;
> @@ -400,6 +423,115 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
> size_t *files_size,
>  }
>  
>  /**
> + * Generate a new file list from an mtree file and add it to the package.
> + * An existing file list will be free()d first.
> + *
> + * archive should point to an archive struct which is already at the
> + * position of the mtree's header.
> + *
> + * @param handle
> + * @param pkg package to add the file list to
> + * @param archive archive containing the mtree
> + * @return 0 on success, <0 on error
> + */
> +static int build_filelist_from_mtree(alpm_handle_t *handle, alpm_pkg_t *pkg, 
> struct archive *archive)
> +{
> +     int ret = 0;
> +     size_t mtree_maxsize = 0;
> +     size_t mtree_cursize = 0;
> +     size_t files_size = 0; /* we clean up the existing array so this is 
> fine */
> +     char *mtree_data = NULL;
> +     struct archive *mtree;
> +     struct archive_entry *mtree_entry = NULL;
> +
> +     _alpm_log(handle, ALPM_LOG_DEBUG,
> +                     "found mtree for package %s, getting file list\n", 
> pkg->filename);
> +
> +     /* throw away any files we might have already found */
> +     for (size_t i = 0; i < pkg->files.count; i++) {
> +             free(pkg->files.files[i].name);
> +     }
> +     free(pkg->files.files);
> +     pkg->files.files = NULL;
> +     pkg->files.count = 0;
> +
> +     /* create a new archive to parse the mtree and load it from archive 
> into memory */
> +     /* TODO: split this into a function */
> +     if((mtree = archive_read_new()) == NULL) {
> +             handle->pm_errno = ALPM_ERR_LIBARCHIVE;
> +             goto error;
> +     }
> +
> +     _alpm_archive_read_support_filter_all(mtree);
> +     archive_read_support_format_mtree(mtree);
> +
> +     /* TODO: split this into a function */
> +     while(1) {
> +             ssize_t size;
> +
> +             if(!_alpm_realloc((void **)&mtree_data, &mtree_maxsize, 
> mtree_cursize + ALPM_BUFFER_SIZE)) {
> +                     goto error;
> +             }
> +
> +             size = archive_read_data(archive, mtree_data + mtree_cursize, 
> ALPM_BUFFER_SIZE);
> +
> +             if(size < 0) {
> +                     _alpm_log(handle, ALPM_LOG_ERROR, _("error while 
> reading package %s: %s\n"),
> +                                     pkg->filename, 
> archive_error_string(archive));
> +                     handle->pm_errno = ALPM_ERR_LIBARCHIVE;
> +                     goto error;
> +             }
> +             if(size == 0) {
> +                     break;
> +             }
> +
> +             mtree_cursize += size;
> +     }
> +
> +     /* TODO: need wrapper function for archive_read_open_memory for old 
> libarchive versions? */

When did support for that function get added?

> +     if(archive_read_open_memory(mtree, mtree_data, mtree_cursize)) {
> +             _alpm_log(handle, ALPM_LOG_ERROR,
> +                             _("error while reading mtree of package %s: 
> %s\n"),
> +                             pkg->filename, archive_error_string(mtree));
> +             handle->pm_errno = ALPM_ERR_LIBARCHIVE;
> +             goto error;
> +     }
> +
> +     while((ret = archive_read_next_header(mtree, &mtree_entry)) == 
> ARCHIVE_OK) {
> +             const char *path = archive_entry_pathname(mtree_entry);
> +
> +             /* strip leading "./" from path entries */
> +             if(path[0] == '.' && path[1] == '/') {
> +                     path += 2;
> +             }
> +
> +             if(handle_simple_path(pkg, path)) {
> +                     continue;
> +             }
> +
> +             if(add_entry_to_files_list(pkg, &files_size, mtree_entry, path) 
> < 0) {
> +                     goto error;
> +             }
> +     }
> +
> +     if(ret != ARCHIVE_EOF && ret != ARCHIVE_OK) { /* An error occurred */
> +             _alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree 
> of package %s: %s\n"),
> +                             pkg->filename, archive_error_string(mtree));
> +             handle->pm_errno = ALPM_ERR_LIBARCHIVE;
> +             goto error;
> +     }
> +
> +     free(mtree_data);
> +     _alpm_archive_read_free(mtree);
> +     _alpm_log(handle, ALPM_LOG_DEBUG, "finished mtree reading for %s\n", 
> pkg->filename);
> +     return 0;
> +error:
> +     free(mtree_data);
> +     _alpm_archive_read_free(mtree);
> +     return -1;
> +}
> +
> +/**
>   * Load a package and create the corresponding alpm_pkg_t struct.
>   * @param handle the context handle
>   * @param pkgfile path to the package file
> @@ -409,7 +541,7 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
> size_t *files_size,
>  alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
>               const char *pkgfile, int full)
>  {
> -     int ret, fd, config = 0;
> +     int ret, fd, config, hit_mtree = 0;
>       struct archive *archive;
>       struct archive_entry *entry;
>       alpm_pkg_t *newpkg;
> @@ -472,7 +604,17 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t 
> *handle,
>                       continue;
>               } else if(handle_simple_path(newpkg, entry_name)) {
>                       continue;

handle_simple_path will return 1 if the file starts with a ".".
".MTREE" starts with a ".", so....

> -             } else if(full) {
> +             } else if(full && strcmp(entry_name, ".MTREE") == 0) {

... this can never happen.

> +                     /* building the file list: cheap way
> +                      * get the filelist from the mtree file rather than 
> scanning
> +                      * the whole archive  */
> +                     if(build_filelist_from_mtree(handle, newpkg, archive) < 
> 0) {
> +                             goto error;
> +                     }
> +                     hit_mtree = 1;
> +                     continue;
> +             } else if(full && !hit_mtree) {
> +                     /* building the file list: expensive way */
>                       if(add_entry_to_files_list(newpkg, &files_size, entry, 
> entry_name) < 0) {
>                               goto error;
>                       }
> @@ -486,7 +628,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
>               }
>  
>               /* if we are not doing a full read, see if we have all we need 
> */
> -             if(!full && config) {
> +             if((!full || hit_mtree) && config) {

Why does hit_mtree matter here?  if(!full) we should never read the
mtree file.

>                       break;
>               }
>       }
> 


Reply via email to