On 02.02.2014 12:13, Allan McRae wrote:
> On 02/02/14 04:12, Florian Pritz wrote:
>> This greatly speeds up file list generation times by avoiding
>> uncompressing the whole package.
>> 
>> pacman -S base with a deliberate file conflict:
>> before: 9.1 seconds
>> after:  2.2 seconds
>> 
>> Signed-off-by: Florian Pritz <[email protected]>
>> ---
>> 
>> v4:
>> 
>> - fix mem leak when cleaning up pkg->files
>> - make mtree_*size names a little more descriptive
>> - switch comments to c style
>> - break some long lines, there are still a few left..
>> - files starting with . are handled by handle_simple_path now
>> - pass the path directly to add_entry_to_files_list
>> - consolidate early breaking in main loop
>> 
>> NOT changed:
>> 
>> - ret for archive_read_next_header: copied from original code in
>>   _alpm_pkg_load_internal, if anyone cares feel free to change it later
>> - I didn't break all lines > 80 chars, that should probably be done in a
>>   line break cleanup only commit
>> 
>>  lib/libalpm/be_package.c | 150 
>> +++++++++++++++++++++++++++++++++++++++++++++--
>>  1 file changed, 146 insertions(+), 4 deletions(-)
>> 
>> diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
>> index 393f436..ac52ae1 100644
>> --- a/lib/libalpm/be_package.c
>> +++ b/lib/libalpm/be_package.c
>> @@ -386,13 +386,36 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>> size_t *files_size,
>>  {
>>      const size_t files_count = pkg->files.count;
>>      alpm_file_t *current_file;
>> +    mode_t type;
>> +    size_t pathlen;
>>  
>>      if(!_alpm_realloc((void **)&pkg->files.files, files_size, (files_count 
>> + 1) * sizeof(alpm_file_t))) {
>>              return -1;
>>      }
>>  
>> +    type = archive_entry_filetype(entry);
>> +
>> +    pathlen = strlen(path);
>> +
>>      current_file = pkg->files.files + files_count;
>> -    STRDUP(current_file->name, path, return -1);
>> +
>> +    /* mtree paths don't contain a tailing slash, those we get from
>> +     * the archive directly do (expensive way)
>> +     * Other code relies on it to detect directories so add it here.*/
>> +    if(type == AE_IFDIR && path[pathlen - 1] != '/') {
>> +            /* 2 = 1 for / + 1 for \0 */
>> +            char *newpath = malloc(pathlen + 2);
>> +            if (!newpath) {
>> +                    _alpm_alloc_fail(pathlen + 2);
>> +                    return -1;
>> +            }
>> +            strcpy(newpath, path);
>> +            newpath[pathlen] = '/';
>> +            newpath[pathlen + 1] = '\0';
>> +            current_file->name = newpath;
>> +    } else {
>> +            STRDUP(current_file->name, path, return -1);
>> +    }
>>      current_file->size = archive_entry_size(entry);
>>      current_file->mode = archive_entry_mode(entry);
>>      pkg->files.count++;
>> @@ -400,6 +423,115 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>> size_t *files_size,
>>  }
>>  
>>  /**
>> + * Generate a new file list from an mtree file and add it to the package.
>> + * An existing file list will be free()d first.
>> + *
>> + * archive should point to an archive struct which is already at the
>> + * position of the mtree's header.
>> + *
>> + * @param handle
>> + * @param pkg package to add the file list to
>> + * @param archive archive containing the mtree
>> + * @return 0 on success, <0 on error
>> + */
>> +static int build_filelist_from_mtree(alpm_handle_t *handle, alpm_pkg_t 
>> *pkg, struct archive *archive)
>> +{
>> +    int ret = 0;
>> +    size_t mtree_maxsize = 0;
>> +    size_t mtree_cursize = 0;
>> +    size_t files_size = 0; /* we clean up the existing array so this is 
>> fine */
>> +    char *mtree_data = NULL;
>> +    struct archive *mtree;
>> +    struct archive_entry *mtree_entry = NULL;
>> +
>> +    _alpm_log(handle, ALPM_LOG_DEBUG,
>> +                    "found mtree for package %s, getting file list\n", 
>> pkg->filename);
>> +
>> +    /* throw away any files we might have already found */
>> +    for (size_t i = 0; i < pkg->files.count; i++) {
>> +            free(pkg->files.files[i].name);
>> +    }
>> +    free(pkg->files.files);
>> +    pkg->files.files = NULL;
>> +    pkg->files.count = 0;
>> +
>> +    /* create a new archive to parse the mtree and load it from archive 
>> into memory */
>> +    /* TODO: split this into a function */
>> +    if((mtree = archive_read_new()) == NULL) {
>> +            handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> +            goto error;
>> +    }
>> +
>> +    _alpm_archive_read_support_filter_all(mtree);
>> +    archive_read_support_format_mtree(mtree);
>> +
>> +    /* TODO: split this into a function */
>> +    while(1) {
>> +            ssize_t size;
>> +
>> +            if(!_alpm_realloc((void **)&mtree_data, &mtree_maxsize, 
>> mtree_cursize + ALPM_BUFFER_SIZE)) {
>> +                    goto error;
>> +            }
>> +
>> +            size = archive_read_data(archive, mtree_data + mtree_cursize, 
>> ALPM_BUFFER_SIZE);
>> +
>> +            if(size < 0) {
>> +                    _alpm_log(handle, ALPM_LOG_ERROR, _("error while 
>> reading package %s: %s\n"),
>> +                                    pkg->filename, 
>> archive_error_string(archive));
>> +                    handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> +                    goto error;
>> +            }
>> +            if(size == 0) {
>> +                    break;
>> +            }
>> +
>> +            mtree_cursize += size;
>> +    }
>> +
>> +    /* TODO: need wrapper function for archive_read_open_memory for old 
>> libarchive versions? */
> 
> When did support for that function get added?

Seems like it's been there from the beginning of the git repo (2007) so
I guess I can remove that comment.

>> +    if(archive_read_open_memory(mtree, mtree_data, mtree_cursize)) {
>> +            _alpm_log(handle, ALPM_LOG_ERROR,
>> +                            _("error while reading mtree of package %s: 
>> %s\n"),
>> +                            pkg->filename, archive_error_string(mtree));
>> +            handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> +            goto error;
>> +    }
>> +
>> +    while((ret = archive_read_next_header(mtree, &mtree_entry)) == 
>> ARCHIVE_OK) {
>> +            const char *path = archive_entry_pathname(mtree_entry);
>> +
>> +            /* strip leading "./" from path entries */
>> +            if(path[0] == '.' && path[1] == '/') {
>> +                    path += 2;
>> +            }
>> +
>> +            if(handle_simple_path(pkg, path)) {
>> +                    continue;
>> +            }
>> +
>> +            if(add_entry_to_files_list(pkg, &files_size, mtree_entry, path) 
>> < 0) {
>> +                    goto error;
>> +            }
>> +    }
>> +
>> +    if(ret != ARCHIVE_EOF && ret != ARCHIVE_OK) { /* An error occurred */
>> +            _alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree 
>> of package %s: %s\n"),
>> +                            pkg->filename, archive_error_string(mtree));
>> +            handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> +            goto error;
>> +    }
>> +
>> +    free(mtree_data);
>> +    _alpm_archive_read_free(mtree);
>> +    _alpm_log(handle, ALPM_LOG_DEBUG, "finished mtree reading for %s\n", 
>> pkg->filename);
>> +    return 0;
>> +error:
>> +    free(mtree_data);
>> +    _alpm_archive_read_free(mtree);
>> +    return -1;
>> +}
>> +
>> +/**
>>   * Load a package and create the corresponding alpm_pkg_t struct.
>>   * @param handle the context handle
>>   * @param pkgfile path to the package file
>> @@ -409,7 +541,7 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>> size_t *files_size,
>>  alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
>>              const char *pkgfile, int full)
>>  {
>> -    int ret, fd, config = 0;
>> +    int ret, fd, config, hit_mtree = 0;
>>      struct archive *archive;
>>      struct archive_entry *entry;
>>      alpm_pkg_t *newpkg;
>> @@ -472,7 +604,17 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t 
>> *handle,
>>                      continue;
>>              } else if(handle_simple_path(newpkg, entry_name)) {
>>                      continue;
> 
> handle_simple_path will return 1 if the file starts with a ".".
> ".MTREE" starts with a ".", so....
> 
>> -            } else if(full) {
>> +            } else if(full && strcmp(entry_name, ".MTREE") == 0) {
> 
> ... this can never happen.

Thanks, messed that up when rebasing some other changes.

>> +                    /* building the file list: cheap way
>> +                     * get the filelist from the mtree file rather than 
>> scanning
>> +                     * the whole archive  */
>> +                    if(build_filelist_from_mtree(handle, newpkg, archive) < 
>> 0) {
>> +                            goto error;
>> +                    }
>> +                    hit_mtree = 1;
>> +                    continue;
>> +            } else if(full && !hit_mtree) {
>> +                    /* building the file list: expensive way */
>>                      if(add_entry_to_files_list(newpkg, &files_size, entry, 
>> entry_name) < 0) {
>>                              goto error;
>>                      }
>> @@ -486,7 +628,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t 
>> *handle,
>>              }
>>  
>>              /* if we are not doing a full read, see if we have all we need 
>> */
>> -            if(!full && config) {
>> +            if((!full || hit_mtree) && config) {
> 
> Why does hit_mtree matter here?  if(!full) we should never read the
> mtree file.

hit_mtree matters because we want to break the loop once we have a file
list (if we need one), but only if we also have read .PKGINFO.

Since .MTREE could be before .PKGINFO in the tarball I can't break just
after reading the mtree.

Does that make sense?

> 
>>                      break;
>>              }
>>      }
>> 
> 


Attachment: signature.asc
Description: OpenPGP digital signature



Reply via email to