On 03/02/14 01:32, Florian Pritz wrote:
> On 02.02.2014 12:13, Allan McRae wrote:
>> On 02/02/14 04:12, Florian Pritz wrote:
>>> This greatly speeds up file list generation times by avoiding
>>> uncompressing the whole package.
>>>
>>> pacman -S base with a deliberate file conflict:
>>> before: 9.1 seconds
>>> after:  2.2 seconds
>>>
>>> Signed-off-by: Florian Pritz <[email protected]>
>>> ---
>>>
>>> v4:
>>>
>>> - fix mem leak when cleaning up pkg->files
>>> - make mtree_*size names a little more descriptive
>>> - switch comments to c style
>>> - break some long lines, there are still a few left..
>>> - files starting with . are handled by handle_simple_path now
>>> - pass the path directly to add_entry_to_files_list
>>> - consolidate early breaking in main loop
>>>
>>> NOT changed:
>>>
>>> - ret for archive_read_next_header: copied from original code in
>>>   _alpm_pkg_load_internal, if anyone cares feel free to change it later
>>> - I didn't break all lines > 80 chars, that should probably be done in a
>>>   line break cleanup only commit
>>>
>>>  lib/libalpm/be_package.c | 150 
>>> +++++++++++++++++++++++++++++++++++++++++++++--
>>>  1 file changed, 146 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c
>>> index 393f436..ac52ae1 100644
>>> --- a/lib/libalpm/be_package.c
>>> +++ b/lib/libalpm/be_package.c
>>> @@ -386,13 +386,36 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>>> size_t *files_size,
>>>  {
>>>     const size_t files_count = pkg->files.count;
>>>     alpm_file_t *current_file;
>>> +   mode_t type;
>>> +   size_t pathlen;
>>>  
>>>     if(!_alpm_realloc((void **)&pkg->files.files, files_size, (files_count 
>>> + 1) * sizeof(alpm_file_t))) {
>>>             return -1;
>>>     }
>>>  
>>> +   type = archive_entry_filetype(entry);
>>> +
>>> +   pathlen = strlen(path);
>>> +
>>>     current_file = pkg->files.files + files_count;
>>> -   STRDUP(current_file->name, path, return -1);
>>> +
>>> +   /* mtree paths don't contain a tailing slash, those we get from
>>> +    * the archive directly do (expensive way)
>>> +    * Other code relies on it to detect directories so add it here.*/
>>> +   if(type == AE_IFDIR && path[pathlen - 1] != '/') {
>>> +           /* 2 = 1 for / + 1 for \0 */
>>> +           char *newpath = malloc(pathlen + 2);
>>> +           if (!newpath) {
>>> +                   _alpm_alloc_fail(pathlen + 2);
>>> +                   return -1;
>>> +           }
>>> +           strcpy(newpath, path);
>>> +           newpath[pathlen] = '/';
>>> +           newpath[pathlen + 1] = '\0';
>>> +           current_file->name = newpath;
>>> +   } else {
>>> +           STRDUP(current_file->name, path, return -1);
>>> +   }
>>>     current_file->size = archive_entry_size(entry);
>>>     current_file->mode = archive_entry_mode(entry);
>>>     pkg->files.count++;
>>> @@ -400,6 +423,115 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>>> size_t *files_size,
>>>  }
>>>  
>>>  /**
>>> + * Generate a new file list from an mtree file and add it to the package.
>>> + * An existing file list will be free()d first.
>>> + *
>>> + * archive should point to an archive struct which is already at the
>>> + * position of the mtree's header.
>>> + *
>>> + * @param handle
>>> + * @param pkg package to add the file list to
>>> + * @param archive archive containing the mtree
>>> + * @return 0 on success, <0 on error
>>> + */
>>> +static int build_filelist_from_mtree(alpm_handle_t *handle, alpm_pkg_t 
>>> *pkg, struct archive *archive)
>>> +{
>>> +   int ret = 0;
>>> +   size_t mtree_maxsize = 0;
>>> +   size_t mtree_cursize = 0;
>>> +   size_t files_size = 0; /* we clean up the existing array so this is 
>>> fine */
>>> +   char *mtree_data = NULL;
>>> +   struct archive *mtree;
>>> +   struct archive_entry *mtree_entry = NULL;
>>> +
>>> +   _alpm_log(handle, ALPM_LOG_DEBUG,
>>> +                   "found mtree for package %s, getting file list\n", 
>>> pkg->filename);
>>> +
>>> +   /* throw away any files we might have already found */
>>> +   for (size_t i = 0; i < pkg->files.count; i++) {
>>> +           free(pkg->files.files[i].name);
>>> +   }
>>> +   free(pkg->files.files);
>>> +   pkg->files.files = NULL;
>>> +   pkg->files.count = 0;
>>> +
>>> +   /* create a new archive to parse the mtree and load it from archive 
>>> into memory */
>>> +   /* TODO: split this into a function */
>>> +   if((mtree = archive_read_new()) == NULL) {
>>> +           handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>>> +           goto error;
>>> +   }
>>> +
>>> +   _alpm_archive_read_support_filter_all(mtree);
>>> +   archive_read_support_format_mtree(mtree);
>>> +
>>> +   /* TODO: split this into a function */
>>> +   while(1) {
>>> +           ssize_t size;
>>> +
>>> +           if(!_alpm_realloc((void **)&mtree_data, &mtree_maxsize, 
>>> mtree_cursize + ALPM_BUFFER_SIZE)) {
>>> +                   goto error;
>>> +           }
>>> +
>>> +           size = archive_read_data(archive, mtree_data + mtree_cursize, 
>>> ALPM_BUFFER_SIZE);
>>> +
>>> +           if(size < 0) {
>>> +                   _alpm_log(handle, ALPM_LOG_ERROR, _("error while 
>>> reading package %s: %s\n"),
>>> +                                   pkg->filename, 
>>> archive_error_string(archive));
>>> +                   handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>>> +                   goto error;
>>> +           }
>>> +           if(size == 0) {
>>> +                   break;
>>> +           }
>>> +
>>> +           mtree_cursize += size;
>>> +   }
>>> +
>>> +   /* TODO: need wrapper function for archive_read_open_memory for old 
>>> libarchive versions? */
>>
>> When did support for that function get added?
> 
> Seems like it's been there from the beginning of the git repo (2007) so
> I guess I can remove that comment.
> 
>>> +   if(archive_read_open_memory(mtree, mtree_data, mtree_cursize)) {
>>> +           _alpm_log(handle, ALPM_LOG_ERROR,
>>> +                           _("error while reading mtree of package %s: 
>>> %s\n"),
>>> +                           pkg->filename, archive_error_string(mtree));
>>> +           handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>>> +           goto error;
>>> +   }
>>> +
>>> +   while((ret = archive_read_next_header(mtree, &mtree_entry)) == 
>>> ARCHIVE_OK) {
>>> +           const char *path = archive_entry_pathname(mtree_entry);
>>> +
>>> +           /* strip leading "./" from path entries */
>>> +           if(path[0] == '.' && path[1] == '/') {
>>> +                   path += 2;
>>> +           }
>>> +
>>> +           if(handle_simple_path(pkg, path)) {
>>> +                   continue;
>>> +           }
>>> +
>>> +           if(add_entry_to_files_list(pkg, &files_size, mtree_entry, path) 
>>> < 0) {
>>> +                   goto error;
>>> +           }
>>> +   }
>>> +
>>> +   if(ret != ARCHIVE_EOF && ret != ARCHIVE_OK) { /* An error occurred */
>>> +           _alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree 
>>> of package %s: %s\n"),
>>> +                           pkg->filename, archive_error_string(mtree));
>>> +           handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>>> +           goto error;
>>> +   }
>>> +
>>> +   free(mtree_data);
>>> +   _alpm_archive_read_free(mtree);
>>> +   _alpm_log(handle, ALPM_LOG_DEBUG, "finished mtree reading for %s\n", 
>>> pkg->filename);
>>> +   return 0;
>>> +error:
>>> +   free(mtree_data);
>>> +   _alpm_archive_read_free(mtree);
>>> +   return -1;
>>> +}
>>> +
>>> +/**
>>>   * Load a package and create the corresponding alpm_pkg_t struct.
>>>   * @param handle the context handle
>>>   * @param pkgfile path to the package file
>>> @@ -409,7 +541,7 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, 
>>> size_t *files_size,
>>>  alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
>>>             const char *pkgfile, int full)
>>>  {
>>> -   int ret, fd, config = 0;
>>> +   int ret, fd, config, hit_mtree = 0;
>>>     struct archive *archive;
>>>     struct archive_entry *entry;
>>>     alpm_pkg_t *newpkg;
>>> @@ -472,7 +604,17 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t 
>>> *handle,
>>>                     continue;
>>>             } else if(handle_simple_path(newpkg, entry_name)) {
>>>                     continue;
>>
>> handle_simple_path will return 1 if the file starts with a ".".
>> ".MTREE" starts with a ".", so....
>>
>>> -           } else if(full) {
>>> +           } else if(full && strcmp(entry_name, ".MTREE") == 0) {
>>
>> ... this can never happen.
> 
> Thanks, messed that up when rebasing some other changes.
> 
>>> +                   /* building the file list: cheap way
>>> +                    * get the filelist from the mtree file rather than 
>>> scanning
>>> +                    * the whole archive  */
>>> +                   if(build_filelist_from_mtree(handle, newpkg, archive) < 
>>> 0) {
>>> +                           goto error;
>>> +                   }
>>> +                   hit_mtree = 1;
>>> +                   continue;
>>> +           } else if(full && !hit_mtree) {
>>> +                   /* building the file list: expensive way */
>>>                     if(add_entry_to_files_list(newpkg, &files_size, entry, 
>>> entry_name) < 0) {
>>>                             goto error;
>>>                     }
>>> @@ -486,7 +628,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t 
>>> *handle,
>>>             }
>>>  
>>>             /* if we are not doing a full read, see if we have all we need 
>>> */
>>> -           if(!full && config) {
>>> +           if((!full || hit_mtree) && config) {
>>
>> Why does hit_mtree matter here?  if(!full) we should never read the
>> mtree file.
> 
> hit_mtree matters because we want to break the loop once we have a file
> list (if we need one), but only if we also have read .PKGINFO.
> 
> Since .MTREE could be before .PKGINFO in the tarball I can't break just
> after reading the mtree.
> 
> Does that make sense?
> 

Sure - I was making assumptions given the order of the files in the tarball.

Allan


Reply via email to