On 02.02.2014 12:13, Allan McRae wrote: > On 02/02/14 04:12, Florian Pritz wrote: >> This greatly speeds up file list generation times by avoiding >> uncompressing the whole package. >> >> pacman -S base with a deliberate file conflict: >> before: 9.1 seconds >> after: 2.2 seconds >> >> Signed-off-by: Florian Pritz <[email protected]> >> --- >> >> v4: >> >> - fix mem leak when cleaning up pkg->files >> - make mtree_*size names a little more descriptive >> - switch comments to c style >> - break some long lines, there are still a few left.. >> - files starting with . are handled by handle_simple_path now >> - pass the path directly to add_entry_to_files_list >> - consolidate early breaking in main loop >> >> NOT changed: >> >> - ret for archive_read_next_header: copied from original code in >> _alpm_pkg_load_internal, if anyone cares feel free to change it later >> - I didn't break all lines > 80 chars, that should probably be done in a >> line break cleanup only commit >> >> lib/libalpm/be_package.c | 150 >> +++++++++++++++++++++++++++++++++++++++++++++-- >> 1 file changed, 146 insertions(+), 4 deletions(-) >> >> diff --git a/lib/libalpm/be_package.c b/lib/libalpm/be_package.c >> index 393f436..ac52ae1 100644 >> --- a/lib/libalpm/be_package.c >> +++ b/lib/libalpm/be_package.c >> @@ -386,13 +386,36 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, >> size_t *files_size, >> { >> const size_t files_count = pkg->files.count; >> alpm_file_t *current_file; >> + mode_t type; >> + size_t pathlen; >> >> if(!_alpm_realloc((void **)&pkg->files.files, files_size, (files_count >> + 1) * sizeof(alpm_file_t))) { >> return -1; >> } >> >> + type = archive_entry_filetype(entry); >> + >> + pathlen = strlen(path); >> + >> current_file = pkg->files.files + files_count; >> - STRDUP(current_file->name, path, return -1); >> + >> + /* mtree paths don't contain a tailing slash, those we get from >> + * the archive directly do (expensive way) >> + * Other code relies on it to detect directories so add it here.*/ >> + if(type == AE_IFDIR && path[pathlen - 1] != '/') { >> + /* 2 = 1 for / + 1 for \0 */ >> + char *newpath = malloc(pathlen + 2); >> + if (!newpath) { >> + _alpm_alloc_fail(pathlen + 2); >> + return -1; >> + } >> + strcpy(newpath, path); >> + newpath[pathlen] = '/'; >> + newpath[pathlen + 1] = '\0'; >> + current_file->name = newpath; >> + } else { >> + STRDUP(current_file->name, path, return -1); >> + } >> current_file->size = archive_entry_size(entry); >> current_file->mode = archive_entry_mode(entry); >> pkg->files.count++; >> @@ -400,6 +423,115 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg, >> size_t *files_size, >> } >> >> /** >> + * Generate a new file list from an mtree file and add it to the package. >> + * An existing file list will be free()d first. >> + * >> + * archive should point to an archive struct which is already at the >> + * position of the mtree's header. >> + * >> + * @param handle >> + * @param pkg package to add the file list to >> + * @param archive archive containing the mtree >> + * @return 0 on success, <0 on error >> + */ >> +static int build_filelist_from_mtree(alpm_handle_t *handle, alpm_pkg_t >> *pkg, struct archive *archive) >> +{ >> + int ret = 0; >> + size_t mtree_maxsize = 0; >> + size_t mtree_cursize = 0; >> + size_t files_size = 0; /* we clean up the existing array so this is >> fine */ >> + char *mtree_data = NULL; >> + struct archive *mtree; >> + struct archive_entry *mtree_entry = NULL; >> + >> + _alpm_log(handle, ALPM_LOG_DEBUG, >> + "found mtree for package %s, getting file list\n", >> pkg->filename); >> + >> + /* throw away any files we might have already found */ >> + for (size_t i = 0; i < pkg->files.count; i++) { >> + free(pkg->files.files[i].name); >> + } >> + free(pkg->files.files); >> + pkg->files.files = NULL; >> + pkg->files.count = 0; >> + >> + /* create a new archive to parse the mtree and load it from archive >> into memory */ >> + /* TODO: split this into a function */ >> + if((mtree = archive_read_new()) == NULL) { >> + handle->pm_errno = ALPM_ERR_LIBARCHIVE; >> + goto error; >> + } >> + >> + _alpm_archive_read_support_filter_all(mtree); >> + archive_read_support_format_mtree(mtree); >> + >> + /* TODO: split this into a function */ >> + while(1) { >> + ssize_t size; >> + >> + if(!_alpm_realloc((void **)&mtree_data, &mtree_maxsize, >> mtree_cursize + ALPM_BUFFER_SIZE)) { >> + goto error; >> + } >> + >> + size = archive_read_data(archive, mtree_data + mtree_cursize, >> ALPM_BUFFER_SIZE); >> + >> + if(size < 0) { >> + _alpm_log(handle, ALPM_LOG_ERROR, _("error while >> reading package %s: %s\n"), >> + pkg->filename, >> archive_error_string(archive)); >> + handle->pm_errno = ALPM_ERR_LIBARCHIVE; >> + goto error; >> + } >> + if(size == 0) { >> + break; >> + } >> + >> + mtree_cursize += size; >> + } >> + >> + /* TODO: need wrapper function for archive_read_open_memory for old >> libarchive versions? */ > > When did support for that function get added?
Seems like it's been there from the beginning of the git repo (2007) so
I guess I can remove that comment.
>> + if(archive_read_open_memory(mtree, mtree_data, mtree_cursize)) {
>> + _alpm_log(handle, ALPM_LOG_ERROR,
>> + _("error while reading mtree of package %s:
>> %s\n"),
>> + pkg->filename, archive_error_string(mtree));
>> + handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> + goto error;
>> + }
>> +
>> + while((ret = archive_read_next_header(mtree, &mtree_entry)) ==
>> ARCHIVE_OK) {
>> + const char *path = archive_entry_pathname(mtree_entry);
>> +
>> + /* strip leading "./" from path entries */
>> + if(path[0] == '.' && path[1] == '/') {
>> + path += 2;
>> + }
>> +
>> + if(handle_simple_path(pkg, path)) {
>> + continue;
>> + }
>> +
>> + if(add_entry_to_files_list(pkg, &files_size, mtree_entry, path)
>> < 0) {
>> + goto error;
>> + }
>> + }
>> +
>> + if(ret != ARCHIVE_EOF && ret != ARCHIVE_OK) { /* An error occurred */
>> + _alpm_log(handle, ALPM_LOG_ERROR, _("error while reading mtree
>> of package %s: %s\n"),
>> + pkg->filename, archive_error_string(mtree));
>> + handle->pm_errno = ALPM_ERR_LIBARCHIVE;
>> + goto error;
>> + }
>> +
>> + free(mtree_data);
>> + _alpm_archive_read_free(mtree);
>> + _alpm_log(handle, ALPM_LOG_DEBUG, "finished mtree reading for %s\n",
>> pkg->filename);
>> + return 0;
>> +error:
>> + free(mtree_data);
>> + _alpm_archive_read_free(mtree);
>> + return -1;
>> +}
>> +
>> +/**
>> * Load a package and create the corresponding alpm_pkg_t struct.
>> * @param handle the context handle
>> * @param pkgfile path to the package file
>> @@ -409,7 +541,7 @@ static int add_entry_to_files_list(alpm_pkg_t *pkg,
>> size_t *files_size,
>> alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t *handle,
>> const char *pkgfile, int full)
>> {
>> - int ret, fd, config = 0;
>> + int ret, fd, config, hit_mtree = 0;
>> struct archive *archive;
>> struct archive_entry *entry;
>> alpm_pkg_t *newpkg;
>> @@ -472,7 +604,17 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t
>> *handle,
>> continue;
>> } else if(handle_simple_path(newpkg, entry_name)) {
>> continue;
>
> handle_simple_path will return 1 if the file starts with a ".".
> ".MTREE" starts with a ".", so....
>
>> - } else if(full) {
>> + } else if(full && strcmp(entry_name, ".MTREE") == 0) {
>
> ... this can never happen.
Thanks, messed that up when rebasing some other changes.
>> + /* building the file list: cheap way
>> + * get the filelist from the mtree file rather than
>> scanning
>> + * the whole archive */
>> + if(build_filelist_from_mtree(handle, newpkg, archive) <
>> 0) {
>> + goto error;
>> + }
>> + hit_mtree = 1;
>> + continue;
>> + } else if(full && !hit_mtree) {
>> + /* building the file list: expensive way */
>> if(add_entry_to_files_list(newpkg, &files_size, entry,
>> entry_name) < 0) {
>> goto error;
>> }
>> @@ -486,7 +628,7 @@ alpm_pkg_t *_alpm_pkg_load_internal(alpm_handle_t
>> *handle,
>> }
>>
>> /* if we are not doing a full read, see if we have all we need
>> */
>> - if(!full && config) {
>> + if((!full || hit_mtree) && config) {
>
> Why does hit_mtree matter here? if(!full) we should never read the
> mtree file.
hit_mtree matters because we want to break the loop once we have a file
list (if we need one), but only if we also have read .PKGINFO.
Since .MTREE could be before .PKGINFO in the tarball I can't break just
after reading the mtree.
Does that make sense?
>
>> break;
>> }
>> }
>>
>
signature.asc
Description: OpenPGP digital signature
