Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package createrepo_c for openSUSE:Factory checked in at 2022-07-29 16:47:06 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/createrepo_c (Old) and /work/SRC/openSUSE:Factory/.createrepo_c.new.1533 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "createrepo_c" Fri Jul 29 16:47:06 2022 rev:17 rq:991447 version:0.20.1 Changes: -------- --- /work/SRC/openSUSE:Factory/createrepo_c/createrepo_c.changes 2022-04-02 18:20:05.778574300 +0200 +++ /work/SRC/openSUSE:Factory/.createrepo_c.new.1533/createrepo_c.changes 2022-07-29 16:47:17.874552459 +0200 @@ -1,0 +2,7 @@ +Wed Jul 27 19:34:32 UTC 2022 - Andreas Stieger <[email protected]> + +- update to 0.20.1: + * fix performance problems with large repositories related to + tasks queue management + +------------------------------------------------------------------- Old: ---- createrepo_c-0.20.0.tar.gz New: ---- createrepo_c-0.20.1.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ createrepo_c.spec ++++++ --- /var/tmp/diff_new_pack.gZCUeG/_old 2022-07-29 16:47:18.342553760 +0200 +++ /var/tmp/diff_new_pack.gZCUeG/_new 2022-07-29 16:47:18.342553760 +0200 @@ -1,7 +1,7 @@ # # spec file for package createrepo_c # -# Copyright (c) 2021 SUSE LLC +# Copyright (c) 2022 SUSE LLC # Copyright (c) 2022 Neal Gompa <[email protected]>. # # All modifications and additions to the file contributed by third parties @@ -54,7 +54,7 @@ %define devname lib%{name}-devel Name: createrepo_c -Version: 0.20.0 +Version: 0.20.1 Release: 0 Summary: RPM repository metadata generation utility License: GPL-2.0-or-later ++++++ createrepo_c-0.20.0.tar.gz -> createrepo_c-0.20.1.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/VERSION.cmake new/createrepo_c-0.20.1/VERSION.cmake --- old/createrepo_c-0.20.0/VERSION.cmake 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/VERSION.cmake 2022-06-28 09:05:56.000000000 +0200 @@ -1,3 +1,3 @@ SET(CR_MAJOR "0") SET(CR_MINOR "20") -SET(CR_PATCH "0") +SET(CR_PATCH "1") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/createrepo_c.spec new/createrepo_c-0.20.1/createrepo_c.spec --- old/createrepo_c-0.20.0/createrepo_c.spec 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/createrepo_c.spec 2022-06-28 09:05:56.000000000 +0200 @@ -30,7 +30,7 @@ Summary: Creates a common metadata repository Name: createrepo_c -Version: 0.20.0 +Version: 0.20.1 Release: 1%{?dist} License: GPLv2+ URL: https://github.com/rpm-software-management/createrepo_c diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/CMakeLists.txt new/createrepo_c-0.20.1/src/CMakeLists.txt --- old/createrepo_c-0.20.0/src/CMakeLists.txt 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/CMakeLists.txt 2022-06-28 09:05:56.000000000 +0200 @@ -40,6 +40,7 @@ constants.h mergerepo_c.h createrepo_c.h + createrepo_shared.h deltarpms.h error.h helpers.h diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/createrepo_c.c new/createrepo_c-0.20.1/src/createrepo_c.c --- old/createrepo_c-0.20.0/src/createrepo_c.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/createrepo_c.c 2022-06-28 09:05:56.000000000 +0200 @@ -106,14 +106,13 @@ * * @param a_p Pointer to first struct PoolTask * @param b_p Pointer to second struct PoolTask - * @param user_data Unused (user data) */ static int -task_cmp(gconstpointer a_p, gconstpointer b_p, G_GNUC_UNUSED gpointer user_data) +task_cmp(gconstpointer a_p, gconstpointer b_p) { int ret; - const struct PoolTask *a = a_p; - const struct PoolTask *b = b_p; + const struct PoolTask *a = *(struct PoolTask **) a_p; + const struct PoolTask *b = *(struct PoolTask **) b_p; ret = g_strcmp0(a->filename, b->filename); if (ret) return ret; return g_strcmp0(a->path, b->path); @@ -143,7 +142,7 @@ long *task_count, int media_id) { - GQueue queue = G_QUEUE_INIT; + GArray *package_tasks = g_array_new(FALSE, FALSE, sizeof(struct PoolTask *)); struct PoolTask *task; if ( ! cmd_options->split ) { @@ -243,7 +242,7 @@ task->path = g_strdup(dirname); *current_pkglist = g_slist_prepend(*current_pkglist, task->filename); // TODO: One common path for all tasks with the same path? - g_queue_insert_sorted(&queue, task, task_cmp, NULL); + g_array_append_val(package_tasks, task); } else { g_free(full_path); } @@ -299,19 +298,24 @@ task->filename = g_strdup(filename); // foobar.rpm task->path = strndup(relative_path, x); // packages/i386/ *current_pkglist = g_slist_prepend(*current_pkglist, task->filename); - g_queue_insert_sorted(&queue, task, task_cmp, NULL); + g_array_append_val(package_tasks, task); } } } + g_array_sort(package_tasks, task_cmp); + // Push sorted tasks into the thread pool - while ((task = g_queue_pop_head(&queue)) != NULL) { + for (int i=0; i<package_tasks->len; i++) { + task = g_array_index(package_tasks, struct PoolTask *, i); task->id = *task_count; task->media_id = media_id; g_thread_pool_push(pool, task, NULL); ++*task_count; } + g_array_free(package_tasks, TRUE); + return *task_count; } @@ -427,10 +431,10 @@ } /** Creates list of cr_RepomdRecords from list - * of additional metadata (cr_Metadatum) + * of additional metadata (cr_Metadatum) * * @param additional_metadata List of cr_Metadatum - * @param repomd_checksum_type + * @param repomd_checksum_type * * @return New GSList of cr_RepomdRecords */ @@ -439,7 +443,7 @@ cr_ChecksumType repomd_checksum_type) { GError *tmp_err = NULL; - GSList *additional_metadata_rec = NULL; + GSList *additional_metadata_rec = NULL; GSList *element = additional_metadata; for (; element; element=g_slist_next(element)) { additional_metadata_rec = g_slist_prepend(additional_metadata_rec, @@ -468,9 +472,9 @@ * use content stats of the new file * * @param task Rewrite pkg count task - * @param filename Name of file with wrong package count + * @param filename Name of file with wrong package count * @param exit_val If errors occured set createrepo_c exit value - * @param content_stat Content stats for filename + * @param content_stat Content stats for filename * */ static void @@ -549,6 +553,31 @@ g_hash_table_size(cr_metadata_hashtable(*md))); } +// Sorting function for location_href strings, by length. +// Compatible with g_array_sort() +static int strlensort(gconstpointer a, gconstpointer b) +{ + // Function is supposed to take a double-pointer so unfortunately you cannot pass a + // string-comparison function directly. + gchar **a_ptr = (gchar **)a; + gchar **b_ptr = (gchar **)b; + + int a_len = strnlen(*a_ptr, 4096); + int b_len = strnlen(*b_ptr, 4096); + if (a_len > b_len) + { + return 1; + } + else if (b_len > a_len) + { + return -1; + } + else + { + return 0; + } +} + int main(int argc, char **argv) { @@ -794,7 +823,7 @@ cr_Metadatum *new_groupfile_metadatum = NULL; - // Groupfile specified as argument + // Groupfile specified as argument if (cmd_options->groupfile_fullpath) { new_groupfile_metadatum = g_malloc0(sizeof(cr_Metadatum)); new_groupfile_metadatum->name = cr_copy_metadatum(cmd_options->groupfile_fullpath, tmp_out_repo, &tmp_err); @@ -1272,6 +1301,7 @@ user_data.repodir_name_len = strlen(in_dir); user_data.task_count = task_count; user_data.package_count = 0; + user_data.nevra_table = g_hash_table_new(g_str_hash, g_str_equal); user_data.skip_stat = cmd_options->skip_stat; user_data.old_metadata = old_metadata; user_data.id_pri = 0; @@ -1286,6 +1316,7 @@ user_data.had_errors = 0; user_data.output_pkg_list = output_pkg_list; + g_mutex_init(&(user_data.mutex_nevra_table)); g_mutex_init(&(user_data.mutex_output_pkg_list)); g_mutex_init(&(user_data.mutex_pri)); g_mutex_init(&(user_data.mutex_fil)); @@ -1306,9 +1337,36 @@ // Wait until pool is finished g_thread_pool_free(pool, FALSE, TRUE); + GHashTableIter iter; + gpointer key, value; + + g_hash_table_iter_init(&iter, user_data.nevra_table); + while (g_hash_table_iter_next(&iter, &key, &value)) + { + gchar *nevra = (gchar *) key; + GArray *locations = (GArray *) value; + if (locations->len > 1) { + g_warning("Package '%s' has duplicate metadata entries, only one should exist", nevra); + + g_array_sort(locations, strlensort); + + for (int i=0; i<locations->len; i++) { + g_warning(" Sourced from location: \'%s\'", g_array_index(locations, gchar *, i)); + } + } + + g_hash_table_iter_steal(&iter); + g_free(nevra); + for (int i = 0; i < locations->len; i++) { + g_free(g_array_index(locations, gchar *, i)); + } + g_array_free(locations, TRUE); + } + g_hash_table_destroy(user_data.nevra_table); + // if there were any errors, exit nonzero if ( user_data.had_errors ) { - exit_val = 2; + exit_val = 2; } g_message("Pool finished%s", (user_data.had_errors ? " with errors" : "")); @@ -1449,6 +1507,7 @@ } g_queue_free(user_data.buffer); + g_mutex_clear(&(user_data.mutex_nevra_table)); g_mutex_clear(&(user_data.mutex_output_pkg_list)); g_mutex_clear(&(user_data.mutex_pri)); g_mutex_clear(&(user_data.mutex_fil)); @@ -1478,7 +1537,7 @@ cr_RepomdRecord *prestodelta_zck_rec = NULL; // List of cr_RepomdRecords - GSList *additional_metadata_rec = NULL; + GSList *additional_metadata_rec = NULL; // XML cr_repomd_record_load_contentstat(pri_xml_rec, pri_stat); @@ -1729,9 +1788,9 @@ g_clear_error(&tmp_err); exit(EXIT_FAILURE); } - /* Only create additional_metadata_zck if additional_metadata isn't already zchunk + /* Only create additional_metadata_zck if additional_metadata isn't already zchunk * and its zck version doesn't yet exists */ - if (com_type != CR_CW_ZCK_COMPRESSION && + if (com_type != CR_CW_ZCK_COMPRESSION && !g_slist_find_custom(additional_metadata_rec, additional_metadatum_rec_zck_type, cr_cmp_repomd_record_type)) { GSList *additional_metadatum_rec_elem = g_slist_find_custom(additional_metadata_rec, ((cr_Metadatum *) element->data)->type, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/createrepo_shared.c new/createrepo_c-0.20.1/src/createrepo_shared.c --- old/createrepo_c-0.20.0/src/createrepo_shared.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/createrepo_shared.c 2022-06-28 09:05:56.000000000 +0200 @@ -230,11 +230,12 @@ // Try to create own - just as a lock if (g_mkdir(lock_dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { + const gchar * mkdir_error = g_strerror(errno); g_critical("(--ignore-lock enabled) Cannot create %s: %s", - lock_dir, g_strerror(errno)); + lock_dir, mkdir_error); g_set_error(err, CREATEREPO_C_ERROR, CRE_IO, "Cannot create: %s (--ignore-lock enabled): %s", - lock_dir, g_strerror(errno)); + lock_dir, mkdir_error); return FALSE; } else { g_debug("(--ignore-lock enabled) Own and empty %s created " @@ -249,11 +250,12 @@ tmp_repodata_dir = cr_append_pid_and_datetime(tmp, "/"); if (g_mkdir(tmp_repodata_dir, S_IRWXU | S_IRWXG | S_IROTH | S_IXOTH)) { + const gchar * mkdir_error = g_strerror(errno); g_critical("(--ignore-lock enabled) Cannot create %s: %s", - tmp_repodata_dir, g_strerror(errno)); + tmp_repodata_dir, mkdir_error); g_set_error(err, CREATEREPO_C_ERROR, CRE_IO, "Cannot create: %s (--ignore-lock enabled): %s", - tmp_repodata_dir, g_strerror(errno)); + tmp_repodata_dir, mkdir_error); return FALSE; } else { g_debug("(--ignore-lock enabled) For data generation is used: %s", diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/createrepo_shared.h new/createrepo_c-0.20.1/src/createrepo_shared.h --- old/createrepo_c-0.20.0/src/createrepo_shared.h 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/createrepo_shared.h 2022-06-28 09:05:56.000000000 +0200 @@ -120,13 +120,6 @@ void cr_setup_logging(gboolean quiet, gboolean verbose); -/** - * Set global pointer to exit value that is used in function set by atexit - * @param exit_val Pointer to exit_value int - */ -void -cr_set_global_exit_value(int *exit_val); - /** @} */ #ifdef __cplusplus diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/dumper_thread.c new/createrepo_c-0.20.1/src/dumper_thread.c --- old/createrepo_c-0.20.0/src/dumper_thread.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/dumper_thread.c 2022-06-28 09:05:56.000000000 +0200 @@ -343,10 +343,11 @@ if (!stat_buf) { struct stat stat_buf_own; if (stat(fullpath, &stat_buf_own) == -1) { + const gchar * stat_error = g_strerror(errno); g_warning("%s: stat(%s) error (%s)", __func__, - fullpath, g_strerror(errno)); + fullpath, stat_error); g_set_error(err, CREATEREPO_C_ERROR, CRE_IO, "stat(%s) failed: %s", - fullpath, g_strerror(errno)); + fullpath, stat_error); goto errexit; } pkg->time_file = stat_buf_own.st_mtime; @@ -550,6 +551,19 @@ } #endif + // Allow checking that the same package (NEVRA) isn't present multiple times in the metadata + // Keep a hashtable of NEVRA mapped to an array-list of location_href values + g_mutex_lock(&(udata->mutex_nevra_table)); + gchar *nevra = cr_package_nevra(pkg); + GArray *pkg_locations = g_hash_table_lookup(udata->nevra_table, nevra); + if (!pkg_locations) { + pkg_locations = g_array_new(FALSE, TRUE, sizeof(gchar *)); + g_hash_table_insert(udata->nevra_table, nevra, pkg_locations); + } + gchar *location = g_strdup(pkg->location_href); + g_array_append_val(pkg_locations, location); + g_mutex_unlock(&(udata->mutex_nevra_table)); + // Buffering stuff g_mutex_lock(&(udata->mutex_buffer)); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/dumper_thread.h new/createrepo_c-0.20.1/src/dumper_thread.h --- old/createrepo_c-0.20.0/src/dumper_thread.h 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/dumper_thread.h 2022-06-28 09:05:56.000000000 +0200 @@ -69,6 +69,10 @@ long task_count; // Total number of task to process long package_count; // Total number of packages processed + // Duplicate package error checking + GMutex mutex_nevra_table; // Mutex for the table of NEVRAs + GHashTable *nevra_table; // Table of NEVRAs, with a list of location_href as key + // Update stuff gboolean skip_stat; // Skip stat() while updating cr_Metadata *old_metadata; // Loaded metadata diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/misc.c new/createrepo_c-0.20.1/src/misc.c --- old/createrepo_c-0.20.0/src/misc.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/misc.c 2022-06-28 09:05:56.000000000 +0200 @@ -261,10 +261,11 @@ FILE *fp = fopen(filename, "rb"); if (!fp) { + const gchar * fopen_error = g_strerror(errno); g_debug("%s: Cannot open file %s (%s)", __func__, filename, - g_strerror(errno)); + fopen_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Cannot open %s: %s", filename, g_strerror(errno)); + "Cannot open %s: %s", filename, fopen_error); return results; } @@ -272,10 +273,10 @@ // Get header range if (fseek(fp, 104, SEEK_SET) != 0) { - g_debug("%s: fseek fail on %s (%s)", __func__, filename, - g_strerror(errno)); + const gchar * fseek_error = g_strerror(errno); + g_debug("%s: fseek fail on %s (%s)", __func__, filename, fseek_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Cannot seek over %s: %s", filename, g_strerror(errno)); + "Cannot seek over %s: %s", filename, fseek_error); fclose(fp); return results; } @@ -406,19 +407,21 @@ // Open src file if ((orig = fopen(src, "rb")) == NULL) { + const gchar * fopen_error = g_strerror(errno); g_debug("%s: Cannot open source file %s (%s)", __func__, src, - g_strerror(errno)); + fopen_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Cannot open file %s: %s", src, g_strerror(errno)); + "Cannot open file %s: %s", src, fopen_error); return FALSE; } // Open dst file if ((new = fopen(dst, "wb")) == NULL) { + const gchar * fopen_error = g_strerror(errno); g_debug("%s: Cannot open destination file %s (%s)", __func__, dst, - g_strerror(errno)); + fopen_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Cannot open file %s: %s", dst, g_strerror(errno)); + "Cannot open file %s: %s", dst, fopen_error); return FALSE; } @@ -431,10 +434,11 @@ } if (fwrite(buf, 1, readed, new) != readed) { + const gchar * fwrite_error = g_strerror(errno); g_debug("%s: Error while copy %s -> %s (%s)", __func__, src, - dst, g_strerror(errno)); + dst, fwrite_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Error while write %s: %s", dst, g_strerror(errno)); + "Error while write %s: %s", dst, fwrite_error); return FALSE; } } @@ -658,10 +662,11 @@ new = fopen(dst, "wb"); if (!new) { + const gchar * fopen_error = g_strerror(errno); g_debug("%s: Cannot open destination file %s (%s)", - __func__, dst, g_strerror(errno)); + __func__, dst, fopen_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Cannot open %s: %s", src, g_strerror(errno)); + "Cannot open %s: %s", src, fopen_error); ret = CRE_IO; goto compress_file_cleanup; } @@ -677,10 +682,11 @@ } if (fwrite(buf, 1, readed, new) != (size_t) readed) { + const gchar * fwrite_error = g_strerror(errno); g_debug("%s: Error while copy %s -> %s (%s)", - __func__, src, dst, g_strerror(errno)); + __func__, src, dst, fwrite_error); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Error while write %s: %s", dst, g_strerror(errno)); + "Error while write %s: %s", dst, fwrite_error); ret = CRE_IO; goto compress_file_cleanup; } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/parsepkg.c new/createrepo_c-0.20.1/src/parsepkg.c --- old/createrepo_c-0.20.0/src/parsepkg.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/parsepkg.c 2022-06-28 09:05:56.000000000 +0200 @@ -92,10 +92,11 @@ FD_t fd = Fopen(filename, "r.ufdio"); if (!fd) { + int fopen_error = errno; g_warning("%s: Fopen of %s failed %s", - __func__, filename, g_strerror(errno)); + __func__, filename, g_strerror(fopen_error)); g_set_error(err, ERR_DOMAIN, CRE_IO, - "Fopen failed: %s", g_strerror(errno)); + "Fopen failed: %s", g_strerror(fopen_error)); return FALSE; } @@ -176,10 +177,11 @@ if (!stat_buf) { struct stat stat_buf_own; if (stat(filename, &stat_buf_own) == -1) { + int stat_error = errno; g_warning("%s: stat(%s) error (%s)", __func__, - filename, g_strerror(errno)); + filename, g_strerror(stat_error)); g_set_error(err, ERR_DOMAIN, CRE_IO, "stat(%s) failed: %s", - filename, g_strerror(errno)); + filename, g_strerror(stat_error)); goto errexit; } pkg->time_file = stat_buf_own.st_mtime; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/createrepo_c-0.20.0/src/xml_parser.c new/createrepo_c-0.20.1/src/xml_parser.c --- old/createrepo_c-0.20.0/src/xml_parser.c 2022-03-29 06:59:01.000000000 +0200 +++ new/createrepo_c-0.20.1/src/xml_parser.c 2022-06-28 09:05:56.000000000 +0200 @@ -309,8 +309,8 @@ // we know which keys we want (they don't contain &) so // we don't have to check those. size_t nattr; - for (nattr = 1; attr[nattr]; nattr+=2) { - if (strchr((char *)attr[nattr], '&')) { + for (nattr = 0; attr[nattr]; nattr+=2) { + if (strchr((char *)attr[nattr+1], '&')) { *allocation_needed = TRUE; } } @@ -319,7 +319,7 @@ return attr; } - char **attr_copy = g_malloc0(sizeof(char *) * (nattr - 1)); + char **attr_copy = g_malloc0(sizeof(char *) * (nattr + 1)); if (attr_copy) { for (nattr = 0; attr[nattr]; nattr++) { if (strchr((char *)attr[nattr], '&')) {
