Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package mcelog for openSUSE:Factory checked in at 2025-04-15 16:45:07 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/mcelog (Old) and /work/SRC/openSUSE:Factory/.mcelog.new.1907 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "mcelog" Tue Apr 15 16:45:07 2025 rev:71 rq:1269206 version:204 Changes: -------- --- /work/SRC/openSUSE:Factory/mcelog/mcelog.changes 2025-01-29 16:09:34.725475361 +0100 +++ /work/SRC/openSUSE:Factory/.mcelog.new.1907/mcelog.changes 2025-04-15 16:48:10.480803822 +0200 @@ -1,0 +2,8 @@ +Fri Apr 11 12:34:24 UTC 2025 - tr...@suse.de + +- Update to version 204: + * Enable offline retries by default + * Add ability to retry failed page offlines with an exponential backoff + * Fix misspelling in variable name + +------------------------------------------------------------------- Old: ---- mcelog-202.obscpio New: ---- mcelog-204.obscpio ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ mcelog.spec ++++++ --- /var/tmp/diff_new_pack.1EDHAv/_old 2025-04-15 16:48:11.476845529 +0200 +++ /var/tmp/diff_new_pack.1EDHAv/_new 2025-04-15 16:48:11.476845529 +0200 @@ -21,7 +21,7 @@ %define _fillupdir %{_localstatedir}/adm/fillup-templates %endif Name: mcelog -Version: 202 +Version: 204 Release: 0 Summary: Log Machine Check Events License: GPL-2.0-only ++++++ _servicedata ++++++ --- /var/tmp/diff_new_pack.1EDHAv/_old 2025-04-15 16:48:11.532847874 +0200 +++ /var/tmp/diff_new_pack.1EDHAv/_new 2025-04-15 16:48:11.536848042 +0200 @@ -7,6 +7,6 @@ <param name="url">https://github.com/andikleen/mcelog.git</param> <param name="changesrevision">1f3a769c8fb736815a56ea104b7b751c5565cb88</param></service><service name="tar_scm"> <param name="url">https://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git</param> - <param name="changesrevision">6b3fe165f35ef9897a6573042851922b3a3e4e4d</param></service></servicedata> + <param name="changesrevision">ad244c6b60fcbd08a5d73497bfb3487983801598</param></service></servicedata> (No newline at EOF) ++++++ mcelog-202.obscpio -> mcelog-204.obscpio ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-202/leaky-bucket.c new/mcelog-204/leaky-bucket.c --- old/mcelog-202/leaky-bucket.c 2024-12-02 19:00:44.000000000 +0100 +++ new/mcelog-204/leaky-bucket.c 2025-02-15 20:29:39.000000000 +0100 @@ -27,12 +27,12 @@ } void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b, - time_t now) + time_t now, unsigned char capacity_multiplier) { long diff; diff = now - b->tstamp; if (diff >= c->agetime) { - unsigned age = (diff / (double)c->agetime) * c->capacity; + unsigned age = (diff / (double)c->agetime) * c->capacity * capacity_multiplier; b->tstamp = now; if (age > b->count) b->count = 0; @@ -44,13 +44,13 @@ /* Account increase in leaky bucket. Return 1 if bucket overflowed. */ int __bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, - unsigned inc, time_t t) + unsigned inc, time_t t, unsigned char capacity_multiplier) { if (c->capacity == 0) return 0; - bucket_age(c, b, t); + bucket_age(c, b, t, capacity_multiplier); b->count += inc; - if (b->count >= c->capacity) { + if (b->count >= c->capacity * capacity_multiplier) { b->excess += b->count; /* should disable overflow completely in the same time unit */ b->count = 0; @@ -62,7 +62,7 @@ int bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc) { - return __bucket_account(c, b, inc, bucket_time()); + return __bucket_account(c, b, inc, bucket_time(), 1); } static int timeconv(char unit, int *out) @@ -89,7 +89,6 @@ xasprintf(&buf, "not enabled"); } else { int unit = 0; - //bucket_age(c, b, bucket_time()); timeconv(c->tunit, &unit); xasprintf(&buf, "%u in %u%c", b->count + b->excess, c->agetime/unit, c->tunit); @@ -198,7 +197,7 @@ for (i = 1; i <= TOTAL_EVENTS; i++) { event_time = start_time + i * SECONDS_PER_EVENT; - ret = __bucket_account(&c, &b, 1, event_time); + ret = __bucket_account(&c, &b, 1, event_time, 1); #ifdef TEST_LEAKY_BUCKET_DEBUG if (ret) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-202/leaky-bucket.h new/mcelog-204/leaky-bucket.h --- old/mcelog-202/leaky-bucket.h 2024-12-02 19:00:44.000000000 +0100 +++ new/mcelog-204/leaky-bucket.h 2025-02-15 20:29:39.000000000 +0100 @@ -23,12 +23,12 @@ int bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, unsigned inc); int __bucket_account(const struct bucket_conf *c, struct leaky_bucket *b, - unsigned inc, time_t time); + unsigned inc, time_t time, unsigned char capacity_multiplier); char *bucket_output(const struct bucket_conf *c, struct leaky_bucket *b); int bucket_conf_init(struct bucket_conf *c, const char *rate); void bucket_init(struct leaky_bucket *b); time_t bucket_time(void); void bucket_age(const struct bucket_conf *c, struct leaky_bucket *b, - time_t now); + time_t now, unsigned char capacity_multiplier); #endif diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-202/mcelog.conf new/mcelog-204/mcelog.conf --- old/mcelog-202/mcelog.conf 2024-12-02 19:00:44.000000000 +0100 +++ new/mcelog-204/mcelog.conf 2025-02-15 20:29:39.000000000 +0100 @@ -159,6 +159,9 @@ # Threshold for the correct memory errors trigger script. memory-ce-threshold = 2 / 24h +# Retry failed offlines with exponential backoff +memory-ce-offline-retry = yes + # Trigger script for corrected errors. # memory-ce-trigger = page-error-trigger diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-202/memdb.c new/mcelog-204/memdb.c --- old/mcelog-202/memdb.c 2024-12-02 19:00:44.000000000 +0100 +++ new/mcelog-204/memdb.c 2025-02-15 20:29:39.000000000 +0100 @@ -196,7 +196,7 @@ { if (corr_err_cnt && --corr_err_cnt > 0) { md->ce.count += corr_err_cnt; - if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, corr_err_cnt, m->time)) { + if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, corr_err_cnt, m->time, 1)) { char *msg; xasprintf(&msg, "Fallback %s memory error count %d exceeded threshold", t->type, corr_err_cnt); @@ -217,11 +217,11 @@ if (m->status & MCI_STATUS_UC) { md->uc.count++; - if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time)) + if (__bucket_account(&t->uc_bucket_conf, &md->uc.bucket, 1, m->time, 1)) memdb_trigger(msg, md, m->time, &md->uc, &t->uc_bucket_conf, NULL, false, reporter); } else { md->ce.count++; - if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time)) + if (__bucket_account(&t->ce_bucket_conf, &md->ce.bucket, 1, m->time, 1)) memdb_trigger(msg, md, m->time, &md->ce, &t->ce_bucket_conf, NULL, false, reporter); } free(msg); @@ -278,7 +278,7 @@ int all = (flags & DUMP_ALL); char *s; - bucket_age(bc, &e->bucket, bucket_time()); + bucket_age(bc, &e->bucket, bucket_time(), 1); if (e->count || e->bucket.count || all) fprintf(f, "%s:\n", name); if (e->count || all) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/mcelog-202/page.c new/mcelog-204/page.c --- old/mcelog-202/page.c 2024-12-02 19:00:44.000000000 +0100 +++ new/mcelog-204/page.c 2025-02-15 20:29:39.000000000 +0100 @@ -41,14 +41,16 @@ #define PAGE_SHIFT 12 #define PAGE_SIZE (1UL << PAGE_SHIFT) +#define OFFLINE_RETRY_EXP_BACKOFF 2 +#define NO_OFFLINE_RETRY 1 enum { PAGE_ONLINE = 0, PAGE_OFFLINE = 1, PAGE_OFFLINE_FAILED = 2 }; struct mempage { struct rb_node nd; - /* one char used by rb_node */ char offlined; char triggered; + unsigned char offline_threshold_multiplier; // 1(32bit)-5(64bit) bytes of padding to play with here u64 addr; struct err_type ce; @@ -74,12 +76,13 @@ static int corr_err_counters; static struct mempage_cluster *mp_cluster; -static struct mempage_replacement mp_repalcement; +static struct mempage_replacement mp_replacement; static struct rb_root mempage_root; static LIST_HEAD(mempage_cluster_lru_list); static struct bucket_conf page_trigger_conf; static struct bucket_conf mp_replacement_trigger_conf; static char *page_error_pre_soft_trigger, *page_error_post_soft_trigger; +static unsigned offline_retry_backoff_base = NO_OFFLINE_RETRY; static const char *page_state[] = { [PAGE_ONLINE] = "online", @@ -111,6 +114,7 @@ mp = &mp_cluster->mp[mp_cluster->mp_used++]; mp->offlined = PAGE_ONLINE; mp->triggered = 0; + mp->offline_threshold_multiplier = NO_OFFLINE_RETRY; mp->ce.count = 0; return mp; @@ -241,6 +245,7 @@ if (memory_offline(addr) < 0) { Lprintf("Offlining page %llx failed: %s\n", addr, strerror(errno)); mp->offlined = PAGE_OFFLINE_FAILED; + mp->offline_threshold_multiplier *= offline_retry_backoff_base; } else mp->offlined = PAGE_OFFLINE; } @@ -323,6 +328,7 @@ bucket_init(&mp->ce.bucket); mempage_insert(addr, mp); mempage_cluster_lru_list_insert(to_cluster(mp)); + mp->offline_threshold_multiplier = NO_OFFLINE_RETRY; corr_err_counters++; } else if (!mp) { mp = mempage_replace(); @@ -331,14 +337,14 @@ mempage_cluster_lru_list_update(to_cluster(mp)); /* Report how often the replacement of counter 'mp' happened */ - ++mp_repalcement.count; - if (__bucket_account(&mp_replacement_trigger_conf, &mp_repalcement.bucket, 1, t)) { - thresh = bucket_output(&mp_replacement_trigger_conf, &mp_repalcement.bucket); + ++mp_replacement.count; + if (__bucket_account(&mp_replacement_trigger_conf, &mp_replacement.bucket, 1, t, 1)) { + thresh = bucket_output(&mp_replacement_trigger_conf, &mp_replacement.bucket); xasprintf(&msg, "Replacements of page correctable error counter exceed threshold %s", thresh); free(thresh); thresh = NULL; - counter_trigger(msg, t, &mp_repalcement, &mp_replacement_trigger_conf, false); + counter_trigger(msg, t, &mp_replacement, &mp_replacement_trigger_conf, false); free(msg); msg = NULL; } @@ -346,10 +352,11 @@ mempage_cluster_lru_list_update(to_cluster(mp)); } ++mp->ce.count; - if (__bucket_account(&page_trigger_conf, &mp->ce.bucket, 1, t)) { + if (__bucket_account(&page_trigger_conf, &mp->ce.bucket, 1, t, mp->offline_threshold_multiplier)) { struct memdimm *md; - if (mp->offlined != PAGE_ONLINE) + if ((offline_retry_backoff_base == OFFLINE_RETRY_EXP_BACKOFF && mp->offlined == PAGE_OFFLINE) || + (offline_retry_backoff_base == NO_OFFLINE_RETRY && mp->offlined != PAGE_ONLINE)) return; /* Only do triggers and messages for online pages */ thresh = bucket_output(&page_trigger_conf, &mp->ce.bucket); @@ -432,6 +439,8 @@ config_trigger("page", "memory-ce", &page_trigger_conf); config_trigger("page", "memory-ce-counter-replacement", &mp_replacement_trigger_conf); + if (config_bool("page", "memory-ce-offline-retry") == 1) + offline_retry_backoff_base = OFFLINE_RETRY_EXP_BACKOFF; n = config_choice("page", "memory-ce-action", offline_choice); if (n >= 0) offline = n; @@ -461,5 +470,5 @@ if (n != max_corr_err_counters) Lprintf("Round up max-corr-err-counters from %d to %d\n", n, max_corr_err_counters); - bucket_init(&mp_repalcement.bucket); + bucket_init(&mp_replacement.bucket); } ++++++ mcelog.obsinfo ++++++ --- /var/tmp/diff_new_pack.1EDHAv/_old 2025-04-15 16:48:11.708855244 +0200 +++ /var/tmp/diff_new_pack.1EDHAv/_new 2025-04-15 16:48:11.712855412 +0200 @@ -1,5 +1,5 @@ name: mcelog -version: 202 -mtime: 1733162444 -commit: 6b3fe165f35ef9897a6573042851922b3a3e4e4d +version: 204 +mtime: 1739647779 +commit: ad244c6b60fcbd08a5d73497bfb3487983801598