Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package openQA for openSUSE:Factory checked in at 2025-03-28 09:37:03 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/openQA (Old) and /work/SRC/openSUSE:Factory/.openQA.new.2696 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "openQA" Fri Mar 28 09:37:03 2025 rev:685 rq:1264792 version:5.1743092811.fb25b01c Changes: -------- --- /work/SRC/openSUSE:Factory/openQA/openQA.changes 2025-03-27 22:32:43.149643994 +0100 +++ /work/SRC/openSUSE:Factory/.openQA.new.2696/openQA.changes 2025-03-28 09:37:31.021340889 +0100 @@ -1,0 +2,7 @@ +Thu Mar 27 22:15:30 UTC 2025 - ok...@suse.com + +- Update to version 5.1743092811.fb25b01c: + * Avoid interrupting/skipping directly chained jobs due to high load + * Dependency cron 2025-03-27 + +------------------------------------------------------------------- Old: ---- openQA-5.1743011892.5068f7f4.obscpio New: ---- openQA-5.1743092811.fb25b01c.obscpio ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ openQA-client-test.spec ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:31.921378166 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:31.925378333 +0100 @@ -18,7 +18,7 @@ %define short_name openQA-client Name: %{short_name}-test -Version: 5.1743011892.5068f7f4 +Version: 5.1743092811.fb25b01c Release: 0 Summary: Test package for %{short_name} License: GPL-2.0-or-later ++++++ openQA-devel-test.spec ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:31.953379492 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:31.953379492 +0100 @@ -18,7 +18,7 @@ %define short_name openQA-devel Name: %{short_name}-test -Version: 5.1743011892.5068f7f4 +Version: 5.1743092811.fb25b01c Release: 0 Summary: Test package for %{short_name} License: GPL-2.0-or-later ++++++ openQA-test.spec ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:31.989380983 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:31.989380983 +0100 @@ -18,7 +18,7 @@ %define short_name openQA Name: %{short_name}-test -Version: 5.1743011892.5068f7f4 +Version: 5.1743092811.fb25b01c Release: 0 Summary: Test package for openQA License: GPL-2.0-or-later ++++++ openQA-worker-test.spec ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:32.021382308 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:32.025382474 +0100 @@ -18,7 +18,7 @@ %define short_name openQA-worker Name: %{short_name}-test -Version: 5.1743011892.5068f7f4 +Version: 5.1743092811.fb25b01c Release: 0 Summary: Test package for %{short_name} License: GPL-2.0-or-later ++++++ openQA.spec ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:32.057383799 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:32.057383799 +0100 @@ -90,7 +90,7 @@ %define devel_requires %devel_no_selenium_requires chromedriver Name: openQA -Version: 5.1743011892.5068f7f4 +Version: 5.1743092811.fb25b01c Release: 0 Summary: The openQA web-frontend, scheduler and tools License: GPL-2.0-or-later ++++++ openQA-5.1743011892.5068f7f4.obscpio -> openQA-5.1743092811.fb25b01c.obscpio ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/cpanfile new/openQA-5.1743092811.fb25b01c/cpanfile --- old/openQA-5.1743011892.5068f7f4/cpanfile 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/cpanfile 2025-03-27 17:26:51.000000000 +0100 @@ -117,7 +117,7 @@ requires 'Code::TidyAll'; requires 'Perl::Critic'; requires 'Perl::Critic::Community'; - requires 'Perl::Tidy', '== 20250214.0.0'; + requires 'Perl::Tidy', '== 20250311.0.0'; }; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/dependencies.yaml new/openQA-5.1743092811.fb25b01c/dependencies.yaml --- old/openQA-5.1743011892.5068f7f4/dependencies.yaml 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/dependencies.yaml 2025-03-27 17:26:51.000000000 +0100 @@ -83,7 +83,7 @@ sudo: tar: xorg-x11-fonts: - perl(Perl::Tidy): '== 20250214.0.0' + perl(Perl::Tidy): '== 20250311.0.0' devel_requires: '%devel_no_selenium_requires': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/lib/OpenQA/Worker.pm new/openQA-5.1743092811.fb25b01c/lib/OpenQA/Worker.pm --- old/openQA-5.1743011892.5068f7f4/lib/OpenQA/Worker.pm 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/lib/OpenQA/Worker.pm 2025-03-27 17:26:51.000000000 +0100 @@ -59,6 +59,7 @@ has 'current_webui_host'; has 'current_job'; has 'current_error'; +has 'current_error_is_fatal'; has 'worker_hostname'; has 'isotovideo_interface_version'; @@ -225,13 +226,12 @@ $status{current_webui_host} = $self->current_webui_host; $status{job} = $current_job->info; } - elsif (my $availability_error = $self->check_availability) { + elsif (my $availability_reason = $self->set_current_error_based_on_availability) { $status{status} = 'broken'; - $self->current_error($status{reason} = $availability_error); + $status{reason} = $availability_reason; } else { $status{status} = 'free'; - $self->current_error(undef); } if (my $queue = $self->{_queue}) { $status{pending_job_ids} = $queue->{pending_job_ids} if keys %{$queue->{pending_job_ids}}; @@ -280,7 +280,7 @@ # note: This assigns $self->current_error if there's an error and therefore prevents us from grabbing # a job while broken. The error is propagated to the web UIs. $self->configure_cache_client; - $self->current_error($self->check_availability); + $self->set_current_error_based_on_availability; log_error 'Unavailable: ' . $self->current_error if $self->current_error; # register error handler to stop the current job when a critical/unhandled error occurs @@ -484,9 +484,14 @@ log_info("Skipping job $next_job_id from queue (web UI sent command $skip_reason)"); return $self->_prepare_and_skip_job($next_job, $skip_reason); } - if (my $current_error = $self->current_error) { - log_info("Skipping job $next_job_id from queue because worker is broken ($current_error)"); - return $self->_prepare_and_skip_job($next_job); + if (my $e = $self->current_error) { + if ($self->current_error_is_fatal) { + log_info "Skipping job $next_job_id from queue because worker is broken ($e)"; + return $self->_prepare_and_skip_job($next_job); + } + else { + log_info "Continuing with job $next_job_id as it is already enqueued despite current error ($e)"; + } } my $parent_chain = $queue_info->{parent_chain}; my $last_parent = $parent_chain->[-1]; @@ -611,36 +616,46 @@ catch ($e) { return 0 } } -# checks whether the worker is available +# returns whether the worker is available and a reason # note: This is used to check certain error conditions *before* starting a job to prevent incompletes and # being able to propagate the brokenness to the web UIs. +# note: High load will yield a corresponding error message as reason so the worker becomes broken and +# thus will not pick up any new jobs. However, a worker under high load is still considered being +# able to work on jobs that are already enqueued. Hence this function returns a "1" for the +# availability in this case. sub check_availability ($self) { # check whether the cache service is available if caching enabled if (my $cache_service_client = $self->{_cache_service_client}) { my $error = $cache_service_client->info->availability_error; my $host = $cache_service_client->host // '?'; - return "Worker cache not available via $host: $error" if $error; + return (0, "Worker cache not available via $host: $error") if $error; } # check whether qemu is still running if (my $qemu_pid = $self->is_qemu_running) { - return "A QEMU instance using the current pool directory is still running (PID: $qemu_pid)"; + return (0, "A QEMU instance using the current pool directory is still running (PID: $qemu_pid)"); } - # avoid running jobs if system utilization is critical and ensure pool directory is locked - if (my $error = $self->_check_system_utilization || $self->_setup_pool_directory) { - return $error; - } + # ensure pool directory is locked + if (my $error = $self->_setup_pool_directory) { return (0, $error) } # auto-detect worker address if not specified explicitly my $settings = $self->settings; - return 'Unable to determine worker address (WORKER_HOSTNAME)' unless $settings->auto_detect_worker_address; + return (0, 'Unable to determine worker address (WORKER_HOSTNAME)') unless $settings->auto_detect_worker_address; # check org.opensuse.os_autoinst.switch if it is a MM-capable worker slot - return "D-Bus service '$self->{_ovs_dbus_service_name}' is not running" + return (0, "D-Bus service '$self->{_ovs_dbus_service_name}' is not running") if $settings->has_class('tap') && !$self->is_ovs_dbus_service_running; - return undef; + # continue with enqueued jobs in any case but avoid picking up new jobs if system utilization is critical + return (1, $self->_check_system_utilization); +} + +sub set_current_error_based_on_availability ($self) { + my ($is_available, $reason) = $self->check_availability; + $self->current_error($reason); + $self->current_error_is_fatal(!$is_available); + return $reason; } sub _handle_client_status_changed ($self, $client, $event_data) { @@ -741,8 +756,8 @@ # hasn't been terminated yet) # incomplete subsequent jobs in the queue if it turns out the worker is generally broken # continue with the next job in the queue (this just returns if there are no further jobs) - $self->current_error(my $availability_error = $self->check_availability); - log_warning $availability_error if $availability_error; + my $availability_reason = $self->set_current_error_based_on_availability; + log_warning $availability_reason if $availability_reason; if (!$self->_accept_or_skip_next_job_in_queue) { # stop if we can not accept/skip the next job (e.g. because there's no further job) if that's configured diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/t/24-worker-overall.t new/openQA-5.1743092811.fb25b01c/t/24-worker-overall.t --- old/openQA-5.1743011892.5068f7f4/t/24-worker-overall.t 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/t/24-worker-overall.t 2025-03-27 17:26:51.000000000 +0100 @@ -851,6 +851,7 @@ 'A QEMU instance using the current pool directory is still running (PID: 17377)', 'error status recomputed' ); + ok $worker->current_error_is_fatal, 'leftover QEMU process considered fatal'; is $pending_job->status, 'skipped: ?', 'pending job is supposed to be skipped due to the error'; combined_like { $worker->_handle_job_status_changed($pending_job, {status => 'stopped', reason => 'skipped'}); @@ -858,11 +859,22 @@ qr/Job 769 from some-host finished - reason: skipped/s, 'assume skipping of job 769 is complete'; is $worker->status->{status}, 'broken', 'worker still considered broken'; + # set back the job/queue for another test + $pending_job->{_status} = 'new'; + $worker->current_job($fake_job); + $worker->_init_queue([$pending_job]); + # assume the average load exceeds configured threshold $worker_mock->unmock('is_qemu_running'); $worker->settings->global_settings->{CRITICAL_LOAD_AVG_THRESHOLD} = '10'; - is $worker->status->{status}, 'broken', 'worker considered broken when average load exceeds threshold'; + combined_like { $worker->_handle_job_status_changed($fake_job, {status => 'stopped', reason => 'done'}) } + qr/Job 42 from some-host finished - reason: done.*Continuing.*769.*despite.*load.*exceeding/s, + 'continuation despite error logged'; like $worker->current_error, qr/load \(.*10\.25.*exceeding.*10/, 'error shows current load and threshold'; + ok !$worker->current_error_is_fatal, 'exceeding the load is not considered fatal'; + is $pending_job->status, 'accepted', 'pending job is not supposed to be skipped due load'; + $worker->current_job(undef); + is $worker->status->{status}, 'broken', 'worker is considered broken when after the job is done'; # assume the error is gone $load_avg_file->remove; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/t/lib/OpenQA/Test/Utils.pm new/openQA-5.1743092811.fb25b01c/t/lib/OpenQA/Test/Utils.pm --- old/openQA-5.1743011892.5068f7f4/t/lib/OpenQA/Test/Utils.pm 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/t/lib/OpenQA/Test/Utils.pm 2025-03-27 17:26:51.000000000 +0100 @@ -485,9 +485,9 @@ } my $error = $options{error}; # uncoverable statement my $worker_mock = Test::MockModule->new('OpenQA::Worker'); # uncoverable statement - $worker_mock->redefine(check_availability => $error) if defined $error; # uncoverable statement + $worker_mock->redefine(check_availability => sub { (0, $error) }) if defined $error; # uncoverable statement my $worker = OpenQA::Worker->new(\%worker_params); # uncoverable statement - $worker->current_error($error) if defined $error; # uncoverable statement + $worker->set_current_error_based_on_availability; # uncoverable statement setup_worker($worker, $host); # uncoverable statement $worker->exec(); # uncoverable statement diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/tools/ci/autoinst.sha new/openQA-5.1743092811.fb25b01c/tools/ci/autoinst.sha --- old/openQA-5.1743011892.5068f7f4/tools/ci/autoinst.sha 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/tools/ci/autoinst.sha 2025-03-27 17:26:51.000000000 +0100 @@ -1 +1 @@ -4ad1bd8ce5aabe01198d40f2a0dc0c97d046927d +95d87acbdb1fdbea785066fa90a210ff5ac8c67c diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/openQA-5.1743011892.5068f7f4/tools/ci/ci-packages.txt new/openQA-5.1743092811.fb25b01c/tools/ci/ci-packages.txt --- old/openQA-5.1743011892.5068f7f4/tools/ci/ci-packages.txt 2025-03-26 18:58:12.000000000 +0100 +++ new/openQA-5.1743092811.fb25b01c/tools/ci/ci-packages.txt 2025-03-27 17:26:51.000000000 +0100 @@ -174,7 +174,7 @@ perl-Perl-Critic-Policy-Variables-ProhibitLoopOnHash-0.008 perl-Perl-Critic-Pulp-99 perl-PerlIO-utf8_strict-0.008 -perl-Perl-Tidy-20250214.0.0 +perl-Perl-Tidy-20250311.0.0 perl-Pod-MinimumVersion-50 perl-Pod-POM-2.01 perl-Pod-Spell-1.26 ++++++ openQA.obsinfo ++++++ --- /var/tmp/diff_new_pack.vFdwtR/_old 2025-03-28 09:37:53.330264857 +0100 +++ /var/tmp/diff_new_pack.vFdwtR/_new 2025-03-28 09:37:53.334265023 +0100 @@ -1,5 +1,5 @@ name: openQA -version: 5.1743011892.5068f7f4 -mtime: 1743011892 -commit: 5068f7f4c91c697d4820b3d18d11122362a39af4 +version: 5.1743092811.fb25b01c +mtime: 1743092811 +commit: fb25b01c367fc8504fe213ab9f4369ac08551667