Hello community, here is the log from the commit of package salt for openSUSE:Factory checked in at 2019-10-14 12:30:50 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/salt (Old) and /work/SRC/openSUSE:Factory/.salt.new.2352 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "salt" Mon Oct 14 12:30:50 2019 rev:93 rq:733437 version:2019.2.0 Changes: -------- --- /work/SRC/openSUSE:Factory/salt/salt.changes 2019-09-11 10:23:31.571487439 +0200 +++ /work/SRC/openSUSE:Factory/.salt.new.2352/salt.changes 2019-10-14 12:30:51.836368560 +0200 @@ -1,0 +2,11 @@ +Thu Sep 26 10:23:39 UTC 2019 - Pablo Suárez Hernández <[email protected]> + +- Improve batch_async to release consumed memory (bsc#1140912) +- Fix memory leak produced by batch async find_jobs mechanism (bsc#1140912) +- Grant read and execute permission to others (bsc#1150447) + +- Added: + * improve-batch_async-to-release-consumed-memory-bsc-1.patch + * fix-memory-leak-produced-by-batch-async-find_jobs-me.patch + +------------------------------------------------------------------- New: ---- fix-memory-leak-produced-by-batch-async-find_jobs-me.patch improve-batch_async-to-release-consumed-memory-bsc-1.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ salt.spec ++++++ --- /var/tmp/diff_new_pack.0XkWfz/_old 2019-10-14 12:30:54.476361681 +0200 +++ /var/tmp/diff_new_pack.0XkWfz/_new 2019-10-14 12:30:54.488361650 +0200 @@ -220,25 +220,29 @@ # https://github.com/saltstack/salt/pull/54022 # https://github.com/saltstack/salt/pull/54024 Patch73: accumulated-changes-required-for-yomi-165.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177 +Patch74: restore-default-behaviour-of-pkg-list-return.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/6af07030a502c427781991fc9a2b994fa04ef32e +Patch75: fix-memory-leak-produced-by-batch-async-find_jobs-me.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/159 -Patch74: move-server_id-deprecation-warning-to-reduce-log-spa.patch +Patch76: move-server_id-deprecation-warning-to-reduce-log-spa.patch # PATCH_FIX_UPSTREAM: https://github.com/saltstack/salt/pull/54077 # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/166 -Patch75: fix-aptpkg-systemd-call-bsc-1143301.patch +Patch77: fix-aptpkg-systemd-call-bsc-1143301.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/170 -Patch76: strip-trailing-from-repo.uri-when-comparing-repos-in.patch -# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/177 -Patch77: restore-default-behaviour-of-pkg-list-return.patch +Patch78: strip-trailing-from-repo.uri-when-comparing-repos-in.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/172 -Patch78: implement-network.fqdns-module-function-bsc-1134860-.patch +Patch79: implement-network.fqdns-module-function-bsc-1134860-.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/173 -Patch79: 2019.2.0-pr-54196-backport-173.patch +Patch80: 2019.2.0-pr-54196-backport-173.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/174 -Patch80: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch +Patch81: virt.volume_infos-needs-to-ignore-inactive-pools-174.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/175 -Patch81: virt.volume_infos-silence-libvirt-error-message-175.patch +Patch82: virt.volume_infos-silence-libvirt-error-message-175.patch # PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/pull/176 -Patch82: fix-virt.full_info-176.patch +Patch83: fix-virt.full_info-176.patch +# PATCH-FIX_OPENSUSE: https://github.com/openSUSE/salt/commit/002543df392f65d95dbc127dc058ac897f2035ed +Patch84: improve-batch_async-to-release-consumed-memory-bsc-1.patch # BuildRoot: %{_tmppath}/%{name}-%{version}-build BuildRoot: %{_tmppath}/%{name}-%{version}-build @@ -799,6 +803,8 @@ %patch80 -p1 %patch81 -p1 %patch82 -p1 +%patch83 -p1 +%patch84 -p1 %build %if 0%{?build_py2} @@ -1519,8 +1525,8 @@ %files standalone-formulas-configuration %defattr(-,root,root) %config(noreplace) %attr(0640, root, salt) %{_sysconfdir}/salt/master.d/standalone-formulas-configuration.conf -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/ -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/states/ -%dir %attr(0750, root, salt) %{_prefix}/share/salt-formulas/metadata/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/states/ +%dir %attr(0755, root, salt) %{_prefix}/share/salt-formulas/metadata/ %changelog ++++++ _lastrevision ++++++ --- /var/tmp/diff_new_pack.0XkWfz/_old 2019-10-14 12:30:54.564361452 +0200 +++ /var/tmp/diff_new_pack.0XkWfz/_new 2019-10-14 12:30:54.564361452 +0200 @@ -1 +1 @@ -24e59963d380d183a48f6ddd4d66dbf6a8fa4210 \ No newline at end of file +6fdab5ecba96a2695eadd798abc2dcfe6ff2b99b \ No newline at end of file ++++++ fix-memory-leak-produced-by-batch-async-find_jobs-me.patch ++++++ >From 8941de5a64b6330c6a814059e6e337f7ad3aa6cd Mon Sep 17 00:00:00 2001 From: Mihai Dinca <[email protected]> Date: Mon, 16 Sep 2019 11:27:30 +0200 Subject: [PATCH] Fix memory leak produced by batch async find_jobs mechanism (bsc#1140912) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multiple fixes: - use different JIDs per find_job - fix bug in detection of find_job returns - fix timeout passed from request payload - better cleanup at the end of batching Co-authored-by: Pablo Suárez Hernández <[email protected]> --- salt/cli/batch_async.py | 60 +++++++++++++++++++++++++++-------------- salt/client/__init__.py | 1 + salt/master.py | 1 - 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py index 8c8f481e34..8a67331102 100644 --- a/salt/cli/batch_async.py +++ b/salt/cli/batch_async.py @@ -72,6 +72,7 @@ class BatchAsync(object): self.done_minions = set() self.active = set() self.initialized = False + self.jid_gen = jid_gen self.ping_jid = jid_gen() self.batch_jid = jid_gen() self.find_job_jid = jid_gen() @@ -89,14 +90,11 @@ class BatchAsync(object): def __set_event_handler(self): ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid) batch_return_pattern = 'salt/job/{0}/ret/*'.format(self.batch_jid) - find_job_return_pattern = 'salt/job/{0}/ret/*'.format(self.find_job_jid) self.event.subscribe(ping_return_pattern, match_type='glob') self.event.subscribe(batch_return_pattern, match_type='glob') - self.event.subscribe(find_job_return_pattern, match_type='glob') - self.event.patterns = { + self.patterns = { (ping_return_pattern, 'ping_return'), (batch_return_pattern, 'batch_run'), - (find_job_return_pattern, 'find_job_return') } self.event.set_event_handler(self.__event_handler) @@ -104,7 +102,7 @@ class BatchAsync(object): if not self.event: return mtag, data = self.event.unpack(raw, self.event.serial) - for (pattern, op) in self.event.patterns: + for (pattern, op) in self.patterns: if fnmatch.fnmatch(mtag, pattern): minion = data['id'] if op == 'ping_return': @@ -112,7 +110,8 @@ class BatchAsync(object): if self.targeted_minions == self.minions: self.event.io_loop.spawn_callback(self.start_batch) elif op == 'find_job_return': - self.find_job_returned.add(minion) + if data.get("return", None): + self.find_job_returned.add(minion) elif op == 'batch_run': if minion in self.active: self.active.remove(minion) @@ -131,31 +130,46 @@ class BatchAsync(object): return set(list(to_run)[:next_batch_size]) @tornado.gen.coroutine - def check_find_job(self, batch_minions): + def check_find_job(self, batch_minions, jid): + find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) + self.event.unsubscribe(find_job_return_pattern, match_type='glob') + self.patterns.remove((find_job_return_pattern, "find_job_return")) + timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) self.timedout_minions = self.timedout_minions.union(timedout_minions) self.active = self.active.difference(self.timedout_minions) running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) + if timedout_minions: self.schedule_next() + if running: + self.find_job_returned = self.find_job_returned.difference(running) self.event.io_loop.add_callback(self.find_job, running) @tornado.gen.coroutine def find_job(self, minions): - not_done = minions.difference(self.done_minions) - ping_return = yield self.local.run_job_async( - not_done, - 'saltutil.find_job', - [self.batch_jid], - 'list', - gather_job_timeout=self.opts['gather_job_timeout'], - jid=self.find_job_jid, - **self.eauth) - self.event.io_loop.call_later( - self.opts['gather_job_timeout'], - self.check_find_job, - not_done) + not_done = minions.difference(self.done_minions).difference(self.timedout_minions) + + if not_done: + jid = self.jid_gen() + find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) + self.patterns.add((find_job_return_pattern, "find_job_return")) + self.event.subscribe(find_job_return_pattern, match_type='glob') + + ret = yield self.local.run_job_async( + not_done, + 'saltutil.find_job', + [self.batch_jid], + 'list', + gather_job_timeout=self.opts['gather_job_timeout'], + jid=jid, + **self.eauth) + self.event.io_loop.call_later( + self.opts['gather_job_timeout'], + self.check_find_job, + not_done, + jid) @tornado.gen.coroutine def start(self): @@ -203,6 +217,9 @@ class BatchAsync(object): } self.event.fire_event(data, "salt/batch/{0}/done".format(self.batch_jid)) self.event.remove_event_handler(self.__event_handler) + for (pattern, label) in self.patterns: + if label in ["ping_return", "batch_run"]: + self.event.unsubscribe(pattern, match_type='glob') def schedule_next(self): if not self.scheduled: @@ -226,9 +243,12 @@ class BatchAsync(object): gather_job_timeout=self.opts['gather_job_timeout'], jid=self.batch_jid, metadata=self.metadata) + self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch)) except Exception as ex: + log.error("Error in scheduling next batch: %s", ex) self.active = self.active.difference(next_batch) else: self.end_batch() self.scheduled = False + yield diff --git a/salt/client/__init__.py b/salt/client/__init__.py index aff354a021..0bb6d2b111 100644 --- a/salt/client/__init__.py +++ b/salt/client/__init__.py @@ -1624,6 +1624,7 @@ class LocalClient(object): 'key': self.key, 'tgt_type': tgt_type, 'ret': ret, + 'timeout': timeout, 'jid': jid} # if kwargs are passed, pack them. diff --git a/salt/master.py b/salt/master.py index f08c126280..0e4bba0505 100644 --- a/salt/master.py +++ b/salt/master.py @@ -2043,7 +2043,6 @@ class ClearFuncs(object): def publish_batch(self, clear_load, minions, missing): batch_load = {} batch_load.update(clear_load) - import salt.cli.batch_async batch = salt.cli.batch_async.BatchAsync( self.local.opts, functools.partial(self._prep_jid, clear_load, {}), -- 2.23.0 ++++++ improve-batch_async-to-release-consumed-memory-bsc-1.patch ++++++ >From 002543df392f65d95dbc127dc058ac897f2035ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pablo=20Su=C3=A1rez=20Hern=C3=A1ndez?= <[email protected]> Date: Thu, 26 Sep 2019 10:41:06 +0100 Subject: [PATCH] Improve batch_async to release consumed memory (bsc#1140912) --- salt/cli/batch_async.py | 73 +++++++++++++++++++++++++---------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/salt/cli/batch_async.py b/salt/cli/batch_async.py index 8a67331102..2bb50459c8 100644 --- a/salt/cli/batch_async.py +++ b/salt/cli/batch_async.py @@ -5,6 +5,7 @@ Execute a job on the targeted minions by using a moving window of fixed size `ba # Import python libs from __future__ import absolute_import, print_function, unicode_literals +import gc import tornado # Import salt libs @@ -77,6 +78,7 @@ class BatchAsync(object): self.batch_jid = jid_gen() self.find_job_jid = jid_gen() self.find_job_returned = set() + self.ended = False self.event = salt.utils.event.get_event( 'master', self.opts['sock_dir'], @@ -86,6 +88,7 @@ class BatchAsync(object): io_loop=ioloop, keep_loop=True) self.scheduled = False + self.patterns = {} def __set_event_handler(self): ping_return_pattern = 'salt/job/{0}/ret/*'.format(self.ping_jid) @@ -116,7 +119,7 @@ class BatchAsync(object): if minion in self.active: self.active.remove(minion) self.done_minions.add(minion) - self.schedule_next() + self.event.io_loop.spawn_callback(self.schedule_next) def _get_next(self): to_run = self.minions.difference( @@ -129,23 +132,23 @@ class BatchAsync(object): ) return set(list(to_run)[:next_batch_size]) - @tornado.gen.coroutine def check_find_job(self, batch_minions, jid): - find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) - self.event.unsubscribe(find_job_return_pattern, match_type='glob') - self.patterns.remove((find_job_return_pattern, "find_job_return")) + if self.event: + find_job_return_pattern = 'salt/job/{0}/ret/*'.format(jid) + self.event.unsubscribe(find_job_return_pattern, match_type='glob') + self.patterns.remove((find_job_return_pattern, "find_job_return")) - timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) - self.timedout_minions = self.timedout_minions.union(timedout_minions) - self.active = self.active.difference(self.timedout_minions) - running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) + timedout_minions = batch_minions.difference(self.find_job_returned).difference(self.done_minions) + self.timedout_minions = self.timedout_minions.union(timedout_minions) + self.active = self.active.difference(self.timedout_minions) + running = batch_minions.difference(self.done_minions).difference(self.timedout_minions) - if timedout_minions: - self.schedule_next() + if timedout_minions: + self.schedule_next() - if running: - self.find_job_returned = self.find_job_returned.difference(running) - self.event.io_loop.add_callback(self.find_job, running) + if running: + self.find_job_returned = self.find_job_returned.difference(running) + self.event.io_loop.spawn_callback(self.find_job, running) @tornado.gen.coroutine def find_job(self, minions): @@ -165,8 +168,8 @@ class BatchAsync(object): gather_job_timeout=self.opts['gather_job_timeout'], jid=jid, **self.eauth) - self.event.io_loop.call_later( - self.opts['gather_job_timeout'], + yield tornado.gen.sleep(self.opts['gather_job_timeout']) + self.event.io_loop.spawn_callback( self.check_find_job, not_done, jid) @@ -174,10 +177,6 @@ class BatchAsync(object): @tornado.gen.coroutine def start(self): self.__set_event_handler() - #start batching even if not all minions respond to ping - self.event.io_loop.call_later( - self.batch_presence_ping_timeout or self.opts['gather_job_timeout'], - self.start_batch) ping_return = yield self.local.run_job_async( self.opts['tgt'], 'test.ping', @@ -191,6 +190,10 @@ class BatchAsync(object): metadata=self.metadata, **self.eauth) self.targeted_minions = set(ping_return['minions']) + #start batching even if not all minions respond to ping + yield tornado.gen.sleep(self.batch_presence_ping_timeout or self.opts['gather_job_timeout']) + self.event.io_loop.spawn_callback(self.start_batch) + @tornado.gen.coroutine def start_batch(self): @@ -202,12 +205,14 @@ class BatchAsync(object): "down_minions": self.targeted_minions.difference(self.minions), "metadata": self.metadata } - self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid)) - yield self.run_next() + ret = self.event.fire_event(data, "salt/batch/{0}/start".format(self.batch_jid)) + self.event.io_loop.spawn_callback(self.run_next) + @tornado.gen.coroutine def end_batch(self): left = self.minions.symmetric_difference(self.done_minions.union(self.timedout_minions)) - if not left: + if not left and not self.ended: + self.ended = True data = { "available_minions": self.minions, "down_minions": self.targeted_minions.difference(self.minions), @@ -220,20 +225,26 @@ class BatchAsync(object): for (pattern, label) in self.patterns: if label in ["ping_return", "batch_run"]: self.event.unsubscribe(pattern, match_type='glob') + del self + gc.collect() + yield + @tornado.gen.coroutine def schedule_next(self): if not self.scheduled: self.scheduled = True # call later so that we maybe gather more returns - self.event.io_loop.call_later(self.batch_delay, self.run_next) + yield tornado.gen.sleep(self.batch_delay) + self.event.io_loop.spawn_callback(self.run_next) @tornado.gen.coroutine def run_next(self): + self.scheduled = False next_batch = self._get_next() if next_batch: self.active = self.active.union(next_batch) try: - yield self.local.run_job_async( + ret = yield self.local.run_job_async( next_batch, self.opts['fun'], self.opts['arg'], @@ -244,11 +255,17 @@ class BatchAsync(object): jid=self.batch_jid, metadata=self.metadata) - self.event.io_loop.call_later(self.opts['timeout'], self.find_job, set(next_batch)) + yield tornado.gen.sleep(self.opts['timeout']) + self.event.io_loop.spawn_callback(self.find_job, set(next_batch)) except Exception as ex: log.error("Error in scheduling next batch: %s", ex) self.active = self.active.difference(next_batch) else: - self.end_batch() - self.scheduled = False + yield self.end_batch() + gc.collect() yield + + def __del__(self): + self.event = None + self.ioloop = None + gc.collect() -- 2.22.0
