Repository: incubator-impala Updated Branches: refs/heads/master 476f687b4 -> baf8fe202
IMPALA-3839: Fix race condition in impala_cluster.py Accesses to properties of Process objects from psutil hit the operating system, at least under Linux. Therefore any access can throw NoSuchProcess if the underlying process has already terminated. The fix extends the try/except block to all such accesses. To ensure that this change does not introduce a regression I ran test_breakpad.py, which exercises this code. I also ran test_breakpad.py:test_sigusr1_writes_minidump() in a loop for about 40 iterations with the changes from https://gerrit.cloudera.org/#/c/3312/6, which made me discover this issue in the first place. Change-Id: I3c0d5e43f8c58b4922ab5db78236915fcc8b588d Reviewed-on: http://gerrit.cloudera.org:8080/3592 Tested-by: Internal Jenkins Reviewed-by: Lars Volker <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/db4294c7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/db4294c7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/db4294c7 Branch: refs/heads/master Commit: db4294c76139209b663f880d7aa7d03f6bcd1b3d Parents: c6ce32b Author: Lars Volker <[email protected]> Authored: Fri Jul 8 00:32:40 2016 +0200 Committer: Taras Bobrovytsky <[email protected]> Committed: Thu Jul 14 19:04:43 2016 +0000 ---------------------------------------------------------------------- tests/common/impala_cluster.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/db4294c7/tests/common/impala_cluster.py ---------------------------------------------------------------------- diff --git a/tests/common/impala_cluster.py b/tests/common/impala_cluster.py index 4a762c0..2a8b56c 100644 --- a/tests/common/impala_cluster.py +++ b/tests/common/impala_cluster.py @@ -96,23 +96,23 @@ class ImpalaCluster(object): for pid in psutil.get_pid_list(): try: process = psutil.Process(pid) + try: + if process.username != getuser(): + continue + except KeyError, e: + if "uid not found" in str(e): + continue + raise + if process.name == 'impalad' and len(process.cmdline) >= 1: + impalads.append(ImpaladProcess(process.cmdline)) + elif process.name == 'statestored' and len(process.cmdline) >= 1: + statestored.append(StateStoreProcess(process.cmdline)) + elif process.name == 'catalogd' and len(process.cmdline) >= 1: + catalogd = CatalogdProcess(process.cmdline) except psutil.NoSuchProcess, e: # A process from get_pid_list() no longer exists, continue. LOG.info(e) continue - try: - if process.username != getuser(): - continue - except KeyError, e: - if "uid not found" in str(e): - continue - raise - if process.name == 'impalad' and len(process.cmdline) >= 1: - impalads.append(ImpaladProcess(process.cmdline)) - elif process.name == 'statestored' and len(process.cmdline) >= 1: - statestored.append(StateStoreProcess(process.cmdline)) - elif process.name == 'catalogd' and len(process.cmdline) >=1: - catalogd = CatalogdProcess(process.cmdline) return impalads, statestored, catalogd # Represents a process running on a machine and common actions that can be performed
