This is an automated email from the ASF dual-hosted git repository. joemcdonnell pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 41a3f4d4ca43092d0ef48eeaa765626b720e986c Author: Joe McDonnell <joemcdonn...@cloudera.com> AuthorDate: Tue Jan 23 10:02:53 2024 -0800 IMPALA-12745: Skip parallel symbol dumping with RPM/DEB packages When using bin/dump_breakpad_symbols.py to dump symbols for RPM/DEB packages, the script extracts the packages to a temporary directory and relies on keeping that directory around until the processing is finished. The parallel processing added in IMPALA-11511 breaks the logic that keeps the temporary directory around, so the script generates errors like: Found debugging info in /tmp/tmpqfZ9MZ/usr/lib/debug/usr/lib/impala/sbin-retail/impalad.debug Failed to open ELF file '/tmp/tmpqfZ9MZ/usr/lib/debug/usr/lib/impala/sbin-retail/impalad.debug': No such file or directory Failed to write symbol file. This turns off parallelism for bin/dump_breakpad_symbols.py when processing RPM/DEB packages (i.e. -r/--pkg). This also avoids using a ThreadPool when num_processes <= 1. Testing: - Hand tested with Redhat 7 RPMs Change-Id: If2885a9cfb36a4f616b539599e7f744bd23552c3 Reviewed-on: http://gerrit.cloudera.org:8080/20943 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Joe McDonnell <joemcdonn...@cloudera.com> --- bin/dump_breakpad_symbols.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py index e28422e2e..ce10daa91 100755 --- a/bin/dump_breakpad_symbols.py +++ b/bin/dump_breakpad_symbols.py @@ -353,20 +353,30 @@ def main(): assert objcopy status = 0 ensure_dir_exists(args.dest_dir) - # Use a thread pool to go parallel - thread_pool = ThreadPool(processes=args.num_processes) - - def processing_fn(binary): - return process_binary(dump_syms, objcopy, binary, args.dest_dir) - - for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)): - if not result: - thread_pool.terminate() - status = 1 - break - - thread_pool.close() - thread_pool.join() + # The logic for handling DEB/RPM packages does not currently work with + # parallelism, so disable parallelism if using the -r/--pkg option. + if args.num_processes > 1 and not bool(args.pkg): + # Use a thread pool to go parallel + thread_pool = ThreadPool(processes=args.num_processes) + + def processing_fn(binary): + return process_binary(dump_syms, objcopy, binary, args.dest_dir) + + for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)): + if not result: + thread_pool.terminate() + status = 1 + break + + thread_pool.close() + thread_pool.join() + else: + # For serial cases, simply avoid the ThreadPool altogether, as that makes it + # easy to reason about. + for binary in enumerate_binaries(args): + if not process_binary(dump_syms, objcopy, binary, args.dest_dir): + status = 1 + break sys.exit(status)