This is an automated email from the ASF dual-hosted git repository. michaelsmith pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 3bcd770dfc51ed5148cf85a96f5644594f953319 Author: Joe McDonnell <[email protected]> AuthorDate: Sun Jun 25 21:20:25 2023 -0700 IMPALA-10048: Go parallel for dump_breakpad_symbols.py This modifies dump_breakpad_symbols.py to use a ThreadPool to go parallel when there are multiple binaries or libraries to process. This is common for Jenkins jobs that dump symbols for all backend tests. The different binaries write out to different directories, so the threads don't interfere with each other. Testing: - Ran locally dumping the symbols for all backend tests - Ran a Jenkins job that generates a minidump and triggers the minidump symbol processing. It went parallel and worked fine. Change-Id: I93427bb07f1d9718bd6df90acfd247210b54294d Reviewed-on: http://gerrit.cloudera.org:8080/20802 Tested-by: Impala Public Jenkins <[email protected]> Reviewed-by: Michael Smith <[email protected]> --- bin/dump_breakpad_symbols.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py index 81bf00d54..da8485bd4 100755 --- a/bin/dump_breakpad_symbols.py +++ b/bin/dump_breakpad_symbols.py @@ -56,8 +56,8 @@ from __future__ import absolute_import, division, print_function import errno import logging -import glob import magic +import multiprocessing import os import shutil import subprocess @@ -66,6 +66,7 @@ import tempfile from argparse import ArgumentParser from collections import namedtuple +from multiprocessing.pool import ThreadPool BinarySymbolInfo = namedtuple('BinarySymbolInfo', 'path, debug_path') @@ -137,6 +138,8 @@ def parse_args(): parser.add_argument('-s', '--symbol_pkg', '--debuginfo_rpm', help="""RPM/DEB file containing the debug symbols matching the binaries in -r""") parser.add_argument('--objcopy', help='Path to the objcopy binary from Binutils') + parser.add_argument('--num_processes', type=int, default=multiprocessing.cpu_count(), + help="Number of parallel processes to use.") args = parser.parse_args() # Post processing checks @@ -341,9 +344,20 @@ def main(): assert objcopy status = 0 ensure_dir_exists(args.dest_dir) - for binary in enumerate_binaries(args): - if not process_binary(dump_syms, objcopy, binary, args.dest_dir): + # Use a thread pool to go parallel + thread_pool = ThreadPool(processes=args.num_processes) + + def processing_fn(binary): + return process_binary(dump_syms, objcopy, binary, args.dest_dir) + + for result in thread_pool.imap_unordered(processing_fn, enumerate_binaries(args)): + if not result: + thread_pool.terminate() status = 1 + break + + thread_pool.close() + thread_pool.join() sys.exit(status)
