This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 3bcd770dfc51ed5148cf85a96f5644594f953319
Author: Joe McDonnell <[email protected]>
AuthorDate: Sun Jun 25 21:20:25 2023 -0700

    IMPALA-10048: Go parallel for dump_breakpad_symbols.py
    
    This modifies dump_breakpad_symbols.py to use a ThreadPool
    to go parallel when there are multiple binaries or
    libraries to process. This is common for Jenkins jobs that
    dump symbols for all backend tests. The different binaries
    write out to different directories, so the threads don't
    interfere with each other.
    
    Testing:
     - Ran locally dumping the symbols for all backend tests
     - Ran a Jenkins job that generates a minidump and triggers
       the minidump symbol processing. It went parallel and
       worked fine.
    
    Change-Id: I93427bb07f1d9718bd6df90acfd247210b54294d
    Reviewed-on: http://gerrit.cloudera.org:8080/20802
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
---
 bin/dump_breakpad_symbols.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/bin/dump_breakpad_symbols.py b/bin/dump_breakpad_symbols.py
index 81bf00d54..da8485bd4 100755
--- a/bin/dump_breakpad_symbols.py
+++ b/bin/dump_breakpad_symbols.py
@@ -56,8 +56,8 @@
 from __future__ import absolute_import, division, print_function
 import errno
 import logging
-import glob
 import magic
+import multiprocessing
 import os
 import shutil
 import subprocess
@@ -66,6 +66,7 @@ import tempfile
 
 from argparse import ArgumentParser
 from collections import namedtuple
+from multiprocessing.pool import ThreadPool
 
 BinarySymbolInfo = namedtuple('BinarySymbolInfo', 'path, debug_path')
 
@@ -137,6 +138,8 @@ def parse_args():
   parser.add_argument('-s', '--symbol_pkg', '--debuginfo_rpm', help="""RPM/DEB 
file
       containing the debug symbols matching the binaries in -r""")
   parser.add_argument('--objcopy', help='Path to the objcopy binary from 
Binutils')
+  parser.add_argument('--num_processes', type=int, 
default=multiprocessing.cpu_count(),
+      help="Number of parallel processes to use.")
   args = parser.parse_args()
 
   # Post processing checks
@@ -341,9 +344,20 @@ def main():
   assert objcopy
   status = 0
   ensure_dir_exists(args.dest_dir)
-  for binary in enumerate_binaries(args):
-    if not process_binary(dump_syms, objcopy, binary, args.dest_dir):
+  # Use a thread pool to go parallel
+  thread_pool = ThreadPool(processes=args.num_processes)
+
+  def processing_fn(binary):
+    return process_binary(dump_syms, objcopy, binary, args.dest_dir)
+
+  for result in thread_pool.imap_unordered(processing_fn, 
enumerate_binaries(args)):
+    if not result:
+      thread_pool.terminate()
       status = 1
+      break
+
+  thread_pool.close()
+  thread_pool.join()
   sys.exit(status)
 
 

Reply via email to