https://github.com/aeubanks created https://github.com/llvm/llvm-project/pull/199453
First try to get the output of `clang -S -emit-llvm` and run that through llc. If that crashes, llvm-reduce. Second try to get the output of `clang -S -emit-llvm -Xclang=-disable-llvm-passes` and run that through opt. If that crashes, run reduce_pipeline.py then llvm-reduce. Assisted-by: Gemini >From 07055df690fbb95d09e2a4f598c3dcfa1dd63f60 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks <[email protected]> Date: Sun, 24 May 2026 15:12:06 -0700 Subject: [PATCH] [clang] Make reduce-clang-crash.py also reduce middleend/backend crashes First try to get the output of `clang -S -emit-llvm` and run that through llc. If that crashes, llvm-reduce. Second try to get the output of `clang -S -emit-llvm -Xclang=-disable-llvm-passes` and run that through opt. If that crashes, run reduce_pipeline.py then llvm-reduce. Assisted-by: Gemini --- clang/utils/reduce-clang-crash.py | 371 +++++++++++++++++++++++++++++- 1 file changed, 361 insertions(+), 10 deletions(-) diff --git a/clang/utils/reduce-clang-crash.py b/clang/utils/reduce-clang-crash.py index 075ff91af2f5a..0c380b8bb5e75 100755 --- a/clang/utils/reduce-clang-crash.py +++ b/clang/utils/reduce-clang-crash.py @@ -1,12 +1,18 @@ #!/usr/bin/env python3 """Calls reduction tools to create minimal reproducers for clang crashes. -Unknown arguments are treated at cvise/creduce options. +For frontend crashes, runs C-Vise/C-Reduce on the source file. +For middle-end/backend crashes, runs llvm-reduce on emitted LLVM IR: + - If `clang -emit-llvm` succeeds and `llc` on the IR crashes, it's a + backend crash and llvm-reduce is run with llc as the test tool. + - Otherwise, if `clang -emit-llvm -Xclang -disable-llvm-passes` succeeds + and `opt` on the IR crashes, it's a middle-end crash and llvm-reduce + is run with opt as the test tool. Output files: *.reduced.sh -- crash reproducer with minimal arguments - *.reduced.cpp -- the reduced file - *.test.sh -- interestingness test for C-Vise + *.reduced.cpp or *.reduced.ll -- the reduced file + *.test.sh -- interestingness test for C-Vise or llvm-reduce """ from argparse import ArgumentParser, RawTextHelpFormatter @@ -24,6 +30,10 @@ verbose = False creduce_cmd = None clang_cmd = None +llc_cmd = None +opt_cmd = None +llvm_reduce_cmd = None +reduce_pipeline_cmd = None def verbose_print(*args, **kwargs): @@ -74,24 +84,30 @@ def write_to_script(text, filename): class Reduce(object): - def __init__(self, crash_script, file_to_reduce, creduce_flags): + def __init__(self, crash_script, file_to_reduce, creduce_flags, llvm_reduce_flags): crash_script_name, crash_script_ext = os.path.splitext(crash_script) file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce) self.testfile = file_reduce_name + ".test.sh" self.crash_script = crash_script_name + ".reduced" + crash_script_ext - self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext - shutil.copy(file_to_reduce, self.file_to_reduce) + self.reduced_source_file = file_reduce_name + ".reduced" + file_reduce_ext + self.file_to_reduce = file_to_reduce self.clang = clang_cmd self.clang_args = [] self.expected_output = [] self.needs_stack_trace = False self.creduce_flags = ["--tidy"] + creduce_flags + if "--n" not in self.creduce_flags: + self.creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))] + self.llvm_reduce_flags = llvm_reduce_flags self.read_clang_args(crash_script, file_to_reduce) self.read_expected_output() + def prepare_source_reduction(self): + shutil.copy(self.file_to_reduce, self.reduced_source_file) + def get_crash_cmd(self, cmd=None, args=None, filename=None): if not cmd: cmd = self.clang @@ -121,9 +137,56 @@ def read_clang_args(self, crash_script, filename): if cmd[i] == filename: del cmd[i] break + + if "-cc1" not in cmd: + cmd = self.driver_to_cc1(cmd, filename) + self.clang_args = cmd verbose_print("Clang arguments:", quote_cmd(self.clang_args)) + def driver_to_cc1(self, driver_args, filename): + """Convert a driver-mode invocation to its cc1 form via `clang -###`.""" + print("Driver command detected; using `clang -###` to get cc1 invocation...") + invocation = [self.clang, "-###"] + driver_args + [filename] + verbose_print("Running:", quote_cmd(invocation)) + p = subprocess.Popen( + invocation, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE + ) + _, output = p.communicate() + if p.returncode != 0: + sys.exit( + "ERROR: `clang -###` failed with exit code %d:\n%s" + % (p.returncode, output.decode("utf-8", errors="replace")) + ) + + cc1_cmd = None + for line in output.decode("utf-8", errors="replace").splitlines(): + line = line.strip() + if not line: + continue + try: + parsed = shlex.split(line) + except ValueError: + continue + if "-cc1" in parsed: + cc1_cmd = parsed + break + if cc1_cmd is None: + sys.exit("ERROR: could not extract a cc1 invocation from `clang -###`") + + # Drop the executable + del cc1_cmd[0] + # Drop the last arg that matches the input filename, skipping + # `-main-file-name <basename>` since that takes the basename as a value. + target_base = os.path.basename(filename) + for i in range(len(cc1_cmd) - 1, -1, -1): + if cc1_cmd[i] == filename or os.path.basename(cc1_cmd[i]) == target_base: + if i > 0 and cc1_cmd[i - 1] == "-main-file-name": + continue + del cc1_cmd[i] + break + return cc1_cmd + def read_expected_output(self): print("\nGetting expected crash output...") p = subprocess.Popen( @@ -265,6 +328,200 @@ def clang_preprocess(self): except subprocess.CalledProcessError: print("Preprocessing failed") + def emit_llvm_ir(self, output_file, disable_passes=False): + """Try to emit textual LLVM IR with `clang -cc1 -emit-llvm -S`. + + Returns True if clang exited successfully (no crash, IR was written). + """ + args = [] + skip_next = False + for arg in self.clang_args: + if skip_next: + skip_next = False + continue + if arg in { + "-fsyntax-only", + "-emit-llvm", + "-emit-llvm-bc", + "-emit-llvm-only", + "-emit-obj", + "-S", + }: + continue + if arg == "-o": + skip_next = True + continue + args.append(arg) + + extra = ["-emit-llvm", "-o", output_file] + if disable_passes: + extra.append("-disable-llvm-passes") + cmd = [self.clang] + args + extra + [self.file_to_reduce] + verbose_print("Emitting LLVM IR:", quote_cmd(cmd)) + p = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + p.communicate() + return p.returncode == 0 + + def get_opt_llc_args(self): + """Extract args from clang_args that should be forwarded to llc/opt.""" + opt_level = "-O2" + for a in self.clang_args: + if re.match(r"^-O[0-3sz]$", a): + opt_level = a + return [opt_level] + + def check_tool_crash(self, tool_cmd): + """Run tool_cmd and check whether the expected crash output appears.""" + p = subprocess.Popen(tool_cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) + out, _ = p.communicate() + return all(msg in out.decode("utf-8") for msg in self.expected_output) + + def try_llvm_ir_crash(self, ir_file): + """Try to reproduce the crash with llc or opt on emitted LLVM IR. + + Writes the IR to `ir_file` if successful. + Returns (tool_path, tool_args) on success, or None. + """ + if llc_cmd: + print("\nTrying to reproduce crash with llc on optimized LLVM IR...") + if self.emit_llvm_ir(ir_file, disable_passes=False): + llc_args = self.get_opt_llc_args() + if self.check_tool_crash([llc_cmd] + llc_args + [ir_file]): + print("Crash reproduces with llc -- treating as backend crash") + return (llc_cmd, llc_args) + print("Crash does not reproduce with llc") + else: + print("clang -emit-llvm did not complete") + + if opt_cmd: + print("\nTrying to reproduce crash with opt on unoptimized LLVM IR...") + if self.emit_llvm_ir(ir_file, disable_passes=True): + opt_args = self.get_opt_llc_args() + if self.check_tool_crash([opt_cmd] + opt_args + [ir_file]): + print("Crash reproduces with opt -- treating as middle-end crash") + reduced = self.run_reduce_pipeline(ir_file, opt_args) + if reduced is not None and self.check_tool_crash( + [opt_cmd] + reduced + [ir_file] + ): + opt_args = reduced + return (opt_cmd, opt_args) + print("Crash does not reproduce with opt") + else: + print("clang -emit-llvm -disable-llvm-passes did not complete") + + return None + + def run_reduce_pipeline(self, ir_file, opt_args): + """Run reduce_pipeline.py to narrow down the failing opt pipeline. + + Returns a new list of opt args (with -passes=...) on success, replacing + the -O level, or None if reduce_pipeline is unavailable or failed. + Overwrites `ir_file` with the (possibly reduced) intermediate IR. + """ + if not reduce_pipeline_cmd: + return None + + passes = "default<O2>" + extra = [] + for a in opt_args: + m = re.match(r"^-O([0-3sz])$", a) + if m: + passes = "default<O%s>" % m.group(1) + else: + extra.append(a) + + output_file = os.path.splitext(ir_file)[0] + ".pipeline.ll" + print("\nRunning reduce_pipeline.py to reduce the opt pipeline...") + cmd = [ + sys.executable, + reduce_pipeline_cmd, + "--opt-binary=" + opt_cmd, + "--input=" + ir_file, + "--output=" + output_file, + "--passes=" + passes, + ] + extra + verbose_print(quote_cmd(cmd)) + result = subprocess.run(cmd, capture_output=True, text=True) + if verbose: + sys.stdout.write(result.stdout) + sys.stderr.write(result.stderr) + if result.returncode != 0: + print("reduce_pipeline.py failed; keeping original pipeline") + return None + + reduced_passes = passes + for line in reversed(result.stdout.splitlines()): + m = re.match(r'^-passes="(.*)"$', line) + if m: + reduced_passes = m.group(1) + break + + if os.path.isfile(output_file): + shutil.move(output_file, ir_file) + + print("Reduced opt pipeline:", reduced_passes) + return ["-passes=" + reduced_passes] + + def write_llvm_reduce_test(self, tool_cmd, tool_args): + """Write an interestingness test for llvm-reduce. + + The test receives the candidate IR file as $1. + """ + print("\nCreating llvm-reduce interestingness test...") + + disable_symbolization = "" + if not self.needs_stack_trace: + disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1" + + invocation = quote_cmd([tool_cmd] + tool_args) + ' "$1"' + + output = """#!/bin/bash +%s +if %s >& t.log ; then + exit 1 +fi +""" % ( + disable_symbolization, + invocation, + ) + + for msg in self.expected_output: + output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg) + + write_to_script(output, self.testfile) + + def run_llvm_reduce(self, tool_cmd, tool_args, ir_file): + self.write_llvm_reduce_test(tool_cmd, tool_args) + + testfile_abs = os.path.abspath(self.testfile) + returncode = subprocess.call([testfile_abs, ir_file], stdout=subprocess.DEVNULL) + if returncode: + sys.exit("The interestingness test does not pass for the original IR file.") + + print("\nRunning llvm-reduce...") + full_cmd = ( + [ + llvm_reduce_cmd, + "--test=" + testfile_abs, + "-o", + ir_file, + ] + + self.llvm_reduce_flags + + [ir_file] + ) + verbose_print(quote_cmd(full_cmd)) + try: + subprocess.check_call(full_cmd) + except KeyboardInterrupt: + print("\n\nctrl-c detected, killed llvm-reduce") + except subprocess.CalledProcessError as e: + print("llvm-reduce failed:", e) + return + + reduced_cmd = quote_cmd([tool_cmd] + tool_args + [ir_file]) + write_to_script(reduced_cmd, self.crash_script) + print("Reduced command:", reduced_cmd) + @staticmethod def filter_args( args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[] @@ -432,6 +689,10 @@ def main(): global verbose global creduce_cmd global clang_cmd + global llc_cmd + global opt_cmd + global llvm_reduce_cmd + global reduce_pipeline_cmd parser = ArgumentParser(description=__doc__, formatter_class=RawTextHelpFormatter) parser.add_argument( @@ -453,6 +714,35 @@ def main(): help="The path to the `clang` executable. " "By default uses the llvm-bin directory.", ) + parser.add_argument( + "--llc", + dest="llc", + type=str, + help="The path to the `llc` executable. " + "By default uses the llvm-bin directory.", + ) + parser.add_argument( + "--opt", + dest="opt", + type=str, + help="The path to the `opt` executable. " + "By default uses the llvm-bin directory.", + ) + parser.add_argument( + "--llvm-reduce", + dest="llvm_reduce", + type=str, + help="The path to the `llvm-reduce` executable. " + "By default uses the llvm-bin directory. Required to reduce IR-level crashes.", + ) + parser.add_argument( + "--reduce-pipeline", + dest="reduce_pipeline", + type=str, + help="The path to `reduce_pipeline.py`. " + "Default: llvm/utils/reduce_pipeline.py relative to this script. " + "Used to reduce the opt pass pipeline for middle-end crashes.", + ) parser.add_argument( "--creduce", dest="creduce", @@ -460,23 +750,84 @@ def main(): help="The path to the `creduce` or `cvise` executable. " "Required if neither `creduce` nor `cvise` are on PATH.", ) + parser.add_argument( + "--no-llvm-reduce", + dest="no_llvm_reduce", + action="store_true", + help="Skip IR-level reduction with llvm-reduce, always run C-Vise/creduce.", + ) + parser.add_argument( + "--creduce-flag", + dest="extra_creduce_flags", + action="append", + default=[], + help="Extra flags to pass to creduce/cvise. Can be specified multiple times.", + ) + parser.add_argument( + "--llvm-reduce-flag", + dest="llvm_reduce_flags", + action="append", + default=[], + help="Extra flags to pass to llvm-reduce. Can be specified multiple times.", + ) parser.add_argument("-v", "--verbose", action="store_true") - args, creduce_flags = parser.parse_known_args() + args = parser.parse_args() verbose = args.verbose llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None creduce_cmd = check_cmd("cvise", None, args.creduce, return_none_if_not_found=True) if not creduce_cmd: creduce_cmd = check_cmd("creduce", None, args.creduce) clang_cmd = check_cmd("clang", llvm_bin, args.clang) + llc_cmd = check_cmd("llc", llvm_bin, args.llc, return_none_if_not_found=True) + opt_cmd = check_cmd("opt", llvm_bin, args.opt, return_none_if_not_found=True) + llvm_reduce_cmd = check_cmd( + "llvm-reduce", llvm_bin, args.llvm_reduce, return_none_if_not_found=True + ) + if args.reduce_pipeline: + reduce_pipeline_cmd = os.path.abspath(args.reduce_pipeline) + if not os.path.isfile(reduce_pipeline_cmd): + sys.exit("ERROR: %s does not exist" % reduce_pipeline_cmd) + else: + default_rp = os.path.normpath( + os.path.join( + os.path.dirname(os.path.abspath(__file__)), + "..", + "..", + "llvm", + "utils", + "reduce_pipeline.py", + ) + ) + reduce_pipeline_cmd = default_rp if os.path.isfile(default_rp) else None crash_script = check_file(args.crash_script[0]) file_to_reduce = check_file(args.file_to_reduce[0]) - if "--n" not in creduce_flags: - creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))] + r = Reduce( + crash_script, + file_to_reduce, + args.extra_creduce_flags, + args.llvm_reduce_flags, + ) - r = Reduce(crash_script, file_to_reduce, creduce_flags) + if not args.no_llvm_reduce and llvm_reduce_cmd and (llc_cmd or opt_cmd): + ir_file = os.path.splitext(file_to_reduce)[0] + ".reduced.ll" + ir_crash = r.try_llvm_ir_crash(ir_file) + if ir_crash: + tool_cmd, tool_args = ir_crash + r.run_llvm_reduce(tool_cmd, tool_args, ir_file) + return + print( + "\nCould not reproduce crash at the IR level, " + "falling back to source-level reduction." + ) + elif not args.no_llvm_reduce and not llvm_reduce_cmd: + verbose_print( + "llvm-reduce not found; skipping IR-level reduction. " + "Pass --llvm-reduce or --llvm-bin to enable it." + ) + r.prepare_source_reduction() r.simplify_clang_args() r.write_interestingness_test() r.clang_preprocess() _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
