llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Arthur Eubanks (aeubanks)

<details>
<summary>Changes</summary>

First try to get the output of `clang -S -emit-llvm` and run that through llc. 
If that crashes, llvm-reduce.

Second try to get the output of `clang -S -emit-llvm 
-Xclang=-disable-llvm-passes` and run that through opt. If that crashes, run 
reduce_pipeline.py then llvm-reduce.

Assisted-by: Gemini

---
Full diff: https://github.com/llvm/llvm-project/pull/199453.diff


1 Files Affected:

- (modified) clang/utils/reduce-clang-crash.py (+361-10) 


``````````diff
diff --git a/clang/utils/reduce-clang-crash.py 
b/clang/utils/reduce-clang-crash.py
index 075ff91af2f5a..0c380b8bb5e75 100755
--- a/clang/utils/reduce-clang-crash.py
+++ b/clang/utils/reduce-clang-crash.py
@@ -1,12 +1,18 @@
 #!/usr/bin/env python3
 """Calls reduction tools to create minimal reproducers for clang crashes.
 
-Unknown arguments are treated at cvise/creduce options.
+For frontend crashes, runs C-Vise/C-Reduce on the source file.
+For middle-end/backend crashes, runs llvm-reduce on emitted LLVM IR:
+  - If `clang -emit-llvm` succeeds and `llc` on the IR crashes, it's a
+    backend crash and llvm-reduce is run with llc as the test tool.
+  - Otherwise, if `clang -emit-llvm -Xclang -disable-llvm-passes` succeeds
+    and `opt` on the IR crashes, it's a middle-end crash and llvm-reduce
+    is run with opt as the test tool.
 
 Output files:
   *.reduced.sh -- crash reproducer with minimal arguments
-  *.reduced.cpp -- the reduced file
-  *.test.sh -- interestingness test for C-Vise
+  *.reduced.cpp or *.reduced.ll -- the reduced file
+  *.test.sh -- interestingness test for C-Vise or llvm-reduce
 """
 
 from argparse import ArgumentParser, RawTextHelpFormatter
@@ -24,6 +30,10 @@
 verbose = False
 creduce_cmd = None
 clang_cmd = None
+llc_cmd = None
+opt_cmd = None
+llvm_reduce_cmd = None
+reduce_pipeline_cmd = None
 
 
 def verbose_print(*args, **kwargs):
@@ -74,24 +84,30 @@ def write_to_script(text, filename):
 
 
 class Reduce(object):
-    def __init__(self, crash_script, file_to_reduce, creduce_flags):
+    def __init__(self, crash_script, file_to_reduce, creduce_flags, 
llvm_reduce_flags):
         crash_script_name, crash_script_ext = os.path.splitext(crash_script)
         file_reduce_name, file_reduce_ext = os.path.splitext(file_to_reduce)
 
         self.testfile = file_reduce_name + ".test.sh"
         self.crash_script = crash_script_name + ".reduced" + crash_script_ext
-        self.file_to_reduce = file_reduce_name + ".reduced" + file_reduce_ext
-        shutil.copy(file_to_reduce, self.file_to_reduce)
+        self.reduced_source_file = file_reduce_name + ".reduced" + 
file_reduce_ext
+        self.file_to_reduce = file_to_reduce
 
         self.clang = clang_cmd
         self.clang_args = []
         self.expected_output = []
         self.needs_stack_trace = False
         self.creduce_flags = ["--tidy"] + creduce_flags
+        if "--n" not in self.creduce_flags:
+            self.creduce_flags += ["--n", str(max(4, 
multiprocessing.cpu_count() // 2))]
+        self.llvm_reduce_flags = llvm_reduce_flags
 
         self.read_clang_args(crash_script, file_to_reduce)
         self.read_expected_output()
 
+    def prepare_source_reduction(self):
+        shutil.copy(self.file_to_reduce, self.reduced_source_file)
+
     def get_crash_cmd(self, cmd=None, args=None, filename=None):
         if not cmd:
             cmd = self.clang
@@ -121,9 +137,56 @@ def read_clang_args(self, crash_script, filename):
             if cmd[i] == filename:
                 del cmd[i]
                 break
+
+        if "-cc1" not in cmd:
+            cmd = self.driver_to_cc1(cmd, filename)
+
         self.clang_args = cmd
         verbose_print("Clang arguments:", quote_cmd(self.clang_args))
 
+    def driver_to_cc1(self, driver_args, filename):
+        """Convert a driver-mode invocation to its cc1 form via `clang 
-###`."""
+        print("Driver command detected; using `clang -###` to get cc1 
invocation...")
+        invocation = [self.clang, "-###"] + driver_args + [filename]
+        verbose_print("Running:", quote_cmd(invocation))
+        p = subprocess.Popen(
+            invocation, stdout=subprocess.DEVNULL, stderr=subprocess.PIPE
+        )
+        _, output = p.communicate()
+        if p.returncode != 0:
+            sys.exit(
+                "ERROR: `clang -###` failed with exit code %d:\n%s"
+                % (p.returncode, output.decode("utf-8", errors="replace"))
+            )
+
+        cc1_cmd = None
+        for line in output.decode("utf-8", errors="replace").splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                parsed = shlex.split(line)
+            except ValueError:
+                continue
+            if "-cc1" in parsed:
+                cc1_cmd = parsed
+                break
+        if cc1_cmd is None:
+            sys.exit("ERROR: could not extract a cc1 invocation from `clang 
-###`")
+
+        # Drop the executable
+        del cc1_cmd[0]
+        # Drop the last arg that matches the input filename, skipping
+        # `-main-file-name <basename>` since that takes the basename as a 
value.
+        target_base = os.path.basename(filename)
+        for i in range(len(cc1_cmd) - 1, -1, -1):
+            if cc1_cmd[i] == filename or os.path.basename(cc1_cmd[i]) == 
target_base:
+                if i > 0 and cc1_cmd[i - 1] == "-main-file-name":
+                    continue
+                del cc1_cmd[i]
+                break
+        return cc1_cmd
+
     def read_expected_output(self):
         print("\nGetting expected crash output...")
         p = subprocess.Popen(
@@ -265,6 +328,200 @@ def clang_preprocess(self):
             except subprocess.CalledProcessError:
                 print("Preprocessing failed")
 
+    def emit_llvm_ir(self, output_file, disable_passes=False):
+        """Try to emit textual LLVM IR with `clang -cc1 -emit-llvm -S`.
+
+        Returns True if clang exited successfully (no crash, IR was written).
+        """
+        args = []
+        skip_next = False
+        for arg in self.clang_args:
+            if skip_next:
+                skip_next = False
+                continue
+            if arg in {
+                "-fsyntax-only",
+                "-emit-llvm",
+                "-emit-llvm-bc",
+                "-emit-llvm-only",
+                "-emit-obj",
+                "-S",
+            }:
+                continue
+            if arg == "-o":
+                skip_next = True
+                continue
+            args.append(arg)
+
+        extra = ["-emit-llvm", "-o", output_file]
+        if disable_passes:
+            extra.append("-disable-llvm-passes")
+        cmd = [self.clang] + args + extra + [self.file_to_reduce]
+        verbose_print("Emitting LLVM IR:", quote_cmd(cmd))
+        p = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, 
stderr=subprocess.DEVNULL)
+        p.communicate()
+        return p.returncode == 0
+
+    def get_opt_llc_args(self):
+        """Extract args from clang_args that should be forwarded to llc/opt."""
+        opt_level = "-O2"
+        for a in self.clang_args:
+            if re.match(r"^-O[0-3sz]$", a):
+                opt_level = a
+        return [opt_level]
+
+    def check_tool_crash(self, tool_cmd):
+        """Run tool_cmd and check whether the expected crash output appears."""
+        p = subprocess.Popen(tool_cmd, stdout=subprocess.PIPE, 
stderr=subprocess.STDOUT)
+        out, _ = p.communicate()
+        return all(msg in out.decode("utf-8") for msg in self.expected_output)
+
+    def try_llvm_ir_crash(self, ir_file):
+        """Try to reproduce the crash with llc or opt on emitted LLVM IR.
+
+        Writes the IR to `ir_file` if successful.
+        Returns (tool_path, tool_args) on success, or None.
+        """
+        if llc_cmd:
+            print("\nTrying to reproduce crash with llc on optimized LLVM 
IR...")
+            if self.emit_llvm_ir(ir_file, disable_passes=False):
+                llc_args = self.get_opt_llc_args()
+                if self.check_tool_crash([llc_cmd] + llc_args + [ir_file]):
+                    print("Crash reproduces with llc -- treating as backend 
crash")
+                    return (llc_cmd, llc_args)
+                print("Crash does not reproduce with llc")
+            else:
+                print("clang -emit-llvm did not complete")
+
+        if opt_cmd:
+            print("\nTrying to reproduce crash with opt on unoptimized LLVM 
IR...")
+            if self.emit_llvm_ir(ir_file, disable_passes=True):
+                opt_args = self.get_opt_llc_args()
+                if self.check_tool_crash([opt_cmd] + opt_args + [ir_file]):
+                    print("Crash reproduces with opt -- treating as middle-end 
crash")
+                    reduced = self.run_reduce_pipeline(ir_file, opt_args)
+                    if reduced is not None and self.check_tool_crash(
+                        [opt_cmd] + reduced + [ir_file]
+                    ):
+                        opt_args = reduced
+                    return (opt_cmd, opt_args)
+                print("Crash does not reproduce with opt")
+            else:
+                print("clang -emit-llvm -disable-llvm-passes did not complete")
+
+        return None
+
+    def run_reduce_pipeline(self, ir_file, opt_args):
+        """Run reduce_pipeline.py to narrow down the failing opt pipeline.
+
+        Returns a new list of opt args (with -passes=...) on success, replacing
+        the -O level, or None if reduce_pipeline is unavailable or failed.
+        Overwrites `ir_file` with the (possibly reduced) intermediate IR.
+        """
+        if not reduce_pipeline_cmd:
+            return None
+
+        passes = "default<O2>"
+        extra = []
+        for a in opt_args:
+            m = re.match(r"^-O([0-3sz])$", a)
+            if m:
+                passes = "default<O%s>" % m.group(1)
+            else:
+                extra.append(a)
+
+        output_file = os.path.splitext(ir_file)[0] + ".pipeline.ll"
+        print("\nRunning reduce_pipeline.py to reduce the opt pipeline...")
+        cmd = [
+            sys.executable,
+            reduce_pipeline_cmd,
+            "--opt-binary=" + opt_cmd,
+            "--input=" + ir_file,
+            "--output=" + output_file,
+            "--passes=" + passes,
+        ] + extra
+        verbose_print(quote_cmd(cmd))
+        result = subprocess.run(cmd, capture_output=True, text=True)
+        if verbose:
+            sys.stdout.write(result.stdout)
+            sys.stderr.write(result.stderr)
+        if result.returncode != 0:
+            print("reduce_pipeline.py failed; keeping original pipeline")
+            return None
+
+        reduced_passes = passes
+        for line in reversed(result.stdout.splitlines()):
+            m = re.match(r'^-passes="(.*)"$', line)
+            if m:
+                reduced_passes = m.group(1)
+                break
+
+        if os.path.isfile(output_file):
+            shutil.move(output_file, ir_file)
+
+        print("Reduced opt pipeline:", reduced_passes)
+        return ["-passes=" + reduced_passes]
+
+    def write_llvm_reduce_test(self, tool_cmd, tool_args):
+        """Write an interestingness test for llvm-reduce.
+
+        The test receives the candidate IR file as $1.
+        """
+        print("\nCreating llvm-reduce interestingness test...")
+
+        disable_symbolization = ""
+        if not self.needs_stack_trace:
+            disable_symbolization = "export LLVM_DISABLE_SYMBOLIZATION=1"
+
+        invocation = quote_cmd([tool_cmd] + tool_args) + ' "$1"'
+
+        output = """#!/bin/bash
+%s
+if %s >& t.log ; then
+  exit 1
+fi
+""" % (
+            disable_symbolization,
+            invocation,
+        )
+
+        for msg in self.expected_output:
+            output += "grep -F %s t.log || exit 1\n" % shlex.quote(msg)
+
+        write_to_script(output, self.testfile)
+
+    def run_llvm_reduce(self, tool_cmd, tool_args, ir_file):
+        self.write_llvm_reduce_test(tool_cmd, tool_args)
+
+        testfile_abs = os.path.abspath(self.testfile)
+        returncode = subprocess.call([testfile_abs, ir_file], 
stdout=subprocess.DEVNULL)
+        if returncode:
+            sys.exit("The interestingness test does not pass for the original 
IR file.")
+
+        print("\nRunning llvm-reduce...")
+        full_cmd = (
+            [
+                llvm_reduce_cmd,
+                "--test=" + testfile_abs,
+                "-o",
+                ir_file,
+            ]
+            + self.llvm_reduce_flags
+            + [ir_file]
+        )
+        verbose_print(quote_cmd(full_cmd))
+        try:
+            subprocess.check_call(full_cmd)
+        except KeyboardInterrupt:
+            print("\n\nctrl-c detected, killed llvm-reduce")
+        except subprocess.CalledProcessError as e:
+            print("llvm-reduce failed:", e)
+            return
+
+        reduced_cmd = quote_cmd([tool_cmd] + tool_args + [ir_file])
+        write_to_script(reduced_cmd, self.crash_script)
+        print("Reduced command:", reduced_cmd)
+
     @staticmethod
     def filter_args(
         args, opts_equal=[], opts_startswith=[], opts_one_arg_startswith=[]
@@ -432,6 +689,10 @@ def main():
     global verbose
     global creduce_cmd
     global clang_cmd
+    global llc_cmd
+    global opt_cmd
+    global llvm_reduce_cmd
+    global reduce_pipeline_cmd
 
     parser = ArgumentParser(description=__doc__, 
formatter_class=RawTextHelpFormatter)
     parser.add_argument(
@@ -453,6 +714,35 @@ def main():
         help="The path to the `clang` executable. "
         "By default uses the llvm-bin directory.",
     )
+    parser.add_argument(
+        "--llc",
+        dest="llc",
+        type=str,
+        help="The path to the `llc` executable. "
+        "By default uses the llvm-bin directory.",
+    )
+    parser.add_argument(
+        "--opt",
+        dest="opt",
+        type=str,
+        help="The path to the `opt` executable. "
+        "By default uses the llvm-bin directory.",
+    )
+    parser.add_argument(
+        "--llvm-reduce",
+        dest="llvm_reduce",
+        type=str,
+        help="The path to the `llvm-reduce` executable. "
+        "By default uses the llvm-bin directory. Required to reduce IR-level 
crashes.",
+    )
+    parser.add_argument(
+        "--reduce-pipeline",
+        dest="reduce_pipeline",
+        type=str,
+        help="The path to `reduce_pipeline.py`. "
+        "Default: llvm/utils/reduce_pipeline.py relative to this script. "
+        "Used to reduce the opt pass pipeline for middle-end crashes.",
+    )
     parser.add_argument(
         "--creduce",
         dest="creduce",
@@ -460,23 +750,84 @@ def main():
         help="The path to the `creduce` or `cvise` executable. "
         "Required if neither `creduce` nor `cvise` are on PATH.",
     )
+    parser.add_argument(
+        "--no-llvm-reduce",
+        dest="no_llvm_reduce",
+        action="store_true",
+        help="Skip IR-level reduction with llvm-reduce, always run 
C-Vise/creduce.",
+    )
+    parser.add_argument(
+        "--creduce-flag",
+        dest="extra_creduce_flags",
+        action="append",
+        default=[],
+        help="Extra flags to pass to creduce/cvise. Can be specified multiple 
times.",
+    )
+    parser.add_argument(
+        "--llvm-reduce-flag",
+        dest="llvm_reduce_flags",
+        action="append",
+        default=[],
+        help="Extra flags to pass to llvm-reduce. Can be specified multiple 
times.",
+    )
     parser.add_argument("-v", "--verbose", action="store_true")
-    args, creduce_flags = parser.parse_known_args()
+    args = parser.parse_args()
     verbose = args.verbose
     llvm_bin = os.path.abspath(args.llvm_bin) if args.llvm_bin else None
     creduce_cmd = check_cmd("cvise", None, args.creduce, 
return_none_if_not_found=True)
     if not creduce_cmd:
         creduce_cmd = check_cmd("creduce", None, args.creduce)
     clang_cmd = check_cmd("clang", llvm_bin, args.clang)
+    llc_cmd = check_cmd("llc", llvm_bin, args.llc, 
return_none_if_not_found=True)
+    opt_cmd = check_cmd("opt", llvm_bin, args.opt, 
return_none_if_not_found=True)
+    llvm_reduce_cmd = check_cmd(
+        "llvm-reduce", llvm_bin, args.llvm_reduce, 
return_none_if_not_found=True
+    )
+    if args.reduce_pipeline:
+        reduce_pipeline_cmd = os.path.abspath(args.reduce_pipeline)
+        if not os.path.isfile(reduce_pipeline_cmd):
+            sys.exit("ERROR: %s does not exist" % reduce_pipeline_cmd)
+    else:
+        default_rp = os.path.normpath(
+            os.path.join(
+                os.path.dirname(os.path.abspath(__file__)),
+                "..",
+                "..",
+                "llvm",
+                "utils",
+                "reduce_pipeline.py",
+            )
+        )
+        reduce_pipeline_cmd = default_rp if os.path.isfile(default_rp) else 
None
 
     crash_script = check_file(args.crash_script[0])
     file_to_reduce = check_file(args.file_to_reduce[0])
 
-    if "--n" not in creduce_flags:
-        creduce_flags += ["--n", str(max(4, multiprocessing.cpu_count() // 2))]
+    r = Reduce(
+        crash_script,
+        file_to_reduce,
+        args.extra_creduce_flags,
+        args.llvm_reduce_flags,
+    )
 
-    r = Reduce(crash_script, file_to_reduce, creduce_flags)
+    if not args.no_llvm_reduce and llvm_reduce_cmd and (llc_cmd or opt_cmd):
+        ir_file = os.path.splitext(file_to_reduce)[0] + ".reduced.ll"
+        ir_crash = r.try_llvm_ir_crash(ir_file)
+        if ir_crash:
+            tool_cmd, tool_args = ir_crash
+            r.run_llvm_reduce(tool_cmd, tool_args, ir_file)
+            return
+        print(
+            "\nCould not reproduce crash at the IR level, "
+            "falling back to source-level reduction."
+        )
+    elif not args.no_llvm_reduce and not llvm_reduce_cmd:
+        verbose_print(
+            "llvm-reduce not found; skipping IR-level reduction. "
+            "Pass --llvm-reduce or --llvm-bin to enable it."
+        )
 
+    r.prepare_source_reduction()
     r.simplify_clang_args()
     r.write_interestingness_test()
     r.clang_preprocess()

``````````

</details>


https://github.com/llvm/llvm-project/pull/199453
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to