https://github.com/MrSidims created 
https://github.com/llvm/llvm-project/pull/189383

We should be able to allow most of optimizations pre-SPIR-V generation if these 
optimizations are more-or-less target agnostic.

This patch sets O3 as default for HIP compilation through SPIR-V and respects 
user-provided opt level. Regardless of opt level target-specific optimizations 
such as: vectorization, unrolling are disabled.

>From 0240c8254b0d1d6950a312f0f6cc661779494d8c Mon Sep 17 00:00:00 2001
From: Dmitry Sidorov <[email protected]>
Date: Mon, 30 Mar 2026 08:40:39 -0500
Subject: [PATCH] [HIP][SPIR-V] Enable optimizations for amdgcnspirv target

We should be able to allow most of optimizations pre-SPIR-V generation
if these optimizations are more-or-less target agnostic.

This patch sets O3 as default for HIP compilation through SPIR-V and
respects user-provided opt level. Regardless of opt level
target-specific optimizations such as: vectorization, unrolling are
disabled.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp      | 20 +++++------
 clang/lib/Driver/ToolChains/HIPAMD.cpp      | 30 +++++++++++++----
 clang/test/Driver/hip-spirv-backend-opt.c   |  2 +-
 clang/test/Driver/hip-spirv-optimizations.c | 37 +++++++++++++++++++++
 clang/test/Driver/hip-toolchain-no-rdc.hip  |  2 +-
 clang/test/Driver/spirv-amd-toolchain.c     |  4 +--
 6 files changed, 72 insertions(+), 23 deletions(-)
 create mode 100644 clang/test/Driver/hip-spirv-optimizations.c

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index 03bd88f0d4f475..a3e10c848eef4c 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -849,19 +849,15 @@ void AMDGPUToolChain::addClangTargetOptions(
     CC1Args.push_back("-fapply-global-visibility-to-externs");
   }
 
-  // For SPIR-V we want to retain the pristine output of Clang CodeGen, since
-  // optimizations might lose structure / information that is necessary for
-  // generating optimal concrete AMDGPU code.
-  // TODO: using the below option is a temporary placeholder until Clang
-  //       provides the required functionality, which essentially boils down to
-  //       -O0 being refactored / reworked to not imply optnone / remove TBAA.
-  //       Once that is added, we should pivot to that functionality, being
-  //       mindful to not corrupt the user provided and subsequently embedded
-  //       command-line (i.e. if the user asks for -O3 this is what the
-  //       finalisation should use).
+  // For SPIR-V, enable basic optimizations but disable target-specific
+  // transformations that could harm JIT performance.
   if (getTriple().isSPIRV() &&
-      !DriverArgs.hasArg(options::OPT_disable_llvm_optzns))
-    CC1Args.push_back("-disable-llvm-optzns");
+      !DriverArgs.hasArg(options::OPT_disable_llvm_optzns)) {
+    CC1Args.append({"-mllvm", "-vectorize-loops=false"});
+    CC1Args.append({"-mllvm", "-vectorize-slp=false"});
+    CC1Args.push_back("-fno-unroll-loops");
+    CC1Args.append({"-mllvm", "-interleave-loops=false"});
+  }
 
   if (DeviceOffloadingKind == Action::OFK_None)
     addOpenCLBuiltinsLib(getDriver(), getTriple(), DriverArgs, CC1Args);
diff --git a/clang/lib/Driver/ToolChains/HIPAMD.cpp 
b/clang/lib/Driver/ToolChains/HIPAMD.cpp
index b08f610b21a162..c4e473dc193248 100644
--- a/clang/lib/Driver/ToolChains/HIPAMD.cpp
+++ b/clang/lib/Driver/ToolChains/HIPAMD.cpp
@@ -177,7 +177,9 @@ void AMDGCN::Linker::constructLinkAndEmitSpirvCommand(
     const char *Triple =
         C.getArgs().MakeArgString("-triple=spirv64-amd-amdhsa");
 
-    CmdArgs.append({"-cc1", Triple, "-emit-obj", "-disable-llvm-optzns",
+    CmdArgs.append({"-cc1", Triple, "-emit-obj", "-mllvm",
+                    "-vectorize-loops=false", "-mllvm", "-vectorize-slp=false",
+                    "-fno-unroll-loops", "-mllvm", "-interleave-loops=false",
                     LinkedBCFile.getFilename(), "-o", Output.getFilename()});
 
     const Driver &Driver = getToolChain().getDriver();
@@ -272,12 +274,18 @@ void HIPAMDToolChain::addClangTargetOptions(
     // with options that match the user-supplied ones.
     if (!DriverArgs.hasArg(options::OPT_fembed_bitcode_marker))
       CC1Args.push_back("-fembed-bitcode=marker");
-    // For SPIR-V we want to retain the pristine output of Clang CodeGen, since
-    // optimizations might lose structure / information that is necessary for
-    // generating optimal concrete AMDGPU code. We duplicate this because the
-    // HIP TC doesn't invoke the base AMDGPU TC addClangTargetOptions.
-    if (!DriverArgs.hasArg(options::OPT_disable_llvm_passes))
-      CC1Args.push_back("-disable-llvm-passes");
+    // Enable basic optimizations but disable target-specific transformations
+    // that could harm JIT performance. The JIT will make target-specific
+    // decisions. Users can pass -disable-llvm-passes to disable all opts.
+    if (!DriverArgs.hasArg(options::OPT_disable_llvm_passes)) {
+      // Disable vectorization (problematic with SPIR-V, let JIT decide)
+      CC1Args.append({"-mllvm", "-vectorize-loops=false"});
+      CC1Args.append({"-mllvm", "-vectorize-slp=false"});
+      // Disable loop unrolling (let JIT decide based on target)
+      CC1Args.push_back("-fno-unroll-loops");
+      // Disable loop interleaving
+      CC1Args.append({"-mllvm", "-interleave-loops=false"});
+    }
     return; // No DeviceLibs for SPIR-V.
   }
 
@@ -315,6 +323,14 @@ HIPAMDToolChain::TranslateArgs(const 
llvm::opt::DerivedArgList &Args,
     DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_flto_partitions_EQ),
                       "8");
 
+  // For SPIR-V, apply the default -O3 optimization level if no optimization
+  // level is specified, matching the behavior of OpenCL compilation.
+  if (getTriple().isSPIRV() &&
+      !Args.hasArg(options::OPT_O, options::OPT_O0, options::OPT_O4,
+                   options::OPT_Ofast))
+    DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_O),
+                      getOptionDefault(options::OPT_O));
+
   return DAL;
 }
 
diff --git a/clang/test/Driver/hip-spirv-backend-opt.c 
b/clang/test/Driver/hip-spirv-backend-opt.c
index 10d9a0b01caf32..475a262ece8563 100644
--- a/clang/test/Driver/hip-spirv-backend-opt.c
+++ b/clang/test/Driver/hip-spirv-backend-opt.c
@@ -56,6 +56,6 @@
 // CHECK-SPIRV-BACKEND-BINARY: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-obj"
 // CHECK-SPIRV-BACKEND-BC: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-llvm-bc"
 // CHECK-SPIRV-BACKEND-LL: "{{.*clang(\.exe)?}}" "-cc1" "-triple" 
"spirv64-amd-amdhsa" {{.*}} "-emit-llvm"
-// CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE: "{{.*clang(\.exe)?}}" "-cc1" 
{{.*}}"-triple=spirv64-amd-amdhsa" {{.*}}"-emit-obj"
+// CHECK-SPIRV-BACKEND-BINARY-EQ-TRIPLE: "{{.*clang(\.exe)?}}" "-cc1" 
"-triple=spirv64-amd-amdhsa" "-emit-obj" "-mllvm" "-vectorize-loops=false"
 // CHECK-FGPU-RDC-SAME: {{.*}} "-fgpu-rdc"
 // CHECK-CLANG-LINKER-WRAPPER: "{{.*}}clang-linker-wrapper" 
"--should-extract=amdgcnspirv" {{.*}} 
"--device-compiler=spirv64-amd-amdhsa=-use-spirv-backend"
diff --git a/clang/test/Driver/hip-spirv-optimizations.c 
b/clang/test/Driver/hip-spirv-optimizations.c
new file mode 100644
index 00000000000000..92b02d905b7c46
--- /dev/null
+++ b/clang/test/Driver/hip-spirv-optimizations.c
@@ -0,0 +1,37 @@
+// Verify SPIR-V compilation enables basic optimizations with targeted disables
+// and defaults to -O3
+// RUN: %clang --target=x86_64-linux-gnu --offload-arch=amdgcnspirv \
+// RUN:         -nogpuinc -nogpulib -### -x hip %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: -disable-llvm-passes
+// CHECK-NOT: -disable-llvm-optzns
+// CHECK-DAG: "-O3"
+// CHECK-DAG: "-mllvm" "-vectorize-loops=false"
+// CHECK-DAG: "-mllvm" "-vectorize-slp=false"
+// CHECK-DAG: "-fno-unroll-loops"
+// CHECK-DAG: "-mllvm" "-interleave-loops=false"
+
+// Verify user-specified optimization level is respected and targeted disables
+// are still present
+// RUN: %clang --target=x86_64-linux-gnu --offload-arch=amdgcnspirv \
+// RUN:         -nogpuinc -nogpulib -O2 -### -x hip %s 2>&1 \
+// RUN:         | FileCheck %s --check-prefix=CHECK-O2
+// CHECK-O2: "-O2"
+// CHECK-O2-NOT: "-O3"
+// CHECK-O2-DAG: "-mllvm" "-vectorize-loops=false"
+// CHECK-O2-DAG: "-mllvm" "-vectorize-slp=false"
+// CHECK-O2-DAG: "-fno-unroll-loops"
+// CHECK-O2-DAG: "-mllvm" "-interleave-loops=false"
+
+// Verify -O0 is respected
+// RUN: %clang --target=x86_64-linux-gnu --offload-arch=amdgcnspirv \
+// RUN:         -nogpuinc -nogpulib -O0 -### -x hip %s 2>&1 \
+// RUN:         | FileCheck %s --check-prefix=CHECK-O0
+// CHECK-O0: "-O0"
+// CHECK-O0-NOT: "-O3"
+
+// Verify user can still disable all optimizations
+// RUN: %clang --target=x86_64-linux-gnu --offload-arch=amdgcnspirv \
+// RUN:         -nogpuinc -nogpulib -disable-llvm-passes -### -x hip %s 2>&1 \
+// RUN:         | FileCheck %s --check-prefix=CHECK-DISABLED
+// CHECK-DISABLED: -disable-llvm-passes
diff --git a/clang/test/Driver/hip-toolchain-no-rdc.hip 
b/clang/test/Driver/hip-toolchain-no-rdc.hip
index 024129e4435ff0..b8058468cca044 100644
--- a/clang/test/Driver/hip-toolchain-no-rdc.hip
+++ b/clang/test/Driver/hip-toolchain-no-rdc.hip
@@ -212,7 +212,7 @@
 // Check mixed AMDGCNSPIRV and concrete GPU arch.
 //
 
-// AMDGCNSPIRV: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-llvm-bc" 
{{.*}}"-fembed-bitcode=marker" "-disable-llvm-passes" {{.*}} "-o" 
"[[AMDGCNSPV_BC:.*bc]]"
+// AMDGCNSPIRV: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-emit-llvm-bc" 
{{.*}}"-fembed-bitcode=marker"{{.*}}"-mllvm" "-vectorize-loops=false"{{.*}} 
"-o" "[[AMDGCNSPV_BC:.*bc]]"
 // AMDGCNSPIRV: {{".*llvm-link.*"}} "-o" "[[AMDGCNSPV_TMP:.*bc]]" 
"[[AMDGCNSPV_BC]]"
 // AMDGCNSPIRV: {{".*llvm-spirv.*"}} "--spirv-max-version=1.6" 
"--spirv-ext=+all,-SPV_KHR_untyped_pointers" {{.*}} "[[AMDGCNSPV_TMP]]" 
{{.*}}"-o" "[[AMDGCNSPV_CO:.*out]]"
 // AMDGCNSPIRV: "-cc1" "-triple" "amdgcn-amd-amdhsa" {{.*}}"-emit-obj" 
{{.*}}"-target-cpu" "gfx900"{{.*}} "-o" "[[GFX900_OBJ:.*o]]"
diff --git a/clang/test/Driver/spirv-amd-toolchain.c 
b/clang/test/Driver/spirv-amd-toolchain.c
index 4c7a673ef85fe7..3808de7f0911c2 100644
--- a/clang/test/Driver/spirv-amd-toolchain.c
+++ b/clang/test/Driver/spirv-amd-toolchain.c
@@ -19,12 +19,12 @@
 
 // RUN: %clang -### --target=spirv64-amd-amdhsa %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=INVOCATION
-// INVOCATION: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-disable-llvm-optzns" {{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
+// INVOCATION: "-cc1" "-triple" "spirv64-amd-amdhsa" {{.*}}"-mllvm" 
"-vectorize-loops=false"{{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
 // INVOCATION: "{{.*}}llvm-link" "-o" "[[LINKED_OUTPUT:.+]]" "[[OUTPUT]]"
 // INVOCATION: "{{.*}}llvm-spirv" "--spirv-max-version=1.6" 
"--spirv-ext=+all,-SPV_KHR_untyped_pointers" "--spirv-allow-unknown-intrinsics" 
"--spirv-lower-const-expr" "--spirv-preserve-auxdata" 
"--spirv-debug-info-version=nonsemantic-shader-200" "[[LINKED_OUTPUT]]" "-o" 
"a.out"
 
 // RUN: %clang -### -use-spirv-backend --target=spirv64-amd-amdhsa %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=INVOCATION-SPIRV-BACKEND
-// INVOCATION-SPIRV-BACKEND: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-disable-llvm-optzns" {{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
+// INVOCATION-SPIRV-BACKEND: "-cc1" "-triple" "spirv64-amd-amdhsa" 
{{.*}}"-mllvm" "-vectorize-loops=false"{{.*}} "-o" "[[OUTPUT:.+]]" "-x" "c"
 // INVOCATION-SPIRV-BACKEND: "{{.*}}llvm-link" "-o" "[[LINKED_OUTPUT:.+]]" 
"[[OUTPUT]]"
 // INVOCATION-SPIRV-BACKEND: "-cc1" "-triple=spirv64-amd-amdhsa" "-emit-obj" 
{{.*}} "[[LINKED_OUTPUT]]" "-o" "a.out"

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to