hliao created this revision.
hliao added reviewers: tra, yaxunl.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

- The device compilation needs to have a consistent source code compared to the 
corresponding host compilation. If macros based on the host-specific target 
processor is not properly populated, the device compilation may fail due to the 
inconsistent source after the preprocessor. So far, only the host triple is 
used to build the macros. If a detailed host CPU target or certain features are 
specified, macros derived from them won't be populated properly, e.g. 
`__SSE3__` won't be added unless `+sse3` feature is present. On Windows 
compilation compatible with MSVC, that missing macros result in that intrinsics 
are not included and cause device compilation failure on the host-side source.

- This patch addresses this issue by introducing two `cc1` options, i.e., 
`-aux-target-cpu` and `-aux-target-feature`. If a specific host CPU target or 
certain features are specified, the compiler driver will append them during the 
construction of the offline compilation actions. Then, the toolchain in `cc1` 
phase will populate macros accordingly.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D73942

Files:
  clang/include/clang/Driver/CC1Options.td
  clang/include/clang/Frontend/FrontendOptions.h
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/lib/Frontend/CompilerInstance.cpp
  clang/lib/Frontend/CompilerInvocation.cpp
  clang/test/Driver/hip-host-cpu-features.hip
  clang/test/Preprocessor/hip-host-cpu-macros.cu

Index: clang/test/Preprocessor/hip-host-cpu-macros.cu
===================================================================
--- /dev/null
+++ clang/test/Preprocessor/hip-host-cpu-macros.cu
@@ -0,0 +1,13 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+#ifdef __HIP_DEVICE_COMPILE__
+DEVICE __SSE3__
+#else
+HOST __SSE3__
+#endif
+
+// RUN: %clang -x hip -E -target x86_64-linux-gnu -msse3 --cuda-gpu-arch=gfx803 -nogpulib -o - %s 2>&1 | FileCheck %s
+
+// CHECK-NOT: SSE3
Index: clang/test/Driver/hip-host-cpu-features.hip
===================================================================
--- /dev/null
+++ clang/test/Driver/hip-host-cpu-features.hip
@@ -0,0 +1,14 @@
+// REQUIRES: clang-driver
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+
+// RUN: %clang -### -c -target x86_64-linux-gnu -march=znver2 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTCPU
+// RUN: %clang -### -c -target x86_64-linux-gnu -msse3 -x hip --cuda-gpu-arch=gfx803 -nogpulib %s 2>&1 | FileCheck %s -check-prefix=HOSTSSE3
+
+// HOSTCPU: "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// HOSTCPU-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// HOSTCPU-SAME: "-aux-target-cpu" "znver2"
+
+// HOSTSSE3: "-cc1" "-triple" "amdgcn-amd-amdhsa"
+// HOSTSSE3-SAME: "-aux-triple" "x86_64-unknown-linux-gnu"
+// HOSTSSE3-SAME: "-aux-target-feature" "+sse3"
Index: clang/lib/Frontend/CompilerInvocation.cpp
===================================================================
--- clang/lib/Frontend/CompilerInvocation.cpp
+++ clang/lib/Frontend/CompilerInvocation.cpp
@@ -1931,6 +1931,10 @@
   Opts.OverrideRecordLayoutsFile =
       std::string(Args.getLastArgValue(OPT_foverride_record_layout_EQ));
   Opts.AuxTriple = std::string(Args.getLastArgValue(OPT_aux_triple));
+  if (Args.hasArg(OPT_aux_target_cpu))
+    Opts.AuxTargetCPU = std::string(Args.getLastArgValue(OPT_aux_target_cpu));
+  if (Args.hasArg(OPT_aux_target_feature))
+    Opts.AuxTargetFeatures = Args.getAllArgValues(OPT_aux_target_feature);
   Opts.StatsFile = std::string(Args.getLastArgValue(OPT_stats_file));
 
   if (const Arg *A = Args.getLastArg(OPT_arcmt_check,
Index: clang/lib/Frontend/CompilerInstance.cpp
===================================================================
--- clang/lib/Frontend/CompilerInstance.cpp
+++ clang/lib/Frontend/CompilerInstance.cpp
@@ -923,6 +923,10 @@
       !getFrontendOpts().AuxTriple.empty()) {
     auto TO = std::make_shared<TargetOptions>();
     TO->Triple = llvm::Triple::normalize(getFrontendOpts().AuxTriple);
+    if (getFrontendOpts().AuxTargetCPU)
+      TO->CPU = getFrontendOpts().AuxTargetCPU.getValue();
+    if (getFrontendOpts().AuxTargetFeatures)
+      TO->FeaturesAsWritten = getFrontendOpts().AuxTargetFeatures.getValue();
     TO->HostTriple = getTarget().getTriple().str();
     setAuxTarget(TargetInfo::CreateTargetInfo(getDiagnostics(), TO));
   }
Index: clang/lib/Driver/ToolChains/Clang.cpp
===================================================================
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -309,7 +309,7 @@
 
 static void getTargetFeatures(const ToolChain &TC, const llvm::Triple &Triple,
                               const ArgList &Args, ArgStringList &CmdArgs,
-                              bool ForAS) {
+                              bool ForAS, bool IsAux = false) {
   const Driver &D = TC.getDriver();
   std::vector<StringRef> Features;
   switch (Triple.getArch()) {
@@ -387,7 +387,7 @@
     if (Last != I)
       continue;
 
-    CmdArgs.push_back("-target-feature");
+    CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature");
     CmdArgs.push_back(Name.data());
   }
 }
@@ -4613,6 +4613,18 @@
                    AsynchronousUnwindTables))
     CmdArgs.push_back("-munwind-tables");
 
+  // Prepare `-aux-target-cpu` and `-aux-target-feature`.
+  if ((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
+      (IsHIP && JA.isDeviceOffloading(Action::OFK_HIP))) {
+    const ArgList &HostArgs = C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
+    std::string HostCPU = getCPUName(HostArgs, *TC.getAuxTriple(), /*FromAs*/false);
+    if (!HostCPU.empty()) {
+      CmdArgs.push_back("-aux-target-cpu");
+      CmdArgs.push_back(Args.MakeArgString(HostCPU));
+    }
+    getTargetFeatures(TC, *TC.getAuxTriple(), HostArgs, CmdArgs, /*ForAS*/false, /*IsAux*/true);
+  }
+
   TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());
 
   // FIXME: Handle -mtune=.
Index: clang/include/clang/Frontend/FrontendOptions.h
===================================================================
--- clang/include/clang/Frontend/FrontendOptions.h
+++ clang/include/clang/Frontend/FrontendOptions.h
@@ -426,9 +426,15 @@
   /// (in the format produced by -fdump-record-layouts).
   std::string OverrideRecordLayoutsFile;
 
-  /// Auxiliary triple for CUDA compilation.
+  /// Auxiliary triple for CUDA/HIP compilation.
   std::string AuxTriple;
 
+  /// Auxiliary target CPU for CUDA/HIP compilation.
+  Optional<std::string> AuxTargetCPU;
+
+  /// Auxiliary target features for CUDA/HIP compilation.
+  Optional<std::vector<std::string>> AuxTargetFeatures;
+
   /// Filename to write statistics to.
   std::string StatsFile;
 
Index: clang/include/clang/Driver/CC1Options.td
===================================================================
--- clang/include/clang/Driver/CC1Options.td
+++ clang/include/clang/Driver/CC1Options.td
@@ -482,6 +482,10 @@
 def ast_merge : Separate<["-"], "ast-merge">,
   MetaVarName<"<ast file>">,
   HelpText<"Merge the given AST file into the translation unit being compiled.">;
+def aux_target_cpu : Separate<["-"], "aux-target-cpu">,
+  HelpText<"Target a specific auxiliary cpu type">;
+def aux_target_feature : Separate<["-"], "aux-target-feature">,
+  HelpText<"Target specific auxiliary attributes">;
 def aux_triple : Separate<["-"], "aux-triple">,
   HelpText<"Auxiliary target triple.">;
 def code_completion_at : Separate<["-"], "code-completion-at">,
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to