[PATCH] D91054: [Clang][OpenMP] Frontend work for sections - D89671

2021-04-15 Thread Chirag Khandelwal via Phabricator via cfe-commits
AMDChirag added a comment.

In D91054#2693514 , @fghanim wrote:

> You can update the tests as long as long as the output is correct. for 
> example the difference is only in names, ordering of basicblocks and 
> instructions that doesn't affect correctness, etc.

Yes, the correctness is not affected with the changes to the test case.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91054/new/

https://reviews.llvm.org/D91054

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99503: [clang-format] Inconsistent behavior regarding line break before access modifier

2021-04-15 Thread Max Sagebaum via Phabricator via cfe-commits
Max_S added a comment.

Sorry forgot about that: Max Sagebaum 


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99503/new/

https://reviews.llvm.org/D99503

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 337988.
yonghong-song added a comment.

check FD->isDefined() as well before emit debuginfo for the declaration. It is 
okay to emit a declaration subprogram and later refined to be with definition. 
But it is not okay to refine a definition to a declaration. So add check here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/BPF.h
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/CodeGen/debug-info-extern-callback.c


Index: clang/test/CodeGen/debug-info-extern-callback.c
===
--- /dev/null
+++ clang/test/CodeGen/debug-info-extern-callback.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -triple bpf-linux-gnu 
-emit-llvm %s -o - | FileCheck %s
+
+extern int do_work(int);
+long bpf_helper(void *callback_fn);
+long prog() {
+   return bpf_helper(_work);
+}
+
+// CHECK: declare !dbg ![[FUNC:[0-9]+]] i32 @do_work(i32)
+// CHECK: ![[FUNC]] = !DISubprogram(name: "do_work"
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12667,7 +12667,7 @@
 Diag(Var->getLocation(), diag::note_private_extern);
   }
 
-  if (Context.getTargetInfo().allowDebugInfoForExternalVar() &&
+  if (Context.getTargetInfo().allowDebugInfoForExternalRef() &&
   !Var->isInvalidDecl() && !getLangOpts().CPlusPlus)
 ExternalDeclarations.push_back(Var);
 
Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2834,8 +2834,20 @@
 return LV;
   }
 
-  if (const auto *FD = dyn_cast(ND))
-return EmitFunctionDeclLValue(*this, E, FD);
+  if (const auto *FD = dyn_cast(ND)) {
+LValue LV = EmitFunctionDeclLValue(*this, E, FD);
+
+// Emit debuginfo for the function declaration if the target wants to.
+if (!FD->isDefined() &&
+getContext().getTargetInfo().allowDebugInfoForExternalRef()) {
+  CGDebugInfo *DI = CGM.getModuleDebugInfo();
+  auto *Fn = dyn_cast(LV.getPointer(*this));
+  if (DI && Fn)
+DI->EmitFunctionDecl(FD, FD->getLocation(), T, Fn);
+}
+
+return LV;
+  }
 
   // FIXME: While we're emitting a binding from an enclosing scope, all other
   // DeclRefExprs we see should be implicitly treated as if they also refer to
Index: clang/lib/Basic/Targets/BPF.h
===
--- clang/lib/Basic/Targets/BPF.h
+++ clang/lib/Basic/Targets/BPF.h
@@ -76,7 +76,7 @@
 return None;
   }
 
-  bool allowDebugInfoForExternalVar() const override { return true; }
+  bool allowDebugInfoForExternalRef() const override { return true; }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override 
{
 switch (CC) {
Index: clang/include/clang/Basic/TargetInfo.h
===
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -1538,8 +1538,8 @@
 
   virtual void setAuxTarget(const TargetInfo *Aux) {}
 
-  /// Whether target allows debuginfo types for decl only variables.
-  virtual bool allowDebugInfoForExternalVar() const { return false; }
+  /// Whether target allows debuginfo types for decl only variables/functions.
+  virtual bool allowDebugInfoForExternalRef() const { return false; }
 
 protected:
   /// Copy type and layout related info.


Index: clang/test/CodeGen/debug-info-extern-callback.c
===
--- /dev/null
+++ clang/test/CodeGen/debug-info-extern-callback.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -triple bpf-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+extern int do_work(int);
+long bpf_helper(void *callback_fn);
+long prog() {
+	return bpf_helper(_work);
+}
+
+// CHECK: declare !dbg ![[FUNC:[0-9]+]] i32 @do_work(i32)
+// CHECK: ![[FUNC]] = !DISubprogram(name: "do_work"
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12667,7 +12667,7 @@
 Diag(Var->getLocation(), diag::note_private_extern);
   }
 
-  if (Context.getTargetInfo().allowDebugInfoForExternalVar() &&
+  if (Context.getTargetInfo().allowDebugInfoForExternalRef() &&
   !Var->isInvalidDecl() && !getLangOpts().CPlusPlus)
 ExternalDeclarations.push_back(Var);
 
Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2834,8 

[PATCH] D45639: [Driver] Support default libc++ library location on Darwin

2021-04-15 Thread Petr Hosek via Phabricator via cfe-commits
phosek added a comment.

In D45639#2692142 , @ldionne wrote:

> In D45639#2383754 , @smeenai wrote:
>
>> Just following up on this, cos I'm curious :) I have 12.1 now, and I still 
>> only see the C++ headers in the toolchain and not in any of the SDKs.
>
> Look in Xcode 12.5 beta 3, you should see libc++ headers in the SDK. You'll 
> also see headers alongside Clang, however those are not being used. They are 
> just there for some internal reasons but eventually we'll have only one copy 
> of the headers, and they'll be in the SDK.
>
> As I explained in https://reviews.llvm.org/D45639#2360267, I think this is 
> the right way forward. We want LLVM Clang to prefer the libc++.dylib (and 
> headers) shipped in the toolchain if those are present, since that's the most 
> consistent approach.
>
> Just one question: with this patch, do we prefer the library in the SDK or 
> the one in the toolchain if both are present? Can we get into trouble if we 
> have both paths on the `-L` list? I'm trying to think of subtle issues like:
>
>   /lib/libc++.a
>   /lib/libc++.dylib
>
> Which one would we pick here?

It's depends on the order: whichever comes first wins. The default order of 
paths that the driver uses is (1) toolchain library paths, (2) library paths 
specified explicitly using `-L`, (3) sysroot library paths. So if 
`/lib/libc++.a` exists, it'd be picked up, otherwise 
`/lib/libc++.dylib` would be used.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D45639/new/

https://reviews.llvm.org/D45639

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100620: [OpenMP] Make sure classes work on the device as they do on the host

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added reviewers: JonChesterfield, ABataev, grokos.
Herald added subscribers: guansong, yaxunl.
Herald added a reviewer: bollu.
jdoerfert requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1.
Herald added a project: clang.

We do provide `operator delete(void*)` in `` but it should be
available by default. This is mostly boilerplate to test it and the
unconditional include of `` in the header we always in include
on the device.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100620

Files:
  clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
  clang/lib/Headers/openmp_wrappers/new
  clang/test/Headers/Inputs/include/cstdlib
  clang/test/Headers/Inputs/include/new
  clang/test/Headers/target_include_new.cpp


Index: clang/test/Headers/target_include_new.cpp
===
--- /dev/null
+++ clang/test/Headers/target_include_new.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers 
-include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include 
-verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown 
-fopenmp-targets=nvptx64 -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers 
-include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include 
-verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown 
-fopenmp-targets=nvptx64 -emit-llvm %s -fopenmp-is-device 
-fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without 
the need to `include `.
+
+// CHECK: define {{.*}}_ZdlPv
+
+#ifndef HEADER
+#define HEADER
+
+class Base {
+  public:
+virtual ~Base() = default;
+};
+
+class Derived : public Base {
+  public:
+#pragma omp declare target
+Derived();
+#pragma omp end declare target
+};
+
+Derived::Derived() { }
+
+int main(void) {
+  #pragma omp target
+  {
+  }
+  return 0;
+}
+#endif
Index: clang/test/Headers/Inputs/include/new
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/new
@@ -0,0 +1,7 @@
+
+namespace std
+{
+
+struct nothrow_t { explicit nothrow_t() = default; };
+
+}
Index: clang/test/Headers/Inputs/include/cstdlib
===
--- clang/test/Headers/Inputs/include/cstdlib
+++ clang/test/Headers/Inputs/include/cstdlib
@@ -2,6 +2,9 @@
 
 #include 
 
+void *malloc(size_t);
+void free(void*);
+
 #if __cplusplus >= 201703L
 extern int abs (int __x) throw()  __attribute__ ((__const__)) ;
 extern long int labs (long int __x) throw() __attribute__ ((__const__)) ;
Index: clang/lib/Headers/openmp_wrappers/new
===
--- clang/lib/Headers/openmp_wrappers/new
+++ clang/lib/Headers/openmp_wrappers/new
@@ -11,7 +11,7 @@
 
 #include_next 
 
-#if defined(__NVPTX__) && defined(_OPENMP)
+#if defined(_OPENMP)
 
 #include 
 
Index: clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
===
--- clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
+++ clang/lib/Headers/openmp_wrappers/__clang_openmp_device_functions.h
@@ -14,6 +14,12 @@
 #error "This file is for OpenMP compilation only."
 #endif
 
+#ifdef __cplusplus
+// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the
+// need to `include `.
+#include 
+#endif
+
 #pragma omp begin declare variant match(   
\
 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
 


Index: clang/test/Headers/target_include_new.cpp
===
--- /dev/null
+++ clang/test/Headers/target_include_new.cpp
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64 -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1 -internal-isystem %S/../../lib/Headers/openmp_wrappers -include __clang_openmp_device_functions.h -internal-isystem %S/Inputs/include -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s
+// expected-no-diagnostics
+
+// Ensure we make `_ZdlPv`, aka. `operator delete(void*)` available without the need to `include `.
+
+// CHECK: define {{.*}}_ZdlPv
+
+#ifndef HEADER
+#define HEADER
+
+class Base {
+  public:
+virtual ~Base() = default;
+};
+
+class Derived : public Base {
+  public:
+#pragma omp declare target
+

[PATCH] D99949: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-15 Thread Pushpinder Singh via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG7029cffc4e78: [AMDGPU][OpenMP] Add amdgpu-arch tool to list 
AMD GPUs installed (authored by pdhaliwal).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99949/new/

https://reviews.llvm.org/D99949

Files:
  clang/include/clang/Basic/DiagnosticDriverKinds.td
  clang/include/clang/Driver/Options.td
  clang/lib/Driver/ToolChains/AMDGPU.cpp
  clang/lib/Driver/ToolChains/AMDGPU.h
  clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
  clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
  clang/test/Driver/amdgpu-openmp-system-arch-fail.c
  clang/test/Driver/amdgpu-openmp-system-arch.c
  clang/tools/CMakeLists.txt
  clang/tools/amdgpu-arch/AMDGPUArch.cpp
  clang/tools/amdgpu-arch/CMakeLists.txt

Index: clang/tools/amdgpu-arch/CMakeLists.txt
===
--- /dev/null
+++ clang/tools/amdgpu-arch/CMakeLists.txt
@@ -0,0 +1,17 @@
+# //===--===//
+# //
+# // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# // See https://llvm.org/LICENSE.txt for details.
+# // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+# //
+# //===--===//
+
+find_package(hsa-runtime64 QUIET 1.2.0 HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if (NOT ${hsa-runtime64_FOUND})
+  message(STATUS "Not building amdgpu-arch: hsa-runtime64 not found")
+  return()
+endif()
+
+add_clang_tool(amdgpu-arch AMDGPUArch.cpp)
+
+clang_target_link_libraries(amdgpu-arch PRIVATE hsa-runtime64::hsa-runtime64)
Index: clang/tools/amdgpu-arch/AMDGPUArch.cpp
===
--- /dev/null
+++ clang/tools/amdgpu-arch/AMDGPUArch.cpp
@@ -0,0 +1,59 @@
+//===- AMDGPUArch.cpp - list AMDGPU installed --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===--===//
+//
+// This file implements a tool for detecting name of AMDGPU installed in system
+// using HSA. This tool is used by AMDGPU OpenMP driver.
+//
+//===--===//
+
+#include 
+#include 
+#include 
+
+static hsa_status_t iterateAgentsCallback(hsa_agent_t Agent, void *Data) {
+  hsa_device_type_t DeviceType;
+  hsa_status_t Status =
+  hsa_agent_get_info(Agent, HSA_AGENT_INFO_DEVICE, );
+
+  // continue only if device type if GPU
+  if (Status != HSA_STATUS_SUCCESS || DeviceType != HSA_DEVICE_TYPE_GPU) {
+return Status;
+  }
+
+  std::vector *GPUs =
+  static_cast *>(Data);
+  char GPUName[64];
+  Status = hsa_agent_get_info(Agent, HSA_AGENT_INFO_NAME, GPUName);
+  if (Status != HSA_STATUS_SUCCESS) {
+return Status;
+  }
+  GPUs->push_back(GPUName);
+  return HSA_STATUS_SUCCESS;
+}
+
+int main() {
+  hsa_status_t Status = hsa_init();
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  std::vector GPUs;
+  Status = hsa_iterate_agents(iterateAgentsCallback, );
+  if (Status != HSA_STATUS_SUCCESS) {
+return 1;
+  }
+
+  for (const auto  : GPUs)
+printf("%s\n", GPU.c_str());
+
+  if (GPUs.size() < 1)
+return 1;
+
+  hsa_shut_down();
+  return 0;
+}
Index: clang/tools/CMakeLists.txt
===
--- clang/tools/CMakeLists.txt
+++ clang/tools/CMakeLists.txt
@@ -43,3 +43,5 @@
 
 # libclang may require clang-tidy in clang-tools-extra.
 add_clang_subdirectory(libclang)
+
+add_clang_subdirectory(amdgpu-arch)
Index: clang/test/Driver/amdgpu-openmp-system-arch.c
===
--- /dev/null
+++ clang/test/Driver/amdgpu-openmp-system-arch.c
@@ -0,0 +1,15 @@
+// REQUIRES: system-linux
+// REQUIRES: x86-registered-target
+// REQUIRES: amdgpu-registered-target
+// RUN:   %clang -### --target=x86_64-unknown-linux-gnu -fopenmp -fopenmp-targets=amdgcn-amd-amdhsa -nogpulib --amdgpu-arch-tool=%S/Inputs/amdgpu-arch/amdgpu_arch_gfx906 %s 2>&1 \
+// RUN:   | FileCheck %s
+// CHECK: clang{{.*}}"-cc1"{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu" "[[GFX:gfx906]]"
+// CHECK: llvm-link{{.*}}"-o" "{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc"
+// CHECK: llc{{.*}}amdgpu-openmp-system-arch-{{.*}}-[[GFX]]-linked-{{.*}}.bc" "-mtriple=amdgcn-amd-amdhsa" "-mcpu=[[GFX]]" "-filetype=obj" 

[clang] 7029cff - [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

2021-04-15 Thread Pushpinder Singh via cfe-commits

Author: Pushpinder Singh
Date: 2021-04-16T05:26:20Z
New Revision: 7029cffc4e78556cfe820791c612968bb15b2ffb

URL: 
https://github.com/llvm/llvm-project/commit/7029cffc4e78556cfe820791c612968bb15b2ffb
DIFF: 
https://github.com/llvm/llvm-project/commit/7029cffc4e78556cfe820791c612968bb15b2ffb.diff

LOG: [AMDGPU][OpenMP] Add amdgpu-arch tool to list AMD GPUs installed

This patch adds new clang tool named amdgpu-arch which uses
HSA to detect installed AMDGPU and report back latter's march.
This tool is built only if system has HSA installed.

The value printed by amdgpu-arch is used to fill -march when
latter is not explicitly provided in -Xopenmp-target.

Reviewed By: JonChesterfield, gregrodgers

Differential Revision: https://reviews.llvm.org/D99949

Added: 
clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_different
clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_fail
clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx906
clang/test/Driver/Inputs/amdgpu-arch/amdgpu_arch_gfx908_gfx908
clang/test/Driver/amdgpu-openmp-system-arch-fail.c
clang/test/Driver/amdgpu-openmp-system-arch.c
clang/tools/amdgpu-arch/AMDGPUArch.cpp
clang/tools/amdgpu-arch/CMakeLists.txt

Modified: 
clang/include/clang/Basic/DiagnosticDriverKinds.td
clang/include/clang/Driver/Options.td
clang/lib/Driver/ToolChains/AMDGPU.cpp
clang/lib/Driver/ToolChains/AMDGPU.h
clang/lib/Driver/ToolChains/AMDGPUOpenMP.cpp
clang/tools/CMakeLists.txt

Removed: 




diff  --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td 
b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 5e580cc4fbb7a..aa3b00c231cbe 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -67,6 +67,8 @@ def err_drv_no_hip_runtime : Error<
   "cannot find HIP runtime. Provide its path via --rocm-path, or pass "
   "-nogpuinc to build without HIP runtime.">;
 
+def err_drv_undetermined_amdgpu_arch : Error<
+  "Cannot determine AMDGPU architecture. Consider passing it via -march">;
 def err_drv_cuda_version_unsupported : Error<
   "GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
   "but installation at %3 is %4. Use --cuda-path to specify a 
diff erent CUDA "

diff  --git a/clang/include/clang/Driver/Options.td 
b/clang/include/clang/Driver/Options.td
index 9e15712eb2d51..5fbcd64b69376 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -918,6 +918,8 @@ def rocm_path_EQ : Joined<["--"], "rocm-path=">, 
Group,
   HelpText<"ROCm installation path, used for finding and automatically linking 
required bitcode libraries.">;
 def hip_path_EQ : Joined<["--"], "hip-path=">, Group,
   HelpText<"HIP runtime installation path, used for finding HIP version and 
adding HIP include path.">;
+def amdgpu_arch_tool_EQ : Joined<["--"], "amdgpu-arch-tool=">, Group,
+  HelpText<"Tool used for detecting AMD GPU arch in the system.">;
 def rocm_device_lib_path_EQ : Joined<["--"], "rocm-device-lib-path=">, 
Group,
   HelpText<"ROCm device library path. Alternative to rocm-path.">;
 def : Joined<["--"], "hip-device-lib-path=">, Alias;

diff  --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp 
b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index dc9c9751c851d..37da2c05dcf67 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -12,10 +12,15 @@
 #include "clang/Basic/TargetID.h"
 #include "clang/Driver/Compilation.h"
 #include "clang/Driver/DriverDiagnostic.h"
+#include "clang/Driver/Options.h"
 #include "llvm/Option/ArgList.h"
+#include "llvm/Support/FileUtilities.h"
+#include "llvm/Support/LineIterator.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/VirtualFileSystem.h"
 
+#define AMDGPU_ARCH_PROGRAM_NAME "amdgpu-arch"
+
 using namespace clang::driver;
 using namespace clang::driver::tools;
 using namespace clang::driver::toolchains;
@@ -715,6 +720,57 @@ void AMDGPUToolChain::checkTargetID(
   }
 }
 
+llvm::SmallVector, 1>
+AMDGPUToolChain::detectSystemGPUs(const ArgList ) const {
+  std::string Program;
+  if (Arg *A = Args.getLastArg(options::OPT_amdgpu_arch_tool_EQ))
+Program = A->getValue();
+  else
+Program = GetProgramPath(AMDGPU_ARCH_PROGRAM_NAME);
+  llvm::SmallString<64> OutputFile;
+  llvm::sys::fs::createTemporaryFile("print-system-gpus", "" /* No Suffix */,
+ OutputFile);
+  llvm::FileRemover OutputRemover(OutputFile.c_str());
+  llvm::Optional Redirects[] = {
+  {""},
+  StringRef(OutputFile),
+  {""},
+  };
+
+  if (llvm::sys::ExecuteAndWait(Program.c_str(), {}, {}, Redirects)) {
+return {};
+  }
+
+  llvm::ErrorOr> OutputBuf =
+  llvm::MemoryBuffer::getFile(OutputFile.c_str());
+  if (!OutputBuf)
+return {};
+
+  llvm::SmallVector, 1> GPUArchs;
+  for (llvm::line_iterator LineIt(**OutputBuf); 

[PATCH] D100590: DeclContext: Fix iterator category

2021-04-15 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks updated this revision to Diff 337985.
HazardyKnusperkeks added a comment.

Trigger build.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100590/new/

https://reviews.llvm.org/D100590

Files:
  clang/include/clang/AST/DeclBase.h


Index: clang/include/clang/AST/DeclBase.h
===
--- clang/include/clang/AST/DeclBase.h
+++ clang/include/clang/AST/DeclBase.h
@@ -2378,7 +2378,7 @@
 
   using udir_iterator_base =
   llvm::iterator_adaptor_base;
 
   struct udir_iterator : udir_iterator_base {


Index: clang/include/clang/AST/DeclBase.h
===
--- clang/include/clang/AST/DeclBase.h
+++ clang/include/clang/AST/DeclBase.h
@@ -2378,7 +2378,7 @@
 
   using udir_iterator_base =
   llvm::iterator_adaptor_base;
 
   struct udir_iterator : udir_iterator_base {
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100611: [RISCV] Add new attribute __clang_riscv_builtin_alias for intrinsics.

2021-04-15 Thread Kito Cheng via Phabricator via cfe-commits
kito-cheng added a comment.

Could you also check the compiler diagnostic messages? it will report 
`__builtin_rvv_vadd_vv_i8m1` or `vadd_generic` if argument type mis-match, 
which one you expected? I assume without `__clang_riscv_builtin_alias` clang 
will report `vadd_generic`?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100611/new/

https://reviews.llvm.org/D100611

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100615: [RISCV][Driver] Make the ordering of CmdArgs consistent between RISCV::Linker and baremetal::Linker

2021-04-15 Thread Kito Cheng via Phabricator via cfe-commits
kito-cheng added a comment.

So I think it's more than consistent issue, it's a bug fix.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100615/new/

https://reviews.llvm.org/D100615

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100619: [ASTReader] Only mark module out of date if not already compiled

2021-04-15 Thread Ben Barham via Phabricator via cfe-commits
bnbarham created this revision.
bnbarham added a reviewer: akyrtzi.
bnbarham requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

If a module contains errors (ie. it was built with
-fallow-pcm-with-compiler-errors and had errors) and was from the module
cache, it is marked as out of date - see
a2c1054c303f20be006e9ef20739dbb88bd9ae02 
.

When a module is imported multiple times in the one compile, this caused
it to be recompiled each time - removing the existing buffer from the
module cache and replacing it. This results in various errors further
down the line.

Instead, only mark the module as out of date if it isn't already
finalized in the module cache.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100619

Files:
  clang/lib/Serialization/ASTReader.cpp
  clang/test/Modules/Inputs/error/error.h
  clang/test/Modules/Inputs/error/module.modulemap
  clang/test/Modules/Inputs/error/use_error_a.h
  clang/test/Modules/Inputs/error/use_error_b.h
  clang/test/Modules/load-module-with-errors.m

Index: clang/test/Modules/load-module-with-errors.m
===
--- clang/test/Modules/load-module-with-errors.m
+++ clang/test/Modules/load-module-with-errors.m
@@ -2,10 +2,13 @@
 // matter in this test.
 
 // pcherror-error@* {{PCH file contains compiler errors}}
-@import error; // notallowerror-error {{could not build module 'error'}}
+@import use_error_a; // notallowerror-error {{could not build module 'use_error_a'}}
+@import use_error_b;
 // expected-no-diagnostics
 
 void test(Error *x) {
+  funca(x);
+  funcb(x);
   [x method];
 }
 
@@ -16,7 +19,16 @@
 // RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fmodule-name=error -o %t/prebuilt/error.pcm \
 // RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
+// RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-name=use_error_a -o %t/prebuilt/use_error_a.pcm \
+// RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
+// RUN: %clang_cc1 -fmodules -fallow-pcm-with-compiler-errors \
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-name=use_error_b -o %t/prebuilt/use_error_b.pcm \
+// RUN:   -x objective-c -emit-module %S/Inputs/error/module.modulemap
 
+// Prebuilt modules
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
 // RUN:   -fprebuilt-module-path=%t/prebuilt -fmodules-cache-path=%t \
 // RUN:   -ast-print %s | FileCheck %s
@@ -24,33 +36,49 @@
 // RUN:   -fprebuilt-module-path=%t/prebuilt -fmodules-cache-path=%t \
 // RUN:   -verify=pcherror %s
 
+// Explicit prebuilt modules (loaded when needed)
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
-// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -ast-print %s | FileCheck %s
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=use_error_a=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=use_error_b=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -ast-print %s | FileCheck %s
 // RUN: %clang_cc1 -fsyntax-only -fmodules \
-// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -verify=pcherror %s
+// RUN:   -fmodule-file=error=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=use_error_a=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=use_error_b=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -verify=pcherror %s
 
+// Explicit prebuilt modules without name (always loaded)
 // RUN: %clang_cc1 -fsyntax-only -fmodules -fallow-pcm-with-compiler-errors \
-// RUN:   -fmodule-file=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -ast-print %s | FileCheck %s
+// RUN:   -fmodule-file=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t -ast-print %s | FileCheck %s
+// As the modules are always loaded, compiling will fail before even parsing
+// this file - this means that -verify can't be used, so do a grep instead.
 // RUN: not %clang_cc1 -fsyntax-only -fmodules \
-// RUN:   -fmodule-file=%t/prebuilt/error.pcm -fmodules-cache-path=%t \
-// RUN:   -verify=pcherror %s
+// RUN:   -fmodule-file=%t/prebuilt/error.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_a.pcm \
+// RUN:   -fmodule-file=%t/prebuilt/use_error_b.pcm \
+// RUN:   -fmodules-cache-path=%t 2>&1 | \
+// RUN: grep "PCH file contains compiler errors"
 
-// Shouldn't build the cached module (that has errors) when not allowing errors
+// Shouldn't build the cached modules (that have errors) when not allowing
+// errors
 // RUN: not %clang_cc1 -fsyntax-only -fmodules \
 // RUN:   

[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added inline comments.



Comment at: clang/include/clang/Basic/TargetBuiltins.h:334
   static constexpr uint64_t LargestBuiltinID = std::max(
   {NEON::FirstTSBuiltin, ARM::LastTSBuiltin, SVE::FirstTSBuiltin,
AArch64::LastTSBuiltin, BPF::LastTSBuiltin, PPC::LastTSBuiltin,

SVE::FirstTSBuiltin can be dropped.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100616/new/

https://reviews.llvm.org/D100616

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95976: [OpenMP] Simplify offloading parallel call codegen

2021-04-15 Thread Michael Kruse via Phabricator via cfe-commits
Meinersbur added a comment.

The transposition problem arises from:

  


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95976/new/

https://reviews.llvm.org/D95976

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

In D100616#2693607 , @MaskRay wrote:

> In D100616#2693603 , @craig.topper 
> wrote:
>
>> In D100616#2693595 , @MaskRay 
>> wrote:
>>
>>> SVE::FirstTSBuiltin is 8148, the largest.
>>
>> Isn't SVE::FirstTSBuiltin used to start AArch64's builtins list. So 
>> shouldn't AArch64::LastTSBuiltin be larger?
>
> You are right. I misspoke. AArch64::LastTSBuiltin is 8328.

That raises the question of why the NEON and SVE are in this list std::max at 
all. They're only helpers for generating the correct information for ARM and 
AArch64 which will always be larger.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100616/new/

https://reviews.llvm.org/D100616

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay added a comment.

In D100616#2693603 , @craig.topper 
wrote:

> In D100616#2693595 , @MaskRay wrote:
>
>> SVE::FirstTSBuiltin is 8148, the largest.
>
> Isn't SVE::FirstTSBuiltin used to start AArch64's builtins list. So shouldn't 
> AArch64::LastTSBuiltin be larger?

You are right. I misspoke. AArch64::LastTSBuiltin is 8328.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100616/new/

https://reviews.llvm.org/D100616

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100615: [RISCV][Driver] Make the ordering of CmdArgs consistent between RISCV::Linker and baremetal::Linker

2021-04-15 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay accepted this revision.
MaskRay added a comment.
This revision is now accepted and ready to land.

LG.

A better test should have a few more stuff, see Xlinker-args.c
Instead of saying --defsym takes precedence over T_Group options, you can say 
that -T is the last.

> Few more word for this issue, the option order is matter for linker both for 
> GNU ld and lld, in the test @arcbbb provided, ABC will treat as undefined in 
> a.lds if the order is wrong.

While GNU people may not like it, I think relying on the order between --defsym 
and -T is highly problematic: 
https://sourceware.org/pipermail/libc-alpha/2020-March/111920.html


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100615/new/

https://reviews.llvm.org/D100615

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Craig Topper via Phabricator via cfe-commits
craig.topper added a comment.

In D100616#2693595 , @MaskRay wrote:

> SVE::FirstTSBuiltin is 8148, the largest.

Isn't SVE::FirstTSBuiltin used to start AArch64's builtins list. So shouldn't 
AArch64::LastTSBuiltin be larger?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100616/new/

https://reviews.llvm.org/D100616

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100514: [OpenMP] Added codegen for masked directive

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

Also, don't forget to mark it as done in 
https://clang.llvm.org/docs/OpenMPSupport.html :)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100514/new/

https://reviews.llvm.org/D100514

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100514: [OpenMP] Added codegen for masked directive

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

Any reason we should not unconditionally use the OMPIRBuilder impl? (btw, many 
thanks for providing one!)
We have an OMPIRBuilder always around in clang's codegen, so there is little 
reason not to use it if it is feature complete.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100514/new/

https://reviews.llvm.org/D100514

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Fangrui Song via Phabricator via cfe-commits
MaskRay accepted this revision.
MaskRay added a comment.
This revision is now accepted and ready to land.

SVE::FirstTSBuiltin is 8148, the largest.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100616/new/

https://reviews.llvm.org/D100616

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100615: [RISCV][Driver] Make the ordering of CmdArgs consistent between RISCV::Linker and baremetal::Linker

2021-04-15 Thread Kito Cheng via Phabricator via cfe-commits
kito-cheng added a comment.

Few more word for this issue, the option order is matter for linker both for 
GNU `ld` and `lld`, in the test @arcbbb provided, `ABC` will treat as undefined 
in `a.lds` if the order is wrong.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100615/new/

https://reviews.llvm.org/D100615

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100617: [RISCV][Clang] Drop the assembly tests for RVV intrinsics.

2021-04-15 Thread Zakk Chen via Phabricator via cfe-commits
khchen created this revision.
khchen added reviewers: craig.topper, rogfer01, HsiangKai, evandro, liaolucy, 
jrtc27.
Herald added subscribers: vkmr, frasercrmck, luismarques, apazos, 
sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, 
brucehoult, MartinMosbeck, edward-jones, zzheng, shiva0217, kito-cheng, niosHD, 
sabuasal, simoncook, johnrusso, rbar, asb.
khchen requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

We had verified the correctness of all intrinsics in downstream, so
dropping the assembly tests to decrease the check-clang time.
It would remove 1/3 of the RUN lines.

https://reviews.llvm.org/D99151#2654154 mentions why we need to have
the ASM tests before.

note: I only update the one file diff to avoid to have
large changes different in review system.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100617

Files:
  clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c


Index: clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
===
--- clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
+++ clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
@@ -4,10 +4,7 @@
 // RUN:   -target-feature +experimental-zfh -disable-O0-optnone -emit-llvm %s 
-o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d 
-target-feature +experimental-v \
 // RUN:   -target-feature +experimental-zfh -disable-O0-optnone -emit-llvm %s 
-o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
-// RUN: %clang_cc1 -triple riscv64 -target-feature +m -target-feature 
+experimental-v \
-// RUN:   -Werror -Wall -o - %s -S >/dev/null 2>&1 | FileCheck 
--check-prefix=ASM --allow-empty %s
 
-// ASM-NOT: warning
 #include 
 
 // CHECK-RV32-LABEL: @test_vadd_vv_i8mf8(


Index: clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
===
--- clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
+++ clang/test/CodeGen/RISCV/rvv-intrinsics/vadd.c
@@ -4,10 +4,7 @@
 // RUN:   -target-feature +experimental-zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV32 %s
 // RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \
 // RUN:   -target-feature +experimental-zfh -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s
-// RUN: %clang_cc1 -triple riscv64 -target-feature +m -target-feature +experimental-v \
-// RUN:   -Werror -Wall -o - %s -S >/dev/null 2>&1 | FileCheck --check-prefix=ASM --allow-empty %s
 
-// ASM-NOT: warning
 #include 
 
 // CHECK-RV32-LABEL: @test_vadd_vv_i8mf8(
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100616: [clang] Fix a potential assert failure

2021-04-15 Thread Ben Shi via Phabricator via cfe-commits
benshi001 created this revision.
benshi001 added a reviewer: MaskRay.
Herald added subscribers: s.egerton, simoncook, dschuff.
benshi001 requested review of this revision.
Herald added subscribers: cfe-commits, aheejin.
Herald added a project: clang.

The calculation of LargestBuiltinID needs all targets information.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100616

Files:
  clang/include/clang/Basic/TargetBuiltins.h


Index: clang/include/clang/Basic/TargetBuiltins.h
===
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -334,9 +334,9 @@
   {NEON::FirstTSBuiltin, ARM::LastTSBuiltin, SVE::FirstTSBuiltin,
AArch64::LastTSBuiltin, BPF::LastTSBuiltin, PPC::LastTSBuiltin,
NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, X86::LastTSBuiltin,
-   Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
-   Le64::LastTSBuiltin, SystemZ::LastTSBuiltin,
-   WebAssembly::LastTSBuiltin});
+   VE::LastTSBuiltin, RISCV::LastTSBuiltin, Hexagon::LastTSBuiltin,
+   Mips::LastTSBuiltin, XCore::LastTSBuiltin, Le64::LastTSBuiltin,
+   SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin});
 
 } // end namespace clang.
 


Index: clang/include/clang/Basic/TargetBuiltins.h
===
--- clang/include/clang/Basic/TargetBuiltins.h
+++ clang/include/clang/Basic/TargetBuiltins.h
@@ -334,9 +334,9 @@
   {NEON::FirstTSBuiltin, ARM::LastTSBuiltin, SVE::FirstTSBuiltin,
AArch64::LastTSBuiltin, BPF::LastTSBuiltin, PPC::LastTSBuiltin,
NVPTX::LastTSBuiltin, AMDGPU::LastTSBuiltin, X86::LastTSBuiltin,
-   Hexagon::LastTSBuiltin, Mips::LastTSBuiltin, XCore::LastTSBuiltin,
-   Le64::LastTSBuiltin, SystemZ::LastTSBuiltin,
-   WebAssembly::LastTSBuiltin});
+   VE::LastTSBuiltin, RISCV::LastTSBuiltin, Hexagon::LastTSBuiltin,
+   Mips::LastTSBuiltin, XCore::LastTSBuiltin, Le64::LastTSBuiltin,
+   SystemZ::LastTSBuiltin, WebAssembly::LastTSBuiltin});
 
 } // end namespace clang.
 
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100615: [RISCV][Driver] Make the ordering of CmdArgs consistent between RISCV::Linker and baremetal::Linker

2021-04-15 Thread ShihPo Hung via Phabricator via cfe-commits
arcbbb created this revision.
arcbbb added reviewers: asb, craig.topper, frasercrmck, rogfer01, jrtc27, 
mgrang.
Herald added subscribers: vkmr, evandro, luismarques, apazos, sameer.abuasal, 
s.egerton, Jim, benna, psnobl, abidh, jocewei, PkmX, the_o, brucehoult, 
MartinMosbeck, edward-jones, zzheng, shiva0217, kito-cheng, niosHD, sabuasal, 
simoncook, johnrusso, rbar.
arcbbb requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

In baremetal::Linker::ConstructJob, LinkerInput is handled prior to T_Group 
options,
but on the other side in RISCV::Linker::ConstructJob, it is opposite.

  

We want it to be consistent whether users are using RISCV::Linker or 
baremetal::Linker.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100615

Files:
  clang/lib/Driver/ToolChains/RISCVToolchain.cpp
  clang/test/Driver/riscv-args.c


Index: clang/test/Driver/riscv-args.c
===
--- /dev/null
+++ clang/test/Driver/riscv-args.c
@@ -0,0 +1,5 @@
+// Make sure --defsym takes precedence over T_Group options
+// RUN: %clang -### -target riscv32 \
+// RUN:   --gcc-toolchain= -Xlinker --defsym=ABC=10 -T a.lds %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LD %s
+// CHECK-LD: {{.*}} "--defsym=ABC=10" {{.*}} "-T" "a.lds"
Index: clang/lib/Driver/ToolChains/RISCVToolchain.cpp
===
--- clang/lib/Driver/ToolChains/RISCVToolchain.cpp
+++ clang/lib/Driver/ToolChains/RISCVToolchain.cpp
@@ -181,14 +181,14 @@
 CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
   }
 
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
+
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs,
   {options::OPT_T_Group, options::OPT_e, options::OPT_s,
options::OPT_t, options::OPT_Z_Flag, options::OPT_r});
 
-  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
-
   // TODO: add C++ includes and libs if compiling C++.
 
   if (!Args.hasArg(options::OPT_nostdlib) &&


Index: clang/test/Driver/riscv-args.c
===
--- /dev/null
+++ clang/test/Driver/riscv-args.c
@@ -0,0 +1,5 @@
+// Make sure --defsym takes precedence over T_Group options
+// RUN: %clang -### -target riscv32 \
+// RUN:   --gcc-toolchain= -Xlinker --defsym=ABC=10 -T a.lds %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=CHECK-LD %s
+// CHECK-LD: {{.*}} "--defsym=ABC=10" {{.*}} "-T" "a.lds"
Index: clang/lib/Driver/ToolChains/RISCVToolchain.cpp
===
--- clang/lib/Driver/ToolChains/RISCVToolchain.cpp
+++ clang/lib/Driver/ToolChains/RISCVToolchain.cpp
@@ -181,14 +181,14 @@
 CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
   }
 
+  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
+
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
   Args.AddAllArgs(CmdArgs,
   {options::OPT_T_Group, options::OPT_e, options::OPT_s,
options::OPT_t, options::OPT_Z_Flag, options::OPT_r});
 
-  AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA);
-
   // TODO: add C++ includes and libs if compiling C++.
 
   if (!Args.hasArg(options::OPT_nostdlib) &&
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100611: [RISCV] Add new attribute __clang_riscv_builtin_alias for intrinsics.

2021-04-15 Thread Kito Cheng via Phabricator via cfe-commits
kito-cheng added inline comments.



Comment at: clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c:9
+#define __rvv_generic \
+static inline __attribute__((__always_inline__, __nodebug__, __overloadable__))
+

I guess this is not needed anymore? or at least could be reduced?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100611/new/

https://reviews.llvm.org/D100611

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D91054: [Clang][OpenMP] Frontend work for sections - D89671

2021-04-15 Thread Fady Ghanim via Phabricator via cfe-commits
fghanim added a comment.

You can update the tests as long as long as the output is correct. for example 
the difference is only in names, ordering of basicblocks and instructions that 
doesn't affect correctness, etc.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D91054/new/

https://reviews.llvm.org/D91054

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100415: [Coroutines] Split coroutine during CoroEarly into an init and ramp function

2021-04-15 Thread Chuanqi Xu via Phabricator via cfe-commits
ChuanqiXu added a comment.

In D100415#2691666 , @lxfind wrote:

> @ChuanqiXu Thank you for the detailed review! Really appreciate it.
> I agree we should create a coroutine benchmark at some point, ideally some 
> realistic production-code driven benchmark. We can work on that in the 
> future. For this patch, it's probably not worth it to hide it behind an 
> option, for two reasons: 1) it would be extremely complicated, 2) most 
> parameters would end up on the frame anyway 3) this patch actually doesn't 
> force parameters to be put on the frame. Before frame creation, all the 
> parameters are put back to allocas, the current alloca analysis and 
> optimization still applies to them. So some parameters may actually end up 
> not put on the frame. So I wouldn't expect this to increase frame size in 
> most cases.
>
> I will add documentation latter once the we all agree on the high-level 
> idea/direction of this patch.

Thanks for the disclaimer. Although I am not familiar with many details in this 
patch, the high-level idea looks good to me.




Comment at: llvm/lib/Transforms/Coroutines/CoroSplit.cpp:2231
 // coroutine.
 struct CoroSplitLegacy : public CallGraphSCCPass {
   static char ID; // Pass identification, replacement for typeid

I am not familiar with the policy in LLVM that how should we treat LegacyPass 
in trunk. I mean, are we responsible to update the LegacyPassManager?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100415/new/

https://reviews.llvm.org/D100415

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100591: [Clang][AArch64] Disable rounding of return values for AArch64

2021-04-15 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In D100591#2692978 , @asavonic wrote:

> In D100591#2692599 , @rjmccall 
> wrote:
>
>> I think the right thing to do here is to recognize generally that we're 
>> emitting a mandatory tail call, and so suppress *all* the normal 
>> transformations on the return value.
>
> I assume it can be tricky to detect such call. The final decision (tail call 
> vs normal call) is made before instruction selection, after all LLVM IR 
> optimization passes. So we can miss tail calls that are not obvious on 
> non-optimized code, or get false-positive results for calls that a backend 
> decides to emit as normal calls.

Well, I mean in the frontend.  I certainly wouldn't expect the backend to 
recognize the pattern I described and somehow turn it into a tail call!

> In any case, this patch can be useful not only for tail calls: `trunc + zext` 
> sequence generated to round a return value can be problematic for other cases 
> as well.

Sure, I can imagine that it's hard to eliminate the extra zext in the backend.  
Maybe we should have an undef_extend?

You should get backend sign-off before making Swift generate non-target-legal 
return types.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100591/new/

https://reviews.llvm.org/D100591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100611: [RISCV] Add new attribute __clang_riscv_builtin_alias for intrinsics.

2021-04-15 Thread Hsiangkai Wang via Phabricator via cfe-commits
HsiangKai created this revision.
HsiangKai added reviewers: craig.topper, rogfer01, khchen, evandro, frasercrmck.
Herald added subscribers: StephenFan, vkmr, jdoerfert, luismarques, apazos, 
sameer.abuasal, s.egerton, Jim, benna, psnobl, jocewei, PkmX, the_o, 
brucehoult, MartinMosbeck, edward-jones, zzheng, jrtc27, shiva0217, kito-cheng, 
niosHD, sabuasal, simoncook, johnrusso, rbar, asb.
Herald added a reviewer: aaron.ballman.
HsiangKai requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay.
Herald added a project: clang.

There are overloaded version for vector intrinsics. Currently, we use
function wrapper and __overloadable__ attributes for them. It increases
the build/test time a lot.

In this patch, we define __clang_riscv_builtin_alias for RISC-V
intrinsics and use this attribute to map the overloaded version to the
corresponding builtins.

In our downstream testing, it could decrease the testing time from 6.3
seconds to 3.7 seconds for vloxei.c test.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100611

Files:
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/AST/Decl.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
  clang/test/Misc/pragma-attribute-supported-attributes-list.test

Index: clang/test/Misc/pragma-attribute-supported-attributes-list.test
===
--- clang/test/Misc/pragma-attribute-supported-attributes-list.test
+++ clang/test/Misc/pragma-attribute-supported-attributes-list.test
@@ -142,6 +142,7 @@
 // CHECK-NEXT: PassObjectSize (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: PatchableFunctionEntry (SubjectMatchRule_function, SubjectMatchRule_objc_method)
 // CHECK-NEXT: Pointer (SubjectMatchRule_record_not_is_union)
+// CHECK-NEXT: RISCVBuiltinAlias (SubjectMatchRule_function)
 // CHECK-NEXT: ReleaseHandle (SubjectMatchRule_variable_is_parameter)
 // CHECK-NEXT: RenderScriptKernel (SubjectMatchRule_function)
 // CHECK-NEXT: ReqdWorkGroupSize (SubjectMatchRule_function)
Index: clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
===
--- /dev/null
+++ clang/test/CodeGen/RISCV/riscv-attr-builtin-alias.c
@@ -0,0 +1,35 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64 -emit-llvm -target-feature +experimental-v \
+// RUN:   %s -o - \
+// RUN:   | FileCheck %s
+
+#include 
+
+#define __rvv_generic \
+static inline __attribute__((__always_inline__, __nodebug__, __overloadable__))
+
+__rvv_generic
+__attribute__((__clang_riscv_builtin_alias(__builtin_rvv_vadd_vv_i8m1)))
+vint8m1_t vadd_generic (vint8m1_t op0, vint8m1_t op1, size_t op2);
+
+// CHECK-LABEL: @test(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[OP0_ADDR:%.*]] = alloca , align 1
+// CHECK-NEXT:[[OP1_ADDR:%.*]] = alloca , align 1
+// CHECK-NEXT:[[VL_ADDR:%.*]] = alloca i64, align 8
+// CHECK-NEXT:[[RET:%.*]] = alloca , align 1
+// CHECK-NEXT:store  [[OP0:%.*]], * [[OP0_ADDR]], align 1
+// CHECK-NEXT:store  [[OP1:%.*]], * [[OP1_ADDR]], align 1
+// CHECK-NEXT:store i64 [[VL:%.*]], i64* [[VL_ADDR]], align 8
+// CHECK-NEXT:[[TMP0:%.*]] = load , * [[OP0_ADDR]], align 1
+// CHECK-NEXT:[[TMP1:%.*]] = load , * [[OP1_ADDR]], align 1
+// CHECK-NEXT:[[TMP2:%.*]] = load i64, i64* [[VL_ADDR]], align 8
+// CHECK-NEXT:[[TMP3:%.*]] = call  @llvm.riscv.vadd.nxv8i8.nxv8i8.i64( [[TMP0]],  [[TMP1]], i64 [[TMP2]])
+// CHECK-NEXT:store  [[TMP3]], * [[RET]], align 1
+// CHECK-NEXT:[[TMP4:%.*]] = load , * [[RET]], align 1
+// CHECK-NEXT:ret  [[TMP4]]
+//
+vint8m1_t test(vint8m1_t op0, vint8m1_t op1, size_t vl) {
+  vint8m1_t ret = vadd_generic(op0, op1, vl);
+  return ret;
+}
Index: clang/lib/Sema/SemaDeclAttr.cpp
===
--- clang/lib/Sema/SemaDeclAttr.cpp
+++ clang/lib/Sema/SemaDeclAttr.cpp
@@ -5120,6 +5120,7 @@
 #define GET_SVE_BUILTINS
 #define BUILTIN(name, types, attr) case SVE::BI##name:
 #include "clang/Basic/arm_sve_builtins.inc"
+#undef BUILTIN
 return true;
   }
 }
@@ -5146,6 +5147,37 @@
   D->addAttr(::new (S.Context) ArmBuiltinAliasAttr(S.Context, AL, Ident));
 }
 
+static bool RISCVVAliasValid(unsigned BuiltinID, StringRef AliasName) {
+  switch (BuiltinID) {
+  default:
+return false;
+#define BUILTIN(ID, TYPE, ATTRS) case RISCV::BI##ID:
+#include "clang/Basic/BuiltinsRISCV.def"
+#undef BUILTIN
+return true;
+  }
+}
+
+static void handleRISCVBuiltinAliasAttr(Sema , Decl *D,
+const ParsedAttr ) {
+  if (!AL.isArgIdent(0)) {
+S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
+<< AL << 1 << AANT_ArgumentIdentifier;
+return;
+  }
+
+  IdentifierInfo *Ident = AL.getArgAsIdent(0)->Ident;
+ 

[PATCH] D100509: Support GCC's -fstack-usage flag

2021-04-15 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

In D100509#2693388 , @pzheng wrote:

> I checked some of the functions in zstd where gcc outputs "dynamic,bounded", 
> but did not find any straightforward way to simplify them into standalone 
> tests. If anyone happen to have a simple test case, I would be more than 
> happy to add here.

Me neither, no problem. Not a blocker.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100509/new/

https://reviews.llvm.org/D100509

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99456: [C++2b] Support size_t literals

2021-04-15 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added inline comments.



Comment at: clang/lib/Frontend/InitPreprocessor.cpp:593-594
+  // C++2b features.
+  if (LangOpts.CPlusPlus2b)
+Builder.defineMacro("__cpp_size_t_suffix", "202011L");
   if (LangOpts.Char8)

Quuxplusone wrote:
> AntonBikineev wrote:
> > AntonBikineev wrote:
> > > Quuxplusone wrote:
> > > > aaron.ballman wrote:
> > > > > AntonBikineev wrote:
> > > > > > aaron.ballman wrote:
> > > > > > > Because we allow this as an extension in all C++ modes, should 
> > > > > > > this be enabled always rather than gated on C++2b?
> > > > > > I was also wondering about this. I've checked that we also do the 
> > > > > > same for other feature macros, such as __cpp_binary_literals, which 
> > > > > > is defined for -std>=c++14 while at the same time is allowed as an 
> > > > > > extension before C++14. Therefore I decided to mimic the behaviour.
> > > > > Thanks for checking on that! I think that seems defensible enough. :-)
> > > > Btw, thus far libc++ has tended to make the opposite choice: for 
> > > > example, libc++ defines `__cpp_lib_variant == 202102` in all modes, 
> > > > because the programmer conceivably might be depending on that macro to 
> > > > make some decision, so we want to make sure it reflects the specific 
> > > > semantics that we implement.  (For `__cpp_binary_literals` 
> > > > specifically, I agree it doesn't really matter because nobody's going 
> > > > to be making decisions based on the value of this macro.)
> > > > 
> > > > See https://reviews.llvm.org/D99290#inline-934563 (D96385, D97394) for 
> > > > previous discussions on the libc++ side.
> > > Thanks for pointing this out, Arthur.
> > > 
> > > I wish there was some consistency, however, I'm not sure if this is 
> > > easily feasible. I guess the strategy of defining `__cpp_size_t_literals` 
> > > on all modes would be problematic, since if the user code depends on 
> > > `__cpp_size_t_literals`, it could suddenly receive the extension warning 
> > > (when compiled with -std<2++2b), which is enabled by default.
> > > Btw, thus far libc++ has tended to make the opposite choice: for example, 
> > > libc++ defines `__cpp_lib_variant == 202102` in all modes, because the 
> > > programmer conceivably might be depending on that macro to make some 
> > > decision, so we want to make sure it reflects the specific semantics that 
> > > we implement.  (For `__cpp_binary_literals` specifically, I agree it 
> > > doesn't really matter because nobody's going to be making decisions based 
> > > on the value of this macro.)
> > > 
> > > See https://reviews.llvm.org/D99290#inline-934563 (D96385, D97394) for 
> > > previous discussions on the libc++ side.
> > 
> > 
> > I guess the strategy of defining `__cpp_size_t_literals` in all modes would 
> > be problematic, since if the [pre-C++2b] user code depends on 
> > `__cpp_size_t_literals`, it could suddenly receive the extension warning...
> 
> Ah, yes. Orthogonally to everything I said above, I think it's fair to say 
> that
> - in modes where `42uz` produces a fatal error, it's definitely "not 
> supported"
> - in modes where it's accepted without complaint, it's definitely "supported" 
> (*)
> - in modes where it produces a non-fatal warning, you can plausibly argue it 
> either way
> (*) - with a bonus exception that if the user passes `-Wno-blah` or `-w`, 
> that doesn't magically make things be supported
> libc++'s situation seems more black-and-white; e.g. `variant` behaves one way 
> or the other. There's no libc++ equivalent of "you get the new behavior but 
> with a warning." :)
We have some prior art to draw on here: our `__has_extension(X)` behavior is 
that under `-pedantic-errors`, we don't advertise any extensions beyond the 
`__has_feature(X)` set, and otherwise we advertise features even if they will 
lead to warnings (or errors via `-Werror` or `-Werror=pedantic` or similar 
warning flags).

I'm not sure that's necessarily the best thing, since it's only loosely 
connected to whether the construct in question would lead to (warnings or) 
errors, but it's consistent with the principle that warning flags shouldn't 
change behavior (beyond which warnings or errors are emitted) and probably more 
useful than never advertising extensions in earlier language modes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99456/new/

https://reviews.llvm.org/D99456

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99517: Implemented [[clang::musttail]] attribute for guaranteed tail calls.

2021-04-15 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
rsmith added a comment.

In D99517#2693418 , @thakis wrote:

> Looks like this breaks tests on mac/arm: 
> http://45.33.8.238/macm1/7552/step_7.txt

Should be fixed by rGf7c9de0de5804498085af973dc6bfc934a18f000 
.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99517/new/

https://reviews.llvm.org/D99517

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] f7c9de0 - Add triple to fix test failure.

2021-04-15 Thread Richard Smith via cfe-commits

Author: Richard Smith
Date: 2021-04-15T18:08:35-07:00
New Revision: f7c9de0de5804498085af973dc6bfc934a18f000

URL: 
https://github.com/llvm/llvm-project/commit/f7c9de0de5804498085af973dc6bfc934a18f000
DIFF: 
https://github.com/llvm/llvm-project/commit/f7c9de0de5804498085af973dc6bfc934a18f000.diff

LOG: Add triple to fix test failure.

This test uses `__regcall`, support for which is target-specific.

Added: 


Modified: 
clang/test/SemaCXX/attr-musttail.cpp

Removed: 




diff  --git a/clang/test/SemaCXX/attr-musttail.cpp 
b/clang/test/SemaCXX/attr-musttail.cpp
index 55faf5a4f5ac7..561184e7a24f9 100644
--- a/clang/test/SemaCXX/attr-musttail.cpp
+++ b/clang/test/SemaCXX/attr-musttail.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -verify -fsyntax-only -fms-extensions -fcxx-exceptions 
-fopenmp %s
+// RUN: %clang_cc1 -verify -fsyntax-only -fms-extensions -fcxx-exceptions 
-fopenmp -triple x86_64-linux %s
 
 int ReturnsInt1();
 int Func1() {



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99517: Implemented [[clang::musttail]] attribute for guaranteed tail calls.

2021-04-15 Thread Nico Weber via Phabricator via cfe-commits
thakis added a comment.

Looks like this breaks tests on mac/arm: 
http://45.33.8.238/macm1/7552/step_7.txt

Please take a look and revert for now if it takes a while to fix.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99517/new/

https://reviews.llvm.org/D99517

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100509: Support GCC's -fstack-usage flag

2021-04-15 Thread Pengxuan Zheng via Phabricator via cfe-commits
pzheng added a comment.

I checked some of the functions in zstd where gcc outputs "dynamic,bounded", 
but did not find any straightforward way to simplify them into standalone 
tests. If anyone happen to have a simple test case, I would be more than happy 
to add here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100509/new/

https://reviews.llvm.org/D100509

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D95976: [OpenMP] Simplify offloading parallel call codegen

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I have only minor remarks but I'd like you to check if my hunch is correct and 
the proposed modifications will fix fix PR49777 *and* fix PR49779.
Also, the number of arguments need to be increased, let's go big and automatic 
here.

Other than that I think this looks good.




Comment at: clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp:2192
 RCG(CGF);
   }
 }

Can we remove SeqGen while we are here please. We need to check in the runtime 
anyway. That check is later folded, no need to make things more complicated 
here.



Comment at: openmp/libomptarget/deviceRTLs/common/src/parallel.cu:294
+  // TODO: Add UNLIKELY to optimize?
+  if (!if_expr) {
+__kmpc_serialized_parallel(ident, global_tid);

This should allow us to remove the `SeqGen` in the Clang CodeGen *and* fix 
PR49777 *and* fix PR49779, a win-win-win situation.



Comment at: openmp/libomptarget/deviceRTLs/common/src/parallel.cu:368
+  //  __kmpc_push_proc_bind(ident, global_tid, proc_bind);
+}
+

FWIW, The implementation here is a stopgap until we move to the new runtime. 
The codegen and interface are the important parts.



Comment at: openmp/libomptarget/deviceRTLs/common/src/support.cu:370
+printf("Too many arguments in kmp_invoke_microtask, aborting 
execution.\n");
+return;
+  }

Not a return but a `__builtin_trap()`, please.
We also need this for more than 16 unfortunately, I've seen 20 in miniqmc.
We might want to create a script to print the cases, and then generate 128 or 
something like that in a file we include. The script can be in the utils folder 
too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D95976/new/

https://reviews.llvm.org/D95976

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99517: Implemented [[clang::musttail]] attribute for guaranteed tail calls.

2021-04-15 Thread Richard Smith - zygoloid via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG834467590842: Implemented [[clang::musttail]] attribute for 
guaranteed tail calls. (authored by haberman, committed by rsmith).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99517/new/

https://reviews.llvm.org/D99517

Files:
  clang/docs/ReleaseNotes.rst
  clang/include/clang/AST/IgnoreExpr.h
  clang/include/clang/Basic/Attr.td
  clang/include/clang/Basic/AttrDocs.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/ScopeInfo.h
  clang/include/clang/Sema/Sema.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/CGClass.cpp
  clang/lib/CodeGen/CGDecl.cpp
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/CodeGen/CGExprCXX.cpp
  clang/lib/CodeGen/CGStmt.cpp
  clang/lib/CodeGen/CodeGenFunction.h
  clang/lib/CodeGen/EHScopeStack.h
  clang/lib/Sema/JumpDiagnostics.cpp
  clang/lib/Sema/Sema.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/SemaStmtAttr.cpp
  clang/test/CodeGenCXX/attr-musttail.cpp
  clang/test/Sema/attr-musttail.c
  clang/test/Sema/attr-musttail.m
  clang/test/SemaCXX/attr-musttail.cpp

Index: clang/test/SemaCXX/attr-musttail.cpp
===
--- /dev/null
+++ clang/test/SemaCXX/attr-musttail.cpp
@@ -0,0 +1,269 @@
+// RUN: %clang_cc1 -verify -fsyntax-only -fms-extensions -fcxx-exceptions -fopenmp %s
+
+int ReturnsInt1();
+int Func1() {
+  [[clang::musttail]] ReturnsInt1();  // expected-error {{'musttail' attribute only applies to return statements}}
+  [[clang::musttail(1, 2)]] return ReturnsInt1(); // expected-error {{'musttail' attribute takes no arguments}}
+  [[clang::musttail]] return 5;   // expected-error {{'musttail' attribute requires that the return value is the result of a function call}}
+  [[clang::musttail]] return ReturnsInt1();
+}
+
+void NoFunctionCall() {
+  [[clang::musttail]] return; // expected-error {{'musttail' attribute requires that the return value is the result of a function call}}
+}
+
+[[clang::musttail]] static int int_val = ReturnsInt1(); // expected-error {{'musttail' attribute cannot be applied to a declaration}}
+
+void NoParams(); // expected-note {{target function has different number of parameters (expected 1 but has 0)}}
+void TestParamArityMismatch(int x) {
+  [[clang::musttail]] // expected-note {{tail call required by 'musttail' attribute here}}
+  return NoParams();  // expected-error {{cannot perform a tail call to function 'NoParams' because its signature is incompatible with the calling function}}
+}
+
+void LongParam(long x); // expected-note {{target function has type mismatch at 1st parameter (expected 'long' but has 'int')}}
+void TestParamTypeMismatch(int x) {
+  [[clang::musttail]]  // expected-note {{tail call required by 'musttail' attribute here}}
+  return LongParam(x); // expected-error {{cannot perform a tail call to function 'LongParam' because its signature is incompatible with the calling function}}
+}
+
+long ReturnsLong(); // expected-note {{target function has different return type ('int' expected but has 'long')}}
+int TestReturnTypeMismatch() {
+  [[clang::musttail]]   // expected-note {{tail call required by 'musttail' attribute here}}
+  return ReturnsLong(); // expected-error {{cannot perform a tail call to function 'ReturnsLong' because its signature is incompatible with the calling function}}
+}
+
+struct Struct1 {
+  void MemberFunction(); // expected-note {{'MemberFunction' declared here}}
+};
+void TestNonMemberToMember() {
+  Struct1 st;
+  [[clang::musttail]] // expected-note {{tail call required by 'musttail' attribute here}}
+  return st.MemberFunction(); // expected-error {{non-member function cannot perform a tail call to non-static member function 'MemberFunction'}}
+}
+
+void ReturnsVoid(); // expected-note {{'ReturnsVoid' declared here}}
+struct Struct2 {
+  void TestMemberToNonMember() {
+[[clang::musttail]]   // expected-note {{tail call required by 'musttail' attribute here}}
+return ReturnsVoid(); // expected-error{{non-static member function cannot perform a tail call to non-member function 'ReturnsVoid'}}
+  }
+};
+
+class HasNonTrivialDestructor {
+public:
+  ~HasNonTrivialDestructor() {}
+  int ReturnsInt();
+};
+
+void ReturnsVoid2();
+void TestNonTrivialDestructorInScope() {
+  HasNonTrivialDestructor foo;  // expected-note {{jump exits scope of variable with non-trivial destructor}}
+  [[clang::musttail]] return ReturnsVoid(); // expected-error {{cannot perform a tail call from this return statement}}
+}
+
+int NonTrivialParam(HasNonTrivialDestructor x);
+int TestNonTrivialParam(HasNonTrivialDestructor x) {
+  [[clang::musttail]] return NonTrivialParam(x); // expected-error {{tail call requires that the return value, all parameters, and any temporaries created by the expression 

[clang] 8344675 - Implemented [[clang::musttail]] attribute for guaranteed tail calls.

2021-04-15 Thread Richard Smith via cfe-commits

Author: Joshua Haberman
Date: 2021-04-15T17:12:21-07:00
New Revision: 8344675908424ee532d4ae30e5043c5a5834e02c

URL: 
https://github.com/llvm/llvm-project/commit/8344675908424ee532d4ae30e5043c5a5834e02c
DIFF: 
https://github.com/llvm/llvm-project/commit/8344675908424ee532d4ae30e5043c5a5834e02c.diff

LOG: Implemented [[clang::musttail]] attribute for guaranteed tail calls.

This is a Clang-only change and depends on the existing "musttail"
support already implemented in LLVM.

The [[clang::musttail]] attribute goes on a return statement, not
a function definition. There are several constraints that the user
must follow when using [[clang::musttail]], and these constraints
are verified by Sema.

Tail calls are supported on regular function calls, calls through a
function pointer, member function calls, and even pointer to member.

Future work would be to throw a warning if a users tries to pass
a pointer or reference to a local variable through a musttail call.

Reviewed By: rsmith

Differential Revision: https://reviews.llvm.org/D99517

Added: 
clang/test/CodeGenCXX/attr-musttail.cpp
clang/test/Sema/attr-musttail.c
clang/test/Sema/attr-musttail.m
clang/test/SemaCXX/attr-musttail.cpp

Modified: 
clang/docs/ReleaseNotes.rst
clang/include/clang/AST/IgnoreExpr.h
clang/include/clang/Basic/Attr.td
clang/include/clang/Basic/AttrDocs.td
clang/include/clang/Basic/DiagnosticSemaKinds.td
clang/include/clang/Sema/ScopeInfo.h
clang/include/clang/Sema/Sema.h
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/CGClass.cpp
clang/lib/CodeGen/CGDecl.cpp
clang/lib/CodeGen/CGExpr.cpp
clang/lib/CodeGen/CGExprCXX.cpp
clang/lib/CodeGen/CGStmt.cpp
clang/lib/CodeGen/CodeGenFunction.h
clang/lib/CodeGen/EHScopeStack.h
clang/lib/Sema/JumpDiagnostics.cpp
clang/lib/Sema/Sema.cpp
clang/lib/Sema/SemaStmt.cpp
clang/lib/Sema/SemaStmtAttr.cpp

Removed: 




diff  --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 9d7333660290..ddf048c2dd33 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -46,7 +46,13 @@ sections with improvements to Clang's support for those 
languages.
 Major New Features
 --
 
-- ...
+- Guaranteed tail calls are now supported with statement attributes
+  ``[[clang::musttail]]`` in C++ and ``__attribute__((musttail))`` in C. The
+  attribute is applied to a return statement (not a function declaration),
+  and an error is emitted if a tail call cannot be guaranteed, for example if
+  the function signatures of caller and callee are not compatible. Guaranteed
+  tail calls enable a class of algorithms that would otherwise use an
+  arbitrary amount of stack space.
 
 Improvements to Clang's diagnostics
 ^^^

diff  --git a/clang/include/clang/AST/IgnoreExpr.h 
b/clang/include/clang/AST/IgnoreExpr.h
index b2f53d1d2a79..a7e9b07bef6c 100644
--- a/clang/include/clang/AST/IgnoreExpr.h
+++ b/clang/include/clang/AST/IgnoreExpr.h
@@ -121,6 +121,18 @@ inline Expr *IgnoreImplicitSingleStep(Expr *E) {
   return E;
 }
 
+inline Expr *IgnoreElidableImplicitConstructorSingleStep(Expr *E) {
+  auto *CCE = dyn_cast(E);
+  if (CCE && CCE->isElidable() && !isa(CCE)) {
+unsigned NumArgs = CCE->getNumArgs();
+if ((NumArgs == 1 ||
+ (NumArgs > 1 && CCE->getArg(1)->isDefaultArgument())) &&
+!CCE->getArg(0)->isDefaultArgument() && !CCE->isListInitialization())
+  return CCE->getArg(0);
+  }
+  return E;
+}
+
 inline Expr *IgnoreImplicitAsWrittenSingleStep(Expr *E) {
   if (auto *ICE = dyn_cast(E))
 return ICE->getSubExprAsWritten();

diff  --git a/clang/include/clang/Basic/Attr.td 
b/clang/include/clang/Basic/Attr.td
index 400dfe84b7d9..fcad24b83a05 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1370,6 +1370,12 @@ def NoMerge : DeclOrStmtAttr {
   let SimpleHandler = 1;
 }
 
+def MustTail : StmtAttr {
+  let Spellings = [Clang<"musttail">];
+  let Documentation = [MustTailDocs];
+  let Subjects = SubjectList<[ReturnStmt], ErrorDiag, "return statements">;
+}
+
 def FastCall : DeclOrTypeAttr {
   let Spellings = [GCC<"fastcall">, Keyword<"__fastcall">,
Keyword<"_fastcall">];

diff  --git a/clang/include/clang/Basic/AttrDocs.td 
b/clang/include/clang/Basic/AttrDocs.td
index eabf30eda8cc..867865e91056 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -443,6 +443,32 @@ calls.
   }];
 }
 
+def MustTailDocs : Documentation {
+  let Category = DocCatStmt;
+  let Content = [{
+If a ``return`` statement is marked ``musttail``, this indicates that the
+compiler must generate a tail call for the program to be correct, even when
+optimizations are disabled. This guarantees that the call will not cause
+unbounded stack growth if it is part of a recursive 

[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song updated this revision to Diff 337948.
yonghong-song edited the summary of this revision.
yonghong-song added a comment.

Rename TargetInfo.allowDebugInfoForExternalVar to 
TargetInfo.allowDebugInfoForExternalRef.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

Files:
  clang/include/clang/Basic/TargetInfo.h
  clang/lib/Basic/Targets/BPF.h
  clang/lib/CodeGen/CGExpr.cpp
  clang/lib/Sema/SemaDecl.cpp
  clang/test/CodeGen/debug-info-extern-callback.c


Index: clang/test/CodeGen/debug-info-extern-callback.c
===
--- /dev/null
+++ clang/test/CodeGen/debug-info-extern-callback.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -triple bpf-linux-gnu 
-emit-llvm %s -o - | FileCheck %s
+
+extern int do_work(int);
+long bpf_helper(void *callback_fn);
+long prog() {
+   return bpf_helper(_work);
+}
+
+// CHECK: declare !dbg ![[FUNC:[0-9]+]] i32 @do_work(i32)
+// CHECK: ![[FUNC]] = !DISubprogram(name: "do_work"
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12667,7 +12667,7 @@
 Diag(Var->getLocation(), diag::note_private_extern);
   }
 
-  if (Context.getTargetInfo().allowDebugInfoForExternalVar() &&
+  if (Context.getTargetInfo().allowDebugInfoForExternalRef() &&
   !Var->isInvalidDecl() && !getLangOpts().CPlusPlus)
 ExternalDeclarations.push_back(Var);
 
Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2833,8 +2833,19 @@
 return LV;
   }
 
-  if (const auto *FD = dyn_cast(ND))
-return EmitFunctionDeclLValue(*this, E, FD);
+  if (const auto *FD = dyn_cast(ND)) {
+LValue LV = EmitFunctionDeclLValue(*this, E, FD);
+
+// Emit debuginfo for the function declaration if the target wants to.
+if (getContext().getTargetInfo().allowDebugInfoForExternalRef()) {
+  CGDebugInfo *DI = CGM.getModuleDebugInfo();
+  auto *Fn = dyn_cast(LV.getPointer(*this));
+  if (DI && Fn)
+DI->EmitFunctionDecl(FD, FD->getLocation(), T, Fn);
+}
+
+return LV;
+  }
 
   // FIXME: While we're emitting a binding from an enclosing scope, all other
   // DeclRefExprs we see should be implicitly treated as if they also refer to
Index: clang/lib/Basic/Targets/BPF.h
===
--- clang/lib/Basic/Targets/BPF.h
+++ clang/lib/Basic/Targets/BPF.h
@@ -76,7 +76,7 @@
 return None;
   }
 
-  bool allowDebugInfoForExternalVar() const override { return true; }
+  bool allowDebugInfoForExternalRef() const override { return true; }
 
   CallingConvCheckResult checkCallingConvention(CallingConv CC) const override 
{
 switch (CC) {
Index: clang/include/clang/Basic/TargetInfo.h
===
--- clang/include/clang/Basic/TargetInfo.h
+++ clang/include/clang/Basic/TargetInfo.h
@@ -1538,8 +1538,8 @@
 
   virtual void setAuxTarget(const TargetInfo *Aux) {}
 
-  /// Whether target allows debuginfo types for decl only variables.
-  virtual bool allowDebugInfoForExternalVar() const { return false; }
+  /// Whether target allows debuginfo types for decl only variables/functions.
+  virtual bool allowDebugInfoForExternalRef() const { return false; }
 
 protected:
   /// Copy type and layout related info.


Index: clang/test/CodeGen/debug-info-extern-callback.c
===
--- /dev/null
+++ clang/test/CodeGen/debug-info-extern-callback.c
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -x c -debug-info-kind=limited -triple bpf-linux-gnu -emit-llvm %s -o - | FileCheck %s
+
+extern int do_work(int);
+long bpf_helper(void *callback_fn);
+long prog() {
+	return bpf_helper(_work);
+}
+
+// CHECK: declare !dbg ![[FUNC:[0-9]+]] i32 @do_work(i32)
+// CHECK: ![[FUNC]] = !DISubprogram(name: "do_work"
Index: clang/lib/Sema/SemaDecl.cpp
===
--- clang/lib/Sema/SemaDecl.cpp
+++ clang/lib/Sema/SemaDecl.cpp
@@ -12667,7 +12667,7 @@
 Diag(Var->getLocation(), diag::note_private_extern);
   }
 
-  if (Context.getTargetInfo().allowDebugInfoForExternalVar() &&
+  if (Context.getTargetInfo().allowDebugInfoForExternalRef() &&
   !Var->isInvalidDecl() && !getLangOpts().CPlusPlus)
 ExternalDeclarations.push_back(Var);
 
Index: clang/lib/CodeGen/CGExpr.cpp
===
--- clang/lib/CodeGen/CGExpr.cpp
+++ clang/lib/CodeGen/CGExpr.cpp
@@ -2833,8 +2833,19 @@
 return LV;
   }
 
-  if (const auto *FD = dyn_cast(ND))
-return EmitFunctionDeclLValue(*this, E, FD);
+  

[PATCH] D100609: [Offload][OpenMP][CUDA] Allow fembed-bitcode for device offload

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

I'm not really sure about the test, my local setup didn't have CUDA attached 
properly but this should work in principle ;)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100609/new/

https://reviews.llvm.org/D100609

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100609: [Offload][OpenMP][CUDA] Allow fembed-bitcode for device offload

2021-04-15 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert created this revision.
jdoerfert added a reviewer: tra.
Herald added subscribers: guansong, yaxunl.
Herald added a reviewer: bollu.
jdoerfert requested review of this revision.
Herald added a subscriber: sstefan1.
Herald added a project: clang.

This is a fix for the problem reported here:
https://lists.llvm.org/pipermail/llvm-dev/2021-March/149529.html

That is, the target information was missing when we embedded bitcode and
that caused the NVPTX backend to fail.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100609

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/embed-bitcode-nvptx.cu


Index: clang/test/Driver/embed-bitcode-nvptx.cu
===
--- /dev/null
+++ clang/test/Driver/embed-bitcode-nvptx.cu
@@ -0,0 +1,8 @@
+// RUN: %clang -Xclang -triple -Xclang nvptx64 -S -Xclang -target-feature 
-Xclang +ptx70 -fembed-bitcode=all --cuda-device-only -nocudalib -nocudainc %s 
-o - | FileCheck %s
+// REQUIRES: nvptx-registered-target
+//
+// CHECK:.global .align 1 .b8 llvm_$_embedded_$_module[
+
+__device__ void foo(int mask) {
+  __nvvm_bar_warp_sync(mask);
+}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4295,6 +4295,9 @@
   // Select the appropriate action.
   RewriteKind rewriteKind = RK_None;
 
+  bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
+  JA.isDeviceOffloading(Action::OFK_Host));
+
   // If CollectArgsForIntegratedAssembler() isn't called below, claim the args
   // it claims when not running an assembler. Otherwise, clang would emit
   // "argument unused" warnings for assembler flags when e.g. adding "-E" to
@@ -4401,9 +4404,6 @@
   CmdArgs.push_back("-emit-llvm-uselists");
 
 // Device-side jobs do not support LTO.
-bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
-   JA.isDeviceOffloading(Action::OFK_Host));
-
 if (D.isUsingLTO() && !isDeviceOffloadAction) {
   Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
   CmdArgs.push_back("-flto-unit");
@@ -4436,7 +4436,15 @@
 // Add flags implied by -fembed-bitcode.
 Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
 // Disable all llvm IR level optimizations.
-CmdArgs.push_back("-disable-llvm-passes");
+if (!isDeviceOffloadAction) {
+  CmdArgs.push_back("-disable-llvm-passes");
+} else  {
+  std::string CPU = getCPUName(Args, Triple, /*FromAs*/ false);
+  if (!CPU.empty()) {
+CmdArgs.push_back("-target-cpu");
+CmdArgs.push_back(Args.MakeArgString(CPU));
+  }
+}
 
 // Render target options.
 TC.addClangTargetOptions(Args, CmdArgs, JA.getOffloadingDeviceKind());


Index: clang/test/Driver/embed-bitcode-nvptx.cu
===
--- /dev/null
+++ clang/test/Driver/embed-bitcode-nvptx.cu
@@ -0,0 +1,8 @@
+// RUN: %clang -Xclang -triple -Xclang nvptx64 -S -Xclang -target-feature -Xclang +ptx70 -fembed-bitcode=all --cuda-device-only -nocudalib -nocudainc %s -o - | FileCheck %s
+// REQUIRES: nvptx-registered-target
+//
+// CHECK:.global .align 1 .b8 llvm_$_embedded_$_module[
+
+__device__ void foo(int mask) {
+  __nvvm_bar_warp_sync(mask);
+}
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4295,6 +4295,9 @@
   // Select the appropriate action.
   RewriteKind rewriteKind = RK_None;
 
+  bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
+  JA.isDeviceOffloading(Action::OFK_Host));
+
   // If CollectArgsForIntegratedAssembler() isn't called below, claim the args
   // it claims when not running an assembler. Otherwise, clang would emit
   // "argument unused" warnings for assembler flags when e.g. adding "-E" to
@@ -4401,9 +4404,6 @@
   CmdArgs.push_back("-emit-llvm-uselists");
 
 // Device-side jobs do not support LTO.
-bool isDeviceOffloadAction = !(JA.isDeviceOffloading(Action::OFK_None) ||
-   JA.isDeviceOffloading(Action::OFK_Host));
-
 if (D.isUsingLTO() && !isDeviceOffloadAction) {
   Args.AddLastArg(CmdArgs, options::OPT_flto, options::OPT_flto_EQ);
   CmdArgs.push_back("-flto-unit");
@@ -4436,7 +4436,15 @@
 // Add flags implied by -fembed-bitcode.
 Args.AddLastArg(CmdArgs, options::OPT_fembed_bitcode_EQ);
 // Disable all llvm IR level optimizations.
-CmdArgs.push_back("-disable-llvm-passes");
+if (!isDeviceOffloadAction) {
+  CmdArgs.push_back("-disable-llvm-passes");
+} else  {
+  std::string CPU = getCPUName(Args, Triple, 

[PATCH] D100536: [clang][deps] NFC: Remove unused FullDependencies member

2021-04-15 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith accepted this revision.
dexonsmith added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100536/new/

https://reviews.llvm.org/D100536

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100534: [clang][deps] Generate the full command-line for modules

2021-04-15 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith added inline comments.



Comment at: 
clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h:78-80
+  /// The compiler invocation associated with the translation unit that imports
+  /// this module.
+  CompilerInvocation Invocation;

Looks like this will be a deep copy, but it doesn't look like it's being 
modified. Can this just be a `const &`, taken in the `ModuleDeps` constructor? 
Or is there a lifetime reason this needs to be as it is?



Comment at: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp:25
+
+  // Remove options incompatible with explicit module build.
+  CI.getFrontendOpts().Inputs.clear();

Should this call any of the `resetNonModularOptions()` functions, or are those 
intentionally omitted?



Comment at: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp:26-27
+  // Remove options incompatible with explicit module build.
+  CI.getFrontendOpts().Inputs.clear();
+  CI.getFrontendOpts().OutputFile.clear();
+

Should `FrontendOpts` gain a `resetNonModularOptions()`?



Comment at: clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp:62
 std::function LookupModuleDeps) const {
-  // TODO: Build full command line. That also means capturing the original
-  //   command line into NonPathCommandLine.
-
-  std::vector Ret{
-  "-fno-implicit-modules",
-  "-fno-implicit-module-maps",
-  };
+  CompilerInvocation CI = getFullCommandLineCompilerInvocation(*this);
 

I think guaranteed copy elision means this won't be a deep copy of the return, 
but it might be nice to add a move constructor for `CompilerInvocation` so it's 
more obvious.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100534/new/

https://reviews.llvm.org/D100534

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100531: [clang][deps] Simplify function discovering .pcm and .modulemap files

2021-04-15 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith accepted this revision.
dexonsmith added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100531/new/

https://reviews.llvm.org/D100531

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100499: [AArch64] Neon Polynomial vadd Intrinsic Fix

2021-04-15 Thread Ryan Santhirarajan via Phabricator via cfe-commits
rsanthir.quic added a comment.

As you mentioned, I thought it was only supported due to 
`CheckFPAdvSIMDEnabled64`. If the header is also guarding for AArch64 does that 
not support the idea that it is AArch64 specific?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100499/new/

https://reviews.llvm.org/D100499

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100604: [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract

2021-04-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap updated this revision to Diff 337924.
Conanap added a comment.

Added test file


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100604/new/

https://reviews.llvm.org/D100604

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
  llvm/test/CodeGen/PowerPC/vec-extract-itofp.ll
  llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
  llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -34,18 +34,12 @@
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 24
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:li r3, 1
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 24
+; CHECK-P9-NEXT:vextractub v3, v2, 15
+; CHECK-P9-NEXT:vextractub v2, v2, 14
+; CHECK-P9-NEXT:xscvuxdsp f0, v3
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v3, vs0, vs0, 3
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
+; CHECK-P9-NEXT:xscvuxdsp f0, v2
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v2, vs0, vs0, 3
 ; CHECK-P9-NEXT:vmrghw v2, v2, v3
@@ -55,17 +49,11 @@
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
 ; CHECK-BE-NEXT:mtvsrws v2, r3
-; CHECK-BE-NEXT:li r3, 1
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:clrlwi r3, r3, 24
-; CHECK-BE-NEXT:mtfprwz f0, r3
-; CHECK-BE-NEXT:li r3, 0
-; CHECK-BE-NEXT:xscvuxdsp f0, f0
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:clrlwi r3, r3, 24
+; CHECK-BE-NEXT:vextractub v3, v2, 2
+; CHECK-BE-NEXT:vextractub v2, v2, 0
+; CHECK-BE-NEXT:xscvuxdsp f0, v3
 ; CHECK-BE-NEXT:xscvdpspn v3, f0
-; CHECK-BE-NEXT:mtfprwz f0, r3
-; CHECK-BE-NEXT:xscvuxdsp f0, f0
+; CHECK-BE-NEXT:xscvuxdsp f0, v2
 ; CHECK-BE-NEXT:xscvdpspn v2, f0
 ; CHECK-BE-NEXT:vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:mfvsrd r3, v2
@@ -299,18 +287,14 @@
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:extsb r3, r3
-; CHECK-P9-NEXT:mtfprwa f0, r3
-; CHECK-P9-NEXT:li r3, 1
-; CHECK-P9-NEXT:xscvsxdsp f0, f0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:extsb r3, r3
+; CHECK-P9-NEXT:vextractub v3, v2, 15
+; CHECK-P9-NEXT:vextractub v2, v2, 14
+; CHECK-P9-NEXT:vextsh2d v3, v3
+; CHECK-P9-NEXT:vextsh2d v2, v2
+; CHECK-P9-NEXT:xscvsxdsp f0, v3
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v3, vs0, vs0, 3
-; CHECK-P9-NEXT:mtfprwa f0, r3
-; CHECK-P9-NEXT:xscvsxdsp f0, f0
+; CHECK-P9-NEXT:xscvsxdsp f0, v2
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v2, vs0, vs0, 3
 ; CHECK-P9-NEXT:vmrghw v2, v2, v3
@@ -320,17 +304,13 @@
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
 ; CHECK-BE-NEXT:mtvsrws v2, r3
-; CHECK-BE-NEXT:li r3, 1
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:extsb r3, r3
-; CHECK-BE-NEXT:mtfprwa f0, r3
-; CHECK-BE-NEXT:li r3, 0
-; CHECK-BE-NEXT:xscvsxdsp f0, f0
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:extsb r3, r3
+; CHECK-BE-NEXT:vextractub v3, v2, 2
+; CHECK-BE-NEXT:vextractub v2, v2, 0
+; CHECK-BE-NEXT:vextsh2d v3, v3
+; CHECK-BE-NEXT:vextsh2d v2, v2
+; CHECK-BE-NEXT:xscvsxdsp f0, v3
 ; CHECK-BE-NEXT:xscvdpspn v3, f0
-; CHECK-BE-NEXT:mtfprwa f0, r3
-; CHECK-BE-NEXT:xscvsxdsp f0, f0
+; CHECK-BE-NEXT:xscvsxdsp f0, v2
 ; CHECK-BE-NEXT:xscvdpspn v2, f0
 ; CHECK-BE-NEXT:vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:mfvsrd r3, v2
Index: llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -34,18 +34,12 @@
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 16
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:li r3, 2
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
-; CHECK-P9-NEXT:vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 16
+; CHECK-P9-NEXT:vextractuh v3, v2, 14
+; CHECK-P9-NEXT:vextractuh v2, v2, 12
+; CHECK-P9-NEXT:xscvuxdsp f0, v3
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; 

[PATCH] D100516: [AST] Add TypeLoc support to node introspection

2021-04-15 Thread Stephen Kelly via Phabricator via cfe-commits
steveire added inline comments.



Comment at: clang/unittests/Introspection/IntrospectionTest.cpp:1294
+
+#ifndef _WIN32
+TEST(Introspection, SourceLocations_TypeOfTypeLoc) {

njames93 wrote:
> Can you add a comment explaining the issues with this test on windows 
> platforms.
I don't know what the problem is, but it failed: https://reviews.llvm.org/B98791


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100516/new/

https://reviews.llvm.org/D100516

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100516: [AST] Add TypeLoc support to node introspection

2021-04-15 Thread Stephen Kelly via Phabricator via cfe-commits
steveire updated this revision to Diff 337923.
steveire edited the summary of this revision.
steveire added a comment.

Update


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100516/new/

https://reviews.llvm.org/D100516

Files:
  clang/include/clang/Tooling/NodeIntrospection.h
  clang/lib/Tooling/CMakeLists.txt
  clang/lib/Tooling/DumpTool/APIData.h
  clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp
  clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.h
  clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
  clang/unittests/Introspection/IntrospectionTest.cpp

Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -26,25 +26,27 @@
 using ::testing::Pair;
 using ::testing::UnorderedElementsAre;
 
-template
-std::map
+template 
+std::vector>
 FormatExpected(const MapType ) {
-  std::map Result;
+  std::vector> Result;
   llvm::transform(llvm::make_filter_range(Accessors,
   [](const auto ) {
 return Accessor.first.isValid();
   }),
-  std::inserter(Result, Result.end()),
-  [](const auto ) {
-return std::make_pair(LocationCallFormatterCpp::format(
-  *Accessor.second.get()),
-  Accessor.first);
+  std::back_inserter(Result), [](const auto ) {
+return std::make_pair(
+LocationCallFormatterCpp::format(*Accessor.second),
+Accessor.first);
   });
   return Result;
 }
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+#define STRING_LOCATION_STDPAIR(INSTANCE, LOC) \
+  std::make_pair(std::string(#LOC), INSTANCE->LOC)
+
 /**
   A test formatter for a hypothetical language which needs
   neither casts nor '->'.
@@ -200,26 +202,94 @@
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
-  EXPECT_THAT(ExpectedLocations,
-  UnorderedElementsAre(
-  STRING_LOCATION_PAIR(MethodDecl, getBeginLoc()),
-  STRING_LOCATION_PAIR(MethodDecl, getBodyRBrace()),
-  STRING_LOCATION_PAIR(MethodDecl, getInnerLocStart()),
-  STRING_LOCATION_PAIR(MethodDecl, getLocation()),
-  STRING_LOCATION_PAIR(MethodDecl, getOuterLocStart()),
-  STRING_LOCATION_PAIR(MethodDecl, getTypeSpecEndLoc()),
-  STRING_LOCATION_PAIR(MethodDecl, getTypeSpecStartLoc()),
-  STRING_LOCATION_PAIR(MethodDecl, getEndLoc(;
+  llvm::sort(ExpectedLocations);
+
+  // clang-format off
+  EXPECT_EQ(
+  ExpectedLocations,
+  (std::vector>{
+STRING_LOCATION_STDPAIR(MethodDecl, getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getBodyRBrace()),
+STRING_LOCATION_STDPAIR(MethodDecl, getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getInnerLocStart()),
+STRING_LOCATION_STDPAIR(MethodDecl, getLocation()),
+STRING_LOCATION_STDPAIR(MethodDecl, getOuterLocStart()),
+STRING_LOCATION_STDPAIR(MethodDecl,
+  getTypeSourceInfo()->getTypeLoc().getAs().getLParenLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl,
+  getTypeSourceInfo()->getTypeLoc().getAs().getLocalRangeBegin()),
+STRING_LOCATION_STDPAIR(MethodDecl,
+  getTypeSourceInfo()->getTypeLoc().getAs().getLocalRangeEnd()),
+STRING_LOCATION_STDPAIR(MethodDecl,
+  getTypeSourceInfo()->getTypeLoc().getAs().getRParenLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSourceInfo()->getTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs().getLAngleLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs().getRAngleLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getAs().getTemplateNameLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getBeginLoc()),
+STRING_LOCATION_STDPAIR(
+MethodDecl,
+getTypeSourceInfo()->getTypeLoc().getNextTypeLoc().getNextTypeLoc().getEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecEndLoc()),
+STRING_LOCATION_STDPAIR(MethodDecl, getTypeSpecStartLoc())
+  }));
+  // 

[PATCH] D100604: [PowerPC] Improve codegen for int-to-fp conversion of subword vector extract

2021-04-15 Thread Albion Fung via Phabricator via cfe-commits
Conanap created this revision.
Conanap added reviewers: nemanjai, saghir, PowerPC.
Conanap added projects: LLVM, PowerPC, clang.
Herald added a subscriber: kbarton.
Conanap requested review of this revision.

The following example generates code that can be completed in two instructions 
instead:

  void testutof(vector unsigned short a, float *ptr) {
*ptr = a[0];
  }

The patch improves this code gen.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100604

Files:
  llvm/lib/Target/PowerPC/PPCInstrVSX.td
  llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
  llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
  llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll

Index: llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -34,18 +34,12 @@
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 24
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:li r3, 1
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 24
+; CHECK-P9-NEXT:vextractub v3, v2, 15
+; CHECK-P9-NEXT:vextractub v2, v2, 14
+; CHECK-P9-NEXT:xscvuxdsp f0, v3
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v3, vs0, vs0, 3
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
+; CHECK-P9-NEXT:xscvuxdsp f0, v2
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v2, vs0, vs0, 3
 ; CHECK-P9-NEXT:vmrghw v2, v2, v3
@@ -55,17 +49,11 @@
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:   # %bb.0: # %entry
 ; CHECK-BE-NEXT:mtvsrws v2, r3
-; CHECK-BE-NEXT:li r3, 1
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:clrlwi r3, r3, 24
-; CHECK-BE-NEXT:mtfprwz f0, r3
-; CHECK-BE-NEXT:li r3, 0
-; CHECK-BE-NEXT:xscvuxdsp f0, f0
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:clrlwi r3, r3, 24
+; CHECK-BE-NEXT:vextractub v3, v2, 2
+; CHECK-BE-NEXT:vextractub v2, v2, 0
+; CHECK-BE-NEXT:xscvuxdsp f0, v3
 ; CHECK-BE-NEXT:xscvdpspn v3, f0
-; CHECK-BE-NEXT:mtfprwz f0, r3
-; CHECK-BE-NEXT:xscvuxdsp f0, f0
+; CHECK-BE-NEXT:xscvuxdsp f0, v2
 ; CHECK-BE-NEXT:xscvdpspn v2, f0
 ; CHECK-BE-NEXT:vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:mfvsrd r3, v2
@@ -299,18 +287,14 @@
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:extsb r3, r3
-; CHECK-P9-NEXT:mtfprwa f0, r3
-; CHECK-P9-NEXT:li r3, 1
-; CHECK-P9-NEXT:xscvsxdsp f0, f0
-; CHECK-P9-NEXT:vextubrx r3, r3, v2
-; CHECK-P9-NEXT:extsb r3, r3
+; CHECK-P9-NEXT:vextractub v3, v2, 15
+; CHECK-P9-NEXT:vextractub v2, v2, 14
+; CHECK-P9-NEXT:vextsh2d v3, v3
+; CHECK-P9-NEXT:vextsh2d v2, v2
+; CHECK-P9-NEXT:xscvsxdsp f0, v3
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v3, vs0, vs0, 3
-; CHECK-P9-NEXT:mtfprwa f0, r3
-; CHECK-P9-NEXT:xscvsxdsp f0, f0
+; CHECK-P9-NEXT:xscvsxdsp f0, v2
 ; CHECK-P9-NEXT:xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:xxsldwi v2, vs0, vs0, 3
 ; CHECK-P9-NEXT:vmrghw v2, v2, v3
@@ -320,17 +304,13 @@
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:   # %bb.0: # %entry
 ; CHECK-BE-NEXT:mtvsrws v2, r3
-; CHECK-BE-NEXT:li r3, 1
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:extsb r3, r3
-; CHECK-BE-NEXT:mtfprwa f0, r3
-; CHECK-BE-NEXT:li r3, 0
-; CHECK-BE-NEXT:xscvsxdsp f0, f0
-; CHECK-BE-NEXT:vextublx r3, r3, v2
-; CHECK-BE-NEXT:extsb r3, r3
+; CHECK-BE-NEXT:vextractub v3, v2, 2
+; CHECK-BE-NEXT:vextractub v2, v2, 0
+; CHECK-BE-NEXT:vextsh2d v3, v3
+; CHECK-BE-NEXT:vextsh2d v2, v2
+; CHECK-BE-NEXT:xscvsxdsp f0, v3
 ; CHECK-BE-NEXT:xscvdpspn v3, f0
-; CHECK-BE-NEXT:mtfprwa f0, r3
-; CHECK-BE-NEXT:xscvsxdsp f0, f0
+; CHECK-BE-NEXT:xscvsxdsp f0, v2
 ; CHECK-BE-NEXT:xscvdpspn v2, f0
 ; CHECK-BE-NEXT:vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:mfvsrd r3, v2
Index: llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
===
--- llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -34,18 +34,12 @@
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:   # %bb.0: # %entry
 ; CHECK-P9-NEXT:mtvsrws v2, r3
-; CHECK-P9-NEXT:li r3, 0
-; CHECK-P9-NEXT:vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:clrlwi r3, r3, 16
-; CHECK-P9-NEXT:mtfprwz f0, r3
-; CHECK-P9-NEXT:li r3, 2
-; CHECK-P9-NEXT:xscvuxdsp f0, f0
-; 

[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread David Blaikie via Phabricator via cfe-commits
dblaikie added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:2840
+// Emit debuginfo for the function declaration if the target wants to.
+if (getContext().getTargetInfo().allowDebugInfoForExternalVar()) {
+  CGDebugInfo *DI = CGM.getModuleDebugInfo();

yonghong-song wrote:
> dblaikie wrote:
> > Seems like this should be renamed given it's being used for things other 
> > than external variables?
> I noticed this but didn't change it as I thought the current name is *sort 
> of* okay. But you are right. It is not precise. Will change to 
> "allowDebugInfoForExternalRef()" and resubmit. Let me know if you have better 
> function name suggestion.
Sounds good to me, thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added inline comments.



Comment at: clang/lib/CodeGen/CGExpr.cpp:2840
+// Emit debuginfo for the function declaration if the target wants to.
+if (getContext().getTargetInfo().allowDebugInfoForExternalVar()) {
+  CGDebugInfo *DI = CGM.getModuleDebugInfo();

dblaikie wrote:
> Seems like this should be renamed given it's being used for things other than 
> external variables?
I noticed this but didn't change it as I thought the current name is *sort of* 
okay. But you are right. It is not precise. Will change to 
"allowDebugInfoForExternalRef()" and resubmit. Let me know if you have better 
function name suggestion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread David Blaikie via Phabricator via cfe-commits
dblaikie added a comment.

ah, right, because this is powered by seeing the DeclRefExpr only in code 
that's codegen'd - fair enough. Thanks for checking!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] f62ad15 - NFC: Add a simple test for introspection call formatting

2021-04-15 Thread Stephen Kelly via cfe-commits

Author: Stephen Kelly
Date: 2021-04-15T23:45:54+01:00
New Revision: f62ad15cd7df0ca7681e0dbb894ee1c1d2465c51

URL: 
https://github.com/llvm/llvm-project/commit/f62ad15cd7df0ca7681e0dbb894ee1c1d2465c51
DIFF: 
https://github.com/llvm/llvm-project/commit/f62ad15cd7df0ca7681e0dbb894ee1c1d2465c51.diff

LOG: NFC: Add a simple test for introspection call formatting

Added: 


Modified: 
clang/unittests/Introspection/IntrospectionTest.cpp

Removed: 




diff  --git a/clang/unittests/Introspection/IntrospectionTest.cpp 
b/clang/unittests/Introspection/IntrospectionTest.cpp
index be58945c9a8d..ad21748f11f8 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -45,6 +45,43 @@ FormatExpected(const MapType ) {
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+/**
+  A test formatter for a hypothetical language which needs
+  neither casts nor '->'.
+*/
+class LocationCallFormatterSimple {
+public:
+  static void print(const LocationCall , llvm::raw_ostream ) {
+if (Call.isCast()) {
+  if (const LocationCall *On = Call.on())
+print(*On, OS);
+  return;
+}
+if (const LocationCall *On = Call.on()) {
+  print(*On, OS);
+  OS << '.';
+}
+OS << Call.name();
+if (Call.args().empty()) {
+  OS << "()";
+  return;
+}
+OS << '(' << Call.args().front();
+for (const std::string  : Call.args().drop_front()) {
+  OS << ", " << Arg;
+}
+OS << ')';
+  }
+
+  static std::string format(const LocationCall ) {
+std::string Result;
+llvm::raw_string_ostream OS(Result);
+print(Call, OS);
+OS.flush();
+return Result;
+  }
+};
+
 TEST(Introspection, SourceLocations_CallContainer) {
   SourceLocationMap slm;
   SharedLocationCall Prefix;
@@ -70,6 +107,24 @@ TEST(Introspection, SourceLocations_CallChainFormatting) {
 "getTypeLoc().getSourceRange()");
 }
 
+TEST(Introspection, SourceLocations_Formatter) {
+  SharedLocationCall Prefix;
+  auto chainedCall = llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(
+  Prefix, "getTypeSourceInfo", LocationCall::ReturnsPointer),
+  "getTypeLoc"),
+  "getAs", LocationCall::IsCast),
+  "getNameLoc");
+
+  
EXPECT_EQ("getTypeSourceInfo()->getTypeLoc().getAs()."
+"getNameLoc()",
+LocationCallFormatterCpp::format(*chainedCall));
+  EXPECT_EQ("getTypeSourceInfo().getTypeLoc().getNameLoc()",
+LocationCallFormatterSimple::format(*chainedCall));
+}
+
 TEST(Introspection, SourceLocations_Stmt) {
   if (!NodeIntrospection::hasIntrospectionSupport())
 return;



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

For the first example, actually clang is smart enough to remove all dead code, 
so nothing generated.

[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ cat t1.c
extern void f1();
void f2(void *);
inline void f3() {

  f2(f1);

}
[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ clang -target bpf -g -S -emit-llvm t1.c
[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ cat t1.ll
; ModuleID = 't1.c'
source_filename = "t1.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "bpf"

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang 
version 13.0.0 (https://github.com/llvm/llvm-project.git 
68275c77c92b89fafbacc31b4f40303bb9e0c9a7)", isOptimized: false, runtimeVersion: 
0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, 
nameTableKind: None)
!1 = !DIFile(filename: "t1.c", directory: "/home/yhs/tmp/ext_func_var")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 
68275c77c92b89fafbacc31b4f40303bb9e0c9a7)"}
[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$

For the second example,

[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ cat t2.c   

void f1();  

int main() {

  int x = sizeof();  

  return x; 


}   

[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ clang -target bpf -g -S -emit-llvm 
t2.c
[yhs@devbig003.ftw2 ~/tmp/ext_func_var]$ cat t2.ll  

; ModuleID = 't2.c' 

source_filename = "t2.c"

target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" 

target triple = "bpf"

  

; Function Attrs: noinline nounwind optnone 

define dso_local i32 @main() #0 !dbg !7 {   

entry:

  %retval = alloca i32, align 4 

  %x = alloca i32, align 4  

  store i32 0, i32* %retval, align 4

  call void @llvm.dbg.declare(metadata i32* %x, metadata !11, metadata 
!DIExpression()), !dbg !12   
  store i32 8, i32* %x, align 4, !dbg !12   

  %0 = load i32, i32* %x, align 4, !dbg !13 

  ret i32 %0, !dbg !14  


}

; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
declare void @llvm.dbg.declare(metadata, metadata, metadata) #1

attributes #0 = { noinline nounwind optnone "frame-pointer"="all" 
"min-legal-vector-width"="0" "no-trapping-math"="t
rue" "stack-protector-buffer-size"="8" }
attributes #1 = { nofree nosync nounwind readnone speculatable willreturn }

!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4, !5}
!llvm.ident = !{!6}

!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang 
version 13.0.0 (https://github.com/ll
vm/llvm-project.git 68275c77c92b89fafbacc31b4f40303bb9e0c9a7)", isOptimized: 
false, runtimeVersion: 0, emissionKind:
 FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
!1 = !DIFile(filename: "t2.c", directory: "/home/yhs/tmp/ext_func_var")
!2 = !{}
!3 = !{i32 7, !"Dwarf Version", i32 4}
!4 = !{i32 2, !"Debug Info Version", i32 3}
!5 = !{i32 1, !"wchar_size", i32 4}
!6 = !{!"clang version 13.0.0 (https://github.com/llvm/llvm-project.git 
68275c77c92b89fafbacc31b4f40303bb9e0c9a7)"}
!7 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: 
!8, scopeLine: 2, spFlags: DISPFlagDef
inition, unit: !0, retainedNodes: !2)
!8 = !DISubroutineType(types: !9)
!9 = !{!10}
!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
!11 = !DILocalVariable(name: "x", 

[PATCH] D100581: [Clang] -Wunused-but-set-parameter and -Wunused-but-set-variable

2021-04-15 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

>> These warnings are not enabled by any other flags. This is different from 
>> gcc, where -Wunused-but-set-variable is enabled by -Wextra in combination 
>> with either -Wunused or -Wall.

IMHO we should follow gcc here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100581/new/

https://reviews.llvm.org/D100581

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] be65347 - NFC: Add missing matcher for test method

2021-04-15 Thread Stephen Kelly via cfe-commits

Author: Stephen Kelly
Date: 2021-04-15T23:26:00+01:00
New Revision: be65347326084ad1c309d4330e94d671f011b35b

URL: 
https://github.com/llvm/llvm-project/commit/be65347326084ad1c309d4330e94d671f011b35b
DIFF: 
https://github.com/llvm/llvm-project/commit/be65347326084ad1c309d4330e94d671f011b35b.diff

LOG: NFC: Add missing matcher for test method

The intention is to match the definition.

Added: 


Modified: 
clang/unittests/Introspection/IntrospectionTest.cpp

Removed: 




diff  --git a/clang/unittests/Introspection/IntrospectionTest.cpp 
b/clang/unittests/Introspection/IntrospectionTest.cpp
index 2df401c8d813..be58945c9a8d 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -133,7 +133,7 @@ ns1::ns2::Foo ns1::ns2::Bar::Nested::method(int 
i, bool b) const
 
   auto BoundNodes = ast_matchers::match(
   decl(hasDescendant(
-  cxxMethodDecl(hasName("method")).bind("method"))),
+  cxxMethodDecl(hasName("method"), isDefinition()).bind("method"))),
   TU, Ctx);
 
   EXPECT_EQ(BoundNodes.size(), 1u);



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100581: [Clang] -Wunused-but-set-parameter and -Wunused-but-set-variable

2021-04-15 Thread Michael Benfield via Phabricator via cfe-commits
mbenfield added inline comments.



Comment at: clang/lib/Sema/SemaDecl.cpp:13740
+// other than assigning to it, sets the corresponding value to false.
+static void AreAllUsesSets(Stmt *Body,
+   llvm::SmallDenseMap *Map) {

george.burgess.iv wrote:
> nit: Should this be a `const Stmt*`? I don't think we should be mutating the 
> `Body`
Unfortunately the `RecursiveASTVisitor`'s non-overridden member functions don't 
take `const`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100581/new/

https://reviews.llvm.org/D100581

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 924cdff - [OpenMP5][DOCS] Update status of masked construct and correct the color

2021-04-15 Thread via cfe-commits

Author: cchen
Date: 2021-04-15T17:19:04-05:00
New Revision: 924cdff0ae18f60f476ccc44d6e5cd0d0e026256

URL: 
https://github.com/llvm/llvm-project/commit/924cdff0ae18f60f476ccc44d6e5cd0d0e026256
DIFF: 
https://github.com/llvm/llvm-project/commit/924cdff0ae18f60f476ccc44d6e5cd0d0e026256.diff

LOG: [OpenMP5][DOCS] Update status of masked construct and correct the color
for omp_target_is_present, NFC.

Added: 


Modified: 
clang/docs/OpenMPSupport.rst

Removed: 




diff  --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 74d9a0858e27..3bd1a0c21148 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -193,7 +193,7 @@ implementation.
 
+--+--+--+---+
 | device extension | implicitly map 'this' (this[:1])  
   | :good:`done` | D55982  
  |
 
+--+--+--+---+
-| device extension | allow access to the reference count 
(omp_target_is_present)  | :part:`done` |   
|
+| device extension | allow access to the reference count 
(omp_target_is_present)  | :good:`done` |   
|
 
+--+--+--+---+
 | device extension | requires directive
   | :part:`partial`  | 
  |
 
+--+--+--+---+
@@ -340,7 +340,7 @@ want to help with the implementation.
 
+--+--+--+---+
 | misc extension   | nothing directive 
   | :none:`unclaimed`| 
  |
 
+--+--+--+---+
-| misc extension   | masked construct and related combined 
constructs | :part:`worked on`| D5  
  |
+| misc extension   | masked construct and related combined 
constructs | :part:`worked on`| D5, D100514 
  |
 
+--+--+--+---+
 | misc extension   | default(firstprivate) & default(private)  
   | :part:`partial`  | firstprivate done: D75591   
  |
 
+--+--+--+---+



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] 4f6d698 - [AST] Fix location call storage with common last-invocation

2021-04-15 Thread Stephen Kelly via cfe-commits

Author: Stephen Kelly
Date: 2021-04-15T23:15:11+01:00
New Revision: 4f6d69846747dd53a54a5de0da7eca38df52d5ca

URL: 
https://github.com/llvm/llvm-project/commit/4f6d69846747dd53a54a5de0da7eca38df52d5ca
DIFF: 
https://github.com/llvm/llvm-project/commit/4f6d69846747dd53a54a5de0da7eca38df52d5ca.diff

LOG: [AST] Fix location call storage with common last-invocation

Differential Revision: https://reviews.llvm.org/D100548

Added: 


Modified: 
clang/lib/Tooling/NodeIntrospection.cpp
clang/unittests/Introspection/IntrospectionTest.cpp

Removed: 




diff  --git a/clang/lib/Tooling/NodeIntrospection.cpp 
b/clang/lib/Tooling/NodeIntrospection.cpp
index 2ee0b1cae55b..0e3ef3c6a01e 100644
--- a/clang/lib/Tooling/NodeIntrospection.cpp
+++ b/clang/lib/Tooling/NodeIntrospection.cpp
@@ -66,13 +66,15 @@ bool RangeLessThan::operator()(
   else if (LHS.first.getEnd() != RHS.first.getEnd())
 return false;
 
-  return LHS.second->name() < RHS.second->name();
+  return LocationCallFormatterCpp::format(*LHS.second) <
+ LocationCallFormatterCpp::format(*RHS.second);
 }
 bool RangeLessThan::operator()(
 std::pair const ,
 std::pair const ) const {
   if (LHS.first == RHS.first)
-return LHS.second->name() < RHS.second->name();
+return LocationCallFormatterCpp::format(*LHS.second) <
+   LocationCallFormatterCpp::format(*RHS.second);
   return LHS.first < RHS.first;
 }
 } // namespace internal

diff  --git a/clang/unittests/Introspection/IntrospectionTest.cpp 
b/clang/unittests/Introspection/IntrospectionTest.cpp
index 880068c43b6e..2df401c8d813 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -45,6 +45,31 @@ FormatExpected(const MapType ) {
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+TEST(Introspection, SourceLocations_CallContainer) {
+  SourceLocationMap slm;
+  SharedLocationCall Prefix;
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(Prefix, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 1u);
+
+  auto callTypeLoc =
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc");
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(callTypeLoc, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 2u);
+}
+
+TEST(Introspection, SourceLocations_CallChainFormatting) {
+  SharedLocationCall Prefix;
+  auto chainedCall = llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc"),
+  "getSourceRange");
+  EXPECT_EQ(LocationCallFormatterCpp::format(*chainedCall),
+"getTypeLoc().getSourceRange()");
+}
+
 TEST(Introspection, SourceLocations_Stmt) {
   if (!NodeIntrospection::hasIntrospectionSupport())
 return;



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100548: [AST] Fix location call storage with common last-invocation

2021-04-15 Thread Stephen Kelly via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG4f6d69846747: [AST] Fix location call storage with common 
last-invocation (authored by stephenkelly).

Changed prior to commit:
  https://reviews.llvm.org/D100548?vs=337713=337917#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100548/new/

https://reviews.llvm.org/D100548

Files:
  clang/lib/Tooling/NodeIntrospection.cpp
  clang/unittests/Introspection/IntrospectionTest.cpp


Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -45,6 +45,31 @@
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+TEST(Introspection, SourceLocations_CallContainer) {
+  SourceLocationMap slm;
+  SharedLocationCall Prefix;
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(Prefix, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 1u);
+
+  auto callTypeLoc =
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc");
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(callTypeLoc, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 2u);
+}
+
+TEST(Introspection, SourceLocations_CallChainFormatting) {
+  SharedLocationCall Prefix;
+  auto chainedCall = llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc"),
+  "getSourceRange");
+  EXPECT_EQ(LocationCallFormatterCpp::format(*chainedCall),
+"getTypeLoc().getSourceRange()");
+}
+
 TEST(Introspection, SourceLocations_Stmt) {
   if (!NodeIntrospection::hasIntrospectionSupport())
 return;
Index: clang/lib/Tooling/NodeIntrospection.cpp
===
--- clang/lib/Tooling/NodeIntrospection.cpp
+++ clang/lib/Tooling/NodeIntrospection.cpp
@@ -66,13 +66,15 @@
   else if (LHS.first.getEnd() != RHS.first.getEnd())
 return false;
 
-  return LHS.second->name() < RHS.second->name();
+  return LocationCallFormatterCpp::format(*LHS.second) <
+ LocationCallFormatterCpp::format(*RHS.second);
 }
 bool RangeLessThan::operator()(
 std::pair const ,
 std::pair const ) const {
   if (LHS.first == RHS.first)
-return LHS.second->name() < RHS.second->name();
+return LocationCallFormatterCpp::format(*LHS.second) <
+   LocationCallFormatterCpp::format(*RHS.second);
   return LHS.first < RHS.first;
 }
 } // namespace internal


Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -45,6 +45,31 @@
 
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
+TEST(Introspection, SourceLocations_CallContainer) {
+  SourceLocationMap slm;
+  SharedLocationCall Prefix;
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(Prefix, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 1u);
+
+  auto callTypeLoc =
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc");
+  slm.insert(std::make_pair(
+  SourceLocation(),
+  llvm::makeIntrusiveRefCnt(callTypeLoc, "getSourceRange")));
+  EXPECT_EQ(slm.size(), 2u);
+}
+
+TEST(Introspection, SourceLocations_CallChainFormatting) {
+  SharedLocationCall Prefix;
+  auto chainedCall = llvm::makeIntrusiveRefCnt(
+  llvm::makeIntrusiveRefCnt(Prefix, "getTypeLoc"),
+  "getSourceRange");
+  EXPECT_EQ(LocationCallFormatterCpp::format(*chainedCall),
+"getTypeLoc().getSourceRange()");
+}
+
 TEST(Introspection, SourceLocations_Stmt) {
   if (!NodeIntrospection::hasIntrospectionSupport())
 return;
Index: clang/lib/Tooling/NodeIntrospection.cpp
===
--- clang/lib/Tooling/NodeIntrospection.cpp
+++ clang/lib/Tooling/NodeIntrospection.cpp
@@ -66,13 +66,15 @@
   else if (LHS.first.getEnd() != RHS.first.getEnd())
 return false;
 
-  return LHS.second->name() < RHS.second->name();
+  return LocationCallFormatterCpp::format(*LHS.second) <
+ LocationCallFormatterCpp::format(*RHS.second);
 }
 bool RangeLessThan::operator()(
 std::pair const ,
 std::pair const ) const {
   if (LHS.first == RHS.first)
-return LHS.second->name() < RHS.second->name();
+return LocationCallFormatterCpp::format(*LHS.second) <
+   LocationCallFormatterCpp::format(*RHS.second);
   return LHS.first < RHS.first;
 }
 } // namespace internal
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread David Blaikie via Phabricator via cfe-commits
dblaikie added a comment.

What happens for this program:

  extern void f1();
  void f2(void *);
  inline void f3() {
f2(f1);
  }
  ...

Even when `f3` is never called, I'm guessing your change will cause `f1` to be 
emitted?

Also something like this:

  void f1();
  int main() {
int x = sizeof();
  }

Does that produce the declaration of `f1` too?




Comment at: clang/lib/CodeGen/CGExpr.cpp:2840
+// Emit debuginfo for the function declaration if the target wants to.
+if (getContext().getTargetInfo().allowDebugInfoForExternalVar()) {
+  CGDebugInfo *DI = CGM.getModuleDebugInfo();

Seems like this should be renamed given it's being used for things other than 
external variables?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98794: [AArch64] Correctly align HFA arguments when passed on the stack

2021-04-15 Thread Momchil Velikov via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf9d932e6735a: [clang][AArch64] Correctly align HFA arguments 
when passed on the stack (authored by chill).
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98794/new/

https://reviews.llvm.org/D98794

Files:
  clang/include/clang/CodeGen/CGFunctionInfo.h
  clang/lib/CodeGen/CGCall.cpp
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/aarch64-args-hfa.c
  llvm/docs/LangRef.rst
  llvm/include/llvm/CodeGen/TargetCallingConv.h
  llvm/include/llvm/IR/Argument.h
  llvm/include/llvm/IR/Attributes.h
  llvm/include/llvm/IR/Function.h
  llvm/include/llvm/IR/InstrTypes.h
  llvm/lib/AsmParser/LLParser.cpp
  llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
  llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
  llvm/lib/IR/Attributes.cpp
  llvm/lib/IR/Function.cpp
  llvm/lib/IR/Verifier.cpp
  llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
  llvm/test/Bitcode/compatibility.ll
  llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll

Index: llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll
===
--- /dev/null
+++ llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll
@@ -0,0 +1,33 @@
+; RUN: llc < %s -mtriple=arm64-none-eabi | FileCheck %s
+
+; Over-aligned HFA argument placed on register - one element per register
+define double @test_hfa_align_arg_reg([2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_arg_reg:
+; CHECK-NOT: mov
+; CHECK-NOT: ld
+; CHECK: ret
+  %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0
+  ret double %h.coerce.fca.0.extract
+}
+
+; Call with over-aligned HFA argument placed on register - one element per register
+define double @test_hfa_align_call_reg() local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_call_reg:
+; CHECK-DAG: fmov  d0, #1.
+; CHECK-DAG: fmov  d1, #2.
+; CHECK: bltest_hfa_align_arg_reg
+  %call = call double @test_hfa_align_arg_reg([2 x double] alignstack(16) [double 1.00e+00, double 2.00e+00])
+  ret double %call
+}
+
+; Over-aligned HFA argument placed on stack - stack round up to alignment
+define double @test_hfa_align_arg_stack(double %d0, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, float %f, [2 x double] alignstack(16) %h.coerce) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: test_hfa_align_arg_stack:
+; CHECK:   ldr  d0, [sp, #16]
+; CHECK-NEXT:  ret
+  %h.coerce.fca.0.extract = extractvalue [2 x double] %h.coerce, 0
+  ret double %h.coerce.fca.0.extract
+}
Index: llvm/test/Bitcode/compatibility.ll
===
--- llvm/test/Bitcode/compatibility.ll
+++ llvm/test/Bitcode/compatibility.ll
@@ -550,6 +550,8 @@
 ; CHECK: declare void @f.param.dereferenceable(i8* dereferenceable(4))
 declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
 ; CHECK: declare void @f.param.dereferenceable_or_null(i8* dereferenceable_or_null(4))
+declare void @f.param.stack_align([2 x double] alignstack(16))
+; CHECK: declare void @f.param.stack_align([2 x double] alignstack(16))
 
 ; Functions -- unnamed_addr and local_unnamed_addr
 declare void @f.unnamed_addr() unnamed_addr
Index: llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
===
--- llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
+++ llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
@@ -88,13 +88,8 @@
   }
 
   unsigned Size = LocVT.getSizeInBits() / 8;
-  const Align StackAlign =
-  State.getMachineFunction().getDataLayout().getStackAlignment();
-  const Align OrigAlign = ArgFlags.getNonZeroOrigAlign();
-  const Align Alignment = std::min(OrigAlign, StackAlign);
-
   for (auto  : PendingMembers) {
-It.convertToMem(State.AllocateStack(Size, std::max(Alignment, SlotAlign)));
+It.convertToMem(State.AllocateStack(Size, SlotAlign));
 State.addLoc(It);
 SlotAlign = Align(1);
   }
@@ -197,7 +192,12 @@
   State.AllocateReg(Reg);
   }
 
-  const Align SlotAlign = Subtarget.isTargetDarwin() ? Align(1) : Align(8);
+  const Align StackAlign =
+  State.getMachineFunction().getDataLayout().getStackAlignment();
+  const Align MemAlign = ArgFlags.getNonZeroMemAlign();
+  Align SlotAlign = std::min(MemAlign, StackAlign);
+  if (!Subtarget.isTargetDarwin())
+SlotAlign = std::max(SlotAlign, Align(8));
 
   return finishStackBlock(PendingMembers, LocVT, ArgFlags, State, SlotAlign);
 }
Index: llvm/lib/IR/Verifier.cpp
===
--- llvm/lib/IR/Verifier.cpp
+++ 

[clang] f9d932e - [clang][AArch64] Correctly align HFA arguments when passed on the stack

2021-04-15 Thread Momchil Velikov via cfe-commits

Author: Momchil Velikov
Date: 2021-04-15T22:58:14+01:00
New Revision: f9d932e6735afe73117e142a12443449f2197e69

URL: 
https://github.com/llvm/llvm-project/commit/f9d932e6735afe73117e142a12443449f2197e69
DIFF: 
https://github.com/llvm/llvm-project/commit/f9d932e6735afe73117e142a12443449f2197e69.diff

LOG: [clang][AArch64] Correctly align HFA arguments when passed on the stack

When we pass a AArch64 Homogeneous Floating-Point
Aggregate (HFA) argument with increased alignment
requirements, for example

struct S {
  __attribute__ ((__aligned__(16))) double v[4];
};

Clang uses `[4 x double]` for the parameter, which is passed
on the stack at alignment 8, whereas it should be at
alignment 16, following Rule C.4 in
AAPCS 
(https://github.com/ARM-software/abi-aa/blob/master/aapcs64/aapcs64.rst#642parameter-passing-rules)

Currently we don't have a way to express in LLVM IR the
alignment requirements of the function arguments. The align
attribute is applicable to pointers only, and only for some
special ways of passing arguments (e..g byval). When
implementing AAPCS32/AAPCS64, clang resorts to dubious hacks
of coercing to types, which naturally have the needed
alignment. We don't have enough types to cover all the
cases, though.

This patch introduces a new use of the stackalign attribute
to control stack slot alignment, when and if an argument is
passed in memory.

The attribute align is left as an optimizer hint - it still
applies to pointer types only and pertains to the content of
the pointer, whereas the alignment of the pointer itself is
determined by the stackalign attribute.

For byval arguments, the stackalign attribute assumes the
role, previously perfomed by align, falling back to align if
stackalign` is absent.

On the clang side, when passing arguments using the "direct"
style (cf. `ABIArgInfo::Kind`), now we can optionally
specify an alignment, which is emitted as the new
`stackalign` attribute.

Patch by Momchil Velikov and Lucas Prates.

Differential Revision: https://reviews.llvm.org/D98794

Added: 
clang/test/CodeGen/aarch64-args-hfa.c
llvm/test/CodeGen/AArch64/arm64-abi-hfa-args.ll

Modified: 
clang/include/clang/CodeGen/CGFunctionInfo.h
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/TargetInfo.cpp
llvm/docs/LangRef.rst
llvm/include/llvm/CodeGen/TargetCallingConv.h
llvm/include/llvm/IR/Argument.h
llvm/include/llvm/IR/Attributes.h
llvm/include/llvm/IR/Function.h
llvm/include/llvm/IR/InstrTypes.h
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
llvm/lib/IR/Attributes.cpp
llvm/lib/IR/Function.cpp
llvm/lib/IR/Verifier.cpp
llvm/lib/Target/AArch64/AArch64CallingConvention.cpp
llvm/test/Bitcode/compatibility.ll

Removed: 




diff  --git a/clang/include/clang/CodeGen/CGFunctionInfo.h 
b/clang/include/clang/CodeGen/CGFunctionInfo.h
index 253ef946ce15d..91d867e7f64a5 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -94,12 +94,17 @@ class ABIArgInfo {
 llvm::Type *UnpaddedCoerceAndExpandType; // isCoerceAndExpand()
   };
   union {
-unsigned DirectOffset; // isDirect() || isExtend()
-unsigned IndirectAlign;// isIndirect()
+struct {
+  unsigned Offset;
+  unsigned Align;
+} DirectAttr;  // isDirect() || isExtend()
+struct {
+  unsigned Align;
+  unsigned AddrSpace;
+} IndirectAttr;// isIndirect()
 unsigned AllocaFieldIndex; // isInAlloca()
   };
   Kind TheKind;
-  unsigned IndirectAddrSpace : 24; // isIndirect()
   bool PaddingInReg : 1;
   bool InAllocaSRet : 1;// isInAlloca()
   bool InAllocaIndirect : 1;// isInAlloca()
@@ -126,19 +131,20 @@ class ABIArgInfo {
 
 public:
   ABIArgInfo(Kind K = Direct)
-  : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
-IndirectAddrSpace(0), PaddingInReg(false), InAllocaSRet(false),
+  : TypeData(nullptr), PaddingType(nullptr), DirectAttr{0, 0}, TheKind(K),
+PaddingInReg(false), InAllocaSRet(false),
 InAllocaIndirect(false), IndirectByVal(false), IndirectRealign(false),
 SRetAfterThis(false), InReg(false), CanBeFlattened(false),
 SignExt(false) {}
 
   static ABIArgInfo getDirect(llvm::Type *T = nullptr, unsigned Offset = 0,
   llvm::Type *Padding = nullptr,
-  bool CanBeFlattened = true) {
+  bool CanBeFlattened = true, unsigned Align = 0) {
 auto AI = ABIArgInfo(Direct);
 AI.setCoerceToType(T);
 AI.setPaddingType(Padding);
 AI.setDirectOffset(Offset);
+AI.setDirectAlign(Align);
 

[PATCH] D100516: [AST] Add TypeLoc support to node introspection

2021-04-15 Thread Nathan James via Phabricator via cfe-commits
njames93 added inline comments.



Comment at: clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp:158-159
 BN.getNodeAs("stmtOrDeclBase");
+const auto *TypeLocBase = 
BN.getNodeAs("typeLocBase");
+const auto *ExprBase = BN.getNodeAs("exprBase");
 if (const auto *Node = BN.getNodeAs("classMethod")) {

nit: can't these be moved into the if stmt below, while were here 
StmtOrDeclBase can also be moved inside the if.



Comment at: clang/lib/Tooling/DumpTool/ASTSrcLocProcessor.cpp:172-176
+  if (TypeLocBase && Node->getName() == "getLocalSourceRange")
+continue;
+  if ((ASTClass->getName() == "PointerLikeTypeLoc" ||
+   ASTClass->getName() == "TypeofLikeTypeLoc") &&
+  Node->getName() == "getLocalSourceRange")

Can we have a comment explaining why we are discarding these?



Comment at: clang/unittests/Introspection/IntrospectionTest.cpp:875
+
+TEST(Introspection, SourceLocations_Formatter) {
+  auto AST =

This test seems irrelevant to what this patch is trying to achieve.



Comment at: clang/unittests/Introspection/IntrospectionTest.cpp:1294
+
+#ifndef _WIN32
+TEST(Introspection, SourceLocations_TypeOfTypeLoc) {

Can you add a comment explaining the issues with this test on windows platforms.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100516/new/

https://reviews.llvm.org/D100516

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100552: [HIP] Diagnose compiling kernel without offload arch

2021-04-15 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

Enforcing explicit GPU target makes sense.

However, I think that singling out a `__global__` as the trigger is not 
sufficient for the intended purpose.

If we can't generate a usable GPU-side binary, then we should produce an error 
if we need to generate *anything* during GPU-side compilation.
Using `__global__` as a proxy would not catch some use cases and, possibly, 
will produce false positives in  others.

E.g. what if I have a TU which only has a `__device__ int var = 42;` combined 
with a host-side code to memcpy to/from it? It would still be a valid, if not 
very useful code, but it would still suffer from runtime being unable to load 
it on a GPU unless that variable is in a GPU binary compiled with a valid 
target.

`__device__` functions in TUs compiled with `-fgpu-rdc` would have a similar 
problem. They would eventually be linked into a GPU binary which will be 
useless if it's not compiled for correct GPU. Granted, `__device__` functions 
will eventually need to be called from a kernel, so we will error out on a 
`__global__`  *somewhere*, but it will miss the problem when such TU does not 
get to the linking stage (e.g. maybe the user wants to link them at runtime).




Comment at: clang/include/clang/Basic/DiagnosticSemaKinds.td:8260
+def err_hip_kern_without_gpu : Error<
+  "compiling a HIP kernel without specifying an offload arch is not allowed">,
+  DefaultFatal;

How about compiling a file with `__device__` functions with `-fgpu-rdc`? If a 
kernel with no-arch is an error, then this should be an error, too.



Comment at: clang/lib/Sema/SemaDeclAttr.cpp:4431
   }
+  if (S.getASTContext().getTargetInfo().getTargetOpts().CPU.empty() &&
+  S.getLangOpts().HIP && S.getLangOpts().CUDAIsDevice) {

Will this fire if we have an uninstantiated kernel template? 





Comment at: clang/test/SemaCUDA/kernel-no-gpu.cu:7
+
+__global__ void kern1() {}
+// hip-error@-1 {{compiling a HIP kernel without specifying an offload arch is 
not allowed}}

We'll need few more test cases.

E.g. these should be fine.

```
template  __global__ void kernel(T arg ) {};
__global__ void kernel(T arg );

```


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100552/new/

https://reviews.llvm.org/D100552

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100591: [Clang][AArch64] Disable rounding of return values for AArch64

2021-04-15 Thread Andrew Savonichev via Phabricator via cfe-commits
asavonic added a comment.

In D100591#2692599 , @rjmccall wrote:

> I think the right thing to do here is to recognize generally that we're 
> emitting a mandatory tail call, and so suppress *all* the normal 
> transformations on the return value.

I assume it can be tricky to detect such call. The final decision (tail call vs 
normal call) is made before instruction selection, after all LLVM IR 
optimization passes. So we can miss tail calls that are not obvious on 
non-optimized code, or get false-positive results for calls that a backend 
decides to emit as normal calls.

In any case, this patch can be useful not only for tail calls: `trunc + zext` 
sequence generated to round a return value can be problematic for other cases 
as well.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100591/new/

https://reviews.llvm.org/D100591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100598: [CUDA, FDO] Filter out profiling options from GPU-side compilations.

2021-04-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl accepted this revision.
yaxunl added a comment.
This revision is now accepted and ready to land.

LGTM. Thanks!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100598/new/

https://reviews.llvm.org/D100598

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99696: [clang] NRVO: Improvements and handling of more cases.

2021-04-15 Thread Matheus Izvekov via Phabricator via cfe-commits
mizvekov updated this revision to Diff 337903.
mizvekov added a comment.

- Added doc to disallowNRVO
- Also detect implicit return type for blocks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99696/new/

https://reviews.llvm.org/D99696

Files:
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/Sema.cpp
  clang/lib/Sema/SemaCoroutine.cpp
  clang/lib/Sema/SemaExprCXX.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
  clang/test/CodeGen/nrvo-tracking.cpp

Index: clang/test/CodeGen/nrvo-tracking.cpp
===
--- clang/test/CodeGen/nrvo-tracking.cpp
+++ clang/test/CodeGen/nrvo-tracking.cpp
@@ -29,8 +29,6 @@
 
 // CHECK-LABEL: define{{.*}} void @_Z2l3v
 // CHECK:   call {{.*}} @_ZN1XC1Ev
-// CHECK-NEXT:  call {{.*}} @_ZN1XC1EOS_
-// CHECK-NEXT:  call void @llvm.lifetime.end
 // CHECK-NEXT:  call void @llvm.lifetime.end
 // CHECK-NEXT:  ret void
 L(3, t, T);
@@ -152,7 +150,11 @@
   }; }()();\
 }
 
-//B(1, X); // Uncomment this line at your own peril ;)
+// CHECK-LABEL: define{{.*}} void @_Z2b1v
+// CHECK:   call {{.*}} @_ZN1XC1Ev
+// CHECK-NEXT:  call void @llvm.lifetime.end
+// CHECK-NEXT:  ret void
+B(1, X);
 
 // CHECK-LABEL: define{{.*}} void @_Z2b2v
 // CHECK:   call {{.*}} @_ZN1XC1Ev
@@ -164,8 +166,6 @@
 
 // CHECK-LABEL: define{{.*}} void @_Z2b3v
 // CHECK:   call {{.*}} @_ZN1XC1Ev
-// CHECK-NEXT:  call {{.*}} @_ZN1XC1EOS_
-// CHECK-NEXT:  call void @llvm.lifetime.end
 // CHECK-NEXT:  call void @llvm.lifetime.end
 // CHECK-NEXT:  ret void
 B(3, T);
Index: clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
===
--- clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -23,6 +23,7 @@
 #include "clang/Basic/TargetInfo.h"
 #include "clang/Sema/Initialization.h"
 #include "clang/Sema/Lookup.h"
+#include "clang/Sema/ScopeInfo.h"
 #include "clang/Sema/SemaInternal.h"
 #include "clang/Sema/Template.h"
 #include "clang/Sema/TemplateInstCallback.h"
@@ -1050,11 +1051,30 @@
 
   SemaRef.BuildVariableInstantiation(Var, D, TemplateArgs, LateAttrs, Owner,
  StartingScope, InstantiatingVarTemplate);
-
   if (D->isNRVOVariable()) {
-QualType ReturnType = cast(DC)->getReturnType();
-if (SemaRef.isCopyElisionCandidate(ReturnType, Var, Sema::CES_Strict))
-  Var->setNRVOVariable(true);
+QualType FT;
+if (auto *F = dyn_cast(DC))
+  FT = F->getType();
+else if (isa(DC))
+  FT = SemaRef.getCurBlock()->FunctionType;
+else
+  llvm_unreachable("Unknown context type");
+
+// This is the last chance we have of checking copy elision eligibility
+// for functions in depdendent contexts. The sema actions for building
+// the return statement during template instantiation will have no effect
+// regarding copy elision, since NRVO propagation runs on the scope exit
+// actions, and these are not run on instantiation.
+// This might run through some VarDecls which were returned from non-taken
+// 'if constexpr' branches, and these will end up being constructed on the
+// return slot even if they will never be returned, as a sort of accidental
+// 'optimization'. Notably, functions with 'auto' return types won't have it
+// deduced by this point. Coupled with the limitation described
+// previously, this makes it very hard to support copy elision for these.
+Sema::NamedReturnInfo Info = SemaRef.getNamedReturnInfo(Var);
+bool NRVO = SemaRef.getCopyElisionCandidate(
+Info, cast(FT)->getReturnType()) != nullptr;
+Var->setNRVOVariable(NRVO);
   }
 
   Var->setImplicit(D->isImplicit());
Index: clang/lib/Sema/SemaStmt.cpp
===
--- clang/lib/Sema/SemaStmt.cpp
+++ clang/lib/Sema/SemaStmt.cpp
@@ -3036,99 +3036,153 @@
   return new (Context) BreakStmt(BreakLoc);
 }
 
-/// Determine whether the given expression is a candidate for
-/// copy elision in either a return statement or a throw expression.
+/// Determine whether the given expression might be move-eligible or
+/// copy-elidable in either a (co_)return statement or throw expression,
+/// without considering function return type, if applicable.
 ///
-/// \param ReturnType If we're determining the copy elision candidate for
-/// a return statement, this is the return type of the function. If we're
-/// determining the copy elision candidate for a throw expression, this will
-/// be a NULL type.
+/// \param E The expression being returned from the function or block,
+/// being thrown, or being co_returned from a coroutine.
 ///
-/// \param E The expression being returned from the function or block, or
-/// being thrown.
+/// \param ForceCXX20 Overrides detection of current language mode

[PATCH] D100548: [AST] Fix location call storage with common last-invocation

2021-04-15 Thread Nathan James via Phabricator via cfe-commits
njames93 accepted this revision.
njames93 added a comment.
This revision is now accepted and ready to land.

LGTM




Comment at: clang/lib/Tooling/NodeIntrospection.cpp:33
   }
-  result += (vec.back()->name() + "()").str();
+  result += (vec.front()->name() + "()").str();
   return result;

nit: rebase, this no longer exists.



Comment at: clang/lib/Tooling/NodeIntrospection.cpp:54-55
 
-  return LHS.second->name() < RHS.second->name();
+  return LocationCallFormatterCpp::format(LHS.second.get()) <
+ LocationCallFormatterCpp::format(RHS.second.get());
 }

nit: This is quite an inefficient way to go about comparing these. Though it 
would require a bit of work to avoid creation of the temporary strings.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100548/new/

https://reviews.llvm.org/D100548

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98783: [AMDGPU] Add GlobalDCE before internalization pass

2021-04-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 337900.
yaxunl marked 2 inline comments as done.
yaxunl added a comment.

revised tests by Artem's comments.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98783/new/

https://reviews.llvm.org/D98783

Files:
  clang/test/CodeGenCUDA/unused-global-var.cu
  llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp


Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -578,6 +578,9 @@
 PM.addPass(AMDGPUPrintfRuntimeBindingPass());
 
 if (InternalizeSymbols) {
+  // Global variables may have dead uses which need to be removed.
+  // Otherwise these useless global variables will not get 
internalized.
+  PM.addPass(GlobalDCEPass());
   PM.addPass(InternalizePass(mustPreserveGV));
 }
 PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
Index: clang/test/CodeGenCUDA/unused-global-var.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unused-global-var.cu
@@ -0,0 +1,53 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s
+
+#include "Inputs/cuda.h"
+
+// AMDGPU internalize unused global variables for whole-program compilation
+// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
+// eliminated by global DCE. If there are invisible unused address space casts
+// for global variables, these dead users need to be eliminated by global
+// DCE before internalization. This test makes sure unused global variables
+// are eliminated.
+
+// Check unused device/constant variables are eliminated.
+
+// NEGCHK-NOT: @v1
+__device__ int v1;
+
+// NEGCHK-NOT: @v2
+__constant__ int v2;
+
+// NEGCHK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK-DAG: @v4
+__managed__ int v4;
+
+// Check used device/constant variables are not eliminated.
+// CHECK-DAG: @u1
+__device__ int u1;
+
+// CHECK-DAG: @u2
+__constant__ int u2;
+
+// Check u3 is kept because its address is taken.
+// CHECK-DAG: @_ZL2u3
+constexpr int u3 = 2;
+
+// Check u4 is not kept because it is not ODR-use.
+// NEGCHK-NOT: @_ZL2u4
+constexpr int u4 = 3;
+
+__device__ int fun1(const int& x);
+
+__global__ void kern1(int *x) {
+  *x = u1 + u2 + fun1(u3) + u4;
+}


Index: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
===
--- llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -578,6 +578,9 @@
 PM.addPass(AMDGPUPrintfRuntimeBindingPass());
 
 if (InternalizeSymbols) {
+  // Global variables may have dead uses which need to be removed.
+  // Otherwise these useless global variables will not get internalized.
+  PM.addPass(GlobalDCEPass());
   PM.addPass(InternalizePass(mustPreserveGV));
 }
 PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
Index: clang/test/CodeGenCUDA/unused-global-var.cu
===
--- /dev/null
+++ clang/test/CodeGenCUDA/unused-global-var.cu
@@ -0,0 +1,53 @@
+// REQUIRES: amdgpu-registered-target
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck %s
+// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device -x hip %s \
+// RUN:   -std=c++11 -O3 -mllvm -amdgpu-internalize-symbols -emit-llvm -o - \
+// RUN:   -target-cpu gfx906 | FileCheck -check-prefix=NEGCHK %s
+
+#include "Inputs/cuda.h"
+
+// AMDGPU internalize unused global variables for whole-program compilation
+// (-fno-gpu-rdc for each TU, or -fgpu-rdc for LTO), which are then
+// eliminated by global DCE. If there are invisible unused address space casts
+// for global variables, these dead users need to be eliminated by global
+// DCE before internalization. This test makes sure unused global variables
+// are eliminated.
+
+// Check unused device/constant variables are eliminated.
+
+// NEGCHK-NOT: @v1
+__device__ int v1;
+
+// NEGCHK-NOT: @v2
+__constant__ int v2;
+
+// NEGCHK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK-DAG: @v4
+__managed__ int v4;
+
+// Check used device/constant variables are not eliminated.
+// CHECK-DAG: @u1
+__device__ int u1;
+
+// CHECK-DAG: @u2

[PATCH] D100530: [AST][Introspection] Add a check to detect if introspection is supported.

2021-04-15 Thread Nathan James via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rGf019e5f73ed7: [AST][Introspection] Add a check to detect if 
introspection is supported. (authored by njames93).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100530/new/

https://reviews.llvm.org/D100530

Files:
  clang/include/clang/Tooling/NodeIntrospection.h
  clang/lib/Tooling/CMakeLists.txt
  clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
  clang/unittests/Introspection/IntrospectionTest.cpp

Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -46,6 +46,8 @@
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
 TEST(Introspection, SourceLocations_Stmt) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST = buildASTFromCode("void foo() {} void bar() { foo(); }", "foo.cpp",
   std::make_shared());
   auto  = AST->getASTContext();
@@ -62,11 +64,6 @@
 
   auto Result = NodeIntrospection::GetLocations(FooCall);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty())
-  {
-return;
-  }
-
   auto ExpectedLocations =
 FormatExpected(Result.LocationAccessors);
 
@@ -84,6 +81,8 @@
 }
 
 TEST(Introspection, SourceLocations_Decl) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 namespace ns1 {
@@ -118,10 +117,6 @@
 
   auto Result = NodeIntrospection::GetLocations(MethodDecl);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -148,6 +143,8 @@
 }
 
 TEST(Introspection, SourceLocations_NNS) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 namespace ns
@@ -171,10 +168,6 @@
 
   auto Result = NodeIntrospection::GetLocations(NNS);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -194,6 +187,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_Type) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 template
@@ -219,10 +214,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -236,6 +227,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_Decl) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 template
@@ -258,10 +251,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -275,6 +264,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_Nullptr) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 template
@@ -297,10 +288,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -314,6 +301,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_Integral) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 template
@@ -335,10 +324,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -352,6 +337,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_Template) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 template class A;
@@ -374,10 +361,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -393,6 +376,8 @@
 }
 
 TEST(Introspection, SourceLocations_TA_TemplateExpansion) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST = buildASTFromCodeWithArgs(
   R"cpp(
 template class ...> class B { };
@@ -414,10 +399,6 @@
 
   auto Result = NodeIntrospection::GetLocations(TA);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
- 

[clang] f019e5f - [AST][Introspection] Add a check to detect if introspection is supported.

2021-04-15 Thread Nathan James via cfe-commits

Author: Nathan James
Date: 2021-04-15T22:21:41+01:00
New Revision: f019e5f73ed732b374e376f5ddbba5d1f67dca0c

URL: 
https://github.com/llvm/llvm-project/commit/f019e5f73ed732b374e376f5ddbba5d1f67dca0c
DIFF: 
https://github.com/llvm/llvm-project/commit/f019e5f73ed732b374e376f5ddbba5d1f67dca0c.diff

LOG: [AST][Introspection] Add a check to detect if introspection is supported.

This could probably be made into a compile time constant, but that would 
involve generating a second inc file.

Reviewed By: steveire

Differential Revision: https://reviews.llvm.org/D100530

Added: 


Modified: 
clang/include/clang/Tooling/NodeIntrospection.h
clang/lib/Tooling/CMakeLists.txt
clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
clang/unittests/Introspection/IntrospectionTest.cpp

Removed: 




diff  --git a/clang/include/clang/Tooling/NodeIntrospection.h 
b/clang/include/clang/Tooling/NodeIntrospection.h
index 9147c7db6c27..5489a67efa22 100644
--- a/clang/include/clang/Tooling/NodeIntrospection.h
+++ b/clang/include/clang/Tooling/NodeIntrospection.h
@@ -85,6 +85,7 @@ struct NodeLocationAccessors {
 };
 
 namespace NodeIntrospection {
+bool hasIntrospectionSupport();
 NodeLocationAccessors GetLocations(clang::Stmt const *Object);
 NodeLocationAccessors GetLocations(clang::Decl const *Object);
 NodeLocationAccessors GetLocations(clang::CXXCtorInitializer const *Object);

diff  --git a/clang/lib/Tooling/CMakeLists.txt 
b/clang/lib/Tooling/CMakeLists.txt
index 6d70c8976f03..0da3dbd0b927 100644
--- a/clang/lib/Tooling/CMakeLists.txt
+++ b/clang/lib/Tooling/CMakeLists.txt
@@ -35,6 +35,8 @@ if (NOT Python3_EXECUTABLE
 namespace clang {
 namespace tooling {
 
+bool NodeIntrospection::hasIntrospectionSupport() { return false; }
+
 NodeLocationAccessors NodeIntrospection::GetLocations(clang::Stmt const *) {
   return {};
 }

diff  --git a/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py 
b/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
index 0adebeb3b3bf..b0953df19203 100755
--- a/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
+++ b/clang/lib/Tooling/DumpTool/generate_cxx_src_locs.py
@@ -28,6 +28,8 @@ def GeneratePrologue(self):
 
 using LocationAndString = SourceLocationMap::value_type;
 using RangeAndString = SourceRangeMap::value_type;
+
+bool NodeIntrospection::hasIntrospectionSupport() { return true; }
 """
 
 def GenerateBaseGetLocationsDeclaration(self, CladeName):
@@ -174,6 +176,8 @@ def main():
 namespace clang {
 namespace tooling {
 
+bool NodeIntrospection::hasIntrospectionSupport() { return false; }
+
 NodeLocationAccessors NodeIntrospection::GetLocations(clang::Stmt const *) {
   return {};
 }

diff  --git a/clang/unittests/Introspection/IntrospectionTest.cpp 
b/clang/unittests/Introspection/IntrospectionTest.cpp
index 4a684f26a624..880068c43b6e 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -46,6 +46,8 @@ FormatExpected(const MapType ) {
 #define STRING_LOCATION_PAIR(INSTANCE, LOC) Pair(#LOC, INSTANCE->LOC)
 
 TEST(Introspection, SourceLocations_Stmt) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST = buildASTFromCode("void foo() {} void bar() { foo(); }", "foo.cpp",
   std::make_shared());
   auto  = AST->getASTContext();
@@ -62,11 +64,6 @@ TEST(Introspection, SourceLocations_Stmt) {
 
   auto Result = NodeIntrospection::GetLocations(FooCall);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty())
-  {
-return;
-  }
-
   auto ExpectedLocations =
 FormatExpected(Result.LocationAccessors);
 
@@ -84,6 +81,8 @@ TEST(Introspection, SourceLocations_Stmt) {
 }
 
 TEST(Introspection, SourceLocations_Decl) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 namespace ns1 {
@@ -118,10 +117,6 @@ ns1::ns2::Foo ns1::ns2::Bar::Nested::method(int i, bool b) const
 
   auto Result = NodeIntrospection::GetLocations(MethodDecl);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -148,6 +143,8 @@ ns1::ns2::Foo ns1::ns2::Bar::Nested::method(int 
i, bool b) const
 }
 
 TEST(Introspection, SourceLocations_NNS) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+return;
   auto AST =
   buildASTFromCode(R"cpp(
 namespace ns
@@ -171,10 +168,6 @@ void ns::A::foo() {}
 
   auto Result = NodeIntrospection::GetLocations(NNS);
 
-  if (Result.LocationAccessors.empty() && Result.RangeAccessors.empty()) {
-return;
-  }
-
   auto ExpectedLocations =
   FormatExpected(Result.LocationAccessors);
 
@@ -194,6 +187,8 @@ void ns::A::foo() {}
 }
 
 TEST(Introspection, SourceLocations_TA_Type) {
+  if (!NodeIntrospection::hasIntrospectionSupport())
+

[PATCH] D100423: [AST] Add a print method to Introspection LocationCall

2021-04-15 Thread Nathan James via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG542e7806e610: [AST] Add a print method to Introspection 
LocationCall (authored by njames93).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100423/new/

https://reviews.llvm.org/D100423

Files:
  clang/include/clang/Tooling/NodeIntrospection.h
  clang/lib/Tooling/NodeIntrospection.cpp
  clang/unittests/Introspection/IntrospectionTest.cpp


Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -36,9 +36,9 @@
   }),
   std::inserter(Result, Result.end()),
   [](const auto ) {
-return std::make_pair(
-
LocationCallFormatterCpp::format(Accessor.second.get()),
-Accessor.first);
+return std::make_pair(LocationCallFormatterCpp::format(
+  *Accessor.second.get()),
+  Accessor.first);
   });
   return Result;
 }
Index: clang/lib/Tooling/NodeIntrospection.cpp
===
--- clang/lib/Tooling/NodeIntrospection.cpp
+++ clang/lib/Tooling/NodeIntrospection.cpp
@@ -13,25 +13,40 @@
 #include "clang/Tooling/NodeIntrospection.h"
 
 #include "clang/AST/AST.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 
 namespace tooling {
 
-std::string LocationCallFormatterCpp::format(LocationCall *Call) {
-  SmallVector vec;
-  while (Call) {
-vec.push_back(Call);
-Call = Call->on();
+void LocationCallFormatterCpp::print(const LocationCall ,
+ llvm::raw_ostream ) {
+  if (const LocationCall *On = Call.on()) {
+print(*On, OS);
+if (On->returnsPointer())
+  OS << "->";
+else
+  OS << '.';
   }
-  std::string result;
-  for (auto *VecCall : llvm::reverse(llvm::makeArrayRef(vec).drop_front())) {
-result +=
-(VecCall->name() + "()" + (VecCall->returnsPointer() ? "->" : "."))
-.str();
+
+  OS << Call.name();
+  if (Call.args().empty()) {
+OS << "()";
+return;
+  }
+  OS << '(' << Call.args().front();
+  for (const std::string  : Call.args().drop_front()) {
+OS << ", " << Arg;
   }
-  result += (vec.back()->name() + "()").str();
-  return result;
+  OS << ')';
+}
+
+std::string LocationCallFormatterCpp::format(const LocationCall ) {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+  print(Call, OS);
+  OS.flush();
+  return Result;
 }
 
 namespace internal {
Index: clang/include/clang/Tooling/NodeIntrospection.h
===
--- clang/include/clang/Tooling/NodeIntrospection.h
+++ clang/include/clang/Tooling/NodeIntrospection.h
@@ -58,7 +58,8 @@
 
 class LocationCallFormatterCpp {
 public:
-  static std::string format(LocationCall *Call);
+  static void print(const LocationCall , llvm::raw_ostream );
+  static std::string format(const LocationCall );
 };
 
 namespace internal {


Index: clang/unittests/Introspection/IntrospectionTest.cpp
===
--- clang/unittests/Introspection/IntrospectionTest.cpp
+++ clang/unittests/Introspection/IntrospectionTest.cpp
@@ -36,9 +36,9 @@
   }),
   std::inserter(Result, Result.end()),
   [](const auto ) {
-return std::make_pair(
-LocationCallFormatterCpp::format(Accessor.second.get()),
-Accessor.first);
+return std::make_pair(LocationCallFormatterCpp::format(
+  *Accessor.second.get()),
+  Accessor.first);
   });
   return Result;
 }
Index: clang/lib/Tooling/NodeIntrospection.cpp
===
--- clang/lib/Tooling/NodeIntrospection.cpp
+++ clang/lib/Tooling/NodeIntrospection.cpp
@@ -13,25 +13,40 @@
 #include "clang/Tooling/NodeIntrospection.h"
 
 #include "clang/AST/AST.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 
 namespace tooling {
 
-std::string LocationCallFormatterCpp::format(LocationCall *Call) {
-  SmallVector vec;
-  while (Call) {
-vec.push_back(Call);
-Call = Call->on();
+void LocationCallFormatterCpp::print(const LocationCall ,
+ llvm::raw_ostream ) {
+  if (const LocationCall *On = Call.on()) {
+print(*On, OS);
+if (On->returnsPointer())
+  OS << "->";
+else
+  OS << '.';
   }
-  std::string result;
-  for (auto *VecCall : 

[clang] 542e780 - [AST] Add a print method to Introspection LocationCall

2021-04-15 Thread Nathan James via cfe-commits

Author: Nathan James
Date: 2021-04-15T22:18:29+01:00
New Revision: 542e7806e61089da88a2ff467006073cbe8d

URL: 
https://github.com/llvm/llvm-project/commit/542e7806e61089da88a2ff467006073cbe8d
DIFF: 
https://github.com/llvm/llvm-project/commit/542e7806e61089da88a2ff467006073cbe8d.diff

LOG: [AST] Add a print method to Introspection LocationCall

Add a print method that takes a raw_ostream.
Change LocationCallFormatterCpp::format to call that method.

Reviewed By: steveire

Differential Revision: https://reviews.llvm.org/D100423

Added: 


Modified: 
clang/include/clang/Tooling/NodeIntrospection.h
clang/lib/Tooling/NodeIntrospection.cpp
clang/unittests/Introspection/IntrospectionTest.cpp

Removed: 




diff  --git a/clang/include/clang/Tooling/NodeIntrospection.h 
b/clang/include/clang/Tooling/NodeIntrospection.h
index 406e17f1351a4..9147c7db6c271 100644
--- a/clang/include/clang/Tooling/NodeIntrospection.h
+++ b/clang/include/clang/Tooling/NodeIntrospection.h
@@ -58,7 +58,8 @@ class LocationCall : public 
llvm::ThreadSafeRefCountedBase {
 
 class LocationCallFormatterCpp {
 public:
-  static std::string format(LocationCall *Call);
+  static void print(const LocationCall , llvm::raw_ostream );
+  static std::string format(const LocationCall );
 };
 
 namespace internal {

diff  --git a/clang/lib/Tooling/NodeIntrospection.cpp 
b/clang/lib/Tooling/NodeIntrospection.cpp
index bb0e6ec412d1f..2ee0b1cae55b0 100644
--- a/clang/lib/Tooling/NodeIntrospection.cpp
+++ b/clang/lib/Tooling/NodeIntrospection.cpp
@@ -13,25 +13,40 @@
 #include "clang/Tooling/NodeIntrospection.h"
 
 #include "clang/AST/AST.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 
 namespace tooling {
 
-std::string LocationCallFormatterCpp::format(LocationCall *Call) {
-  SmallVector vec;
-  while (Call) {
-vec.push_back(Call);
-Call = Call->on();
+void LocationCallFormatterCpp::print(const LocationCall ,
+ llvm::raw_ostream ) {
+  if (const LocationCall *On = Call.on()) {
+print(*On, OS);
+if (On->returnsPointer())
+  OS << "->";
+else
+  OS << '.';
   }
-  std::string result;
-  for (auto *VecCall : llvm::reverse(llvm::makeArrayRef(vec).drop_front())) {
-result +=
-(VecCall->name() + "()" + (VecCall->returnsPointer() ? "->" : "."))
-.str();
+
+  OS << Call.name();
+  if (Call.args().empty()) {
+OS << "()";
+return;
+  }
+  OS << '(' << Call.args().front();
+  for (const std::string  : Call.args().drop_front()) {
+OS << ", " << Arg;
   }
-  result += (vec.back()->name() + "()").str();
-  return result;
+  OS << ')';
+}
+
+std::string LocationCallFormatterCpp::format(const LocationCall ) {
+  std::string Result;
+  llvm::raw_string_ostream OS(Result);
+  print(Call, OS);
+  OS.flush();
+  return Result;
 }
 
 namespace internal {

diff  --git a/clang/unittests/Introspection/IntrospectionTest.cpp 
b/clang/unittests/Introspection/IntrospectionTest.cpp
index 2bfdcd0a26979..4a684f26a6248 100644
--- a/clang/unittests/Introspection/IntrospectionTest.cpp
+++ b/clang/unittests/Introspection/IntrospectionTest.cpp
@@ -36,9 +36,9 @@ FormatExpected(const MapType ) {
   }),
   std::inserter(Result, Result.end()),
   [](const auto ) {
-return std::make_pair(
-
LocationCallFormatterCpp::format(Accessor.second.get()),
-Accessor.first);
+return std::make_pair(LocationCallFormatterCpp::format(
+  *Accessor.second.get()),
+  Accessor.first);
   });
   return Result;
 }



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100598: [CUDA, FDO] Filter out profiling options from GPU-side compilations.

2021-04-15 Thread Artem Belevich via Phabricator via cfe-commits
tra updated this revision to Diff 337896.
tra added a comment.

filter the options for AMD GPUs too.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100598/new/

https://reviews.llvm.org/D100598

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/clang_f_opts.c

Index: clang/test/Driver/clang_f_opts.c
===
--- clang/test/Driver/clang_f_opts.c
+++ clang/test/Driver/clang_f_opts.c
@@ -58,6 +58,19 @@
 // RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
 // CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
 
+//
+// RUN: %clang -### -x cuda -nocudainc -nocudalib \
+// RUN:-c -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 \
+// RUN:  | FileCheck -check-prefix=CHECK-CUDA-SAMPLE-PROFILE %s
+// -fprofile-sample-use should not be passed to the GPU compilation
+// CHECK-CUDA-SAMPLE-PROFILE: "-cc1"
+// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-triple" "nvptx
+// CHECK-CUDA-SAMPLE-PROFILE-NOT: "-fprofile-sample-use={{.*}}/file.prof"
+// Host compilation should still have the option.
+// CHECK-CUDA-SAMPLE-PROFILE: "-cc1" 
+// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-fprofile-sample-use={{.*}}/file.prof"
+
+
 // RUN: %clang -### -S -fauto-profile=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-AUTO-PROFILE %s
 // CHECK-AUTO-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4159,7 +4159,9 @@
   // include as part of the module. All other jobs are expected to have exactly
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
+  bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
+  bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
   bool IsHeaderModulePrecompile = isa(JA);
 
@@ -5003,8 +5005,7 @@
   // Prepare `-aux-target-cpu` and `-aux-target-feature` unless
   // `--gpu-use-aux-triple-only` is specified.
   if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
-  ((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
-   (IsHIP && JA.isDeviceOffloading(Action::OFK_HIP {
+  (IsCudaDevice || IsHIPDevice)) {
 const ArgList  =
 C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
 std::string HostCPU =
@@ -5824,29 +5825,32 @@
 Args.MakeArgString(Twine("-fcf-protection=") + A->getValue()));
   }
 
-  // Forward -f options with positive and negative forms; we translate
-  // these by hand.
-  if (Arg *A = getLastProfileSampleUseArg(Args)) {
-auto *PGOArg = Args.getLastArg(
-options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
-options::OPT_fcs_profile_generate, options::OPT_fcs_profile_generate_EQ,
-options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
-if (PGOArg)
-  D.Diag(diag::err_drv_argument_not_allowed_with)
-  << "SampleUse with PGO options";
+  // Forward -f options with positive and negative forms; we translate these by
+  // hand.  Do not propagate PGO options to the GPU-side compilations as the
+  // profile info is for the host-side compilation only.
+  if (!(IsCudaDevice || IsHIPDevice)) {
+if (Arg *A = getLastProfileSampleUseArg(Args)) {
+  auto *PGOArg = Args.getLastArg(
+  options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
+  options::OPT_fcs_profile_generate,
+  options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
+  options::OPT_fprofile_use_EQ);
+  if (PGOArg)
+D.Diag(diag::err_drv_argument_not_allowed_with)
+<< "SampleUse with PGO options";
 
-StringRef fname = A->getValue();
-if (!llvm::sys::fs::exists(fname))
-  D.Diag(diag::err_drv_no_such_file) << fname;
-else
-  A->render(Args, CmdArgs);
+  StringRef fname = A->getValue();
+  if (!llvm::sys::fs::exists(fname))
+D.Diag(diag::err_drv_no_such_file) << fname;
+  else
+A->render(Args, CmdArgs);
+}
+Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
+
+if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
+ options::OPT_fno_pseudo_probe_for_profiling, false))
+  CmdArgs.push_back("-fpseudo-probe-for-profiling");
   }
-  Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
-
-  if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
-   options::OPT_fno_pseudo_probe_for_profiling, false))
-CmdArgs.push_back("-fpseudo-probe-for-profiling");
-
   RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);
 
   if 

[PATCH] D100598: [CUDA, FDO] Filter out profiling options from GPU-side compilations.

2021-04-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:5830
   // these by hand.
-  if (Arg *A = getLastProfileSampleUseArg(Args)) {
-auto *PGOArg = Args.getLastArg(
-options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
-options::OPT_fcs_profile_generate, 
options::OPT_fcs_profile_generate_EQ,
-options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
-if (PGOArg)
-  D.Diag(diag::err_drv_argument_not_allowed_with)
-  << "SampleUse with PGO options";
+  if (!IsCudaDevice) {
+if (Arg *A = getLastProfileSampleUseArg(Args)) {

tra wrote:
> @yaxunl Should I add isHipDevice here too? I don't think host-side profiling 
> info is of much use for GPU compilation.
Yes pls do the same for HIP. Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100598/new/

https://reviews.llvm.org/D100598

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98783: [AMDGPU] Add GlobalDCE before internalization pass

2021-04-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl marked 2 inline comments as done.
yaxunl added inline comments.



Comment at: clang/test/CodeGenCUDA/unused-global-var.cu:23-29
+// CHECK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK: @v4
+__managed__ int v4;

tra wrote:
> Mixing CHECK and CHECK-NOT is tricky and, in general, only works if things 
> are always in the same order.
> 
> E.g. if does `v3` get emitted after  `v4`, the test will still pass.
> 
> One way to deal with that would be to split the positive and negative checks 
> into separate runs.
> First one would check the variables we do want to keep with `CHECK-DAG`.
> The other one would only check for the absence of the variables with `-NOT`.
will do



Comment at: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp:584
+  PM.addPass(GlobalDCEPass());
   PM.addPass(InternalizePass(mustPreserveGV));
 }

arsenm wrote:
> Should we move where the internalize pass is added instead?
we need two global dce, one before internalize, one after internalize.

the first global dce will eliminate the dead users of the global vars, but not 
the vars themselves since they are external.

the internalize pass will make the useless vars internal.

the second global dec will eliminate the useless internal global vars. Without 
the second global dce, the useless global var will not be eliminated.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98783/new/

https://reviews.llvm.org/D98783

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100598: [CUDA, FDO] Filter out profiling options from GPU-side compilations.

2021-04-15 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/Driver/ToolChains/Clang.cpp:5830
   // these by hand.
-  if (Arg *A = getLastProfileSampleUseArg(Args)) {
-auto *PGOArg = Args.getLastArg(
-options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
-options::OPT_fcs_profile_generate, 
options::OPT_fcs_profile_generate_EQ,
-options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
-if (PGOArg)
-  D.Diag(diag::err_drv_argument_not_allowed_with)
-  << "SampleUse with PGO options";
+  if (!IsCudaDevice) {
+if (Arg *A = getLastProfileSampleUseArg(Args)) {

@yaxunl Should I add isHipDevice here too? I don't think host-side profiling 
info is of much use for GPU compilation.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100598/new/

https://reviews.llvm.org/D100598

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100598: [CUDA, FDO] Filter out profiling options from GPU-side compilations.

2021-04-15 Thread Artem Belevich via Phabricator via cfe-commits
tra created this revision.
tra added reviewers: yaxunl, tejohnson.
Herald added subscribers: wenlei, bixia.
tra requested review of this revision.
Herald added a project: clang.

Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100598

Files:
  clang/lib/Driver/ToolChains/Clang.cpp
  clang/test/Driver/clang_f_opts.c


Index: clang/test/Driver/clang_f_opts.c
===
--- clang/test/Driver/clang_f_opts.c
+++ clang/test/Driver/clang_f_opts.c
@@ -58,6 +58,19 @@
 // RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | 
FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
 // CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
 
+//
+// RUN: %clang -### -x cuda -nocudainc -nocudalib \
+// RUN:-c -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 \
+// RUN:  | FileCheck -check-prefix=CHECK-CUDA-SAMPLE-PROFILE %s
+// -fprofile-sample-use should not be passed to the GPU compilation
+// CHECK-CUDA-SAMPLE-PROFILE: "-cc1"
+// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-triple" "nvptx
+// CHECK-CUDA-SAMPLE-PROFILE-NOT: "-fprofile-sample-use={{.*}}/file.prof"
+// Host compilation should still have the option.
+// CHECK-CUDA-SAMPLE-PROFILE: "-cc1" 
+// CHECK-CUDA-SAMPLE-PROFILE-SAME: "-fprofile-sample-use={{.*}}/file.prof"
+
+
 // RUN: %clang -### -S -fauto-profile=%S/Inputs/file.prof %s 2>&1 | FileCheck 
-check-prefix=CHECK-AUTO-PROFILE %s
 // CHECK-AUTO-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
 
Index: clang/lib/Driver/ToolChains/Clang.cpp
===
--- clang/lib/Driver/ToolChains/Clang.cpp
+++ clang/lib/Driver/ToolChains/Clang.cpp
@@ -4159,7 +4159,9 @@
   // include as part of the module. All other jobs are expected to have exactly
   // one input.
   bool IsCuda = JA.isOffloading(Action::OFK_Cuda);
+  bool IsCudaDevice = JA.isDeviceOffloading(Action::OFK_Cuda);
   bool IsHIP = JA.isOffloading(Action::OFK_HIP);
+  bool IsHIPDevice = JA.isDeviceOffloading(Action::OFK_HIP);
   bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP);
   bool IsHeaderModulePrecompile = isa(JA);
 
@@ -5003,8 +5005,7 @@
   // Prepare `-aux-target-cpu` and `-aux-target-feature` unless
   // `--gpu-use-aux-triple-only` is specified.
   if (!Args.getLastArg(options::OPT_gpu_use_aux_triple_only) &&
-  ((IsCuda && JA.isDeviceOffloading(Action::OFK_Cuda)) ||
-   (IsHIP && JA.isDeviceOffloading(Action::OFK_HIP {
+  (IsCudaDevice || IsHIPDevice)) {
 const ArgList  =
 C.getArgsForToolChain(nullptr, StringRef(), Action::OFK_None);
 std::string HostCPU =
@@ -5826,27 +5827,29 @@
 
   // Forward -f options with positive and negative forms; we translate
   // these by hand.
-  if (Arg *A = getLastProfileSampleUseArg(Args)) {
-auto *PGOArg = Args.getLastArg(
-options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
-options::OPT_fcs_profile_generate, 
options::OPT_fcs_profile_generate_EQ,
-options::OPT_fprofile_use, options::OPT_fprofile_use_EQ);
-if (PGOArg)
-  D.Diag(diag::err_drv_argument_not_allowed_with)
-  << "SampleUse with PGO options";
+  if (!IsCudaDevice) {
+if (Arg *A = getLastProfileSampleUseArg(Args)) {
+  auto *PGOArg = Args.getLastArg(
+  options::OPT_fprofile_generate, options::OPT_fprofile_generate_EQ,
+  options::OPT_fcs_profile_generate,
+  options::OPT_fcs_profile_generate_EQ, options::OPT_fprofile_use,
+  options::OPT_fprofile_use_EQ);
+  if (PGOArg)
+D.Diag(diag::err_drv_argument_not_allowed_with)
+<< "SampleUse with PGO options";
 
-StringRef fname = A->getValue();
-if (!llvm::sys::fs::exists(fname))
-  D.Diag(diag::err_drv_no_such_file) << fname;
-else
-  A->render(Args, CmdArgs);
+  StringRef fname = A->getValue();
+  if (!llvm::sys::fs::exists(fname))
+D.Diag(diag::err_drv_no_such_file) << fname;
+  else
+A->render(Args, CmdArgs);
+}
+Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
+
+if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
+ options::OPT_fno_pseudo_probe_for_profiling, false))
+  CmdArgs.push_back("-fpseudo-probe-for-profiling");
   }
-  Args.AddLastArg(CmdArgs, options::OPT_fprofile_remapping_file_EQ);
-
-  if (Args.hasFlag(options::OPT_fpseudo_probe_for_profiling,
-   options::OPT_fno_pseudo_probe_for_profiling, false))
-CmdArgs.push_back("-fpseudo-probe-for-profiling");
-
   RenderBuiltinOptions(TC, RawTriple, Args, CmdArgs);
 
   if (!Args.hasFlag(options::OPT_fassume_sane_operator_new,


Index: clang/test/Driver/clang_f_opts.c
===
--- clang/test/Driver/clang_f_opts.c
+++ clang/test/Driver/clang_f_opts.c
@@ -58,6 +58,19 @@
 // RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck 

[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Yonghong Song via Phabricator via cfe-commits
yonghong-song added a comment.

Yes.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100509: Support GCC's -fstack-usage flag

2021-04-15 Thread Dávid Bolvanský via Phabricator via cfe-commits
xbolva00 added a comment.

Great if better :)

Please consider adding a small testcase (eg from zstd) where llvm says static 
and gcc says dynamic,bound.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100509/new/

https://reviews.llvm.org/D100509

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100581: [Clang] -Wunused-but-set-parameter and -Wunused-but-set-variable

2021-04-15 Thread George Burgess IV via Phabricator via cfe-commits
george.burgess.iv added a comment.

Thanks for this! I think this warning looks valuable.

Most of my comments are various forms of style nits. :)




Comment at: clang/lib/Sema/SemaDecl.cpp:13738
 
+// values in Map should be true. traverses Body; if any key is used in any way
+// other than assigning to it, sets the corresponding value to false.

nit: Please use `///` for documenting functions and methods

secondary nit: it looks like the prevailing comment style here is `// 
Capitalized sentences ending with a period.` Please try to keep with that style 
(here and below)



Comment at: clang/lib/Sema/SemaDecl.cpp:13740
+// other than assigning to it, sets the corresponding value to false.
+static void AreAllUsesSets(Stmt *Body,
+   llvm::SmallDenseMap *Map) {

nit: Should this be a `const Stmt*`? I don't think we should be mutating the 
`Body`



Comment at: clang/lib/Sema/SemaDecl.cpp:13744
+  struct AllUsesAreSetsVisitor : RecursiveASTVisitor {
+llvm::SmallDenseMap *M;
+unsigned FalseCount = 0;

nit: For visitors that're meant primarily to update an external data structure 
and be discarded, I think the preferred style is that they take and hold 
references rather than pointers

Also, now that I'm looking at this again, could `M` be a `SmallPtrSet`? The idea would be "this is the set of things we've not seen outside of an 
assignment so far." 



Comment at: clang/lib/Sema/SemaDecl.cpp:13751
+bool TraverseBinaryOperator(BinaryOperator *BO) {
+  auto *LHS = BO->getLHS();
+  auto *DRE = dyn_cast(LHS);

nit: `const auto *` is preferred when possible



Comment at: clang/lib/Sema/SemaDecl.cpp:13763
+  auto iter = M->find(DRE->getFoundDecl());
+  if (iter != M->end() && iter->second) {
+// we've found a use of this Decl that's not the LHS of an assignment

nit: LLVM prefers early exits over reuniting control flow, so this should 
probably be:

```
if (iter == M->end() || !iter->second) {
  return true;
}

iter->second = false;
++FalseCount;
return FalseCount != M->size();
```



Comment at: clang/lib/Sema/SemaDecl.cpp:13812
+
+  auto IsCandidate = [CPlusPlus,  = Diags](ParmVarDecl *P) {
+// check for Ignored here, because if we have no candidates we can avoid

nit: For lambdas which don't escape, I think the style preference is simply 
`[&]`



Comment at: clang/lib/Sema/SemaDecl.cpp:13813-13818
+// check for Ignored here, because if we have no candidates we can avoid
+// walking the AST
+if (Diags.getDiagnosticLevel(diag::warn_unused_but_set_parameter,
+ P->getLocation()) ==
+DiagnosticsEngine::Ignored)
+  return false;

If we want to optimize for when this warning is disabled, would it be better to 
hoist this to the start of DiagnoseUnusedButSetParameters?



Comment at: clang/lib/Sema/SemaDecl.cpp:13850-13853
+// check for Ignored here, because if we have no candidates we can avoid
+// walking the AST
+if (Diags.getDiagnosticLevel(diag::warn_unused_but_set_variable,
+ VD->getLocation()) ==

Same "why not put this at the beginning of `DiagnoseUnusedButSetVariables`?" 
comment



Comment at: clang/lib/Sema/SemaStmt.cpp:437
+  CompoundStmt *CS = CompoundStmt::Create(Context, Elts, L, R);
+  DiagnoseUnusedButSetVariables(CS);
+  return CS;

It seems like these two warnings are doing analyses with identical requirements 
(e.g., the function's body must be present), but are taking two different sets 
of variables into account while doing so.

Is there a way we can rephrase this so we only need to walk the AST checking 
once for all of this?



Comment at: clang/test/Sema/warn-unused-but-set-variables-cpp.cpp:14
+
+  // no warning for structs in C++
+  struct S s;

Can you please expand on why not?

I can see a case for structs with nontrivial dtors or custom `operator=` 
methods. That said I don't understand why we'd avoid this for trivial structs 
like `S`.

(edit: now that I'm seeing this is for GCC compatibility, please call that out 
as a part of this comment. Maybe we want to do better than GCC? That can easily 
be done in a follow-up patch/commit though, so no strong opinion from me)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100581/new/

https://reviews.llvm.org/D100581

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100596: [WebAssembly] Remove saturating fp-to-int target intrinsics

2021-04-15 Thread Thomas Lively via Phabricator via cfe-commits
tlively created this revision.
tlively added reviewers: aheejin, dschuff.
Herald added subscribers: wingo, ecnelises, sunfish, hiraditya, 
jgravelle-google, sbc100.
tlively requested review of this revision.
Herald added projects: clang, LLVM.
Herald added subscribers: llvm-commits, cfe-commits.

Use the target-independent @llvm.fptosi and @llvm.fptoui intrinsics instead.
This includes removing the instrinsics for i32x4.trunc_sat_zero_f64x2_{s,u},
which are now represented in IR as a saturating truncation to a v2i32 followed 
by
a concatenation with a zero vector.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100596

Files:
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/test/CodeGen/builtins-wasm.c
  llvm/include/llvm/IR/IntrinsicsWebAssembly.td
  llvm/lib/Analysis/ConstantFolding.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISD.def
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
  llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.h
  llvm/lib/Target/WebAssembly/WebAssemblyInstrConv.td
  llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
  llvm/test/CodeGen/WebAssembly/conv.ll
  llvm/test/CodeGen/WebAssembly/simd-intrinsics.ll
  llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll

Index: llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll
===
--- llvm/test/Transforms/InstSimplify/ConstProp/WebAssembly/trunc_saturate.ll
+++ /dev/null
@@ -1,610 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instsimplify -S | FileCheck %s
-
-target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
-target triple = "wasm32-unknown-unknown"
-
-declare i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float)
-declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f32(float)
-declare i32 @llvm.wasm.trunc.saturate.signed.i32.f64(double)
-declare i32 @llvm.wasm.trunc.saturate.unsigned.i32.f64(double)
-declare i64 @llvm.wasm.trunc.saturate.signed.i64.f32(float)
-declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f32(float)
-declare i64 @llvm.wasm.trunc.saturate.signed.i64.f64(double)
-declare i64 @llvm.wasm.trunc.saturate.unsigned.i64.f64(double)
-
-define void @test_i32_trunc_sat_f32_s(i32* %p) {
-; CHECK-LABEL: @test_i32_trunc_sat_f32_s(
-; CHECK-NEXT:store volatile i32 0, i32* [[P:%.*]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -1, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -2, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 2147483520, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -2147483648, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 2147483647, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -2147483648, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 2147483647, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 -2147483648, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:store volatile i32 0, i32* [[P]], align 4
-; CHECK-NEXT:ret void
-;
-  %t0 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float +0.0)
-  store volatile i32 %t0, i32* %p
-  %t1 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -0.0)
-  store volatile i32 %t1, i32* %p
-  %t2 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x36a0); 0x1p-149
-  store volatile i32 %t2, i32* %p
-  %t3 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xb6a0); -0x1p-149
-  store volatile i32 %t3, i32* %p
-  %t4 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 1.0)
-  store volatile i32 %t4, i32* %p
-  %t5 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0x3ff1a000); 0x1.1ap+0
-  store volatile i32 %t5, i32* %p
-  %t6 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 1.5)
-  store volatile i32 %t6, i32* %p
-  %t7 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -1.0)
-  store volatile i32 %t7, i32* %p
-  %t8 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 0xbff1a000); -0x1.1ap+0
-  store volatile i32 %t8, i32* %p
-  %t9 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float -1.5)
-  store volatile i32 %t9, i32* %p
-  %t10 = call i32 @llvm.wasm.trunc.saturate.signed.i32.f32(float 

[PATCH] D99503: [clang-format] Inconsistent behavior regarding line break before access modifier

2021-04-15 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks accepted this revision.
HazardyKnusperkeks added a comment.

I don't know if you did elsewhere, but you have to give a name and email for 
the commit, so that someone can push it for you.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99503/new/

https://reviews.llvm.org/D99503

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100552: [HIP] Diagnose compiling kernel without offload arch

2021-04-15 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl updated this revision to Diff 337883.
yaxunl marked an inline comment as done.
yaxunl added a comment.

revised error msg by Aaron's comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100552/new/

https://reviews.llvm.org/D100552

Files:
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/lib/Driver/ToolChains/HIP.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/test/CodeGenCUDA/amdgpu-hip-implicit-kernarg.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-arg-pointer-type.cu
  clang/test/CodeGenCUDA/amdgpu-kernel-attrs.cu
  clang/test/CodeGenCUDA/kernel-amdgcn.cu
  clang/test/CodeGenCUDA/kernel-args.cu
  clang/test/CodeGenCUDA/kernel-dbg-info.cu
  clang/test/CodeGenCUDA/lambda-reference-var.cu
  clang/test/CodeGenCUDA/lambda.cu
  clang/test/CodeGenCUDA/managed-var.cu
  clang/test/CodeGenCUDA/norecurse.cu
  clang/test/CodeGenCUDA/static-device-var-no-rdc.cu
  clang/test/CodeGenCUDA/static-device-var-rdc.cu
  clang/test/CodeGenCUDA/unnamed-types.cu
  clang/test/Driver/cuda-flush-denormals-to-zero.cu
  clang/test/Driver/hip-default-gpu-arch.hip
  clang/test/SemaCUDA/kernel-no-gpu.cu

Index: clang/test/SemaCUDA/kernel-no-gpu.cu
===
--- /dev/null
+++ clang/test/SemaCUDA/kernel-no-gpu.cu
@@ -0,0 +1,11 @@
+// RUN: %clang_cc1 -fcuda-is-device -verify=hip -x hip %s
+// RUN: %clang_cc1 -fcuda-is-device -verify=cuda %s
+// cuda-no-diagnostics
+
+#include "Inputs/cuda.h"
+
+__global__ void kern1() {}
+// hip-error@-1 {{compiling a HIP kernel without specifying an offload arch is not allowed}}
+
+// Make sure the error is emitted once.
+__global__ void kern2() {}
Index: clang/test/Driver/hip-default-gpu-arch.hip
===
--- clang/test/Driver/hip-default-gpu-arch.hip
+++ clang/test/Driver/hip-default-gpu-arch.hip
@@ -4,4 +4,5 @@
 
 // RUN: %clang -### -c %s 2>&1 | FileCheck %s
 
-// CHECK: {{.*}}clang{{.*}}"-target-cpu" "gfx803"
+// CHECK-NOT: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"{{.*}}"-target-cpu"
+// CHECK: {{.*}}clang{{.*}}"-triple" "amdgcn-amd-amdhsa"
Index: clang/test/Driver/cuda-flush-denormals-to-zero.cu
===
--- clang/test/Driver/cuda-flush-denormals-to-zero.cu
+++ clang/test/Driver/cuda-flush-denormals-to-zero.cu
@@ -26,8 +26,11 @@
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=NOFTZ %s
 
-// Test no subtarget, which should get the denormal setting of the default gfx803
-// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=FTZ %s
+// Test no subtarget, which should get the denormal setting of the default
+// CPU of AMDGPU target, which is 'none'.
+// RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c \
+// RUN:   -march=haswell -nocudainc -nogpulib %s 2>&1 \
+// RUN:   | FileCheck -check-prefix=NOFTZ %s
 
 // Test multiple offload archs with different defaults.
 // RUN: %clang -x hip -no-canonical-prefixes -### -target x86_64-linux-gnu -c -march=haswell --cuda-gpu-arch=gfx803 --cuda-gpu-arch=gfx900 -nocudainc -nogpulib %s 2>&1 | FileCheck -check-prefix=MIXED-DEFAULT-MODE %s
Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,6 +1,6 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
-// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -target-cpu gfx906 -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
Index: clang/test/CodeGenCUDA/static-device-var-rdc.cu
===
--- clang/test/CodeGenCUDA/static-device-var-rdc.cu
+++ clang/test/CodeGenCUDA/static-device-var-rdc.cu
@@ -2,7 +2,7 @@
 // REQUIRES: amdgpu-registered-target
 
 // RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -fcuda-is-device \
-// RUN:   -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
+// RUN:   -target-cpu gfx906 -fgpu-rdc -emit-llvm -o - -x hip %s | FileCheck \
 // RUN:   -check-prefixes=DEV,INT-DEV %s
 
 // RUN: %clang_cc1 -triple x86_64-gnu-linux \
@@ -10,7 +10,7 

[PATCH] D100118: [clang] RFC Support new builtin __arithmetic_fence to control floating point optimization, and new clang option fprotect-parens

2021-04-15 Thread Melanie Blower via Phabricator via cfe-commits
mibintc updated this revision to Diff 337881.
mibintc retitled this revision from "[clang] RFC Support new builtin 
__arithmetic_fence to control floating point optiization" to "[clang] RFC 
Support new builtin __arithmetic_fence to control floating point optimization, 
and new clang option fprotect-parens".
mibintc added a comment.

This is a minor change with only formatting changes, this patch is not yet 
ready for review, only discussion. 
Together with the llvm parent patch, this simple program can now run end-to-end

  clang -c -ffast-math test.c
  
  float addF(float x, float y) {
return __arithmetic_fence(x + y);
  }


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100118/new/

https://reviews.llvm.org/D100118

Files:
  clang/include/clang/Basic/Builtins.def
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ExprConstant.cpp
  clang/lib/CodeGen/CGBuiltin.cpp
  clang/lib/Sema/SemaChecking.cpp
  clang/test/CodeGen/arithmetic-fence-builtin.c
  clang/test/Sema/arithmetic-fence-builtin.c

Index: clang/test/Sema/arithmetic-fence-builtin.c
===
--- /dev/null
+++ clang/test/Sema/arithmetic-fence-builtin.c
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm -o - -verify -x c++ %s
+int v;
+template  T addT(T a, T b) {
+  T *q = __arithmetic_fence();
+  // expected-error@-1 {{operand of type 'float *' where floating, complex or a vector of such types is required ('float *' invalid)}}
+  // expected-error@-2 {{operand of type 'int *' where floating, complex or a vector of such types is required ('int *' invalid)}}
+  return __arithmetic_fence(a + b);
+  // expected-error@-1 {{operand of type 'int' where floating, complex or a vector of such types is required ('int' invalid)}}
+}
+int addit(int a, int b) {
+  float x, y;
+  typedef struct {
+int a, b;
+  } stype;
+  stype s;
+  s = __arithmetic_fence(s);// expected-error {{operand of type 'stype' where floating, complex or a vector of such types is required ('stype' invalid)}}
+  x = __arithmetic_fence(x, y); // expected-error {{too many arguments to function call, expected 1, have 2}}
+  // Complex is supported.
+  _Complex double cd, cd1;
+  cd = __arithmetic_fence(cd1);
+  // Vector is supported.
+  typedef float __v4hi __attribute__((__vector_size__(8)));
+  __v4hi vec1, vec2;
+  vec1 = __arithmetic_fence(vec2);
+
+  v = __arithmetic_fence(a + b); // expected-error {{operand of type 'int' where floating, complex or a vector of such types is required ('int' invalid)}}
+  float f = addT(a, b);   // expected-note {{in instantiation of function template specialization 'addT' requested here}}
+  int i = addT(1, 2);   // expected-note {{in instantiation of function template specialization 'addT' requested here}}
+  constexpr float d = 1.0 + 2.0;
+  constexpr float c = __arithmetic_fence(1.0 + 2.0);
+  // expected-error@-1 {{constexpr variable 'c' must be initialized by a constant expression}}
+  return 0;
+}
Index: clang/test/CodeGen/arithmetic-fence-builtin.c
===
--- /dev/null
+++ clang/test/CodeGen/arithmetic-fence-builtin.c
@@ -0,0 +1,35 @@
+// Test with fast math
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm \
+// RUN: -menable-no-infs -menable-no-nans -menable-unsafe-fp-math \
+// RUN: -fno-signed-zeros -mreassociate -freciprocal-math \
+// RUN: -ffp-contract=fast -fno-rounding-math -ffast-math -ffinite-math-only \
+// RUN: -o - %s | FileCheck %s
+//
+// Test with fast math, showing incomplete implementaton for Complex
+// this test fails.
+// RUN: %clang_cc1 -triple i386-pc-linux-gnu -emit-llvm \
+// RUN: -menable-no-infs -menable-no-nans -menable-unsafe-fp-math \
+// RUN: -fno-signed-zeros -mreassociate -freciprocal-math \
+// RUN: -ffp-contract=fast -fno-rounding-math -ffast-math -ffinite-math-only \
+// RUN: -o - -DSHOWBUG %s | FileCheck %s
+//
+// TBD: Add test without fast flags showing llvm intrinsic not created
+int v;
+int addit(float a, float b) {
+//CHECK: define {{.*}} @addit(float %a, float %b) #0 {
+#ifdef SHOWBUG
+  // Assertion fail in clang when try to Emit complex expression
+  // Complex should be supported.
+  _Complex double cd, cd1;
+  cd = __arithmetic_fence(cd1);
+#endif
+  // Vector should be supported.
+  typedef float __v2f32 __attribute__((__vector_size__(8)));
+  __v2f32 vec1, vec2;
+  vec1 = __arithmetic_fence(vec2);
+  // CHECK: call fast <2 x float> @llvm.arithmetic.fence.v2f32
+
+  v = __arithmetic_fence(a + b);
+  // CHECK: call fast float @llvm.arithmetic.fence.f32(float %add)
+  return 0;
+}
Index: clang/lib/Sema/SemaChecking.cpp
===
--- clang/lib/Sema/SemaChecking.cpp
+++ clang/lib/Sema/SemaChecking.cpp
@@ -1555,6 +1555,10 @@
 Diag(TheCall->getBeginLoc(), diag::warn_alloca)
 << 

[PATCH] D100488: [SystemZ][z/OS] Add IsText Argument to GetFile and GetFileOrSTDIN

2021-04-15 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added a comment.

In D100488#2689494 , @amccarth wrote:

> Personally, I'm not a fan of boolean function parameters because of the 
> inline comments necessary to make the call site understandable.  But it 
> appears to be consistent with LLVM Coding Standards and other APIs, so this 
> looks right to me.

I think it would be a reasonable follow-up change to turn these optional 
boolean parameters into a flags enum.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100488/new/

https://reviews.llvm.org/D100488

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D97417: [clangd] use a compatible preamble for the first AST built

2021-04-15 Thread Quentin Chateau via Phabricator via cfe-commits
qchateau added a comment.
Herald added a project: clang-tools-extra.

Have you guys been giving some thoughts to that patch ? I've been using it in 
my daily work since I submitted the patch, and I'd not go back


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D97417/new/

https://reviews.llvm.org/D97417

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100567: BPF: emit debuginfo for Function of DeclRefExpr if requested

2021-04-15 Thread Andrii Nakryiko via Phabricator via cfe-commits
anakryiko accepted this revision.
anakryiko added a comment.

Nice, thanks! This will work for externs with explicit section name (.ksym) and 
with no section name (externs for static linking), right?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100567/new/

https://reviews.llvm.org/D100567

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D99675: RFC [llvm][clang] Create new intrinsic llvm.arith.fence to control FP optimization at expression level

2021-04-15 Thread Melanie Blower via Phabricator via cfe-commits
mibintc updated this revision to Diff 337879.
mibintc edited the summary of this revision.
mibintc added a comment.

This is a minor update from @pengfei which allows simple tests cases to run 
end-to-end with clang.
Also I changed the "summary" to reflect the review discussion around the FMA 
optimization, to choose "FMA is not allowed across a fence".


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D99675/new/

https://reviews.llvm.org/D99675

Files:
  llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
  llvm/include/llvm/CodeGen/BasicTTIImpl.h
  llvm/include/llvm/CodeGen/ISDOpcodes.h
  llvm/include/llvm/CodeGen/SelectionDAGISel.h
  llvm/include/llvm/IR/IRBuilder.h
  llvm/include/llvm/IR/Intrinsics.td
  llvm/include/llvm/Support/TargetOpcodes.def
  llvm/include/llvm/Target/Target.td
  llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
  llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp

Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2321,6 +2321,11 @@
N->getOperand(0));
 }
 
+void SelectionDAGISel::Select_ARITH_FENCE(SDNode *N) {
+  CurDAG->SelectNodeTo(N, TargetOpcode::ARITH_FENCE, N->getValueType(0),
+   N->getOperand(0));
+}
+
 /// GetVBR - decode a vbr encoding whose top bit is set.
 LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
 GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned ) {
@@ -2872,6 +2877,9 @@
   case ISD::FREEZE:
 Select_FREEZE(NodeToMatch);
 return;
+  case ISD::ARITH_FENCE:
+Select_ARITH_FENCE(NodeToMatch);
+return;
   }
 
   assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
===
--- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -7210,6 +7210,13 @@
 }
 break;
   }
+  case Intrinsic::arithmetic_fence: {
+auto DL = getCurSDLoc();
+SDValue Val = getValue(FPI.getArgOperand(0));
+EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), FPI.getType());
+setValue(, DAG.getNode(ISD::ARITH_FENCE, DL, ResultVT, Val));
+return;
+  }
   }
 
   // A few strict DAG nodes carry additional operands that are not
Index: llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
===
--- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1275,6 +1275,9 @@
   case TargetOpcode::PSEUDO_PROBE:
 emitPseudoProbe(MI);
 break;
+  case TargetOpcode::ARITH_FENCE:
+OutStreamer->emitRawComment("ARITH_FENCE");
+break;
   default:
 emitInstruction();
 if (CanDoExtraAnalysis) {
Index: llvm/include/llvm/Target/Target.td
===
--- llvm/include/llvm/Target/Target.td
+++ llvm/include/llvm/Target/Target.td
@@ -1172,6 +1172,12 @@
   let AsmString = "PSEUDO_PROBE";
   let hasSideEffects = 1;
 }
+def ARITH_FENCE : StandardPseudoInstruction {
+  let OutOperandList = (outs unknown:$dst);
+  let InOperandList = (ins unknown:$src);
+  let AsmString = "";
+  let hasSideEffects = false;
+}
 
 def STACKMAP : StandardPseudoInstruction {
   let OutOperandList = (outs);
Index: llvm/include/llvm/Support/TargetOpcodes.def
===
--- llvm/include/llvm/Support/TargetOpcodes.def
+++ llvm/include/llvm/Support/TargetOpcodes.def
@@ -117,6 +117,9 @@
 /// Pseudo probe
 HANDLE_TARGET_OPCODE(PSEUDO_PROBE)
 
+/// Arithmetic fence.
+HANDLE_TARGET_OPCODE(ARITH_FENCE)
+
 /// A Stackmap instruction captures the location of live variables at its
 /// position in the instruction stream. It is followed by a shadow of bytes
 /// that must lie within the function and not contain another stackmap.
Index: llvm/include/llvm/IR/Intrinsics.td
===
--- llvm/include/llvm/IR/Intrinsics.td
+++ llvm/include/llvm/IR/Intrinsics.td
@@ -1311,6 +1311,9 @@
 def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
 [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
+// Arithmetic fence intrinsic.
+def int_arithmetic_fence : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>;
+
 // Intrinsics to support half precision floating point format
 let IntrProperties = [IntrNoMem, IntrWillReturn] in {
 def int_convert_to_fp16   : DefaultAttrsIntrinsic<[llvm_i16_ty], [llvm_anyfloat_ty]>;
Index: llvm/include/llvm/IR/IRBuilder.h

[PATCH] D98783: [AMDGPU] Add GlobalDCE before internalization pass

2021-04-15 Thread Matt Arsenault via Phabricator via cfe-commits
arsenm added inline comments.



Comment at: llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp:584
+  PM.addPass(GlobalDCEPass());
   PM.addPass(InternalizePass(mustPreserveGV));
 }

Should we move where the internalize pass is added instead?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98783/new/

https://reviews.llvm.org/D98783

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D98783: [AMDGPU] Add GlobalDCE before internalization pass

2021-04-15 Thread Artem Belevich via Phabricator via cfe-commits
tra accepted this revision.
tra added a comment.
This revision is now accepted and ready to land.

LGTM with a test nit.




Comment at: clang/test/CodeGenCUDA/unused-global-var.cu:23-29
+// CHECK-NOT: @_ZL2v3
+constexpr int v3 = 1;
+
+// Check managed variables are always kept.
+
+// CHECK: @v4
+__managed__ int v4;

Mixing CHECK and CHECK-NOT is tricky and, in general, only works if things are 
always in the same order.

E.g. if does `v3` get emitted after  `v4`, the test will still pass.

One way to deal with that would be to split the positive and negative checks 
into separate runs.
First one would check the variables we do want to keep with `CHECK-DAG`.
The other one would only check for the absence of the variables with `-NOT`.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D98783/new/

https://reviews.llvm.org/D98783

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100581: [Clang] -Wunused-but-set-parameter and -Wunused-but-set-variable

2021-04-15 Thread Michael Benfield via Phabricator via cfe-commits
mbenfield updated this revision to Diff 337877.
mbenfield added a comment.

Updates in response to comments.

- Parameters.empty() and early exit.

- comments in VisitDeclRefExpr.

- clearer description of the warnings.

Also, changed the name of DiagnoseUnusedDecls to DiagnoseUnusedButSetDecls for
clarity and to avoid confusion with Sema::DiagnoseUnusedDecl.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100581/new/

https://reviews.llvm.org/D100581

Files:
  clang/include/clang/Basic/DiagnosticGroups.td
  clang/include/clang/Basic/DiagnosticSemaKinds.td
  clang/include/clang/Sema/Sema.h
  clang/lib/Sema/SemaDecl.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaStmt.cpp
  clang/test/Sema/vector-gcc-compat.c
  clang/test/Sema/warn-unused-but-set-parameters-cpp.cpp
  clang/test/Sema/warn-unused-but-set-parameters.c
  clang/test/Sema/warn-unused-but-set-variables-cpp.cpp
  clang/test/Sema/warn-unused-but-set-variables.c

Index: clang/test/Sema/warn-unused-but-set-variables.c
===
--- /dev/null
+++ clang/test/Sema/warn-unused-but-set-variables.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -fblocks -fsyntax-only -Wunused-but-set-variable -verify %s
+
+struct S {
+  int i;
+};
+
+int f0() {
+  int y; // expected-warning{{variable 'y' set but not used}}
+  y = 0;
+
+  int z __attribute__((unused));
+  z = 0;
+
+  struct S s; // expected-warning{{variable 's' set but not used}}
+  struct S t;
+  s = t;
+
+  int x;
+  x = 0;
+  return x;
+}
+
+void f1(void) {
+  (void)^() {
+int y; // expected-warning{{variable 'y' set but not used}}
+y = 0;
+
+int x;
+x = 0;
+return x;
+  };
+}
Index: clang/test/Sema/warn-unused-but-set-variables-cpp.cpp
===
--- /dev/null
+++ clang/test/Sema/warn-unused-but-set-variables-cpp.cpp
@@ -0,0 +1,33 @@
+// RUN: %clang_cc1 -fblocks -fsyntax-only -Wunused-but-set-variable -verify %s
+
+struct S {
+  int i;
+};
+
+int f0() {
+  int y; // expected-warning{{variable 'y' set but not used}}
+  y = 0;
+
+  int z __attribute__((unused));
+  z = 0;
+
+  // no warning for structs in C++
+  struct S s;
+  struct S t;
+  s = t;
+
+  int x;
+  x = 0;
+  return x;
+}
+
+void f1(void) {
+  (void)^() {
+int y; // expected-warning{{variable 'y' set but not used}}
+y = 0;
+
+int x;
+x = 0;
+return x;
+  };
+}
Index: clang/test/Sema/warn-unused-but-set-parameters.c
===
--- /dev/null
+++ clang/test/Sema/warn-unused-but-set-parameters.c
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 -fblocks -fsyntax-only -Wunused-but-set-parameter -verify %s
+
+int f0(int x,
+   int y, // expected-warning{{parameter 'y' set but not used}}
+   int z __attribute__((unused))) {
+  y = 0;
+  return x;
+}
+
+void f1(void) {
+  (void)^(int x,
+  int y, // expected-warning{{parameter 'y' set but not used}}
+  int z __attribute__((unused))) {
+y = 0;
+return x;
+  };
+}
+
+struct S {
+  int i;
+};
+
+void f3(struct S s) { // expected-warning{{parameter 's' set but not used}}
+  struct S t;
+  s = t;
+}
Index: clang/test/Sema/warn-unused-but-set-parameters-cpp.cpp
===
--- /dev/null
+++ clang/test/Sema/warn-unused-but-set-parameters-cpp.cpp
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fblocks -fsyntax-only -Wunused-but-set-parameter -verify %s
+
+int f0(int x,
+   int y, // expected-warning{{parameter 'y' set but not used}}
+   int z __attribute__((unused))) {
+  y = 0;
+  return x;
+}
+
+void f1(void) {
+  (void)^(int x,
+  int y, // expected-warning{{parameter 'y' set but not used}}
+  int z __attribute__((unused))) {
+y = 0;
+return x;
+  };
+}
+
+struct S {
+  int i;
+};
+
+// in C++, don't warn for a struct
+void f3(struct S s) {
+  struct S t;
+  s = t;
+}
Index: clang/test/Sema/vector-gcc-compat.c
===
--- clang/test/Sema/vector-gcc-compat.c
+++ clang/test/Sema/vector-gcc-compat.c
@@ -35,7 +35,7 @@
 
 void arithmeticTest(void) {
   v2i64 v2i64_a = (v2i64){0, 1};
-  v2i64 v2i64_r;
+  v2i64 v2i64_r; // expected-warning{{variable 'v2i64_r' set but not used}}
 
   v2i64_r = v2i64_a + 1;
   v2i64_r = v2i64_a - 1;
@@ -58,7 +58,7 @@
 
 void comparisonTest(void) {
   v2i64 v2i64_a = (v2i64){0, 1};
-  v2i64 v2i64_r;
+  v2i64 v2i64_r; // expected-warning{{variable 'v2i64_r' set but not used}}
 
   v2i64_r = v2i64_a == 1;
   v2i64_r = v2i64_a != 1;
@@ -78,8 +78,8 @@
 void logicTest(void) {
   v2i64 v2i64_a = (v2i64){0, 1};
   v2i64 v2i64_b = (v2i64){2, 1};
-  v2i64 v2i64_c = (v2i64){3, 1};
-  v2i64 v2i64_r;
+  v2i64 v2i64_c = (v2i64){3, 1}; // expected-warning{{variable 'v2i64_c' set but not used}}
+  v2i64 v2i64_r; // expected-warning{{variable 'v2i64_r' set but not used}}
 
  

[PATCH] D100581: [Clang] -Wunused-but-set-parameter and -Wunused-but-set-variable

2021-04-15 Thread Michael Benfield via Phabricator via cfe-commits
mbenfield added a comment.

In D100581#2692425 , @aeubanks wrote:

> running this over an existing codebase to see what fires is probably a good 
> idea (if you haven't already done that)

I ran it on LLVM itself (with clang and lldb) and got no firings at all. (And I 
introduced an artificial `int x; x = 0;` which was detected and flagged as 
expected to make sure the warnings really were working.)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100581/new/

https://reviews.llvm.org/D100581

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100467: [clang] [AArch64] Fix handling of HFAs passed to Windows variadic functions

2021-04-15 Thread Martin Storsjö via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rG8e0f2e89ff95: [clang] [AArch64] Fix handling of HFAs passed 
to Windows variadic functions (authored by mstorsjo).

Changed prior to commit:
  https://reviews.llvm.org/D100467?vs=337412=337866#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100467/new/

https://reviews.llvm.org/D100467

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/ms_abi_aarch64.c

Index: clang/test/CodeGen/ms_abi_aarch64.c
===
--- clang/test/CodeGen/ms_abi_aarch64.c
+++ clang/test/CodeGen/ms_abi_aarch64.c
@@ -84,3 +84,39 @@
   __builtin_va_end(ap);
   // WIN64: call void @llvm.va_end
 }
+
+struct HFA {
+  float a, b, c;
+};
+
+__attribute__((ms_abi)) void msabi_hfa(struct HFA a);
+__attribute__((ms_abi)) void msabi_hfa_vararg(struct HFA a, int b, ...);
+
+void call_msabi_hfa(void) {
+  // COMMON-LABEL: define{{.*}} void @call_msabi_hfa()
+  // WIN64: call void @msabi_hfa([3 x float] {{.*}})
+  // LINUX: call win64cc void @msabi_hfa([3 x float] {{.*}})
+  msabi_hfa((struct HFA){1.0f, 2.0f, 3.0f});
+}
+
+void call_msabi_hfa_vararg(void) {
+  // COMMON-LABEL: define{{.*}} void @call_msabi_hfa_vararg()
+  // WIN64: call void ([2 x i64], i32, ...) @msabi_hfa_vararg([2 x i64] {{.*}}, i32 4, [2 x i64] {{.*}})
+  // LINUX: call win64cc void ([2 x i64], i32, ...) @msabi_hfa_vararg([2 x i64] {{.*}}, i32 4, [2 x i64] {{.*}})
+  msabi_hfa_vararg((struct HFA){1.0f, 2.0f, 3.0f}, 4,
+   (struct HFA){5.0f, 6.0f, 7.0f});
+}
+
+__attribute__((ms_abi)) void get_msabi_hfa_vararg(int a, ...) {
+  // COMMON-LABEL: define{{.*}} void @get_msabi_hfa_vararg
+  __builtin_ms_va_list ap;
+  __builtin_ms_va_start(ap, a);
+  // COMMON: %[[AP:.*]] = alloca i8*
+  // COMMON: call void @llvm.va_start
+  struct HFA b = __builtin_va_arg(ap, struct HFA);
+  // COMMON: %[[AP_CUR:.*]] = load i8*, i8** %[[AP]]
+  // COMMON-NEXT: %[[AP_NEXT:.*]] = getelementptr inbounds i8, i8* %[[AP_CUR]], i64 16
+  // COMMON-NEXT: store i8* %[[AP_NEXT]], i8** %[[AP]]
+  // COMMON-NEXT: bitcast i8* %[[AP_CUR]] to %struct.HFA*
+  __builtin_ms_va_end(ap);
+}
Index: clang/lib/CodeGen/TargetInfo.cpp
===
--- clang/lib/CodeGen/TargetInfo.cpp
+++ clang/lib/CodeGen/TargetInfo.cpp
@@ -5418,7 +5418,8 @@
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
+  unsigned CallingConvention) const;
   ABIArgInfo coerceIllegalVector(QualType Ty) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
@@ -5432,7 +5433,8 @@
   classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
 for (auto  : FI.arguments())
-  it.info = classifyArgumentType(it.type);
+  it.info = classifyArgumentType(it.type, FI.isVariadic(),
+ FI.getCallingConvention());
   }
 
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
@@ -5635,7 +5637,9 @@
   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 }
 
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
+ABIArgInfo
+AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
+ unsigned CallingConvention) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
@@ -5681,7 +5685,11 @@
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (isHomogeneousAggregate(Ty, Base, Members)) {
+  bool IsWin64 = Kind == Win64 || CallingConvention == llvm::CallingConv::Win64;
+  bool IsWinVariadic = IsWin64 && IsVariadic;
+  // In variadic functions on Windows, all composite types are treated alike,
+  // no special handling of HFAs/HVAs.
+  if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
 return ABIArgInfo::getDirect(
 llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
@@ -5838,10 +5846,10 @@
   return Members <= 4;
 }
 
-Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
-QualType Ty,
-CodeGenFunction ) const {
-  ABIArgInfo AI = classifyArgumentType(Ty);
+Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
+   CodeGenFunction ) const {
+  ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
+   CGF.CurFnInfo->getCallingConvention());
   bool IsIndirect = AI.isIndirect();
 
   llvm::Type *BaseTy = 

[clang] 8e0f2e8 - [clang] [AArch64] Fix handling of HFAs passed to Windows variadic functions

2021-04-15 Thread Martin Storsjö via cfe-commits

Author: Martin Storsjö
Date: 2021-04-15T22:21:27+03:00
New Revision: 8e0f2e89ff951c74875ed751e2215cc263b33328

URL: 
https://github.com/llvm/llvm-project/commit/8e0f2e89ff951c74875ed751e2215cc263b33328
DIFF: 
https://github.com/llvm/llvm-project/commit/8e0f2e89ff951c74875ed751e2215cc263b33328.diff

LOG: [clang] [AArch64] Fix handling of HFAs passed to Windows variadic functions

The documentation says that for variadic functions, all composites
are treated similarly, no special handling of HFAs/HVAs, not even
for the fixed arguments of a variadic function.

Differential Revision: https://reviews.llvm.org/D100467

Added: 


Modified: 
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/ms_abi_aarch64.c

Removed: 




diff  --git a/clang/lib/CodeGen/TargetInfo.cpp 
b/clang/lib/CodeGen/TargetInfo.cpp
index 3ff3eed15608..bd3c26537892 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -5418,7 +5418,8 @@ class AArch64ABIInfo : public SwiftABIInfo {
   bool isDarwinPCS() const { return Kind == DarwinPCS; }
 
   ABIArgInfo classifyReturnType(QualType RetTy, bool IsVariadic) const;
-  ABIArgInfo classifyArgumentType(QualType RetTy) const;
+  ABIArgInfo classifyArgumentType(QualType RetTy, bool IsVariadic,
+  unsigned CallingConvention) const;
   ABIArgInfo coerceIllegalVector(QualType Ty) const;
   bool isHomogeneousAggregateBaseType(QualType Ty) const override;
   bool isHomogeneousAggregateSmallEnough(const Type *Ty,
@@ -5432,7 +5433,8 @@ class AArch64ABIInfo : public SwiftABIInfo {
   classifyReturnType(FI.getReturnType(), FI.isVariadic());
 
 for (auto  : FI.arguments())
-  it.info = classifyArgumentType(it.type);
+  it.info = classifyArgumentType(it.type, FI.isVariadic(),
+ FI.getCallingConvention());
   }
 
   Address EmitDarwinVAArg(Address VAListAddr, QualType Ty,
@@ -5635,7 +5637,9 @@ ABIArgInfo AArch64ABIInfo::coerceIllegalVector(QualType 
Ty) const {
   return getNaturalAlignIndirect(Ty, /*ByVal=*/false);
 }
 
-ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty) const {
+ABIArgInfo
+AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadic,
+ unsigned CallingConvention) const {
   Ty = useFirstFieldIfTransparentUnion(Ty);
 
   // Handle illegal vector types here.
@@ -5681,7 +5685,11 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType 
Ty) const {
   // Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
   const Type *Base = nullptr;
   uint64_t Members = 0;
-  if (isHomogeneousAggregate(Ty, Base, Members)) {
+  bool IsWin64 = Kind == Win64 || CallingConvention == 
llvm::CallingConv::Win64;
+  bool IsWinVariadic = IsWin64 && IsVariadic;
+  // In variadic functions on Windows, all composite types are treated alike,
+  // no special handling of HFAs/HVAs.
+  if (!IsWinVariadic && isHomogeneousAggregate(Ty, Base, Members)) {
 return ABIArgInfo::getDirect(
 llvm::ArrayType::get(CGT.ConvertType(QualType(Base, 0)), Members));
   }
@@ -5838,10 +5846,10 @@ bool 
AArch64ABIInfo::isHomogeneousAggregateSmallEnough(const Type *Base,
   return Members <= 4;
 }
 
-Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr,
-QualType Ty,
-CodeGenFunction ) const {
-  ABIArgInfo AI = classifyArgumentType(Ty);
+Address AArch64ABIInfo::EmitAAPCSVAArg(Address VAListAddr, QualType Ty,
+   CodeGenFunction ) const {
+  ABIArgInfo AI = classifyArgumentType(Ty, /*IsVariadic=*/true,
+   CGF.CurFnInfo->getCallingConvention());
   bool IsIndirect = AI.isIndirect();
 
   llvm::Type *BaseTy = CGF.ConvertType(Ty);

diff  --git a/clang/test/CodeGen/ms_abi_aarch64.c 
b/clang/test/CodeGen/ms_abi_aarch64.c
index 8526ce934ef5..cf244420296d 100644
--- a/clang/test/CodeGen/ms_abi_aarch64.c
+++ b/clang/test/CodeGen/ms_abi_aarch64.c
@@ -84,3 +84,39 @@ void f5(int a, ...) {
   __builtin_va_end(ap);
   // WIN64: call void @llvm.va_end
 }
+
+struct HFA {
+  float a, b, c;
+};
+
+__attribute__((ms_abi)) void msabi_hfa(struct HFA a);
+__attribute__((ms_abi)) void msabi_hfa_vararg(struct HFA a, int b, ...);
+
+void call_msabi_hfa(void) {
+  // COMMON-LABEL: define{{.*}} void @call_msabi_hfa()
+  // WIN64: call void @msabi_hfa([3 x float] {{.*}})
+  // LINUX: call win64cc void @msabi_hfa([3 x float] {{.*}})
+  msabi_hfa((struct HFA){1.0f, 2.0f, 3.0f});
+}
+
+void call_msabi_hfa_vararg(void) {
+  // COMMON-LABEL: define{{.*}} void @call_msabi_hfa_vararg()
+  // WIN64: call void ([2 x i64], i32, ...) @msabi_hfa_vararg([2 x i64] 
{{.*}}, i32 4, [2 x i64] {{.*}})
+  // LINUX: call win64cc void ([2 x i64], i32, ...) @msabi_hfa_vararg([2 x 
i64] {{.*}}, i32 4, [2 x i64] {{.*}})
+  

[PATCH] D100591: [Clang][AArch64] Disable rounding of return values for AArch64

2021-04-15 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Hmm.  I think the right thing to do here is to recognize generally that we're 
emitting a mandatory tail call, and so suppress *all* the normal 
transformations on the return value.  The conditions on mandatory tail calls 
should make that possible, and it seems like it would be necessary for a lot of 
types.  Aggregates especially come to mind — if an aggregate is returned in 
registers, we're probably going to generate code like

  %0 = alloca %struct.foo
  %1 = call {i64,i64} @function()
  %2 = bitcast %0 to {i64,i64}*
  store %1, %2
  %3 = bitcast %0 to {i64,i64}*
  %4 = load %3
  ret %4

(Actually, probably much worse, with a lot of extract_values and so on.)  I 
assume that is going to completely break TCO, and we really need to generate

  %0 = call {i64,i64} @function()
  ret %0

The *only* way we can do that is to recognize that the call has to be done 
differently in IRGen.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100591/new/

https://reviews.llvm.org/D100591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100509: Support GCC's -fstack-usage flag

2021-04-15 Thread Pengxuan Zheng via Phabricator via cfe-commits
pzheng added a comment.

In D100509#2692259 , @xbolva00 wrote:

>   Set current_function_has_unbounded_dynamic_stack_size to 1 when pushing a 
> variable-sized argument onto the stack. 
>   
> if (current_function_has_unbounded_dynamic_stack_size)
>   stack_usage_kind = DYNAMIC;
> else
>   stack_usage_kind = DYNAMIC_BOUNDED;
>
> https://github.com/gcc-mirror/gcc/commit/d3c12306971946ab9a9d644ddf7b26e9383d2391
>
> You can compile eg. zstd project with "CC="gcc -fstack-usage" make -j6 -B" 
> and then grep "bounded" . -R and you will find some examples of 
> dynamic,bounded.

Thanks, @xbolva00! This is really helpful. I tried compiling the zstd project 
with both gcc and clang. I found that in all those cases I checked where gcc 
outputs "dynamic,bounded", clang actually outputs "static" instead. Looks like 
LLVM already does a better job of determining the frame size statically. So, 
maybe there is no need to add the "dynamic,bounded" case to clang? Thoughts?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100509/new/

https://reviews.llvm.org/D100509

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100591: [Clang][AArch64] Disable rounding of return values for AArch64

2021-04-15 Thread Andrew Savonichev via Phabricator via cfe-commits
asavonic added inline comments.



Comment at: clang/test/CodeGen/arm64-arguments.c:53
 
-// CHECK: define{{.*}} i64 @f12()
+// CHECK: define{{.*}} i24 @f11_packed()
+struct s11_packed { char c; short s } __attribute__((packed));

I'm not sure if `i24` here is a problem or not. Let me know if we need to 
handle this differently.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D100591/new/

https://reviews.llvm.org/D100591

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D100591: [Clang][AArch64] Disable rounding of return values for AArch64

2021-04-15 Thread Andrew Savonichev via Phabricator via cfe-commits
asavonic created this revision.
asavonic added reviewers: rjmccall, dmgreen, t.p.northover, ostannard, 
sdesmalen, momchil.velikov, SjoerdMeijer.
Herald added subscribers: mstorsjo, danielkiss, kristof.beyls.
asavonic requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

If a return value is explicitly rounded to 64 bits, an additional
`zext` instruction is emitted, and in some cases it prevents tail call
optimization.

As discussed in D100225 , this rounding is 
not necessary and can be
disabled.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D100591

Files:
  clang/lib/CodeGen/TargetInfo.cpp
  clang/test/CodeGen/aarch64-varargs.c
  clang/test/CodeGen/arm64-arguments.c
  clang/test/CodeGen/arm64-microsoft-arguments.cpp
  clang/test/CodeGen/attr-noundef.cpp
  clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
  clang/test/CodeGenCXX/trivial_abi.cpp

Index: clang/test/CodeGenCXX/trivial_abi.cpp
===
--- clang/test/CodeGenCXX/trivial_abi.cpp
+++ clang/test/CodeGenCXX/trivial_abi.cpp
@@ -198,12 +198,11 @@
   testReturnLarge();
 }
 
-// CHECK: define{{.*}} i64 @_Z20testReturnHasTrivialv()
+// CHECK: define{{.*}} i32 @_Z20testReturnHasTrivialv()
 // CHECK: %[[RETVAL:.*]] = alloca %[[STRUCT_TRIVIAL:.*]], align 4
 // CHECK: %[[COERCE_DIVE:.*]] = getelementptr inbounds %[[STRUCT_TRIVIAL]], %[[STRUCT_TRIVIAL]]* %[[RETVAL]], i32 0, i32 0
 // CHECK: %[[V0:.*]] = load i32, i32* %[[COERCE_DIVE]], align 4
-// CHECK: %[[COERCE_VAL_II:.*]] = zext i32 %[[V0]] to i64
-// CHECK: ret i64 %[[COERCE_VAL_II]]
+// CHECK: ret i32 %[[V0]]
 // CHECK: }
 
 Trivial testReturnHasTrivial() {
Index: clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
===
--- clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
+++ clang/test/CodeGenCXX/microsoft-abi-sret-and-byval.cpp
@@ -87,7 +87,7 @@
 // LINUX-LABEL: define{{.*}} void @_Z12small_returnv(%struct.Small* noalias sret(%struct.Small) align 4 %agg.result)
 // WIN32: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 // WIN64: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
-// WOA64: define dso_local i64 @"?small_return@@YA?AUSmall@@XZ"()
+// WOA64: define dso_local i32 @"?small_return@@YA?AUSmall@@XZ"()
 
 Medium medium_return() { return Medium(); }
 // LINUX-LABEL: define{{.*}} void @_Z13medium_returnv(%struct.Medium* noalias sret(%struct.Medium) align 4 %agg.result)
Index: clang/test/CodeGen/attr-noundef.cpp
===
--- clang/test/CodeGen/attr-noundef.cpp
+++ clang/test/CodeGen/attr-noundef.cpp
@@ -11,7 +11,7 @@
 Trivial ret_trivial() { return {}; }
 void pass_trivial(Trivial e) {}
 // CHECK-INTEL: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
-// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i64 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE:define( dso_local)?]] i32 @{{.*}}ret_trivial
 // CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
 // CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
 
@@ -43,7 +43,7 @@
 Trivial ret_trivial() { return {}; }
 void pass_trivial(Trivial e) {}
 // CHECK-INTEL: [[DEFINE]] i32 @{{.*}}ret_trivial
-// CHECK-AARCH: [[DEFINE]] i64 @{{.*}}ret_trivial
+// CHECK-AARCH: [[DEFINE]] i32 @{{.*}}ret_trivial
 // CHECK-INTEL: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i32 %
 // CHECK-AARCH: [[DEFINE]] void @{{.*}}pass_trivial{{.*}}(i64 %
 
Index: clang/test/CodeGen/arm64-microsoft-arguments.cpp
===
--- clang/test/CodeGen/arm64-microsoft-arguments.cpp
+++ clang/test/CodeGen/arm64-microsoft-arguments.cpp
@@ -104,8 +104,8 @@
 
 // Pass and return an object with a non-trivial explicitly defaulted constructor
 // (passed directly, returned directly)
-// CHECK: define {{.*}} i64 @"?f6@@YA?AUS6@@XZ"()
-// CHECK: call i64 {{.*}}func6{{.*}}(i64 {{.*}})
+// CHECK: define {{.*}} i8 @"?f6@@YA?AUS6@@XZ"()
+// CHECK: call i8 {{.*}}func6{{.*}}(i64 {{.*}})
 struct S6a {
   S6a();
 };
@@ -123,8 +123,8 @@
 
 // Pass and return an object with a non-trivial implicitly defaulted constructor
 // (passed directly, returned directly)
-// CHECK: define {{.*}} i64 @"?f7@@YA?AUS7@@XZ"()
-// CHECK: call i64 {{.*}}func7{{.*}}(i64 {{.*}})
+// CHECK: define {{.*}} i8 @"?f7@@YA?AUS7@@XZ"()
+// CHECK: call i8 {{.*}}func7{{.*}}(i64 {{.*}})
 struct S7 {
   S6a x;
 };
Index: clang/test/CodeGen/arm64-arguments.c
===
--- clang/test/CodeGen/arm64-arguments.c
+++ clang/test/CodeGen/arm64-arguments.c
@@ -5,29 +5,28 @@
   return 0;
 }
 
-// Struct as return type. Aggregates <= 16 bytes are passed directly and round
-// up to multiple of 8 bytes.
-// CHECK: define{{.*}} i64 @f1()
+// Struct as return type. Aggregates <= 16 bytes are passed 

  1   2   3   >