[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-04 Thread Nico Weber via Phabricator via cfe-commits
thakis added a comment.

Hi, a whole bunch of cuda-related tests failed on windows with this in 
(http://45.33.8.238/win/32620/summary.html). Did you run check-clang before 
committing? I reverted this for now in 4874ff02417916cc9ff994b34abcb5e563056546 
 . After 
the revert, all tests pass again.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-03 Thread Michael Liao via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGa2fdf9d4d734: [hip][cuda] Enable extended lambda support on 
Windows. (authored by hliao).

Changed prior to commit:
  https://reviews.llvm.org/D69322?vs=320679=321316#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/DeclCXX.cpp
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/ms-linker-options.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/ms-linker-options.cu
===
--- clang/test/CodeGenCUDA/ms-linker-options.cu
+++ clang/test/CodeGenCUDA/ms-linker-options.cu
@@ -2,12 +2,12 @@
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions -x hip %s -triple \
-// RUN:x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
+// RUN:x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -emit-llvm 

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-03 Thread Reid Kleckner via Phabricator via cfe-commits
rnk accepted this revision.
rnk added a comment.
This revision is now accepted and ready to land.

Looks good with one suggestion.




Comment at: clang/include/clang/AST/ASTContext.h:540
   llvm::MapVector MangleNumbers;
   llvm::MapVector StaticLocalNumbers;
 

This is a nit, but I'd group the device mangling number map here with the other 
mangling number side tables.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-03 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

@rsmith, @rnk and @rjmccall, could you review the latest change addressing the 
memory usage issue?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-02 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-01 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 320679.
hliao added a comment.

Revise comments.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/DeclCXX.cpp
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/ms-linker-options.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/ms-linker-options.cu
===
--- clang/test/CodeGenCUDA/ms-linker-options.cu
+++ clang/test/CodeGenCUDA/ms-linker-options.cu
@@ -2,12 +2,12 @@
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions -x hip %s -triple \
-// RUN:x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
+// RUN:x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -fms-extensions %s \
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions %s -triple \
-// RUN:

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-01 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 320568.
hliao added a comment.

- Remove `DeviceManglingNumber` to reduce memory usage.
- Add a table in ASTContext for device lambda mangling numbers if that lamba 
requires.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/ASTContext.h
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/DeclCXX.cpp
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/ms-linker-options.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/ms-linker-options.cu
===
--- clang/test/CodeGenCUDA/ms-linker-options.cu
+++ clang/test/CodeGenCUDA/ms-linker-options.cu
@@ -2,12 +2,12 @@
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions -x hip %s -triple \
-// RUN:x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
+// RUN:x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -fms-extensions %s \
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-01 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 320409.
hliao added a comment.

Rebase to the main branch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/ms-linker-options.cu
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z17__device_stub__k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/test/CodeGenCUDA/ms-linker-options.cu
===
--- clang/test/CodeGenCUDA/ms-linker-options.cu
+++ clang/test/CodeGenCUDA/ms-linker-options.cu
@@ -2,12 +2,12 @@
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions -x hip %s -triple \
-// RUN:x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
+// RUN:x86_64-pc-windows-msvc -aux-triple amdgcn | FileCheck -check-prefix=HOST %s
 // RUN: %clang_cc1 -emit-llvm -o - -fcuda-is-device -fms-extensions %s \
 // RUN:   -fno-autolink -triple amdgcn-amd-amdhsa \
 // RUN:   | FileCheck -check-prefix=DEV %s
 // RUN: %clang_cc1 -emit-llvm -o - -fms-extensions %s -triple \
-// RUN:x86_64-pc-windows-msvc | FileCheck -check-prefix=HOST %s
+// RUN:

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-02-01 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

I agree that we should aim to avoid broad memory impact for uncommon features, 
and this seems to apply.




Comment at: clang/include/clang/AST/DeclCXX.h:395-400
 /// The number used to indicate this lambda expression for name
 /// mangling in the Itanium C++ ABI.
 unsigned ManglingNumber : 31;
 
+/// The device side mangling number.
+unsigned DeviceManglingNumber = 0;

rnk wrote:
> hliao wrote:
> > rnk wrote:
> > > It seems a shame to grow LambdaDefinitionData by a pointer for all users 
> > > of C++ that do not use CUDA. Optimizing bitfields may be worth the time, 
> > > but I'll leave it to @rjmccall or @rsmith to give guidance on whether 
> > > that's worth it.
> > > 
> > > An alternative would be to store the device numbers in the mangling 
> > > context and look them up when needed, since they are so rarely needed.
> > I like the alternative way by storing all numbering into the 
> > mangle/numbering context instead of AST itself. But, it needs additional 
> > numbering post-processing after AST importing. Sound to me a major 
> > refactoring work likely to be addressed later.
> Generally, I don't think we can count on contributors to come back later and 
> optimize memory usage, so it seems reasonable to ask to avoid the regression 
> in the first place. Above I see the bitfield usage optimizing memory usage of 
> the number of captures, and then here we spent lots of memory storing device 
> mangling numbers that are only used for CUDA. I think the memory usage 
> concern still stands. I don't think it's unreasonable to maintain these 
> numbers on the side in a DenseMap.
> 
> @rsmith or @rjmccall, do you agree with that reasoning?
I think that's the right idea in general.  I don't know how much it really 
matters for LambdaData, because there aren't a huge number of lambdas in 
typical translation units.  But if you want to optimize this, a hashtable in 
the `ASTContext` or `ManglingContext` seems reasonable.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-01-28 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added inline comments.



Comment at: clang/include/clang/AST/DeclCXX.h:395-400
 /// The number used to indicate this lambda expression for name
 /// mangling in the Itanium C++ ABI.
 unsigned ManglingNumber : 31;
 
+/// The device side mangling number.
+unsigned DeviceManglingNumber = 0;

hliao wrote:
> rnk wrote:
> > It seems a shame to grow LambdaDefinitionData by a pointer for all users of 
> > C++ that do not use CUDA. Optimizing bitfields may be worth the time, but 
> > I'll leave it to @rjmccall or @rsmith to give guidance on whether that's 
> > worth it.
> > 
> > An alternative would be to store the device numbers in the mangling context 
> > and look them up when needed, since they are so rarely needed.
> I like the alternative way by storing all numbering into the mangle/numbering 
> context instead of AST itself. But, it needs additional numbering 
> post-processing after AST importing. Sound to me a major refactoring work 
> likely to be addressed later.
Generally, I don't think we can count on contributors to come back later and 
optimize memory usage, so it seems reasonable to ask to avoid the regression in 
the first place. Above I see the bitfield usage optimizing memory usage of the 
number of captures, and then here we spent lots of memory storing device 
mangling numbers that are only used for CUDA. I think the memory usage concern 
still stands. I don't think it's unreasonable to maintain these numbers on the 
side in a DenseMap.

@rsmith or @rjmccall, do you agree with that reasoning?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-01-28 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

In D69322#2523058 , @tra wrote:

> @hliao -- Can you take a look at  
> https://bugs.llvm.org/show_bug.cgi?id=48866. This patch may be relevant there.

@rnk Reid, looks like this patch does fix a lambda mangling issue in CUDA on 
Windows. Could you take another look at it?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2021-01-26 Thread Artem Belevich via Phabricator via cfe-commits
tra added a comment.

@hliao -- Can you take a look at  https://bugs.llvm.org/show_bug.cgi?id=48866. 
This patch may be relevant there.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-11-08 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-11-06 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-11-04 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

Looks like holidays are approaching, :). Anyway, it's really appreciated that 
you could review this patch to enable CUDA/HIP applications on Windows.

Thanks.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-11-01 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

kindly PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-30 Thread Michael Liao via Phabricator via cfe-commits
hliao added inline comments.



Comment at: clang/include/clang/AST/DeclCXX.h:395-400
 /// The number used to indicate this lambda expression for name
 /// mangling in the Itanium C++ ABI.
 unsigned ManglingNumber : 31;
 
+/// The device side mangling number.
+unsigned DeviceManglingNumber = 0;

rnk wrote:
> It seems a shame to grow LambdaDefinitionData by a pointer for all users of 
> C++ that do not use CUDA. Optimizing bitfields may be worth the time, but 
> I'll leave it to @rjmccall or @rsmith to give guidance on whether that's 
> worth it.
> 
> An alternative would be to store the device numbers in the mangling context 
> and look them up when needed, since they are so rarely needed.
I like the alternative way by storing all numbering into the mangle/numbering 
context instead of AST itself. But, it needs additional numbering 
post-processing after AST importing. Sound to me a major refactoring work 
likely to be addressed later.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-30 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 227214.
hliao marked an inline comment as done.
hliao added a comment.

simplify again following suggestion.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -6226,6 +6226,7 @@
 Record->push_back(Lambda.NumExplicitCaptures);
 Record->push_back(Lambda.HasKnownInternalLinkage);
 Record->push_back(Lambda.ManglingNumber);
+Record->push_back(Lambda.DeviceManglingNumber);
 AddDeclRef(D->getLambdaContextDecl());
 AddTypeSourceInfo(Lambda.MethodTyInfo);
 for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
Index: clang/lib/Serialization/ASTReaderDecl.cpp
===
--- clang/lib/Serialization/ASTReaderDecl.cpp
+++ clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1692,6 +1692,7 @@
 Lambda.NumExplicitCaptures = Record.readInt();
 Lambda.HasKnownInternalLinkage = 

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-30 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added inline comments.



Comment at: clang/include/clang/AST/DeclCXX.h:395-400
 /// The number used to indicate this lambda expression for name
 /// mangling in the Itanium C++ ABI.
 unsigned ManglingNumber : 31;
 
+/// The device side mangling number.
+unsigned DeviceManglingNumber = 0;

It seems a shame to grow LambdaDefinitionData by a pointer for all users of C++ 
that do not use CUDA. Optimizing bitfields may be worth the time, but I'll 
leave it to @rjmccall or @rsmith to give guidance on whether that's worth it.

An alternative would be to store the device numbers in the mangling context and 
look them up when needed, since they are so rarely needed.



Comment at: clang/include/clang/AST/MangleNumberingContext.h:57-58
+
+  /// Has device mangle numbering context.
+  virtual bool hasDeviceMangleNumberingContext() const { return false; }
+

hliao wrote:
> rnk wrote:
> > It would be nicer if there were a single entry point that does the right 
> > thing for all mangling contexts.
> could you elaborate in more details?
I'll add comments at the call site.



Comment at: clang/lib/Sema/SemaLambda.cpp:479-481
+if (MCtx->hasDeviceMangleNumberingContext()) {
+  Class->setDeviceLambdaManglingNumber(
+  MCtx->getDeviceManglingNumber(Method));

Rather than having a virtual method that returns bool, call a virtual method 
that does the entire thing you are trying to do. For example, MSHIP* could be 
responsible for setting the mangling number on the class. Although, maybe it 
should be storing the number in some side table now that I think about it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-30 Thread Michael Liao via Phabricator via cfe-commits
hliao updated this revision to Diff 227091.
hliao added a comment.

revise `MSHIPNumberingContext`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -6226,6 +6226,7 @@
 Record->push_back(Lambda.NumExplicitCaptures);
 Record->push_back(Lambda.HasKnownInternalLinkage);
 Record->push_back(Lambda.ManglingNumber);
+Record->push_back(Lambda.DeviceManglingNumber);
 AddDeclRef(D->getLambdaContextDecl());
 AddTypeSourceInfo(Lambda.MethodTyInfo);
 for (unsigned I = 0, N = Lambda.NumCaptures; I != N; ++I) {
Index: clang/lib/Serialization/ASTReaderDecl.cpp
===
--- clang/lib/Serialization/ASTReaderDecl.cpp
+++ clang/lib/Serialization/ASTReaderDecl.cpp
@@ -1692,6 +1692,7 @@
 Lambda.NumExplicitCaptures = Record.readInt();
 Lambda.HasKnownInternalLinkage = Record.readInt();
 Lambda.ManglingNumber = 

[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-30 Thread Michael Liao via Phabricator via cfe-commits
hliao marked 3 inline comments as done.
hliao added inline comments.



Comment at: clang/include/clang/AST/MangleNumberingContext.h:57-58
+
+  /// Has device mangle numbering context.
+  virtual bool hasDeviceMangleNumberingContext() const { return false; }
+

rnk wrote:
> It would be nicer if there were a single entry point that does the right 
> thing for all mangling contexts.
could you elaborate in more details?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-29 Thread Reid Kleckner via Phabricator via cfe-commits
rnk added inline comments.



Comment at: clang/include/clang/AST/MangleNumberingContext.h:57-58
+
+  /// Has device mangle numbering context.
+  virtual bool hasDeviceMangleNumberingContext() const { return false; }
+

It would be nicer if there were a single entry point that does the right thing 
for all mangling contexts.



Comment at: clang/lib/AST/MicrosoftCXXABI.cpp:67-68
 
+class MSHIPNumberingContext : public MangleNumberingContext {
+  MicrosoftNumberingContext HostCtx;
+  std::unique_ptr DeviceCtx;

I know it's inheritance instead of composition, but maybe the simple thing to 
do here would be to inherit from the MicrosoftMangleNumberingContext, and then 
implement an override for getDeviceManglingNumber that delegates to the Itanium 
one.



Comment at: clang/lib/AST/MicrosoftCXXABI.cpp:114
 
+  std::unique_ptr Mangler;
+

This is actually the device's mangling context, and it's an Itanium mangling 
context. I think it needs comments and a more descriptive variable name.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-29 Thread Artem Belevich via Phabricator via cfe-commits
tra added a reviewer: rnk.
tra added a subscriber: rnk.
tra added a comment.

@rnk Reid, would you be the right person to look at the change on the Windows' 
side?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-29 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-24 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

Please don't ping for review after a single day.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-23 Thread Michael Liao via Phabricator via cfe-commits
hliao added a comment.

PING for review


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D69322/new/

https://reviews.llvm.org/D69322



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D69322: [hip][cuda] Enable extended lambda support on Windows.

2019-10-22 Thread Michael Liao via Phabricator via cfe-commits
hliao created this revision.
hliao added reviewers: rsmith, rjmccall, tra, yaxunl.
Herald added a reviewer: martong.
Herald added a reviewer: shafik.
Herald added subscribers: cfe-commits, erik.pilkington.
Herald added a project: clang.

- On Windows, extended lambda has extra issues due to the numbering schemes are 
different between the host compilation (Microsoft C++ ABI) and the device 
compilation (Itanium C++ ABI. Additional device side lambda number is required 
per lambda for the host compilation to correctly mangle the device-side lambda 
name.
- A hybrid numbering context `MSHIPNumberingContext` is introduced to number a 
lambda for both host- and device-compilations.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D69322

Files:
  clang/include/clang/AST/DeclCXX.h
  clang/include/clang/AST/Mangle.h
  clang/include/clang/AST/MangleNumberingContext.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/ASTImporter.cpp
  clang/lib/AST/CXXABI.h
  clang/lib/AST/ItaniumCXXABI.cpp
  clang/lib/AST/ItaniumMangle.cpp
  clang/lib/AST/MicrosoftCXXABI.cpp
  clang/lib/CodeGen/CGCUDANV.cpp
  clang/lib/Sema/SemaLambda.cpp
  clang/lib/Sema/TreeTransform.h
  clang/lib/Serialization/ASTReaderDecl.cpp
  clang/lib/Serialization/ASTWriter.cpp
  clang/test/CodeGenCUDA/unnamed-types.cu

Index: clang/test/CodeGenCUDA/unnamed-types.cu
===
--- clang/test/CodeGenCUDA/unnamed-types.cu
+++ clang/test/CodeGenCUDA/unnamed-types.cu
@@ -1,12 +1,17 @@
 // RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-pc-windows-msvc -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=MSVC
 // RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
 
 #include "Inputs/cuda.h"
 
 // HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// HOST: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
+// Check that, on MSVC, the same device kernel mangling name is generated.
+// MSVC: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+// MSVC: @1 = private unnamed_addr constant [60 x i8] c"_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_\00", align 1
 
 __device__ float d0(float x) {
-  return [](float x) { return x + 2.f; }(x);
+  return [](float x) { return x + 1.f; }(x);
 }
 
 __device__ float d1(float x) {
@@ -14,11 +19,21 @@
 }
 
 // DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 template 
 __global__ void k0(float *p, F f) {
   p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
 }
 
+// DEVICE: amdgpu_kernel void @_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE_clEf(
+// DEVICE: define internal float @_ZZ2f1PfENKUlffE_clEff(
+// DEVICE: define internal float @_ZZ2f1PfENKUlfE0_clEf(
+template 
+__global__ void k1(float *p, F0 f0, F1 f1, F2 f2) {
+  p[0] = f0(p[0]) + f1(p[1], p[2]) + f2(p[3]);
+}
+
 void f0(float *p) {
   [](float *p) {
 *p = 1.f;
@@ -29,11 +44,17 @@
 // linkages are still required to keep the original `internal` linkage.
 
 // HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
-// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
 void f1(float *p) {
   [](float *p) {
-k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+k0<<<1,1>>>(p, [] __device__ (float x) { return x + 3.f; });
   }(p);
+  k1<<<1,1>>>(p,
+  [] __device__ (float x) { return x + 4.f; },
+  [] __device__ (float x, float y) { return x * y; },
+  [] __device__ (float x) { return x + 5.f; });
 }
 // HOST: @__hip_register_globals
 // HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0
+// HOST: __hipRegisterFunction{{.*}}@_Z2k1IZ2f1PfEUlfE_Z2f1S0_EUlffE_Z2f1S0_EUlfE0_EvS0_T_T0_T1_{{.*}}@1
+// MSVC: __hipRegisterFunction{{.*}}@"??$k0@V@?0???R1?0??f1@@YAXPEAM@Z@QEBA@0@Z@@@YAXPEAMV@?0???R0?0??f1@@YAX0@Z@QEBA@0@Z@@Z{{.*}}@0
+// MSVC: __hipRegisterFunction{{.*}}@"??$k1@V@?0??f1@@YAXPEAM@Z@V@?0??2@YAX0@Z@V@?0??2@YAX0@Z@@@YAXPEAMV@?0??f1@@YAX0@Z@V@?0??1@YAX0@Z@V@?0??1@YAX0@Z@@Z{{.*}}@1
Index: clang/lib/Serialization/ASTWriter.cpp
===
--- clang/lib/Serialization/ASTWriter.cpp
+++ clang/lib/Serialization/ASTWriter.cpp
@@ -6226,6 +6226,7 @@
 Record->push_back(Lambda.NumExplicitCaptures);
 Record->push_back(Lambda.HasKnownInternalLinkage);
 Record->push_back(Lambda.ManglingNumber);
+Record->push_back(Lambda.DeviceManglingNumber);