[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-18 Thread Jon Chesterfield via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGdbd7bad9ad9b: [openmp] Annotate tmp variables with 
omp_thread_mem_alloc (authored by JonChesterfield).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

Files:
  clang/lib/Headers/__clang_hip_math.h
  clang/test/Headers/Inputs/include/omp.h

Index: clang/test/Headers/Inputs/include/omp.h
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/omp.h
@@ -0,0 +1,21 @@
+#ifndef __OMP_H
+#define __OMP_H
+
+#if _OPENMP
+// Follows the pattern in interface.h
+// Clang sema checks this type carefully, needs to closely match that from omp.h
+typedef enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
+} omp_allocator_handle_t;
+#endif
+
+#endif
Index: clang/lib/Headers/__clang_hip_math.h
===
--- clang/lib/Headers/__clang_hip_math.h
+++ clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include 
 #include 
+#ifdef __OPENMP_AMDGCN__
+#include 
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
   __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
   __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-18 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield updated this revision to Diff 367374.
JonChesterfield added a comment.

- add minimal omp.h to clang test inputs


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

Files:
  clang/lib/Headers/__clang_hip_math.h
  clang/test/Headers/Inputs/include/omp.h

Index: clang/test/Headers/Inputs/include/omp.h
===
--- /dev/null
+++ clang/test/Headers/Inputs/include/omp.h
@@ -0,0 +1,21 @@
+#ifndef __OMP_H
+#define __OMP_H
+
+#if _OPENMP
+// Follows the pattern in interface.h
+// Clang sema checks this type carefully, needs to closely match that from omp.h
+typedef enum omp_allocator_handle_t {
+  omp_null_allocator = 0,
+  omp_default_mem_alloc = 1,
+  omp_large_cap_mem_alloc = 2,
+  omp_const_mem_alloc = 3,
+  omp_high_bw_mem_alloc = 4,
+  omp_low_lat_mem_alloc = 5,
+  omp_cgroup_mem_alloc = 6,
+  omp_pteam_mem_alloc = 7,
+  omp_thread_mem_alloc = 8,
+  KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
+} omp_allocator_handle_t;
+#endif
+
+#endif
Index: clang/lib/Headers/__clang_hip_math.h
===
--- clang/lib/Headers/__clang_hip_math.h
+++ clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include 
 #include 
+#ifdef __OPENMP_AMDGCN__
+#include 
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
   __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
   __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-18 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

intended content for the new omp.h is

  #ifndef __OMP_H
  #define __OMP_H
  
  #if _OPENMP
  // Follows the pattern in interface.h
  // Clang sema checks this type carefully, needs to closely match that from 
omp.h
  typedef enum omp_allocator_handle_t {
omp_null_allocator = 0,
omp_default_mem_alloc = 1,
omp_large_cap_mem_alloc = 2,
omp_const_mem_alloc = 3,
omp_high_bw_mem_alloc = 4,
omp_low_lat_mem_alloc = 5,
omp_cgroup_mem_alloc = 6,
omp_pteam_mem_alloc = 7,
omp_thread_mem_alloc = 8,
KMP_ALLOCATOR_MAX_HANDLE = ~(0U)
  } omp_allocator_handle_t;
  #endif
  
  #endif

trying to reproduce the failure locally first to verify that the fix works


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-17 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert added a comment.

In D107971#2941739 , @JonChesterfield 
wrote:

> Failed a CI job that builds an openmp test in an environment without omp.h, 
> will revert.
>
> Thoughts on fixing? Putting the omp allocator definition in this header is 
> likely to collide with a real omp.h. Fairly clean fix is to move the 
> definitions into the deviceRTL. Tempting fix is to add a minimal omp.h to the 
> test dir
>
> example failure:
>
>   In file included from 
> /home/tcwg-buildslave/worker/clang-aarch64-quick/llvm/clang/test/Headers/amdgcn_openmp_device_math.c:9:
>   In file included from 
> /home/tcwg-buildslave/worker/clang-aarch64-quick/llvm/clang/test/Headers/../../lib/Headers/openmp_wrappers/math.h:55:
>   
> /home/tcwg-buildslave/worker/clang-aarch64-quick/stage1/lib/clang/14.0.0/include/__clang_hip_math.h:23:10:
>  fatal error: 'omp.h' file not found
>   #include 

yes add `omp.h`, in `clang/test/Headers/Inputs/include`.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

Failed a CI job that builds an openmp test in an environment without omp.h, 
will revert.

Thoughts on fixing? Putting the omp allocator definition in this header is 
likely to collide with a real omp.h. Fairly clean fix is to move the 
definitions into the deviceRTL. Tempting fix is to add a minimal omp.h to the 
test dir


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Jon Chesterfield via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb6113548c921: [openmp] Annotate tmp variables with 
omp_thread_mem_alloc (authored by JonChesterfield).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

Files:
  clang/lib/Headers/__clang_hip_math.h

Index: clang/lib/Headers/__clang_hip_math.h
===
--- clang/lib/Headers/__clang_hip_math.h
+++ clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include 
 #include 
+#ifdef __OPENMP_AMDGCN__
+#include 
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
   __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
   __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added a comment.

In D107971#2941666 , @jdoerfert wrote:

> Assuming this causes us to generate an `alloc as(5)` for `__tmp`, LG

Yes, with the running example and this patch we get the perfect:

  %__tmp = alloca double, align 8, addrspace(5)
  %call = call double @__ocml_sincos_f64(double %__x, double addrspace(5)* 
%__tmp)

Before, we get:

  %__tmp = call fastcc i8* @__kmpc_alloc_shared()
  %__tmp_on_stack = bitcast i8* %__tmp to double*
  %__tmp_on_stack.ascast = addrspacecast double* %__tmp_on_stack to double 
addrspace(5)*
  %call = call double @__ocml_sincos_f64(double %__x, double addrspace(5)* 
%__tmp_on_stack.ascast)

which interacts badly with the addrspace(5) annotation on the ocml function


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Johannes Doerfert via Phabricator via cfe-commits
jdoerfert accepted this revision.
jdoerfert added a comment.
This revision is now accepted and ready to land.

Assuming this causes us to generate an `alloc as(5)` for `__tmp`, LG


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield added reviewers: ye-luo, yaxunl.
JonChesterfield added a subscriber: ye-luo.
JonChesterfield added a comment.

@ye-luo this fixes the modf and sincos test cases from 
https://github.com/ye-luo/openmp-target


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D107971/new/

https://reviews.llvm.org/D107971

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D107971: [openmp] Annotate tmp variables with omp_thread_mem_alloc

2021-08-12 Thread Jon Chesterfield via Phabricator via cfe-commits
JonChesterfield created this revision.
JonChesterfield added reviewers: arsenm, jdoerfert.
Herald added subscribers: guansong, yaxunl.
JonChesterfield requested review of this revision.
Herald added subscribers: cfe-commits, sstefan1, wdng.
Herald added a project: clang.

Fixes miscompile of calls into ocml. Bug 51445.

The stack variable `double __tmp` is moved to dynamically allocated shared
memory by CGOpenMPRuntimeGPU. This is usually fine, but when the variable
is passed to a function that is explicitly annotated address_space(5) then
allocating the variable off-stack leads to a miscompile in the back end,
which cannot decide to move the variable back to the stack from shared.

This could be fixed by removing the AS(5) annotation from the math library
or by explicitly marking the variables as thread_mem_alloc. The cast to
AS(5) is still a no-op once IR is reached.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D107971

Files:
  clang/lib/Headers/__clang_hip_math.h

Index: clang/lib/Headers/__clang_hip_math.h
===
--- clang/lib/Headers/__clang_hip_math.h
+++ clang/lib/Headers/__clang_hip_math.h
@@ -19,6 +19,9 @@
 #endif
 #include 
 #include 
+#ifdef __OPENMP_AMDGCN__
+#include 
+#endif
 #endif // !defined(__HIPCC_RTC__)
 
 #pragma push_macro("__DEVICE__")
@@ -258,6 +261,9 @@
 __DEVICE__
 float frexpf(float __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -343,6 +349,9 @@
 __DEVICE__
 float modff(float __x, float *__iptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r =
   __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__iptr = __tmp;
@@ -423,6 +432,9 @@
 __DEVICE__
 float remquof(float __x, float __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   float __r = __ocml_remquo_f32(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -479,6 +491,9 @@
 __DEVICE__
 void sincosf(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr =
   __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -487,6 +502,9 @@
 __DEVICE__
 void sincospif(float __x, float *__sinptr, float *__cosptr) {
   float __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f32(
   __x, (__attribute__((address_space(5))) float *)&__tmp);
   *__cosptr = __tmp;
@@ -799,6 +817,9 @@
 __DEVICE__
 double frexp(double __x, int *__nptr) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp);
   *__nptr = __tmp;
@@ -883,6 +904,9 @@
 __DEVICE__
 double modf(double __x, double *__iptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r =
   __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
   *__iptr = __tmp;
@@ -971,6 +995,9 @@
 __DEVICE__
 double remquo(double __x, double __y, int *__quo) {
   int __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   double __r = __ocml_remquo_f64(
   __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
   *__quo = __tmp;
@@ -1029,6 +1056,9 @@
 __DEVICE__
 void sincos(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincos_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
@@ -1037,6 +1067,9 @@
 __DEVICE__
 void sincospi(double __x, double *__sinptr, double *__cosptr) {
   double __tmp;
+#ifdef __OPENMP_AMDGCN__
+#pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
+#endif
   *__sinptr = __ocml_sincospi_f64(
   __x, (__attribute__((address_space(5))) double *)&__tmp);
   *__cosptr = __tmp;
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits