[PATCH] D116967: [HIP] Fix device malloc/free

2022-01-11 Thread Yaxun Liu via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG694fd10659eb: [HIP] Fix device malloc/free (authored by 
yaxunl).
Herald added a project: clang.

Changed prior to commit:
  https://reviews.llvm.org/D116967?vs=398718=399030#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116967/new/

https://reviews.llvm.org/D116967

Files:
  clang/lib/Headers/__clang_hip_runtime_wrapper.h
  clang/test/Headers/hip-header.hip

Index: clang/test/Headers/hip-header.hip
===
--- clang/test/Headers/hip-header.hip
+++ clang/test/Headers/hip-header.hip
@@ -4,7 +4,7 @@
 // RUN:   -internal-isystem %S/Inputs/include \
 // RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
 // RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
-// RUN:   -D__HIPCC_RTC__ | FileCheck %s
+// RUN:   -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s
 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
 // RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
 // RUN:   -internal-isystem %S/Inputs/include \
@@ -25,6 +25,13 @@
 // RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
 // RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
 // RUN:   -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
+// RUN:   -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
 
 // expected-no-diagnostics
 
@@ -120,14 +127,18 @@
 #include 
 // CHECK-LABEL: define{{.*}}@_Z11test_malloc
 // CHECK: call {{.*}}i8* @malloc(i64
-// CHECK: define weak {{.*}}i8* @malloc(i64
+// CHECK-LABEL: define weak {{.*}}i8* @malloc(i64
+// MALLOC:  call i64 @__ockl_dm_alloc
+// NOMALLOC:  call void @llvm.trap
 __device__ void test_malloc(void *a) {
   a = malloc(42);
 }
 
 // CHECK-LABEL: define{{.*}}@_Z9test_free
-// CHECK: call {{.*}}i8* @free(i8*
-// CHECK: define weak {{.*}}i8* @free(i8*
+// CHECK: call {{.*}}void @free(i8*
+// CHECK-LABEL: define weak {{.*}}void @free(i8*
+// MALLOC:  call void @__ockl_dm_dealloc
+// NOMALLOC: call void @llvm.trap
 __device__ void test_free(void *a) {
   free(a);
 }
Index: clang/lib/Headers/__clang_hip_runtime_wrapper.h
===
--- clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -50,6 +50,9 @@
 #include 
 #include 
 #include 
+#if __has_include("hip/hip_version.h")
+#include "hip/hip_version.h"
+#endif // __has_include("hip/hip_version.h")
 #else
 typedef __SIZE_TYPE__ size_t;
 // Define macros which are needed to declare HIP device API's without standard
@@ -74,25 +77,35 @@
 extern "C" {
 #endif //__cplusplus
 
+#if HIP_VERSION_MAJOR * 100 + HIP_VERSION_MINOR >= 405
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
+extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
+__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
+  return (void *) __ockl_dm_alloc(__size);
+}
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+  __ockl_dm_dealloc((unsigned long long)__ptr);
+}
+#else  // HIP version check
 #if __HIP_ENABLE_DEVICE_MALLOC__
 __device__ void *__hip_malloc(__hip_size_t __size);
 __device__ void *__hip_free(void *__ptr);
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   return __hip_malloc(__size);
 }
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
-  return __hip_free(__ptr);
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+  __hip_free(__ptr);
 }
 #else
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   __builtin_trap();
   return (void *)0;
 }
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
   __builtin_trap();
-  return (void *)0;
 }
 #endif
+#endif // HIP version check
 
 #ifdef __cplusplus
 } // extern "C"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116967: [HIP] Fix device malloc/free

2022-01-11 Thread Artem Belevich via Phabricator via cfe-commits
tra added inline comments.



Comment at: clang/lib/Headers/__clang_hip_runtime_wrapper.h:80
 
+#if HIP_VERSION_MAJOR > 4 || (HIP_VERSION_MAJOR == 4 && HIP_VERSION_MINOR >= 5)
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long 
__size);

yaxunl wrote:
> tra wrote:
> > Nit: perhaps something like this would express the intent a bit more 
> > directly:
> > 
> > ```
> > # if HIP_VERSION_MAJOR*100+HIP_VERSION_MINOR*10 > 450
> > ```
> > 
> > 
> We had ROCm 3.10, so the minor version may be 10 or greater.
> 
> Probably use HIP_VERSION_MAJOR*100+HIP_VERSION_MINOR > 405 instead?
LGTM. 




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116967/new/

https://reviews.llvm.org/D116967

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116967: [HIP] Fix device malloc/free

2022-01-11 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl added inline comments.



Comment at: clang/lib/Headers/__clang_hip_runtime_wrapper.h:80
 
+#if HIP_VERSION_MAJOR > 4 || (HIP_VERSION_MAJOR == 4 && HIP_VERSION_MINOR >= 5)
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long 
__size);

tra wrote:
> Nit: perhaps something like this would express the intent a bit more directly:
> 
> ```
> # if HIP_VERSION_MAJOR*100+HIP_VERSION_MINOR*10 > 450
> ```
> 
> 
We had ROCm 3.10, so the minor version may be 10 or greater.

Probably use HIP_VERSION_MAJOR*100+HIP_VERSION_MINOR > 405 instead?


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116967/new/

https://reviews.llvm.org/D116967

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116967: [HIP] Fix device malloc/free

2022-01-10 Thread Artem Belevich via Phabricator via cfe-commits
tra accepted this revision.
tra added inline comments.
This revision is now accepted and ready to land.



Comment at: clang/lib/Headers/__clang_hip_runtime_wrapper.h:80
 
+#if HIP_VERSION_MAJOR > 4 || (HIP_VERSION_MAJOR == 4 && HIP_VERSION_MINOR >= 5)
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long 
__size);

Nit: perhaps something like this would express the intent a bit more directly:

```
# if HIP_VERSION_MAJOR*100+HIP_VERSION_MINOR*10 > 450
```




CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D116967/new/

https://reviews.llvm.org/D116967

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D116967: [HIP] Fix device malloc/free

2022-01-10 Thread Yaxun Liu via Phabricator via cfe-commits
yaxunl created this revision.
yaxunl added reviewers: tra, b-sumner.
yaxunl requested review of this revision.

ROCm 4.5 device library introduced `__ockl_dm_alloc` and `__oclk_dm_dealloc`
for supporting device side malloc/free.

This patch redefines device malloc/free to use these functions.

It also fixes a bug in the wrapper header which incorrectly defines free
with return type void* instead of void.


https://reviews.llvm.org/D116967

Files:
  clang/lib/Headers/__clang_hip_runtime_wrapper.h
  clang/test/Headers/hip-header.hip

Index: clang/test/Headers/hip-header.hip
===
--- clang/test/Headers/hip-header.hip
+++ clang/test/Headers/hip-header.hip
@@ -4,7 +4,7 @@
 // RUN:   -internal-isystem %S/Inputs/include \
 // RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
 // RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
-// RUN:   -D__HIPCC_RTC__ | FileCheck %s
+// RUN:   -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,NOMALLOC %s
 // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
 // RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
 // RUN:   -internal-isystem %S/Inputs/include \
@@ -25,6 +25,13 @@
 // RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
 // RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
 // RUN:   -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s
+// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \
+// RUN:   -internal-isystem %S/../../lib/Headers/cuda_wrappers \
+// RUN:   -internal-isystem %S/Inputs/include \
+// RUN:   -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \
+// RUN:   -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \
+// RUN:   -DHIP_VERSION_MAJOR=4 -DHIP_VERSION_MINOR=5 \
+// RUN:   -D__HIPCC_RTC__ | FileCheck -check-prefixes=CHECK,MALLOC %s
 
 // expected-no-diagnostics
 
@@ -120,14 +127,18 @@
 #include 
 // CHECK-LABEL: define{{.*}}@_Z11test_malloc
 // CHECK: call {{.*}}i8* @malloc(i64
-// CHECK: define weak {{.*}}i8* @malloc(i64
+// CHECK-LABEL: define weak {{.*}}i8* @malloc(i64
+// MALLOC:  call i64 @__ockl_dm_alloc
+// NOMALLOC:  call void @llvm.trap
 __device__ void test_malloc(void *a) {
   a = malloc(42);
 }
 
 // CHECK-LABEL: define{{.*}}@_Z9test_free
-// CHECK: call {{.*}}i8* @free(i8*
-// CHECK: define weak {{.*}}i8* @free(i8*
+// CHECK: call {{.*}}void @free(i8*
+// CHECK-LABEL: define weak {{.*}}void @free(i8*
+// MALLOC:  call void @__ockl_dm_dealloc
+// NOMALLOC: call void @llvm.trap
 __device__ void test_free(void *a) {
   free(a);
 }
Index: clang/lib/Headers/__clang_hip_runtime_wrapper.h
===
--- clang/lib/Headers/__clang_hip_runtime_wrapper.h
+++ clang/lib/Headers/__clang_hip_runtime_wrapper.h
@@ -50,6 +50,9 @@
 #include 
 #include 
 #include 
+#if __has_include("hip/hip_version.h")
+#include "hip/hip_version.h"
+#endif // __has_include("hip/hip_version.h")
 #else
 typedef __SIZE_TYPE__ size_t;
 // Define macros which are needed to declare HIP device API's without standard
@@ -74,25 +77,35 @@
 extern "C" {
 #endif //__cplusplus
 
+#if HIP_VERSION_MAJOR > 4 || (HIP_VERSION_MAJOR == 4 && HIP_VERSION_MINOR >= 5)
+extern "C" __device__ unsigned long long __ockl_dm_alloc(unsigned long long __size);
+extern "C" __device__ void __ockl_dm_dealloc(unsigned long long __addr);
+__attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
+  return (void *) __ockl_dm_alloc(__size);
+}
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+  __ockl_dm_dealloc((unsigned long long)__ptr);
+}
+#else  // HIP version check
 #if __HIP_ENABLE_DEVICE_MALLOC__
 __device__ void *__hip_malloc(__hip_size_t __size);
 __device__ void *__hip_free(void *__ptr);
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   return __hip_malloc(__size);
 }
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
-  return __hip_free(__ptr);
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
+  __hip_free(__ptr);
 }
 #else
 __attribute__((weak)) inline __device__ void *malloc(__hip_size_t __size) {
   __builtin_trap();
   return (void *)0;
 }
-__attribute__((weak)) inline __device__ void *free(void *__ptr) {
+__attribute__((weak)) inline __device__ void free(void *__ptr) {
   __builtin_trap();
-  return (void *)0;
 }
 #endif
+#endif // HIP version check
 
 #ifdef __cplusplus
 } // extern "C"
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits