================
@@ -0,0 +1,357 @@
+//===---- __clang_hip_device_functions.h - HIP device functions ------------===
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===-----------------------------------------------------------------------===
+
+#ifndef __CLANG_HIP_DEVICE_FUNCTIONS_H__
+#define __CLANG_HIP_DEVICE_FUNCTIONS_H__
+
+#if __HIP__ && (defined(__AMDGPU__))
+
+#ifndef __device__
+#define __host__ __attribute__((host))
+#define __device__ __attribute__((device))
+#define __global__ __attribute__((global))
+#define __shared__ __attribute__((shared))
+#define __constant__ __attribute__((constant))
+#define __managed__ __attribute__((managed))
+#endif
+
+#include <gpuintrin.h>
+
+#pragma push_macro("__HIP_DEVICE__")
+#define __HIP_DEVICE__ static __inline__ __attribute__((device, always_inline))
+
+#pragma push_macro("MAYBE_UNDEF")
+#define MAYBE_UNDEF __attribute__((maybe_undef))
+
+// warpSize and the threadIdx/blockIdx/blockDim/gridDim coordinate variables.
+#include <__clang_hip_builtin_vars.h>
+
+//===----------------------------------------------------------------------===//
+// Integer intrinsics.
+//===----------------------------------------------------------------------===//
+
+__HIP_DEVICE__ unsigned int __popc(unsigned int __x) {
+  return __builtin_popcountg(__x);
+}
+__HIP_DEVICE__ unsigned int __popcll(unsigned long long __x) {
+  return __builtin_popcountg(__x);
+}
+
+__HIP_DEVICE__ int __clz(int __x) {
+  return __builtin_clzg((unsigned int)__x, 32);
+}
+__HIP_DEVICE__ int __clzll(long long __x) {
+  return __builtin_clzg((unsigned long long)__x, 64);
+}
+
+__HIP_DEVICE__ int __ffs(int __x) {
+  return __builtin_ctzg((unsigned int)__x, -1) + 1;
+}
+__HIP_DEVICE__ int __ffs(unsigned int __x) {
+  return __builtin_ctzg(__x, -1) + 1;
+}
+__HIP_DEVICE__ int __ffsll(long long __x) {
+  return __builtin_ctzg((unsigned long long)__x, -1) + 1;
+}
+__HIP_DEVICE__ int __ffsll(unsigned long long __x) {
+  return __builtin_ctzg(__x, -1) + 1;
+}
+
+__HIP_DEVICE__ unsigned int __brev(unsigned int __x) {
+  return __builtin_elementwise_bitreverse(__x);
+}
+__HIP_DEVICE__ unsigned long long __brevll(unsigned long long __x) {
+  return __builtin_elementwise_bitreverse(__x);
+}
+
+__HIP_DEVICE__ int __mul24(int __x, int __y) {
+  return (((int)((unsigned)__x << 8) >> 8)) *
+         (((int)((unsigned)__y << 8) >> 8));
+}
+__HIP_DEVICE__ int __umul24(unsigned int __x, unsigned int __y) {
+  return (int)((__x & 0x00ffffffu) * (__y & 0x00ffffffu));
+}
+
+__HIP_DEVICE__ int __mulhi(int __x, int __y) {
+  return (int)(((long long)__x * (long long)__y) >> 32);
+}
+__HIP_DEVICE__ unsigned int __umulhi(unsigned int __x, unsigned int __y) {
+  return (unsigned int)(((unsigned long long)__x * (unsigned long long)__y) >>
+                        32);
+}
+__HIP_DEVICE__ long long __mul64hi(long long __x, long long __y) {
+  return (long long)(((__int128)__x * (__int128)__y) >> 64);
+}
+__HIP_DEVICE__ unsigned long long __umul64hi(unsigned long long __x,
+                                             unsigned long long __y) {
+  return (
+      unsigned long long)(((unsigned __int128)__x * (unsigned __int128)__y) >>
+                          64);
+}
+
+__HIP_DEVICE__ unsigned int __sad(int __x, int __y, unsigned int __z) {
+  return __x > __y ? __x - __y + __z : __y - __x + __z;
+}
+__HIP_DEVICE__ unsigned int __usad(unsigned int __x, unsigned int __y,
+                                   unsigned int __z) {
+  return __x > __y ? __x - __y + __z : __y - __x + __z;
+}
+
+__HIP_DEVICE__ int __hadd(int __x, int __y) {
+  return (int)(((long long)__x + (long long)__y) >> 1);
+}
+__HIP_DEVICE__ int __rhadd(int __x, int __y) {
+  return (int)(((long long)__x + (long long)__y + 1) >> 1);
+}
+__HIP_DEVICE__ unsigned int __uhadd(unsigned int __x, unsigned int __y) {
+  return (unsigned int)(((unsigned long long)__x + (unsigned long long)__y) >>
+                        1);
+}
+__HIP_DEVICE__ unsigned int __urhadd(unsigned int __x, unsigned int __y) {
+  return (
+      unsigned int)(((unsigned long long)__x + (unsigned long long)__y + 1) >>
+                    1);
+}
+
+__HIP_DEVICE__ unsigned int __byte_perm(unsigned int __x, unsigned int __y,
+                                        unsigned int __s) {
+  unsigned long long __tmp = ((unsigned long long)__y << 32) | __x;
+  unsigned int __result = 0;
+  for (int __i = 0; __i < 4; ++__i) {
+    unsigned int __sel = (__s >> (__i * 4)) & 0x7u;
+    __result |= (unsigned int)((__tmp >> (__sel * 8)) & 0xffu) << (__i * 8);
+  }
+  return __result;
+}
+
+//===----------------------------------------------------------------------===//
+// Bitfield operations.
+//===----------------------------------------------------------------------===//
+
+__HIP_DEVICE__ unsigned int __lastbit_u32_u64(unsigned long long __x) {
+  return (unsigned int)__builtin_ctzg(__x, -1);
+}
+
+__HIP_DEVICE__ unsigned int __bitextract_u32(unsigned int __src,
+                                             unsigned int __offset,
+                                             unsigned int __width) {
+  unsigned int __o = __offset & 31u;
+  unsigned int __w = __width & 31u;
+  return __w == 0 ? 0u : (__src << (32u - __o - __w)) >> (32u - __w);
+}
+__HIP_DEVICE__ unsigned long long __bitextract_u64(unsigned long long __src,
----------------
arsenm wrote:

I've been bit by these headers using the "normal" C integer types before. 
Should these all use the explicitly sized integer types, or do we have to avoid 
including inttypes.h 

https://github.com/llvm/llvm-project/pull/203980
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to