================ @@ -0,0 +1,357 @@ +//===---- __clang_hip_device_functions.h - HIP device functions ------------=== +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===-----------------------------------------------------------------------=== + +#ifndef __CLANG_HIP_DEVICE_FUNCTIONS_H__ +#define __CLANG_HIP_DEVICE_FUNCTIONS_H__ + +#if __HIP__ && (defined(__AMDGPU__)) + +#ifndef __device__ +#define __host__ __attribute__((host)) +#define __device__ __attribute__((device)) +#define __global__ __attribute__((global)) +#define __shared__ __attribute__((shared)) +#define __constant__ __attribute__((constant)) +#define __managed__ __attribute__((managed)) +#endif + +#include <gpuintrin.h> + +#pragma push_macro("__HIP_DEVICE__") +#define __HIP_DEVICE__ static __inline__ __attribute__((device, always_inline)) + +#pragma push_macro("MAYBE_UNDEF") +#define MAYBE_UNDEF __attribute__((maybe_undef)) + +// warpSize and the threadIdx/blockIdx/blockDim/gridDim coordinate variables. +#include <__clang_hip_builtin_vars.h> + +//===----------------------------------------------------------------------===// +// Integer intrinsics. +//===----------------------------------------------------------------------===// + +__HIP_DEVICE__ unsigned int __popc(unsigned int __x) { + return __builtin_popcountg(__x); +} +__HIP_DEVICE__ unsigned int __popcll(unsigned long long __x) { + return __builtin_popcountg(__x); +} + +__HIP_DEVICE__ int __clz(int __x) { + return __builtin_clzg((unsigned int)__x, 32); +} +__HIP_DEVICE__ int __clzll(long long __x) { + return __builtin_clzg((unsigned long long)__x, 64); +} + +__HIP_DEVICE__ int __ffs(int __x) { + return __builtin_ctzg((unsigned int)__x, -1) + 1; +} +__HIP_DEVICE__ int __ffs(unsigned int __x) { + return __builtin_ctzg(__x, -1) + 1; +} +__HIP_DEVICE__ int __ffsll(long long __x) { + return __builtin_ctzg((unsigned long long)__x, -1) + 1; +} +__HIP_DEVICE__ int __ffsll(unsigned long long __x) { + return __builtin_ctzg(__x, -1) + 1; +} + +__HIP_DEVICE__ unsigned int __brev(unsigned int __x) { + return __builtin_elementwise_bitreverse(__x); +} +__HIP_DEVICE__ unsigned long long __brevll(unsigned long long __x) { + return __builtin_elementwise_bitreverse(__x); +} + +__HIP_DEVICE__ int __mul24(int __x, int __y) { + return (((int)((unsigned)__x << 8) >> 8)) * + (((int)((unsigned)__y << 8) >> 8)); +} +__HIP_DEVICE__ int __umul24(unsigned int __x, unsigned int __y) { + return (int)((__x & 0x00ffffffu) * (__y & 0x00ffffffu)); +} + +__HIP_DEVICE__ int __mulhi(int __x, int __y) { + return (int)(((long long)__x * (long long)__y) >> 32); +} +__HIP_DEVICE__ unsigned int __umulhi(unsigned int __x, unsigned int __y) { + return (unsigned int)(((unsigned long long)__x * (unsigned long long)__y) >> + 32); +} +__HIP_DEVICE__ long long __mul64hi(long long __x, long long __y) { + return (long long)(((__int128)__x * (__int128)__y) >> 64); +} +__HIP_DEVICE__ unsigned long long __umul64hi(unsigned long long __x, + unsigned long long __y) { + return ( + unsigned long long)(((unsigned __int128)__x * (unsigned __int128)__y) >> + 64); +} + +__HIP_DEVICE__ unsigned int __sad(int __x, int __y, unsigned int __z) { + return __x > __y ? __x - __y + __z : __y - __x + __z; +} +__HIP_DEVICE__ unsigned int __usad(unsigned int __x, unsigned int __y, + unsigned int __z) { + return __x > __y ? __x - __y + __z : __y - __x + __z; +} + +__HIP_DEVICE__ int __hadd(int __x, int __y) { + return (int)(((long long)__x + (long long)__y) >> 1); +} +__HIP_DEVICE__ int __rhadd(int __x, int __y) { + return (int)(((long long)__x + (long long)__y + 1) >> 1); +} +__HIP_DEVICE__ unsigned int __uhadd(unsigned int __x, unsigned int __y) { + return (unsigned int)(((unsigned long long)__x + (unsigned long long)__y) >> + 1); +} +__HIP_DEVICE__ unsigned int __urhadd(unsigned int __x, unsigned int __y) { + return ( + unsigned int)(((unsigned long long)__x + (unsigned long long)__y + 1) >> + 1); +} + +__HIP_DEVICE__ unsigned int __byte_perm(unsigned int __x, unsigned int __y, + unsigned int __s) { + unsigned long long __tmp = ((unsigned long long)__y << 32) | __x; + unsigned int __result = 0; + for (int __i = 0; __i < 4; ++__i) { + unsigned int __sel = (__s >> (__i * 4)) & 0x7u; + __result |= (unsigned int)((__tmp >> (__sel * 8)) & 0xffu) << (__i * 8); + } + return __result; +} + +//===----------------------------------------------------------------------===// +// Bitfield operations. +//===----------------------------------------------------------------------===// + +__HIP_DEVICE__ unsigned int __lastbit_u32_u64(unsigned long long __x) { + return (unsigned int)__builtin_ctzg(__x, -1); +} + +__HIP_DEVICE__ unsigned int __bitextract_u32(unsigned int __src, + unsigned int __offset, + unsigned int __width) { + unsigned int __o = __offset & 31u; + unsigned int __w = __width & 31u; + return __w == 0 ? 0u : (__src << (32u - __o - __w)) >> (32u - __w); +} +__HIP_DEVICE__ unsigned long long __bitextract_u64(unsigned long long __src, ---------------- jhuber6 wrote:
Yeah, I much prefer the specifically sized ones, I ported a few that did it like this and wasn't sure if that was just the "style" of HIP and people expected it to be like that. https://github.com/llvm/llvm-project/pull/203980 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
