================
@@ -213,7 +213,7 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx,
double __x,
__type __x) {
\
uint64_t __above = __lane_mask & -(2ull << __gpu_lane_id());
\
for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) {
\
- uint32_t __src = __above ? __builtin_ctzg(__above) : __gpu_lane_id();
\
+ uint32_t __src = __builtin_ctzg(__above);
\
----------------
jhuber6 wrote:
I guess I'll just set it to `-1` because that's what AMDGPU uses and on other
targets it'll just be slightly slower if they use something different.
https://github.com/llvm/llvm-project/pull/189381
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits