[clang] [libc] [Clang] Add more scan / reduce operations to 'gpuintrin.h' (PR #185525)

Matt Arsenault via cfe-commits Tue, 10 Mar 2026 02:13:16 -0700

================
@@ -245,11 +245,44 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t 
__idx, double __x,
         __lane_mask,                                                           
\
         __gpu_suffix_scan_##__prefix##_##__suffix(__lane_mask, __x));          
\
   }
-__DO_LANE_OP(uint32_t, +, 0, sum, u32);
-__DO_LANE_OP(uint64_t, +, 0, sum, u64);
-__DO_LANE_OP(float, +, 0, sum, f32);
-__DO_LANE_OP(double, +, 0, sum, f64);
-#undef __DO_LANE_OP
+
+#define __GPU_OP(__x, __y) ((__x) + (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, sum, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, sum, u64);
+__DO_LANE_OPS(float, __GPU_OP, 0, sum, f32);
+__DO_LANE_OPS(double, __GPU_OP, 0, sum, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) & (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, and, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, and, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) | (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, or, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, or, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) ^ (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, xor, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, xor, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) < (__y) ? (__x) : (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, min, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, min, u64);
+__DO_LANE_OPS(float, __GPU_OP, __builtin_inff(), min, f32);
+__DO_LANE_OPS(double, __GPU_OP, __builtin_inf(), min, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) > (__y) ? (__x) : (__y))
----------------
arsenm wrote:


This is just completely wrong for the float cases. The float cases should not 
be named min and max, and should not be implemented as compare and select. This 
should have minnum/maxnum and minimum/maximum, and minimumnum/maximumnum as 
distinct operations 

https://github.com/llvm/llvm-project/pull/185525
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [libc] [Clang] Add more scan / reduce operations to 'gpuintrin.h' (PR #185525)

Reply via email to