================
@@ -245,11 +245,44 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t
__idx, double __x,
__lane_mask,
\
__gpu_suffix_scan_##__prefix##_##__suffix(__lane_mask, __x));
\
}
-__DO_LANE_OP(uint32_t, +, 0, sum, u32);
-__DO_LANE_OP(uint64_t, +, 0, sum, u64);
-__DO_LANE_OP(float, +, 0, sum, f32);
-__DO_LANE_OP(double, +, 0, sum, f64);
-#undef __DO_LANE_OP
+
+#define __GPU_OP(__x, __y) ((__x) + (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, sum, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, sum, u64);
+__DO_LANE_OPS(float, __GPU_OP, 0, sum, f32);
+__DO_LANE_OPS(double, __GPU_OP, 0, sum, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) & (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, and, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, and, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) | (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, or, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, or, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) ^ (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, 0, xor, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, 0, xor, u64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) < (__y) ? (__x) : (__y))
+__DO_LANE_OPS(uint32_t, __GPU_OP, UINT32_MAX, min, u32);
+__DO_LANE_OPS(uint64_t, __GPU_OP, UINT64_MAX, min, u64);
+__DO_LANE_OPS(float, __GPU_OP, __builtin_inff(), min, f32);
+__DO_LANE_OPS(double, __GPU_OP, __builtin_inf(), min, f64);
+#undef __GPU_OP
+
+#define __GPU_OP(__x, __y) ((__x) > (__y) ? (__x) : (__y))
----------------
arsenm wrote:
This is just completely wrong for the float cases. The float cases should not
be named min and max, and should not be implemented as compare and select. This
should have minnum/maxnum and minimum/maximum, and minimumnum/maximumnum as
distinct operations
https://github.com/llvm/llvm-project/pull/185525
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits