bkietz commented on a change in pull request #10557:
URL: https://github.com/apache/arrow/pull/10557#discussion_r669034240
##########
File path: docs/source/cpp/compute.rst
##########
@@ -859,50 +859,60 @@ Structural transforms
+--------------------------+------------+------------------------------------------------+---------------------+---------+
| Function name | Arity | Input types
| Output type | Notes |
+==========================+============+================================================+=====================+=========+
-| fill_null | Binary | Boolean, Null, Numeric, Temporal,
String-like | Input type | \(1) |
+| case_when | Varargs | Boolean, Any fixed-width
| Input type | \(1) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| if_else | Ternary | Boolean, Null, Numeric, Temporal
| Input type + \(2) |
+| fill_null | Binary | Boolean, Null, Numeric, Temporal,
String-like | Input type | \(2) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_finite | Unary | Float, Double
| Boolean | \(3) |
+| if_else | Ternary | Boolean, Null, Numeric, Temporal
| Input type | \(3) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_inf | Unary | Float, Double
| Boolean | \(4) |
+| is_finite | Unary | Float, Double
| Boolean | \(4) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_nan | Unary | Float, Double
| Boolean | \(5) |
+| is_inf | Unary | Float, Double
| Boolean | \(5) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_null | Unary | Any
| Boolean | \(6) |
+| is_nan | Unary | Float, Double
| Boolean | \(6) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| is_valid | Unary | Any
| Boolean | \(7) |
+| is_null | Unary | Any
| Boolean | \(7) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| list_value_length | Unary | List-like
| Int32 or Int64 | \(8) |
+| is_valid | Unary | Any
| Boolean | \(8) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
-| project | Varargs | Any
| Struct | \(9) |
+| list_value_length | Unary | List-like
| Int32 or Int64 | \(9) |
+--------------------------+------------+------------------------------------------------+---------------------+---------+
+| project | Varargs | Any
| Struct | \(10) |
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+
+* \(1) This function acts like a SQL 'case when' statement or switch-case. The
+ input is any number of alternating Boolean and value data, followed by an
Review comment:
This (and some of the other comments) need to be updated with the new
signature
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
##########
@@ -97,6 +97,99 @@ static void IfElseBench32Contiguous(benchmark::State& state)
{
return IfElseBenchContiguous<UInt32Type>(state);
}
+template <typename Type>
+static void CaseWhenBench(benchmark::State& state) {
+ using CType = typename Type::c_type;
+ auto type = TypeTraits<Type>::type_singleton();
+ using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ int64_t len = state.range(0);
+ int64_t offset = state.range(1);
+
+ random::RandomArrayGenerator rand(/*seed=*/0);
+
+ auto cond1 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ auto cond2 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ auto cond3 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ auto val1 = std::static_pointer_cast<ArrayType>(
+ rand.ArrayOf(type, len, /*null_probability=*/0.01));
+ auto val2 = std::static_pointer_cast<ArrayType>(
+ rand.ArrayOf(type, len, /*null_probability=*/0.01));
+ auto val3 = std::static_pointer_cast<ArrayType>(
+ rand.ArrayOf(type, len, /*null_probability=*/0.01));
+ auto val4 = std::static_pointer_cast<ArrayType>(
+ rand.ArrayOf(type, len, /*null_probability=*/0.01));
+ ASSERT_OK_AND_ASSIGN(
+ auto cond,
+ StructArray::Make({cond1, cond2, cond3}, std::vector<std::string>{"a",
"b", "c"},
+ nullptr, /*null_count=*/0));
+
+ for (auto _ : state) {
+ ABORT_NOT_OK(
+ CaseWhen(cond->Slice(offset), {val1->Slice(offset),
val2->Slice(offset),
+ val3->Slice(offset),
val4->Slice(offset)}));
+ }
+
+ state.SetBytesProcessed(state.iterations() * (len - offset) * sizeof(CType));
+}
+
+template <typename Type>
+static void CaseWhenBenchContiguous(benchmark::State& state) {
+ using CType = typename Type::c_type;
+ auto type = TypeTraits<Type>::type_singleton();
+ using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ int64_t len = state.range(0);
+ int64_t offset = state.range(1);
+
+ ASSERT_OK_AND_ASSIGN(auto trues, MakeArrayFromScalar(BooleanScalar(true),
len / 3));
+ ASSERT_OK_AND_ASSIGN(auto falses, MakeArrayFromScalar(BooleanScalar(false),
len / 3));
+ auto null_scalar = MakeNullScalar(boolean());
+ ASSERT_OK_AND_ASSIGN(auto nulls,
+ MakeArrayFromScalar(*null_scalar, len - 2 * (len / 3)));
Review comment:
```suggestion
ASSERT_OK_AND_ASSIGN(auto nulls, MakeArrayOfNull(boolean(), len - 2 * (len
/ 3)));
```
##########
File path: cpp/src/arrow/compute/kernels/scalar_if_else_benchmark.cc
##########
@@ -97,6 +97,99 @@ static void IfElseBench32Contiguous(benchmark::State& state)
{
return IfElseBenchContiguous<UInt32Type>(state);
}
+template <typename Type>
+static void CaseWhenBench(benchmark::State& state) {
+ using CType = typename Type::c_type;
+ auto type = TypeTraits<Type>::type_singleton();
+ using ArrayType = typename TypeTraits<Type>::ArrayType;
+
+ int64_t len = state.range(0);
+ int64_t offset = state.range(1);
+
+ random::RandomArrayGenerator rand(/*seed=*/0);
+
+ auto cond1 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ auto cond2 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
+ auto cond3 = std::static_pointer_cast<BooleanArray>(
+ rand.ArrayOf(boolean(), len, /*null_probability=*/0.01));
Review comment:
```suggestion
auto fld = field("cond", boolean(),
key_value_metadata({{"null_probability", "1.0"}}));
auto cond = rand.ArrayOf(field("", struct_({fld, fld, fld}), len);
```
?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]