pitrou commented on a change in pull request #8628:
URL: https://github.com/apache/arrow/pull/8628#discussion_r522312822



##########
File path: cpp/src/arrow/compute/kernels/scalar_fill_null.cc
##########
@@ -153,6 +153,60 @@ void AddBasicFillNullKernels(ScalarKernel kernel, 
ScalarFunction* func) {
   AddKernels({boolean(), null()});
 }
 
+template <typename Type>
+struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& input = *batch[0].array();
+    const auto& fill_value_scalar =
+        checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
+    util::string_view fill_value(*fill_value_scalar.value);
+    ArrayData* output = out->mutable_array();
+
+    // Ensure the kernel is configured properly to have no validity bitmap /
+    // null count 0 unless we explicitly propagate it below.
+    DCHECK(output->buffers[0] == nullptr);
+
+    if (input.MayHaveNulls() && fill_value_scalar.is_valid) {

Review comment:
       Instead of calling `MayHaveNulls`, just compute the null count and check 
it here.

##########
File path: docs/source/cpp/compute.rst
##########
@@ -431,17 +431,17 @@ Structural transforms
 
 .. XXX (this category is a bit of a hodgepodge)
 
-+--------------------------+------------+---------------------------------------+---------------------+---------+
-| Function name            | Arity      | Input types                          
 | Output type         | Notes   |
-+==========================+============+=======================================+=====================+=========+
-| fill_null                | Binary     | Boolean, Null, Numeric, Temporal     
 | Boolean             | \(1)    |
-+--------------------------+------------+---------------------------------------+---------------------+---------+
-| is_null                  | Unary      | Any                                  
 | Boolean             | \(2)    |
-+--------------------------+------------+---------------------------------------+---------------------+---------+
-| is_valid                 | Unary      | Any                                  
 | Boolean             | \(2)    |
-+--------------------------+------------+---------------------------------------+---------------------+---------+
-| list_value_length        | Unary      | List-like                            
 | Int32 or Int64      | \(4)    |
-+--------------------------+------------+---------------------------------------+---------------------+---------+
++--------------------------+------------+------------------------------------------------+---------------------+---------+
+| Function name            | Arity      | Input types                          
          | Output type         | Notes   |
++==========================+============+================================================+=====================+=========+
+| fill_null                | Binary     | Boolean, Null, Numeric, Temporal, 
String-like  | Input type          | \(1)    |

Review comment:
       Thanks :-)

##########
File path: cpp/src/arrow/compute/kernels/scalar_fill_null.cc
##########
@@ -153,6 +153,60 @@ void AddBasicFillNullKernels(ScalarKernel kernel, 
ScalarFunction* func) {
   AddKernels({boolean(), null()});
 }
 
+template <typename Type>
+struct FillNullFunctor<Type, enable_if_t<is_base_binary_type<Type>::value>> {
+  using BuilderType = typename TypeTraits<Type>::BuilderType;
+
+  static void Exec(KernelContext* ctx, const ExecBatch& batch, Datum* out) {
+    const ArrayData& input = *batch[0].array();
+    const auto& fill_value_scalar =
+        checked_cast<const BaseBinaryScalar&>(*batch[1].scalar());
+    util::string_view fill_value(*fill_value_scalar.value);
+    ArrayData* output = out->mutable_array();
+
+    // Ensure the kernel is configured properly to have no validity bitmap /
+    // null count 0 unless we explicitly propagate it below.
+    DCHECK(output->buffers[0] == nullptr);
+
+    if (input.MayHaveNulls() && fill_value_scalar.is_valid) {
+      BuilderType builder(input.type, ctx->memory_pool());
+      //

Review comment:
       Did you mean to say something here?

##########
File path: cpp/src/arrow/compute/kernels/codegen_internal.h
##########
@@ -1058,6 +1058,22 @@ ArrayKernelExec 
GenerateTypeAgnosticPrimitive(detail::GetTypeId get_id) {
   }
 }
 
+/// similar to GenerateTypeAgnosticPrimitive, but for variable types
+template <template <typename...> class Generator>
+ArrayKernelExec GenerateTypeAgnosticVarBinaryBase(detail::GetTypeId get_id) {
+  switch (get_id.id) {
+    case Type::BINARY:
+    case Type::STRING:
+      return Generator<BinaryType>::Exec;

Review comment:
       Why don't you use `StringType` for `Type::STRING`? Fear of code 
duplication?

##########
File path: python/pyarrow/tests/test_compute.py
##########
@@ -860,6 +860,16 @@ def test_fill_null():
     expected = pa.array([None, None, None, None])
     assert result.equals(expected)
 
+    arr = pa.array(['a', 'bb', None])
+    result = arr.fill_null('ccc')
+    expected = pa.array(['a', 'bb', 'ccc'])
+    assert result.equals(expected)
+
+    arr = pa.array([b'a', b'bb', None], type=pa.large_binary())
+    result = arr.fill_null('ccc')

Review comment:
       Hmm, we can pass a unicode string here? Interesting.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to