js8544 commented on code in PR #37292:
URL: https://github.com/apache/arrow/pull/37292#discussion_r1301856602


##########
cpp/src/arrow/compute/kernels/scalar_cast_nested.cc:
##########
@@ -145,6 +153,135 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+template <typename DestType>
+struct CastFixedToVarList {
+  using dest_offset_type = typename DestType::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const CastOptions& options = CastState::Get(ctx);
+
+    auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
+
+    const ArraySpan& in_array = batch[0].array;
+
+    ArrayData* out_array = out->array_data().get();
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                          GetNullBitmapBuffer(in_array, ctx->memory_pool()));
+
+    const auto& in_type = checked_cast<const 
FixedSizeListType&>(*in_array.type);
+    const int32_t list_size = in_type.list_size();
+
+    // Allocate a new offsets buffer
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                          ctx->Allocate(sizeof(dest_offset_type) * 
(batch.length + 1)));
+    auto* offsets = out_array->GetMutableValues<dest_offset_type>(1);
+    dest_offset_type offset = 0;
+    for (int64_t i = 0; i <= batch.length; ++i) {
+      offsets[i] = offset;
+      offset += list_size;
+    }
+
+    // Handle values
+    std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
+    if (in_array.offset > 0) {
+      values = values->Slice(in_array.offset * list_size, in_array.length * 
list_size);
+    }
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                          Cast(values, child_type, options, 
ctx->exec_context()));
+    DCHECK(cast_values.is_array());
+    out_array->child_data.push_back(cast_values.array());
+
+    return Status::OK();
+  }
+};
+
+template <typename SrcType>
+struct CastVarToFixedList {
+  using src_offset_type = typename SrcType::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const CastOptions& options = CastState::Get(ctx);
+
+    auto child_type = checked_cast<const 
FixedSizeListType&>(*out->type()).value_type();
+
+    const ArraySpan& in_array = batch[0].array;
+
+    const auto& out_type = checked_cast<const 
FixedSizeListType&>(*out->type());
+    const int32_t list_size = out_type.list_size();
+
+    // Validate lengths by comparing to the expected offsets.
+    const auto* offsets = in_array.GetValues<src_offset_type>(1);
+    src_offset_type expected_offset = offsets[0] + list_size;
+    if (in_array.GetNullCount() > 0) {
+      for (int64_t i = 1; i <= batch.length; ++i) {
+        if (in_array.IsNull(i - 1)) {
+          // If element is null, it can be any size, so the next offset is 
valid.
+          expected_offset = offsets[i] + list_size;
+        } else {
+          if (offsets[i] != expected_offset) {
+            return Status::Invalid("ListType can only be casted to 
FixedSizeListType ",
+                                   "if the lists are all the expected size.");
+          }
+          expected_offset += list_size;
+        }
+      }
+    } else {
+      // Don't need to check null slots if there are no nulls
+      for (int64_t i = 1; i <= batch.length; ++i) {
+        if (offsets[i] != expected_offset) {
+          return Status::Invalid("ListType can only be casted to 
FixedSizeListType ",
+                                 "if the lists are all the expected size.");
+        }
+        expected_offset += list_size;
+      }
+    }
+
+    ArrayData* out_array = out->array_data().get();
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                          GetNullBitmapBuffer(in_array, ctx->memory_pool()));
+
+    // Handle values
+    std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values_datum,
+                          Cast(values, child_type, options, 
ctx->exec_context()));
+
+    DCHECK(cast_values_datum.is_array());
+    std::shared_ptr<ArrayData> cast_values = cast_values_datum.array();
+
+    if (in_array.GetNullCount() > 0) {
+      // We need to fill in the null slots, so we'll use Take on the values.

Review Comment:
   Nit: The `Take` call can be skipped if all nulls have exactly `list_length` 
elements. This can be checked easily when checking the offsets.



##########
cpp/src/arrow/compute/kernels/scalar_cast_nested.cc:
##########
@@ -145,6 +153,135 @@ void AddListCast(CastFunction* func) {
   DCHECK_OK(func->AddKernel(SrcType::type_id, std::move(kernel)));
 }
 
+template <typename DestType>
+struct CastFixedToVarList {
+  using dest_offset_type = typename DestType::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const CastOptions& options = CastState::Get(ctx);
+
+    auto child_type = checked_cast<const DestType&>(*out->type()).value_type();
+
+    const ArraySpan& in_array = batch[0].array;
+
+    ArrayData* out_array = out->array_data().get();
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                          GetNullBitmapBuffer(in_array, ctx->memory_pool()));
+
+    const auto& in_type = checked_cast<const 
FixedSizeListType&>(*in_array.type);
+    const int32_t list_size = in_type.list_size();
+
+    // Allocate a new offsets buffer
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[1],
+                          ctx->Allocate(sizeof(dest_offset_type) * 
(batch.length + 1)));
+    auto* offsets = out_array->GetMutableValues<dest_offset_type>(1);
+    dest_offset_type offset = 0;
+    for (int64_t i = 0; i <= batch.length; ++i) {
+      offsets[i] = offset;
+      offset += list_size;
+    }
+
+    // Handle values
+    std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
+    if (in_array.offset > 0) {
+      values = values->Slice(in_array.offset * list_size, in_array.length * 
list_size);
+    }
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values,
+                          Cast(values, child_type, options, 
ctx->exec_context()));
+    DCHECK(cast_values.is_array());
+    out_array->child_data.push_back(cast_values.array());
+
+    return Status::OK();
+  }
+};
+
+template <typename SrcType>
+struct CastVarToFixedList {
+  using src_offset_type = typename SrcType::offset_type;
+
+  static Status Exec(KernelContext* ctx, const ExecSpan& batch, ExecResult* 
out) {
+    const CastOptions& options = CastState::Get(ctx);
+
+    auto child_type = checked_cast<const 
FixedSizeListType&>(*out->type()).value_type();
+
+    const ArraySpan& in_array = batch[0].array;
+
+    const auto& out_type = checked_cast<const 
FixedSizeListType&>(*out->type());
+    const int32_t list_size = out_type.list_size();
+
+    // Validate lengths by comparing to the expected offsets.
+    const auto* offsets = in_array.GetValues<src_offset_type>(1);
+    src_offset_type expected_offset = offsets[0] + list_size;
+    if (in_array.GetNullCount() > 0) {
+      for (int64_t i = 1; i <= batch.length; ++i) {
+        if (in_array.IsNull(i - 1)) {
+          // If element is null, it can be any size, so the next offset is 
valid.
+          expected_offset = offsets[i] + list_size;
+        } else {
+          if (offsets[i] != expected_offset) {
+            return Status::Invalid("ListType can only be casted to 
FixedSizeListType ",
+                                   "if the lists are all the expected size.");
+          }
+          expected_offset += list_size;
+        }
+      }
+    } else {
+      // Don't need to check null slots if there are no nulls
+      for (int64_t i = 1; i <= batch.length; ++i) {
+        if (offsets[i] != expected_offset) {
+          return Status::Invalid("ListType can only be casted to 
FixedSizeListType ",
+                                 "if the lists are all the expected size.");
+        }
+        expected_offset += list_size;
+      }
+    }
+
+    ArrayData* out_array = out->array_data().get();
+    ARROW_ASSIGN_OR_RAISE(out_array->buffers[0],
+                          GetNullBitmapBuffer(in_array, ctx->memory_pool()));
+
+    // Handle values
+    std::shared_ptr<ArrayData> values = in_array.child_data[0].ToArrayData();
+    ARROW_ASSIGN_OR_RAISE(Datum cast_values_datum,
+                          Cast(values, child_type, options, 
ctx->exec_context()));
+
+    DCHECK(cast_values_datum.is_array());
+    std::shared_ptr<ArrayData> cast_values = cast_values_datum.array();
+
+    if (in_array.GetNullCount() > 0) {
+      // We need to fill in the null slots, so we'll use Take on the values.
+      auto builder = Int64Builder(ctx->memory_pool());
+      RETURN_NOT_OK(builder.Reserve(in_array.length * list_size));
+      for (int64_t offset_i = 0; offset_i < in_array.length; ++offset_i) {
+        if (in_array.IsNull(offset_i)) {
+          // If element is null, just fill in the null slots with first value.
+          for (int64_t j = 0; j < list_size; ++j) {
+            builder.UnsafeAppend(0);
+          }
+        } else {
+          int64_t value_i = offsets[offset_i];
+          for (int64_t j = 0; j < list_size; ++j) {
+            builder.UnsafeAppend(value_i++);
+          }
+        }
+      }
+      ARROW_ASSIGN_OR_RAISE(auto indices, builder.Finish());
+      ARROW_ASSIGN_OR_RAISE(auto take_result,
+                            Take(cast_values, Datum(indices),
+                                 TakeOptions::NoBoundsCheck(), 
ctx->exec_context()));
+      DCHECK(take_result.is_array());
+      cast_values = take_result.array();
+    } else {
+      // No nulls, so we can just slice the values.
+      cast_values = cast_values->Slice(offsets[0], in_array.length * 
list_size);

Review Comment:
   Nit: Also no need to `Slice` if `offsets[0]==0`



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to