lidavidm commented on a change in pull request #11466:
URL: https://github.com/apache/arrow/pull/11466#discussion_r741075422



##########
File path: cpp/src/arrow/array/array_nested.h
##########
@@ -370,6 +370,13 @@ class ARROW_EXPORT StructArray : public Array {
   /// \param[in] pool The pool to allocate null bitmaps from, if necessary
   Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
 
+  /// \brief Get one of the child arrays, adjusting the null bitmap if 
necessary.
+  ///
+  /// \param[in] index Which child array to get
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> Flatten(int index,

Review comment:
       We could call it `MakeFlattenedChild` or `GetFlattenedChild` or 
something if that helps? I agree Flatten isn't the best name when it comes to a 
single child array.

##########
File path: cpp/src/arrow/compute/exec/expression.h
##########
@@ -112,7 +113,7 @@ class ARROW_EXPORT Expression {
 
     // post-bind properties
     ValueDescr descr;
-    int index;
+    internal::SmallVector<int, 2> indices;

Review comment:
       It's the number of items that can be stored before it changes from a 
stack-based to heap-based representation.

##########
File path: cpp/src/arrow/compute/exec/expression.cc
##########
@@ -512,7 +511,31 @@ Result<Datum> ExecuteScalarExpression(const Expression& 
expr, const ExecBatch& i
       return MakeNullScalar(null());
     }
 
-    const Datum& field = input[param->index];
+    Datum field = input[param->indices[0]];
+    for (auto it = param->indices.begin() + 1; it != param->indices.end(); 
++it) {

Review comment:
       It skips the first index and I don't think it'd be clearer overall to 
avoid the explicit iterator.

##########
File path: cpp/src/arrow/array/array_nested.cc
##########
@@ -541,56 +541,62 @@ std::shared_ptr<Array> StructArray::GetFieldByName(const 
std::string& name) cons
 
 Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const {
   ArrayVector flattened;
-  flattened.reserve(data_->child_data.size());
+  flattened.resize(data_->child_data.size());
   std::shared_ptr<Buffer> null_bitmap = data_->buffers[0];
 
-  for (const auto& child_data_ptr : data_->child_data) {
-    auto child_data = child_data_ptr->Copy();
+  for (int i = 0; static_cast<size_t>(i) < data_->child_data.size(); i++) {
+    ARROW_ASSIGN_OR_RAISE(flattened[i], Flatten(i, pool));
+  }
 
-    std::shared_ptr<Buffer> flattened_null_bitmap;
-    int64_t flattened_null_count = kUnknownNullCount;
+  return flattened;
+}
 
-    // Need to adjust for parent offset
-    if (data_->offset != 0 || data_->length != child_data->length) {
-      child_data = child_data->Slice(data_->offset, data_->length);
-    }
-    std::shared_ptr<Buffer> child_null_bitmap = child_data->buffers[0];
-    const int64_t child_offset = child_data->offset;
-
-    // The validity of a flattened datum is the logical AND of the struct
-    // element's validity and the individual field element's validity.
-    if (null_bitmap && child_null_bitmap) {
-      ARROW_ASSIGN_OR_RAISE(
-          flattened_null_bitmap,
-          BitmapAnd(pool, child_null_bitmap->data(), child_offset, 
null_bitmap_data_,
-                    data_->offset, data_->length, child_offset));
-    } else if (child_null_bitmap) {
-      flattened_null_bitmap = child_null_bitmap;
-      flattened_null_count = child_data->null_count;
-    } else if (null_bitmap) {
-      if (child_offset == data_->offset) {
-        flattened_null_bitmap = null_bitmap;
-      } else {
-        // If the child has an offset, need to synthesize a validity
-        // buffer with an offset too
-        ARROW_ASSIGN_OR_RAISE(flattened_null_bitmap,
-                              AllocateEmptyBitmap(child_offset + 
data_->length, pool));
-        CopyBitmap(null_bitmap_data_, data_->offset, data_->length,
-                   flattened_null_bitmap->mutable_data(), child_offset);
-      }
-      flattened_null_count = data_->null_count;
-    } else {
-      flattened_null_count = 0;
-    }
+Result<std::shared_ptr<Array>> StructArray::Flatten(int index, MemoryPool* 
pool) const {
+  std::shared_ptr<Buffer> null_bitmap = data_->buffers[0];
+
+  auto child_data = data_->child_data[index]->Copy();

Review comment:
       Shallow copy. 
https://github.com/apache/arrow/blob/a0c650415bc28920512077faecdfa9d07d3c4efe/cpp/src/arrow/array/data.h#L164

##########
File path: cpp/src/arrow/compute/exec/expression.cc
##########
@@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const 
TypeOrSchema& in,
   if (expr.literal()) return expr;
 
   if (auto ref = expr.field_ref()) {
-    if (ref->IsNested()) {
-      return Status::NotImplemented("nested field references");
-    }
-
     ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
 
     auto bound = *expr.parameter();
-    bound.index = path[0];
+    bound.indices.resize(path.indices().size());
+    for (size_t i = 0; i < path.indices().size(); ++i) {
+      bound.indices[i] = path.indices()[i];
+    }

Review comment:
       It seems SmallVector doesn't implement iterator_traits fully (I also had 
issues with trying to use insert() that I should try and debug)

##########
File path: cpp/src/arrow/compute/exec/expression.cc
##########
@@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const 
TypeOrSchema& in,
   if (expr.literal()) return expr;
 
   if (auto ref = expr.field_ref()) {
-    if (ref->IsNested()) {
-      return Status::NotImplemented("nested field references");
-    }
-
     ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
 
     auto bound = *expr.parameter();
-    bound.index = path[0];
+    bound.indices.resize(path.indices().size());
+    for (size_t i = 0; i < path.indices().size(); ++i) {
+      bound.indices[i] = path.indices()[i];
+    }

Review comment:
       ```
   
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:378:46:
 error: no type named 'value_type' in 
'std::iterator_traits<arrow::internal::StaticVectorImpl<int, 2, 
arrow::internal::SmallVectorStorage<int, 2> > >'
         typedef typename iterator_traits<_OI>::value_type _ValueTypeO;
                 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~
   
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:422:23:
 note: in instantiation of function template specialization 
'std::__copy_move_a<false, const int *, arrow::internal::StaticVectorImpl<int, 
2, arrow::internal::SmallVectorStorage<int, 2> > >' requested here
         return _OI(std::__copy_move_a<_IsMove>(std::__niter_base(__first),
                         ^
   
/usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:454:20:
 note: in instantiation of function template specialization 
'std::__copy_move_a2<false, __gnu_cxx::__normal_iterator<const int *, 
std::vector<int, std::allocator<int> > >, 
arrow::internal::StaticVectorImpl<int, 2, 
arrow::internal::SmallVectorStorage<int, 2> > >' requested here
         return (std::__copy_move_a2<__is_move_iterator<_II>::__value>
                      ^
   
/home/lidavidm/Code/upstream/arrow-13987/cpp/src/arrow/compute/exec/expression.cc:401:10:
 note: in instantiation of function template specialization 
'std::copy<__gnu_cxx::__normal_iterator<const int *, std::vector<int, 
std::allocator<int> > >, arrow::internal::StaticVectorImpl<int, 2, 
arrow::internal::SmallVectorStorage<int, 2> > >' requested here
       std::copy(path.indices().begin(), path.indices().end(), bound.indices);
            ^
   
/home/lidavidm/Code/upstream/arrow-13987/cpp/src/arrow/compute/exec/expression.cc:424:10:
 note: in instantiation of function template specialization 
'arrow::compute::(anonymous namespace)::BindImpl<arrow::DataType>' requested 
here
     return BindImpl(*this, *in.type, in.shape, exec_context);
            ^
   ```

##########
File path: cpp/src/arrow/compute/exec/expression.cc
##########
@@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const 
TypeOrSchema& in,
   if (expr.literal()) return expr;
 
   if (auto ref = expr.field_ref()) {
-    if (ref->IsNested()) {
-      return Status::NotImplemented("nested field references");
-    }
-
     ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in));
 
     auto bound = *expr.parameter();
-    bound.index = path[0];
+    bound.indices.resize(path.indices().size());
+    for (size_t i = 0; i < path.indices().size(); ++i) {
+      bound.indices[i] = path.indices()[i];
+    }

Review comment:
       Ah never mind that, I was being dumb about iterator vs container.

##########
File path: cpp/src/arrow/array/array_nested.h
##########
@@ -370,6 +370,13 @@ class ARROW_EXPORT StructArray : public Array {
   /// \param[in] pool The pool to allocate null bitmaps from, if necessary
   Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const;
 
+  /// \brief Get one of the child arrays, adjusting the null bitmap if 
necessary.
+  ///
+  /// \param[in] index Which child array to get
+  /// \param[in] pool The pool to allocate null bitmaps from, if necessary
+  Result<std::shared_ptr<Array>> Flatten(int index,

Review comment:
       Updated this and fixed the copy above to use std::copy.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to