lidavidm commented on a change in pull request #11466: URL: https://github.com/apache/arrow/pull/11466#discussion_r741075422
########## File path: cpp/src/arrow/array/array_nested.h ########## @@ -370,6 +370,13 @@ class ARROW_EXPORT StructArray : public Array { /// \param[in] pool The pool to allocate null bitmaps from, if necessary Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const; + /// \brief Get one of the child arrays, adjusting the null bitmap if necessary. + /// + /// \param[in] index Which child array to get + /// \param[in] pool The pool to allocate null bitmaps from, if necessary + Result<std::shared_ptr<Array>> Flatten(int index, Review comment: We could call it `MakeFlattenedChild` or `GetFlattenedChild` or something if that helps? I agree Flatten isn't the best name when it comes to a single child array. ########## File path: cpp/src/arrow/compute/exec/expression.h ########## @@ -112,7 +113,7 @@ class ARROW_EXPORT Expression { // post-bind properties ValueDescr descr; - int index; + internal::SmallVector<int, 2> indices; Review comment: It's the number of items that can be stored before it changes from a stack-based to heap-based representation. ########## File path: cpp/src/arrow/compute/exec/expression.cc ########## @@ -512,7 +511,31 @@ Result<Datum> ExecuteScalarExpression(const Expression& expr, const ExecBatch& i return MakeNullScalar(null()); } - const Datum& field = input[param->index]; + Datum field = input[param->indices[0]]; + for (auto it = param->indices.begin() + 1; it != param->indices.end(); ++it) { Review comment: It skips the first index and I don't think it'd be clearer overall to avoid the explicit iterator. ########## File path: cpp/src/arrow/array/array_nested.cc ########## @@ -541,56 +541,62 @@ std::shared_ptr<Array> StructArray::GetFieldByName(const std::string& name) cons Result<ArrayVector> StructArray::Flatten(MemoryPool* pool) const { ArrayVector flattened; - flattened.reserve(data_->child_data.size()); + flattened.resize(data_->child_data.size()); std::shared_ptr<Buffer> null_bitmap = data_->buffers[0]; - for (const auto& child_data_ptr : data_->child_data) { - auto child_data = child_data_ptr->Copy(); + for (int i = 0; static_cast<size_t>(i) < data_->child_data.size(); i++) { + ARROW_ASSIGN_OR_RAISE(flattened[i], Flatten(i, pool)); + } - std::shared_ptr<Buffer> flattened_null_bitmap; - int64_t flattened_null_count = kUnknownNullCount; + return flattened; +} - // Need to adjust for parent offset - if (data_->offset != 0 || data_->length != child_data->length) { - child_data = child_data->Slice(data_->offset, data_->length); - } - std::shared_ptr<Buffer> child_null_bitmap = child_data->buffers[0]; - const int64_t child_offset = child_data->offset; - - // The validity of a flattened datum is the logical AND of the struct - // element's validity and the individual field element's validity. - if (null_bitmap && child_null_bitmap) { - ARROW_ASSIGN_OR_RAISE( - flattened_null_bitmap, - BitmapAnd(pool, child_null_bitmap->data(), child_offset, null_bitmap_data_, - data_->offset, data_->length, child_offset)); - } else if (child_null_bitmap) { - flattened_null_bitmap = child_null_bitmap; - flattened_null_count = child_data->null_count; - } else if (null_bitmap) { - if (child_offset == data_->offset) { - flattened_null_bitmap = null_bitmap; - } else { - // If the child has an offset, need to synthesize a validity - // buffer with an offset too - ARROW_ASSIGN_OR_RAISE(flattened_null_bitmap, - AllocateEmptyBitmap(child_offset + data_->length, pool)); - CopyBitmap(null_bitmap_data_, data_->offset, data_->length, - flattened_null_bitmap->mutable_data(), child_offset); - } - flattened_null_count = data_->null_count; - } else { - flattened_null_count = 0; - } +Result<std::shared_ptr<Array>> StructArray::Flatten(int index, MemoryPool* pool) const { + std::shared_ptr<Buffer> null_bitmap = data_->buffers[0]; + + auto child_data = data_->child_data[index]->Copy(); Review comment: Shallow copy. https://github.com/apache/arrow/blob/a0c650415bc28920512077faecdfa9d07d3c4efe/cpp/src/arrow/array/data.h#L164 ########## File path: cpp/src/arrow/compute/exec/expression.cc ########## @@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in, if (expr.literal()) return expr; if (auto ref = expr.field_ref()) { - if (ref->IsNested()) { - return Status::NotImplemented("nested field references"); - } - ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in)); auto bound = *expr.parameter(); - bound.index = path[0]; + bound.indices.resize(path.indices().size()); + for (size_t i = 0; i < path.indices().size(); ++i) { + bound.indices[i] = path.indices()[i]; + } Review comment: It seems SmallVector doesn't implement iterator_traits fully (I also had issues with trying to use insert() that I should try and debug) ########## File path: cpp/src/arrow/compute/exec/expression.cc ########## @@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in, if (expr.literal()) return expr; if (auto ref = expr.field_ref()) { - if (ref->IsNested()) { - return Status::NotImplemented("nested field references"); - } - ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in)); auto bound = *expr.parameter(); - bound.index = path[0]; + bound.indices.resize(path.indices().size()); + for (size_t i = 0; i < path.indices().size(); ++i) { + bound.indices[i] = path.indices()[i]; + } Review comment: ``` /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:378:46: error: no type named 'value_type' in 'std::iterator_traits<arrow::internal::StaticVectorImpl<int, 2, arrow::internal::SmallVectorStorage<int, 2> > >' typedef typename iterator_traits<_OI>::value_type _ValueTypeO; ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~ /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:422:23: note: in instantiation of function template specialization 'std::__copy_move_a<false, const int *, arrow::internal::StaticVectorImpl<int, 2, arrow::internal::SmallVectorStorage<int, 2> > >' requested here return _OI(std::__copy_move_a<_IsMove>(std::__niter_base(__first), ^ /usr/lib/gcc/x86_64-linux-gnu/7.5.0/../../../../include/c++/7.5.0/bits/stl_algobase.h:454:20: note: in instantiation of function template specialization 'std::__copy_move_a2<false, __gnu_cxx::__normal_iterator<const int *, std::vector<int, std::allocator<int> > >, arrow::internal::StaticVectorImpl<int, 2, arrow::internal::SmallVectorStorage<int, 2> > >' requested here return (std::__copy_move_a2<__is_move_iterator<_II>::__value> ^ /home/lidavidm/Code/upstream/arrow-13987/cpp/src/arrow/compute/exec/expression.cc:401:10: note: in instantiation of function template specialization 'std::copy<__gnu_cxx::__normal_iterator<const int *, std::vector<int, std::allocator<int> > >, arrow::internal::StaticVectorImpl<int, 2, arrow::internal::SmallVectorStorage<int, 2> > >' requested here std::copy(path.indices().begin(), path.indices().end(), bound.indices); ^ /home/lidavidm/Code/upstream/arrow-13987/cpp/src/arrow/compute/exec/expression.cc:424:10: note: in instantiation of function template specialization 'arrow::compute::(anonymous namespace)::BindImpl<arrow::DataType>' requested here return BindImpl(*this, *in.type, in.shape, exec_context); ^ ``` ########## File path: cpp/src/arrow/compute/exec/expression.cc ########## @@ -394,14 +394,13 @@ Result<Expression> BindImpl(Expression expr, const TypeOrSchema& in, if (expr.literal()) return expr; if (auto ref = expr.field_ref()) { - if (ref->IsNested()) { - return Status::NotImplemented("nested field references"); - } - ARROW_ASSIGN_OR_RAISE(auto path, ref->FindOne(in)); auto bound = *expr.parameter(); - bound.index = path[0]; + bound.indices.resize(path.indices().size()); + for (size_t i = 0; i < path.indices().size(); ++i) { + bound.indices[i] = path.indices()[i]; + } Review comment: Ah never mind that, I was being dumb about iterator vs container. ########## File path: cpp/src/arrow/array/array_nested.h ########## @@ -370,6 +370,13 @@ class ARROW_EXPORT StructArray : public Array { /// \param[in] pool The pool to allocate null bitmaps from, if necessary Result<ArrayVector> Flatten(MemoryPool* pool = default_memory_pool()) const; + /// \brief Get one of the child arrays, adjusting the null bitmap if necessary. + /// + /// \param[in] index Which child array to get + /// \param[in] pool The pool to allocate null bitmaps from, if necessary + Result<std::shared_ptr<Array>> Flatten(int index, Review comment: Updated this and fixed the copy above to use std::copy. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org