bkietz commented on a change in pull request #8589:
URL: https://github.com/apache/arrow/pull/8589#discussion_r519899025



##########
File path: cpp/src/parquet/column_writer.cc
##########
@@ -1222,12 +1223,17 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, 
public TypedColumnWriter<
   }
 
   std::shared_ptr<Array> MaybeReplaceValidity(std::shared_ptr<Array> array,
-                                              int64_t new_null_count) {
+                                              int64_t new_null_count,
+                                              arrow::MemoryPool* memory_pool) {
     if (bits_buffer_ == nullptr) {
       return array;
     }
     std::vector<std::shared_ptr<Buffer>> buffers = array->data()->buffers;
     buffers[0] = bits_buffer_;
+    DCHECK_GT(buffers.size(), 1);
+    PARQUET_ASSIGN_OR_THROW(
+        buffers[1],
+        
checked_pointer_cast<arrow::FlatArray>(array)->SlicedValues(memory_pool));
     // Should be a leaf array.
     DCHECK_EQ(array->num_fields(), 0);

Review comment:
       Stylistically, the above hunk should follow this assertion of leafiness 
since that hunk assumes leafiness by casting to FlatArray

##########
File path: cpp/src/arrow/array/array_base.h
##########
@@ -207,6 +207,14 @@ static inline std::ostream& operator<<(std::ostream& os, 
const Array& x) {
 
 /// Base class for non-nested arrays
 class ARROW_EXPORT FlatArray : public Array {
+ public:
+  /// Returns the slice buffer for the second buffer in the
+  /// array (values buffer for primitives, offsets buffers
+  /// for variable length binary types.
+  ///
+  /// \note API EXPERIMENTAL
+  virtual Result<std::shared_ptr<Buffer>> SlicedValues(MemoryPool* pool) const 
= 0;

Review comment:
       Since this is only used in one place and uses "Slice" with very 
different semantics from other slices, I think it'd be better to
   1. move it into `column_writer.cc` or
   2. provide a more encapsulated and easily understood function, for example 
`Result<std::shared_ptr<ArrayData>> arrow::internal::ReplaceNullBitmap(const 
ArrayData& data, std::shared_ptr<Buffer> null_bitmap, MemoryPool* pool)`

##########
File path: cpp/src/parquet/arrow/arrow_reader_writer_test.cc
##########
@@ -3199,6 +3200,36 @@ TEST(TestArrowReaderAdHoc, HandleDictPageOffsetZero) {
   TryReadDataFile(test::get_data_file("dict-page-offset-zero.parquet"));
 }
 
+TEST(TestArrowReaderAdHoc, WriteBatchedNestedNullableStringColumn) {
+  // ARROW-10493
+  auto type =
+      ::arrow::struct_({::arrow::field("inner", ::arrow::utf8(), 
/*nullable=*/true)});
+  auto outer_array = ::arrow::ArrayFromJSON(type,
+                                            R"([{"inner": "a"},
+                                                {"inner": null},
+                                                {"inner": "b"},
+                                                {"inner": "c"},
+                                                {"inner": null},
+                                                {"inner": null},
+                                                {"inner": "d"},
+                                                null])");
+  auto inner_array =
+      std::static_pointer_cast<::arrow::StructArray>(outer_array)->field(0);
+  ASSERT_OK_AND_ASSIGN(inner_array->data()->buffers[0],
+                       ::arrow::internal::BytesToBits({1, 0, 1, 1, 0, 0, 1, 
0}));
+  ASSERT_OK_AND_ASSIGN(outer_array->data()->buffers[0],
+                       ::arrow::internal::BytesToBits({1, 1, 1, 1, 1, 1, 1, 
0}));

Review comment:
       I think you mean to assert equality here, but you're overwriting the 
null bitmaps instead.
   ```suggestion
     ASSERT_OK_AND_ASSIGN(auto expected_inner_validity,
                          ::arrow::internal::BytesToBits({1, 0, 1, 1, 0, 0, 1, 
0}));
     ASSERT_OK_AND_ASSIGN(auto expected_outer_validity,
                          ::arrow::internal::BytesToBits({1, 1, 1, 1, 1, 1, 1, 
0}));
     AssertBuffersEqual(*inner_array->data()->buffers[0], 
*expected_inner_validity);
     AssertBuffersEqual(*outer_array->data()->buffers[0], 
*expected_outer_validity);
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to