emkornfield commented on a change in pull request #8632:
URL: https://github.com/apache/arrow/pull/8632#discussion_r522673324



##########
File path: cpp/src/parquet/encoding.cc
##########
@@ -127,6 +129,21 @@ class PlainEncoder : public EncoderImpl, virtual public 
TypedEncoder<DType> {
   }
 
  protected:
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    const int64_t total_bytes =
+        array.value_offset(array.length()) - array.value_offset(0);
+    PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + array.length() * 
sizeof(uint32_t)));

Review comment:
       is a check for int32 overflow needed here here?

##########
File path: cpp/src/parquet/arrow/arrow_reader_writer_test.cc
##########
@@ -1135,14 +1172,50 @@ TEST_F(TestStringParquetIO, 
EmptyStringColumnRequiredWrite) {
   ASSERT_NO_FATAL_FAILURE(this->ReaderFromSink(&reader));
   ASSERT_NO_FATAL_FAILURE(this->ReadTableFromFile(std::move(reader), &out));
   ASSERT_EQ(1, out->num_columns());
-  ASSERT_EQ(100, out->num_rows());
+  ASSERT_EQ(table->num_rows(), out->num_rows());
 
   std::shared_ptr<ChunkedArray> chunked_array = out->column(0);
   ASSERT_EQ(1, chunked_array->num_chunks());
 
   AssertArraysEqual(*values, *chunked_array->chunk(0));
 }
 
+using TestLargeBinaryParquetIO = TestParquetIO<::arrow::LargeBinaryType>;
+
+TEST_F(TestLargeBinaryParquetIO, Basics) {
+  const char* json = "[\"foo\", \"\", null, \"\xff\"]";
+
+  const auto large_type = ::arrow::large_binary();
+  const auto narrow_type = ::arrow::binary();
+  const auto large_array = ::arrow::ArrayFromJSON(large_type, json);
+  const auto narrow_array = ::arrow::ArrayFromJSON(narrow_type, json);
+
+  this->RoundTripSingleColumn(large_array, narrow_array,
+                              default_arrow_writer_properties());
+  const auto arrow_properties =
+      ::parquet::ArrowWriterProperties::Builder().store_schema()->build();
+
+  this->RoundTripSingleColumn(large_array, large_array, arrow_properties);
+}
+
+using TestLargeStringParquetIO = TestParquetIO<::arrow::LargeStringType>;
+
+TEST_F(TestLargeStringParquetIO, Basics) {
+  const char* json = R"(["foo", "", null, "bar"])";
+
+  const auto large_type = ::arrow::large_utf8();
+  const auto narrow_type = ::arrow::utf8();
+  const auto large_array = ::arrow::ArrayFromJSON(large_type, json);
+  const auto narrow_array = ::arrow::ArrayFromJSON(narrow_type, json);
+
+  this->RoundTripSingleColumn(large_array, narrow_array,

Review comment:
       is this intended to be two different arrays?  same as above?  maybe add 
a comment on what you are testing here? (lack of schema to read back?)

##########
File path: cpp/src/parquet/statistics.cc
##########
@@ -691,9 +634,60 @@ void TypedStatisticsImpl<ByteArrayType>::PlainDecode(const 
std::string& src,
   dst->ptr = reinterpret_cast<const uint8_t*>(src.c_str());
 }
 
+}  // namespace
+
 // ----------------------------------------------------------------------
 // Public factory functions
 
+std::shared_ptr<Comparator> Comparator::Make(Type::type physical_type,
+                                             SortOrder::type sort_order,
+                                             int type_length) {
+  if (SortOrder::SIGNED == sort_order) {

Review comment:
       is this just moving code?

##########
File path: cpp/src/parquet/encoding.cc
##########
@@ -127,6 +129,21 @@ class PlainEncoder : public EncoderImpl, virtual public 
TypedEncoder<DType> {
   }
 
  protected:
+  template <typename ArrayType>

Review comment:
       i'm not very familiar with this code, could you let me know what these 
changes are intended to do?

##########
File path: cpp/src/parquet/encoding.cc
##########
@@ -592,6 +597,29 @@ class DictEncoderImpl : public EncoderImpl, virtual public 
DictEncoder<DType> {
   /// Indices that have not yet be written out by WriteIndices().
   ArrowPoolVector<int32_t> buffered_indices_;
 
+  template <typename ArrayType>
+  void PutBinaryArray(const ArrayType& array) {
+    ::arrow::VisitArrayDataInline<typename ArrayType::TypeClass>(
+        *array.data(),
+        [&](::arrow::util::string_view view) {
+          // XXX check length

Review comment:
       implement the TODO?




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to