wgtmac commented on code in PR #36972:
URL: https://github.com/apache/arrow/pull/36972#discussion_r1280777744


##########
cpp/src/parquet/encoding.cc:
##########
@@ -340,6 +340,8 @@ class PlainEncoder<BooleanType> : public EncoderImpl, 
virtual public BooleanEnco
       throw ParquetException("direct put to boolean from " + 
values.type()->ToString() +
                              " not supported");
     }
+    // Put arrow array cannot mix with PlainEncoder<BooleanType>::PutImpl.
+    DCHECK_EQ(0, bit_writer_.bytes_written());

Review Comment:
   This check is not perfect as PutImpl may call `bit_writer_.Clear()` inside 
and bytes_written() may be 0 accidentally when we check.



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
     ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
+  void PlainTwice(int seed) {
+    auto values_single = GetValues(seed);

Review Comment:
   ```suggestion
       auto random_values = GetValues(seed);
   ```
   
   single looks strange here.



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
     ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
+  void PlainTwice(int seed) {

Review Comment:
   Is it possible to extend `void Plain(int seed)` to be something like `void 
Plain(int seed, int round = 1)` to avoid duplicate code?



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
     ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
+  void PlainTwice(int seed) {
+    auto values_single = GetValues(seed);
+    auto encoder = MakeTypedEncoder<ParquetType>(
+        Encoding::PLAIN, /*use_dictionary=*/false, column_descr());
+    auto decoder = MakeTypedDecoder<ParquetType>(Encoding::PLAIN, 
column_descr());
+
+    ASSERT_NO_THROW(encoder->Put(*values_single));
+    ASSERT_NO_THROW(encoder->Put(*values_single));
+    auto buf = encoder->FlushValues();
+
+    EXPECT_OK_AND_ASSIGN(auto values,
+                         ::arrow::Concatenate({values_single, values_single}));
+    decoder->SetData(static_cast<int>(values->length()), buf->data(),
+                     static_cast<int>(buf->size()));
+
+    BuilderType acc(arrow_type(), ::arrow::default_memory_pool());
+    ASSERT_EQ(values->length() - values->null_count(),
+              decoder->DecodeArrow(static_cast<int>(values->length()),
+                                   static_cast<int>(values->null_count()),
+                                   values->null_bitmap_data(), 
values->offset(), &acc));
+
+    std::shared_ptr<::arrow::Array> result;
+    ASSERT_OK(acc.Finish(&result));
+    ASSERT_EQ(100, result->length());

Review Comment:
   ```suggestion
       ASSERT_EQ(size_ * 2, result->length());
   ```



##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
     ::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
   }
 
+  void PlainTwice(int seed) {
+    auto values_single = GetValues(seed);
+    auto encoder = MakeTypedEncoder<ParquetType>(
+        Encoding::PLAIN, /*use_dictionary=*/false, column_descr());
+    auto decoder = MakeTypedDecoder<ParquetType>(Encoding::PLAIN, 
column_descr());
+
+    ASSERT_NO_THROW(encoder->Put(*values_single));
+    ASSERT_NO_THROW(encoder->Put(*values_single));
+    auto buf = encoder->FlushValues();
+
+    EXPECT_OK_AND_ASSIGN(auto values,
+                         ::arrow::Concatenate({values_single, values_single}));
+    decoder->SetData(static_cast<int>(values->length()), buf->data(),
+                     static_cast<int>(buf->size()));
+
+    BuilderType acc(arrow_type(), ::arrow::default_memory_pool());
+    ASSERT_EQ(values->length() - values->null_count(),
+              decoder->DecodeArrow(static_cast<int>(values->length()),
+                                   static_cast<int>(values->null_count()),
+                                   values->null_bitmap_data(), 
values->offset(), &acc));
+
+    std::shared_ptr<::arrow::Array> result;
+    ASSERT_OK(acc.Finish(&result));
+    ASSERT_EQ(100, result->length());

Review Comment:
   Or values->length() ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to