wgtmac commented on code in PR #36972:
URL: https://github.com/apache/arrow/pull/36972#discussion_r1280777744
##########
cpp/src/parquet/encoding.cc:
##########
@@ -340,6 +340,8 @@ class PlainEncoder<BooleanType> : public EncoderImpl,
virtual public BooleanEnco
throw ParquetException("direct put to boolean from " +
values.type()->ToString() +
" not supported");
}
+ // Put arrow array cannot mix with PlainEncoder<BooleanType>::PutImpl.
+ DCHECK_EQ(0, bit_writer_.bytes_written());
Review Comment:
This check is not perfect as PutImpl may call `bit_writer_.Clear()` inside
and bytes_written() may be 0 accidentally when we check.
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
}
+ void PlainTwice(int seed) {
+ auto values_single = GetValues(seed);
Review Comment:
```suggestion
auto random_values = GetValues(seed);
```
single looks strange here.
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
}
+ void PlainTwice(int seed) {
Review Comment:
Is it possible to extend `void Plain(int seed)` to be something like `void
Plain(int seed, int round = 1)` to avoid duplicate code?
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
}
+ void PlainTwice(int seed) {
+ auto values_single = GetValues(seed);
+ auto encoder = MakeTypedEncoder<ParquetType>(
+ Encoding::PLAIN, /*use_dictionary=*/false, column_descr());
+ auto decoder = MakeTypedDecoder<ParquetType>(Encoding::PLAIN,
column_descr());
+
+ ASSERT_NO_THROW(encoder->Put(*values_single));
+ ASSERT_NO_THROW(encoder->Put(*values_single));
+ auto buf = encoder->FlushValues();
+
+ EXPECT_OK_AND_ASSIGN(auto values,
+ ::arrow::Concatenate({values_single, values_single}));
+ decoder->SetData(static_cast<int>(values->length()), buf->data(),
+ static_cast<int>(buf->size()));
+
+ BuilderType acc(arrow_type(), ::arrow::default_memory_pool());
+ ASSERT_EQ(values->length() - values->null_count(),
+ decoder->DecodeArrow(static_cast<int>(values->length()),
+ static_cast<int>(values->null_count()),
+ values->null_bitmap_data(),
values->offset(), &acc));
+
+ std::shared_ptr<::arrow::Array> result;
+ ASSERT_OK(acc.Finish(&result));
+ ASSERT_EQ(100, result->length());
Review Comment:
```suggestion
ASSERT_EQ(size_ * 2, result->length());
```
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -665,6 +666,33 @@ class EncodingAdHocTyped : public ::testing::Test {
::arrow::AssertArraysEqual(*values, *result, /*verbose=*/true);
}
+ void PlainTwice(int seed) {
+ auto values_single = GetValues(seed);
+ auto encoder = MakeTypedEncoder<ParquetType>(
+ Encoding::PLAIN, /*use_dictionary=*/false, column_descr());
+ auto decoder = MakeTypedDecoder<ParquetType>(Encoding::PLAIN,
column_descr());
+
+ ASSERT_NO_THROW(encoder->Put(*values_single));
+ ASSERT_NO_THROW(encoder->Put(*values_single));
+ auto buf = encoder->FlushValues();
+
+ EXPECT_OK_AND_ASSIGN(auto values,
+ ::arrow::Concatenate({values_single, values_single}));
+ decoder->SetData(static_cast<int>(values->length()), buf->data(),
+ static_cast<int>(buf->size()));
+
+ BuilderType acc(arrow_type(), ::arrow::default_memory_pool());
+ ASSERT_EQ(values->length() - values->null_count(),
+ decoder->DecodeArrow(static_cast<int>(values->length()),
+ static_cast<int>(values->null_count()),
+ values->null_bitmap_data(),
values->offset(), &acc));
+
+ std::shared_ptr<::arrow::Array> result;
+ ASSERT_OK(acc.Finish(&result));
+ ASSERT_EQ(100, result->length());
Review Comment:
Or values->length() ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]