pitrou commented on code in PR #36972:
URL: https://github.com/apache/arrow/pull/36972#discussion_r1290335030
##########
cpp/src/parquet/encoding.cc:
##########
@@ -340,97 +334,42 @@ class PlainEncoder<BooleanType> : public EncoderImpl,
virtual public BooleanEnco
throw ParquetException("direct put to boolean from " +
values.type()->ToString() +
" not supported");
}
-
const auto& data = checked_cast<const ::arrow::BooleanArray&>(values);
+
if (data.null_count() == 0) {
-
PARQUET_THROW_NOT_OK(sink_.Reserve(bit_util::BytesForBits(data.length())));
// no nulls, just dump the data
- ::arrow::internal::CopyBitmap(data.data()->GetValues<uint8_t>(1),
data.offset(),
- data.length(), sink_.mutable_data(),
sink_.length());
+ PARQUET_THROW_NOT_OK(sink_.Reserve(data.length()));
+ sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1), /*offset=*/0,
data.length());
Review Comment:
This is not right, since the offset is measured in bits here. You must
instead take the absolute buffer address and give the offset separately to
`UnsafeAppend`:
```suggestion
sink_.UnsafeAppend(data.data()->GetValues<uint8_t>(1, 0),
data.offset(), data.length());
```
You could test this by passing a sliced BooleanArray to `Put()`.
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -1627,6 +1645,39 @@ TEST_F(TestRleBooleanEncoding, AllNull) {
/*null_probability*/ 1));
}
+class TestPlainBooleanEncoding : public TestEncodingBase<BooleanType> {};
Review Comment:
Is this class used?
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -882,6 +894,12 @@ TYPED_TEST(EncodingAdHocTyped, PlainArrowDirectPut) {
}
}
+TYPED_TEST(EncodingAdHocTyped, PlainArrowDirectPutMultiRound) {
+ for (auto seed : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) {
Review Comment:
```suggestion
// Check that one can put several Arrow arrays into a given encoder
// and decode to the right values (see GH-36939)
for (auto seed : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) {
```
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -1627,6 +1645,39 @@ TEST_F(TestRleBooleanEncoding, AllNull) {
/*null_probability*/ 1));
}
+class TestPlainBooleanEncoding : public TestEncodingBase<BooleanType> {};
+
+TEST(TestPlainBooleanArrayEncoding, AdHocRoundTrip) {
+ std::vector<std::shared_ptr<::arrow::Array>> arrays{
Review Comment:
Add a comment pointing to the GH issue number?
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -641,27 +642,38 @@ class EncodingAdHocTyped : public ::testing::Test {
static std::shared_ptr<::arrow::DataType> arrow_type();
- void Plain(int seed) {
- auto values = GetValues(seed);
+ void Plain(int seed, int round = 1) {
Review Comment:
Nit: make this plural:
```suggestion
void Plain(int seed, int rounds = 1) {
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]