pitrou commented on code in PR #41037:
URL: https://github.com/apache/arrow/pull/41037#discussion_r1555803934
##########
cpp/src/parquet/encoding_test.cc:
##########
@@ -635,6 +635,144 @@ TEST(BooleanArrayEncoding, AdHocRoundTrip) {
}
}
+class TestBooleanArrowDecoding : public ::testing::Test {
+ public:
+ // number of values including nulls
+ constexpr static int kNumValues = 10000;
+
+ void SetUp() override {
+ null_probabilities_ = {0.0, 0.001, 0.5, 0.999, 1.0};
+ read_batch_sizes_ = {1024, 4096, 10000};
+ true_probabilities_ = {0.0, 0.001, 0.5, 0.999, 1.0};
+ }
+ void TearDown() override {}
+
+ void InitTestCase(Encoding::type encoding, double null_probability,
+ double true_probability) {
+ GenerateInputData(null_probability, true_probability);
+ SetupEncoderDecoder(encoding);
+ }
+
+ void GenerateInputData(double null_probability, double true_probability) {
+ ::arrow::random::RandomArrayGenerator rag(0);
+ expected_dense_ = rag.Boolean(kNumValues, true_probability,
null_probability);
+ null_count_ = static_cast<int>(expected_dense_->null_count());
+ valid_bits_ = expected_dense_->null_bitmap_data();
+
+ // Initialize input_data_ for the encoder from the expected_array_ values
+ const auto& boolean_array =
+ static_cast<const ::arrow::BooleanArray&>(*expected_dense_);
+ input_data_.resize(boolean_array.length());
+
+ for (int64_t i = 0; i < boolean_array.length(); ++i) {
+ input_data_[i] = boolean_array.Value(i);
+ }
+ }
+
+ // Setup encoder/decoder pair for testing with boolean encoding
+ void SetupEncoderDecoder(Encoding::type encoding) {
+ encoder_ = MakeTypedEncoder<BooleanType>(encoding);
+ decoder_ = MakeTypedDecoder<BooleanType>(encoding);
+ const auto* data_ptr = reinterpret_cast<const bool*>(input_data_.data());
+ if (valid_bits_ != nullptr) {
+ ASSERT_NO_THROW(encoder_->PutSpaced(data_ptr, kNumValues, valid_bits_,
0));
+ } else {
+ ASSERT_NO_THROW(encoder_->Put(data_ptr, kNumValues));
+ }
+ buffer_ = encoder_->FlushValues();
+ decoder_->SetData(kNumValues, buffer_->data(),
static_cast<int>(buffer_->size()));
+ }
+
+ void CheckDense(int actual_num_values, const ::arrow::Array& chunk) {
+ ASSERT_EQ(actual_num_values, kNumValues - null_count_);
+ ASSERT_ARRAYS_EQUAL(chunk, *expected_dense_);
+ }
+
+ void CheckDecodeArrowUsingDenseBuilder(Encoding::type encoding) {
+ for (double np : null_probabilities_) {
+ for (double true_prob : true_probabilities_) {
+ for (int read_batch_size : this->read_batch_sizes_) {
+ // Resume the state of decoder
+ InitTestCase(encoding, np, true_prob);
Review Comment:
This is regenerating the input data for each batch size, can we avoid this?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]