fatemehp commented on code in PR #17877:
URL: https://github.com/apache/arrow/pull/17877#discussion_r1119357815
##########
cpp/src/parquet/column_reader_test.cc:
##########
@@ -703,9 +710,187 @@ class RecordReaderTest : public ::testing::Test {
Repetition::type repetition_type_;
};
-// Tests reading a repeated field using the RecordReader.
-TEST_F(RecordReaderTest, BasicReadRepeatedField) {
- Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED);
+// Tests reading a required field. The expected results are the same for
+// reading dense and spaced.
+TEST_P(RecordReaderPrimitiveTypeTest, ReadRequired) {
+ Init(/*max_def_level=*/0, /*max_rep_level=*/0, Repetition::REQUIRED,
+ /*repeated_ancestor_def_level=*/0);
+
+ // Records look like: {10, 20, 20, 30, 30, 30}
+ std::vector<std::shared_ptr<Page>> pages;
+ std::vector<int32_t> values = {10, 20, 20, 30, 30, 30};
+ std::vector<int16_t> def_levels = {};
+ std::vector<int16_t> rep_levels = {};
+
+ std::shared_ptr<DataPageV1> page = MakeDataPage<Int32Type>(
+ descr_.get(), values, /*num_values=*/static_cast<int>(def_levels.size()),
+ Encoding::PLAIN,
+ /*indices=*/{},
+ /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels,
+ level_info_.rep_level);
+ pages.push_back(std::move(page));
+ auto pager = std::make_unique<MockPageReader>(pages);
+ record_reader_->SetPageReader(std::move(pager));
+
+ // Read [10]
+ int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1);
+ ASSERT_EQ(records_read, 1);
+ CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+ CheckReadValues(/*expected_values=*/{10}, /*expected_defs=*/{},
+ /*expected_reps=*/{});
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+
+ // Read 20, 20, 30, 30, 30
+ records_read = record_reader_->ReadRecords(/*num_records=*/10);
+ ASSERT_EQ(records_read, 5);
+ CheckState(/*values_written=*/5, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+ CheckReadValues(/*expected_values=*/{20, 20, 30, 30, 30},
+ /*expected_defs=*/{},
+ /*expected_reps=*/{});
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+}
+
+// Tests reading an optional field.
+// Use a max definition field > 1 to test both cases where parent is present or
+// parent is missing.
+TEST_P(RecordReaderPrimitiveTypeTest, ReadOptional) {
+ Init(/*max_def_level=*/2, /*max_rep_level=*/0, Repetition::OPTIONAL,
+ /*repeated_ancestor_def_level=*/0);
+
+ // Records look like: {10, null, 20, 20, null, 30, 30, 30, null}
+ std::vector<std::shared_ptr<Page>> pages;
+ std::vector<int32_t> values = {10, 20, 20, 30, 30, 30};
+ std::vector<int16_t> def_levels = {2, 0, 2, 2, 1, 2, 2, 2, 0};
+
+ std::shared_ptr<DataPageV1> page = MakeDataPage<Int32Type>(
+ descr_.get(), values, /*num_values=*/static_cast<int>(def_levels.size()),
+ Encoding::PLAIN,
+ /*indices=*/{},
+ /*indices_size=*/0, def_levels, level_info_.def_level, /*rep_levels=*/{},
+ level_info_.rep_level);
+ pages.push_back(std::move(page));
+ auto pager = std::make_unique<MockPageReader>(pages);
+ record_reader_->SetPageReader(std::move(pager));
+
+ // Read 10, null
+ int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2);
+ ASSERT_EQ(records_read, 2);
+ if (GetParam() == /*read_dense_for_nullable=*/true) {
+ CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/9,
+ /*levels_position=*/2);
+ CheckReadValues(/*expected_values=*/{10}, /*expected_defs=*/{2, 0},
+ /*expected_reps=*/{});
+ } else {
+ CheckState(/*values_written=*/2, /*null_count=*/1, /*levels_written=*/9,
+ /*levels_position=*/2);
+ CheckReadValues(/*expected_values=*/{10, kNullValue},
/*expected_defs=*/{2, 0},
+ /*expected_reps=*/{});
+ }
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/7,
+ /*levels_position=*/0);
+
+ // Read 20, 20, null (parent present), 30, 30, 30
+ records_read = record_reader_->ReadRecords(/*num_records=*/6);
+ ASSERT_EQ(records_read, 6);
+ if (GetParam() == /*read_dense_for_nullable=*/true) {
+ CheckState(/*values_written=*/5, /*null_count=*/0, /*levels_written=*/7,
+ /*levels_position=*/6);
+ CheckReadValues(/*expected_values=*/{20, 20, 30, 30, 30},
+ /*expected_defs=*/{2, 2, 1, 2, 2, 2},
+ /*expected_reps=*/{});
+ } else {
+ CheckState(/*values_written=*/6, /*null_count=*/1, /*levels_written=*/7,
+ /*levels_position=*/6);
+ CheckReadValues(/*expected_values=*/{20, 20, kNullValue, 30, 30, 30},
+ /*expected_defs=*/{2, 2, 1, 2, 2, 2},
+ /*expected_reps=*/{});
+ }
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1,
+ /*levels_position=*/0);
+
+ // Read the last null value and read past the end.
+ records_read = record_reader_->ReadRecords(/*num_records=*/3);
+ ASSERT_EQ(records_read, 1);
+ if (GetParam() == /*read_dense_for_nullable=*/true) {
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/1,
+ /*levels_position=*/1);
+ CheckReadValues(/*expected_values=*/{},
+ /*expected_defs=*/{0},
+ /*expected_reps=*/{});
+ } else {
+ CheckState(/*values_written=*/1, /*null_count=*/1, /*levels_written=*/1,
+ /*levels_position=*/1);
+ CheckReadValues(/*expected_values=*/{kNullValue},
+ /*expected_defs=*/{0},
+ /*expected_reps=*/{});
+ }
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+}
+
+// Tests reading a required repeated field. The results are the same for
reading
+// dense or spaced.
+TEST_P(RecordReaderPrimitiveTypeTest, ReadRequiredRepeated) {
+ // Set repeated_ancestor_def_level = max_def_level so that the repeated field
+ // is not nullable.
+ Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED,
+ /*repeated_ancestor_def_level=*/1);
+
+ // Records look like: {[10], [20, 20], [30, 30, 30]}
+ std::vector<std::shared_ptr<Page>> pages;
+ std::vector<int32_t> values = {10, 20, 20, 30, 30, 30};
+ std::vector<int16_t> def_levels = {1, 1, 1, 1, 1, 1};
+ std::vector<int16_t> rep_levels = {0, 0, 1, 0, 1, 1};
+
+ std::shared_ptr<DataPageV1> page = MakeDataPage<Int32Type>(
+ descr_.get(), values, /*num_values=*/static_cast<int>(def_levels.size()),
+ Encoding::PLAIN,
+ /*indices=*/{},
+ /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels,
+ level_info_.rep_level);
+ pages.push_back(std::move(page));
+ auto pager = std::make_unique<MockPageReader>(pages);
+ record_reader_->SetPageReader(std::move(pager));
+
+ // Read [10]
+ int64_t records_read = record_reader_->ReadRecords(/*num_records=*/1);
+ ASSERT_EQ(records_read, 1);
+ CheckState(/*values_written=*/1, /*null_count=*/0, /*levels_written=*/6,
+ /*levels_position=*/1);
+ CheckReadValues(/*expected_values=*/{10}, /*expected_defs=*/{1},
+ /*expected_reps=*/{0});
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/5,
+ /*levels_position=*/0);
+
+ // Read [20, 20], [30, 30, 30]
+ records_read = record_reader_->ReadRecords(/*num_records=*/3);
+ ASSERT_EQ(records_read, 2);
+ CheckState(/*values_written=*/5, /*null_count=*/0, /*levels_written=*/5,
+ /*levels_position=*/5);
+ CheckReadValues(/*expected_values=*/{20, 20, 30, 30, 30},
+ /*expected_defs=*/{1, 1, 1, 1, 1},
+ /*expected_reps=*/{0, 1, 0, 1, 1});
+ record_reader_->Reset();
+ CheckState(/*values_written=*/0, /*null_count=*/0, /*levels_written=*/0,
+ /*levels_position=*/0);
+}
+
+// Tests reading a nullable repeated field.
+TEST_P(RecordReaderPrimitiveTypeTest, ReadNullableRepeated) {
Review Comment:
I modified ReadNullableRepeated to cover these cases.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]