fatemehp commented on code in PR #14142:
URL: https://github.com/apache/arrow/pull/14142#discussion_r1007342590
##########
cpp/src/parquet/column_reader_test.cc:
##########
@@ -572,5 +576,549 @@ TEST_F(TestPrimitiveReader,
TestNonDictionaryEncodedPagesWithExposeEncoding) {
pages_.clear();
}
+class RecordReaderTest : public ::testing::Test {
+ public:
+ const int32_t kNullValue = -1;
+
+ void Init(int32_t max_def_level, int32_t max_rep_level, Repetition::type
repetition) {
+ level_info_.def_level = max_def_level;
+ level_info_.rep_level = max_rep_level;
+ repetition_type_ = repetition;
+
+ NodePtr type = schema::Int32("b", repetition);
+ descr_ = std::make_unique<ColumnDescriptor>(type, level_info_.def_level,
+ level_info_.rep_level);
+
+ record_reader_ = internal::RecordReader::Make(descr_.get(), level_info_);
+ }
+
+ void CheckReadValues(std::vector<int32_t> expected_values,
+ std::vector<int16_t> expected_defs,
+ std::vector<int16_t> expected_reps) {
+ const auto read_values = reinterpret_cast<const
int32_t*>(record_reader_->values());
+ std::vector<int32_t> read_vals(read_values,
+ read_values +
record_reader_->values_written());
+ ASSERT_EQ(read_vals.size(), expected_values.size());
+ for (size_t i = 0; i < expected_values.size(); ++i) {
+ if (expected_values[i] != kNullValue) {
+ ASSERT_EQ(expected_values[i], read_values[i]);
+ }
+ }
+
+ if (repetition_type_ != Repetition::REQUIRED) {
+ std::vector<int16_t> read_defs(
+ record_reader_->def_levels(),
+ record_reader_->def_levels() + record_reader_->levels_position());
+ ASSERT_TRUE(vector_equal(expected_defs, read_defs));
+ }
+
+ if (repetition_type_ == Repetition::REPEATED) {
+ std::vector<int16_t> read_reps(
+ record_reader_->rep_levels(),
+ record_reader_->rep_levels() + record_reader_->levels_position());
+ ASSERT_TRUE(vector_equal(expected_reps, read_reps));
+ }
+ }
+
+ void CheckState(int64_t values_written, int64_t null_count, int64_t
levels_written,
+ int64_t levels_position) {
+ ASSERT_EQ(record_reader_->values_written(), values_written);
+ ASSERT_EQ(record_reader_->null_count(), null_count);
+ ASSERT_EQ(record_reader_->levels_written(), levels_written);
+ ASSERT_EQ(record_reader_->levels_position(), levels_position);
+ }
+
+ protected:
+ std::shared_ptr<internal::RecordReader> record_reader_;
+ std::unique_ptr<ColumnDescriptor> descr_;
+ internal::LevelInfo level_info_;
+ Repetition::type repetition_type_;
+};
+
+// Tests reading a repeated field using the RecordReader.
+TEST_F(RecordReaderTest, BasicReadRepeatedField) {
+ Init(/*max_def_level=*/1, /*max_rep_level=*/1, Repetition::REPEATED);
+
+ // Records look like: {[10], [20, 20], [30, 30, 30]}
+ std::vector<std::shared_ptr<Page>> pages;
+ std::vector<int32_t> values = {10, 20, 20, 30, 30, 30};
+ std::vector<int16_t> def_levels = {1, 1, 1, 1, 1, 1};
+ std::vector<int16_t> rep_levels = {0, 0, 1, 0, 1, 1};
+
+ std::shared_ptr<DataPageV1> page = MakeDataPage<Int32Type>(
+ descr_.get(), values, /*num_values=*/static_cast<int>(def_levels.size()),
+ Encoding::PLAIN,
+ /*indices=*/{},
+ /*indices_size=*/0, def_levels, level_info_.def_level, rep_levels,
+ level_info_.rep_level);
+ pages.push_back(std::move(page));
+ auto pager = std::make_unique<MockPageReader>(pages);
+ record_reader_->SetPageReader(std::move(pager));
+
+ int64_t records_read = record_reader_->ReadRecords(/*num_records=*/2);
+ ASSERT_EQ(records_read, 2);
+ CheckState(/*values_written=*/3, /*null_count=*/0, /*levels_written=*/6,
+ /*levels_position=*/3);
+ CheckReadValues(/*expected_values=*/{10, 20, 20}, /*expected_defs=*/{1, 1,
1},
+ /*expected_reps=*/{0, 0, 1});
+ record_reader_->Reset();
Review Comment:
With my changes the last read goes past the end. Please take a look.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]