KarateSnowMachine commented on code in PR #36191:
URL: https://github.com/apache/arrow/pull/36191#discussion_r1268435923


##########
cpp/src/parquet/stream_reader_test.cc:
##########
@@ -947,5 +947,110 @@ TEST_F(TestReadingDataFiles, ByteArrayDecimal) {
   EXPECT_EQ(i, 25);
 }
 
+class TestMultiRowGroupStreamReader : public ::testing::Test {
+ public:
+  TestMultiRowGroupStreamReader() { createTestFile(); }
+
+ protected:
+  const char* GetDataFile() const { return 
"stream_reader_multirowgroup_test.parquet"; }
+
+  void SetUp() {
+    PARQUET_ASSIGN_OR_THROW(auto infile, 
::arrow::io::ReadableFile::Open(GetDataFile()));
+    auto file_reader = parquet::ParquetFileReader::Open(infile);
+    reader_ = StreamReader{std::move(file_reader)};
+  }
+
+  void TearDown() { reader_ = StreamReader{}; }
+
+  std::shared_ptr<schema::GroupNode> GetSchema() {
+    schema::NodeVector fields;
+    fields.push_back(schema::PrimitiveNode::Make("row_group_number", 
Repetition::REQUIRED,
+                                                 Type::INT32, 
ConvertedType::UINT_16));
+
+    fields.push_back(schema::PrimitiveNode::Make("row_number", 
Repetition::REQUIRED,
+                                                 Type::INT64, 
ConvertedType::UINT_64));
+
+    return std::static_pointer_cast<schema::GroupNode>(
+        schema::GroupNode::Make("schema", Repetition::REQUIRED, fields));
+  }
+
+  void createTestFile() {
+    PARQUET_ASSIGN_OR_THROW(auto outfile,
+                            
::arrow::io::FileOutputStream::Open(GetDataFile()));
+
+    auto file_writer = ParquetFileWriter::Open(outfile, GetSchema());
+
+    StreamWriter os{std::move(file_writer)};
+
+    int nrows = 0;
+    for (auto group = 0; group < num_row_groups; ++group) {
+      for (auto i = 0; i < num_rows_per_group; ++i) {
+        os << static_cast<uint16_t>(group);
+        os << static_cast<uint64_t>(nrows);
+        os << EndRow;
+        nrows++;
+      }
+      os.EndRowGroup();
+    }
+  }
+
+  StreamReader reader_;
+  static constexpr int num_row_groups = 5;
+  static constexpr int num_rows_per_group = 10;
+};
+
+TEST_F(TestMultiRowGroupStreamReader, SkipRows) {
+  // skip somewhere into the middle of a row group somewhere in the middle of 
the file
+  auto current_row = 33;
+

Review Comment:
   Fixed. I made the helper name a bit more verbose to make it clear that it 
will have the side effect of consuming the row. 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to