wgtmac commented on code in PR #40594:
URL: https://github.com/apache/arrow/pull/40594#discussion_r1860754016


##########
cpp/src/parquet/page_index_test.cc:
##########
@@ -446,23 +451,76 @@ TEST(PageIndex, WriteOffsetIndex) {
   /// Verify the data of the offset index.
   for (const auto& offset_index : offset_indexes) {
     ASSERT_EQ(num_pages, offset_index->page_locations().size());
+    if (write_size_stats) {
+      ASSERT_EQ(num_pages, 
offset_index->unencoded_byte_array_data_bytes().size());
+    } else {
+      ASSERT_TRUE(offset_index->unencoded_byte_array_data_bytes().empty());
+    }
     for (size_t i = 0; i < num_pages; ++i) {
       const auto& page_location = offset_index->page_locations().at(i);
       ASSERT_EQ(offsets[i] + final_position, page_location.offset);
       ASSERT_EQ(page_sizes[i], page_location.compressed_page_size);
       ASSERT_EQ(first_row_indices[i], page_location.first_row_index);
+      if (write_size_stats) {
+        ASSERT_EQ(unencoded_byte_array_lengths[i],
+                  offset_index->unencoded_byte_array_data_bytes()[i]);
+      }
     }
   }
 }
 
+TEST(PageIndex, WriteOffsetIndexWithoutSizeStats) {
+  TestWriteOffsetIndex(/*write_size_stats=*/false);
+}
+
+TEST(PageIndex, WriteOffsetIndexWithSizeStats) {
+  TestWriteOffsetIndex(/*write_size_stats=*/true);
+}
+
+struct PageLevelHistogram {
+  std::vector<int16_t> rep_levels;
+  std::vector<int16_t> def_levels;
+};
+
+std::unique_ptr<SizeStatistics> ConstructFakeSizeStatistics(
+    const ColumnDescriptor* descr, const PageLevelHistogram& 
page_level_histogram) {
+  auto stats = MakeSizeStatistics(descr);
+  for (int16_t level = 0; level <= descr->max_repetition_level(); ++level) {
+    stats->repetition_level_histogram[level] = 
page_level_histogram.rep_levels[level];
+  }
+  for (int16_t level = 0; level <= descr->max_definition_level(); ++level) {
+    stats->definition_level_histogram[level] = 
page_level_histogram.def_levels[level];
+  }
+  return stats;
+}
+
+void VerifyPageLevelHistogram(int16_t max_level, size_t page_id,
+                              const std::vector<int16_t>& expected_page_levels,

Review Comment:
   Now, it isn't. `expected_page_levels.size() == (max_level + 1) * num_pages`.



##########
cpp/src/parquet/page_index_test.cc:
##########
@@ -446,23 +451,76 @@ TEST(PageIndex, WriteOffsetIndex) {
   /// Verify the data of the offset index.
   for (const auto& offset_index : offset_indexes) {
     ASSERT_EQ(num_pages, offset_index->page_locations().size());
+    if (write_size_stats) {
+      ASSERT_EQ(num_pages, 
offset_index->unencoded_byte_array_data_bytes().size());
+    } else {
+      ASSERT_TRUE(offset_index->unencoded_byte_array_data_bytes().empty());
+    }
     for (size_t i = 0; i < num_pages; ++i) {
       const auto& page_location = offset_index->page_locations().at(i);
       ASSERT_EQ(offsets[i] + final_position, page_location.offset);
       ASSERT_EQ(page_sizes[i], page_location.compressed_page_size);
       ASSERT_EQ(first_row_indices[i], page_location.first_row_index);
+      if (write_size_stats) {
+        ASSERT_EQ(unencoded_byte_array_lengths[i],
+                  offset_index->unencoded_byte_array_data_bytes()[i]);
+      }
     }
   }
 }
 
+TEST(PageIndex, WriteOffsetIndexWithoutSizeStats) {
+  TestWriteOffsetIndex(/*write_size_stats=*/false);
+}
+
+TEST(PageIndex, WriteOffsetIndexWithSizeStats) {
+  TestWriteOffsetIndex(/*write_size_stats=*/true);
+}
+
+struct PageLevelHistogram {
+  std::vector<int16_t> rep_levels;
+  std::vector<int16_t> def_levels;
+};
+
+std::unique_ptr<SizeStatistics> ConstructFakeSizeStatistics(
+    const ColumnDescriptor* descr, const PageLevelHistogram& 
page_level_histogram) {
+  auto stats = MakeSizeStatistics(descr);
+  for (int16_t level = 0; level <= descr->max_repetition_level(); ++level) {
+    stats->repetition_level_histogram[level] = 
page_level_histogram.rep_levels[level];
+  }
+  for (int16_t level = 0; level <= descr->max_definition_level(); ++level) {
+    stats->definition_level_histogram[level] = 
page_level_histogram.def_levels[level];
+  }
+  return stats;
+}
+
+void VerifyPageLevelHistogram(int16_t max_level, size_t page_id,
+                              const std::vector<int16_t>& expected_page_levels,

Review Comment:
   No, it isn't. `expected_page_levels.size() == (max_level + 1) * num_pages`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to