mapleFU commented on code in PR #39570:
URL: https://github.com/apache/arrow/pull/39570#discussion_r1449192341
##########
cpp/src/parquet/reader_test.cc:
##########
@@ -120,11 +120,27 @@ std::string concatenated_gzip_members() {
return data_file("concatenated_gzip_members.parquet");
}
+std::string byte_stream_split() { return
data_file("byte_stream_split.zstd.parquet"); }
+
+template <typename DType, typename ValueType = typename DType::c_type>
+std::vector<ValueType> ReadColumnValues(ParquetFileReader* file_reader, int
row_group,
+ int column, int64_t
expected_values_read) {
+ auto column_reader = checked_pointer_cast<TypedColumnReader<DType>>(
+ file_reader->RowGroup(row_group)->Column(column));
+ std::vector<ValueType> values(expected_values_read);
+ int64_t values_read;
+ auto levels_read = column_reader->ReadBatch(expected_values_read, nullptr,
nullptr,
Review Comment:
https://github.com/apache/arrow/issues/39381
I don't know if passing nullptr as level get the right level...Maybe not?
##########
cpp/src/parquet/reader_test.cc:
##########
@@ -1474,6 +1489,38 @@ TEST(TestFileReader, TestOverflowInt16PageOrdinal) {
}
}
+#ifdef ARROW_WITH_ZSTD
+TEST(TestByteStreamSplit, FloatIntegrationFile) {
+ auto file_path = byte_stream_split();
+ auto file = ParquetFileReader::OpenFile(file_path);
+
+ const int64_t kNumRows = 300;
+
+ ASSERT_EQ(kNumRows, file->metadata()->num_rows());
+ ASSERT_EQ(2, file->metadata()->num_columns());
+ ASSERT_EQ(1, file->metadata()->num_row_groups());
+
+ // column 0 ("f32")
+ {
+ auto values =
+ ReadColumnValues<FloatType>(file.get(), /*row_group=*/0, /*column=*/0,
kNumRows);
+ ASSERT_EQ(values[0], 1.7640524f);
+ ASSERT_EQ(values[1], 0.4001572f);
+ ASSERT_EQ(values[kNumRows - 2], -0.39944902f);
Review Comment:
So we didn't need to check the whole file?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]