raulcd commented on code in PR #45350: URL: https://github.com/apache/arrow/pull/45350#discussion_r1931890134
########## cpp/src/parquet/arrow/arrow_reader_writer_test.cc: ########## @@ -4296,6 +4296,110 @@ TEST(TestArrowReaderAdHoc, ReadFloat16Files) { } } +TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) { + const int num_columns = 1; + const int num_rows = 3; + const int num_chunks = 1; + + std::shared_ptr<Table> table; + ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table)); + + const int64_t row_group_size = num_rows; + std::shared_ptr<Buffer> buffer; + ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size, + default_arrow_writer_properties(), &buffer)); + + auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer)); + std::unique_ptr<FileReader> file_reader; + ASSERT_OK( + FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + // This is the important part in this test. + std::vector<int> row_group_indices = {}; + ASSERT_OK_AND_ASSIGN(auto record_batch_reader, + file_reader->GetRecordBatchReader(row_group_indices)); + std::shared_ptr<::arrow::RecordBatch> record_batch; + ASSERT_OK(record_batch_reader->ReadNext(&record_batch)); + // No read record batch for empty row groups request. + ASSERT_FALSE(record_batch); +} + +TEST(TestArrowFileReader, RecordBatchReaderEmptyInput) { + const int num_columns = 1; + // This is the important part in this test. + const int num_rows = 0; + const int num_chunks = 1; + + std::shared_ptr<Table> table; + ASSERT_NO_FATAL_FAILURE(MakeDoubleTable(num_columns, num_rows, num_chunks, &table)); + + const int64_t row_group_size = num_rows; + std::shared_ptr<Buffer> buffer; + ASSERT_NO_FATAL_FAILURE(WriteTableToBuffer(table, row_group_size, + default_arrow_writer_properties(), &buffer)); + + auto reader = ParquetFileReader::Open(std::make_shared<BufferReader>(buffer)); + std::unique_ptr<FileReader> file_reader; + ASSERT_OK( + FileReader::Make(::arrow::default_memory_pool(), std::move(reader), &file_reader)); + ASSERT_OK_AND_ASSIGN(auto record_batch_reader, file_reader->GetRecordBatchReader()); + std::shared_ptr<::arrow::RecordBatch> record_batch; + ASSERT_OK(record_batch_reader->ReadNext(&record_batch)); + // No read record batch for empty data. + ASSERT_FALSE(record_batch); +} + +TEST(TestArrowColumnReader, NextBatchZeroBatchSize) { Review Comment: This test is exactly the same as `RecordBatchReaderEmptyRowGroups`: I manually did a diff to see if I was missing anything: ``` $ diff 1.cpp 2.cpp 1c1 < TEST(TestArrowColumnReader, NextBatchZeroBatchSize) { --- > TEST(TestArrowFileReader, RecordBatchReaderEmptyRowGroups) { ``` ########## cpp/src/parquet/properties.h: ########## @@ -913,7 +913,8 @@ class PARQUET_EXPORT ArrowReaderProperties { pre_buffer_(true), cache_options_(::arrow::io::CacheOptions::LazyDefaults()), coerce_int96_timestamp_unit_(::arrow::TimeUnit::NANO), - arrow_extensions_enabled_(false) {} + arrow_extensions_enabled_(false), + should_load_statistics_(false) {} Review Comment: only for my understanding, why is the reason we default to not loading statistics? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org