This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new a7a93295b Update read parquet example in parquet/arrow home (#2730)
a7a93295b is described below
commit a7a93295bd4a143d55fa31a1c6ac92045d73dc05
Author: Marc Garcia <[email protected]>
AuthorDate: Thu Sep 15 17:23:22 2022 +0100
Update read parquet example in parquet/arrow home (#2730)
* Update example to read parquet
* Remove outdated comment
---
parquet/src/arrow/mod.rs | 35 ++++++++++-------------------------
1 file changed, 10 insertions(+), 25 deletions(-)
diff --git a/parquet/src/arrow/mod.rs b/parquet/src/arrow/mod.rs
index c0de656bf..c5fe0fa2a 100644
--- a/parquet/src/arrow/mod.rs
+++ b/parquet/src/arrow/mod.rs
@@ -66,26 +66,23 @@
//! # Example of reading parquet file into arrow record batch
//!
//! ```rust
-//! use arrow::record_batch::RecordBatchReader;
-//! use parquet::file::reader::{FileReader, SerializedFileReader};
-//! use parquet::arrow::{ParquetFileArrowReader, ArrowReader, ProjectionMask};
-//! use std::sync::Arc;
//! use std::fs::File;
+//! use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
//!
+//! # use std::sync::Arc;
//! # use arrow::array::Int32Array;
//! # use arrow::datatypes::{DataType, Field, Schema};
//! # use arrow::record_batch::RecordBatch;
//! # use parquet::arrow::arrow_writer::ArrowWriter;
+//! #
//! # let ids = Int32Array::from(vec![1, 2, 3, 4]);
//! # let schema = Arc::new(Schema::new(vec![
-//! # Field::new("id", DataType::Int32, false),
+//! # Field::new("id", DataType::Int32, false),
//! # ]));
//! #
-//! # // Write to a memory buffer (can also write to a File)
//! # let file = File::create("data.parquet").unwrap();
//! #
-//! # let batch =
-//! # RecordBatch::try_new(Arc::clone(&schema),
vec![Arc::new(ids)]).unwrap();
+//! # let batch = RecordBatch::try_new(Arc::clone(&schema),
vec![Arc::new(ids)]).unwrap();
//! # let batches = vec![batch];
//! #
//! # let mut writer = ArrowWriter::try_new(file, Arc::clone(&schema),
None).unwrap();
@@ -97,26 +94,14 @@
//!
//! let file = File::open("data.parquet").unwrap();
//!
-//! let mut arrow_reader = ParquetFileArrowReader::try_new(file).unwrap();
-//! let mask = ProjectionMask::leaves(arrow_reader.parquet_schema(), [0]);
-//!
-//! println!("Converted arrow schema is: {}",
arrow_reader.get_schema().unwrap());
-//! println!("Arrow schema after projection is: {}",
-//! arrow_reader.get_schema_by_columns(mask.clone()).unwrap());
+//! let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+//! println!("Converted arrow schema is: {}", builder.schema());
//!
-//! let mut unprojected = arrow_reader.get_record_reader(2048).unwrap();
-//! println!("Unprojected reader schema: {}", unprojected.schema());
+//! let mut reader = builder.build().unwrap();
//!
-//! let mut record_batch_reader =
arrow_reader.get_record_reader_by_columns(mask, 2048).unwrap();
+//! let record_batch = reader.next().unwrap().unwrap();
//!
-//! for maybe_record_batch in record_batch_reader {
-//! let record_batch = maybe_record_batch.unwrap();
-//! if record_batch.num_rows() > 0 {
-//! println!("Read {} records.", record_batch.num_rows());
-//! } else {
-//! println!("End of file!");
-//! }
-//!}
+//! println!("Read {} records.", record_batch.num_rows());
//! ```
experimental!(mod array_reader);