alamb commented on a change in pull request #797:
URL: https://github.com/apache/arrow-rs/pull/797#discussion_r717916170



##########
File path: parquet/src/arrow/mod.rs
##########
@@ -20,6 +20,52 @@
 //!
 //! This mod provides API for converting between arrow and parquet.
 //!
+//!# Example of writing Arrow record batch to Parquet file
+//!
+//!```rust
+//! use arrow::array::Int32Array;
+//! use arrow::datatypes::{DataType, Field, Schema};
+//! use arrow::record_batch::RecordBatch;
+//! use parquet::arrow::arrow_writer::ArrowWriter;
+//! use parquet::file::properties::WriterProperties;
+//! use std::fs::File;
+//! use std::sync::Arc;
+//! let ids = Int32Array::from(vec![1, 2, 3, 4]);
+//! let vals = Int32Array::from(vec![5, 6, 7, 8]);
+//! let schema = Arc::new(Schema::new(vec![
+//!     Field::new("id", DataType::Int32, false),
+//!     Field::new("val", DataType::Int32, false),
+//! ]));
+//!
+//! let file = File::create("data.parquet").unwrap();
+//!
+//! let batch =
+//!     RecordBatch::try_new(Arc::clone(&schema), vec![Arc::new(ids), 
Arc::new(vals)]).unwrap();
+//! let batches = vec![batch];
+//!
+//! // Default writer properties
+//! let props = WriterProperties::builder().build();
+//!
+//! let mut writer = ArrowWriter::try_new(file, Arc::clone(&schema), 
Some(props)).unwrap();
+//!
+//! for batch in batches {
+//!     writer.write(&batch).expect("Writing batch");
+//! }
+//! writer.close().unwrap();
+//! ```
+
+//! `WriterProperties` can be used to set Parquet file options
+//! ```rust
+//! use parquet::file::properties::WriterProperties;
+//! use parquet::basic::{ Compression, Encoding };
+//! use parquet::file::properties::WriterVersion;
+//!
+//! // File compression
+//! let props = WriterProperties::builder()
+//!     .set_compression(Compression::SNAPPY)
+//!     .build();
+//! ```
+//!
 //! # Example of reading parquet file into arrow record batch
 //!
 //! ```rust, no_run

Review comment:
       Here is a way to actually run the example reading from a file: use `# ` 
to hide some code that writes to a file.
   
   Something like:
   
   ```rust
   //! ```rust
   //! use arrow::record_batch::RecordBatchReader;
   //! use parquet::file::reader::SerializedFileReader;
   //! use parquet::arrow::{ParquetFileArrowReader, ArrowReader};
   //! use std::sync::Arc;
   //! use std::fs::File;
   //!
   //! # use arrow::array::Int32Array;
   //! # use arrow::datatypes::{DataType, Field, Schema};
   //! # use arrow::record_batch::RecordBatch;
   //! # use parquet::arrow::arrow_writer::ArrowWriter;
   //! # let ids = Int32Array::from(vec![1, 2, 3, 4]);
   //! # let schema = Arc::new(Schema::new(vec![
   //! #    Field::new("id", DataType::Int32, false),
   //! # ]));
   //! #
   //! # // Write to a memory buffer (can also write to a File)
   //! # let file = File::create("data.parquet").unwrap();
   //! #
   //! # let batch =
   //! #    RecordBatch::try_new(Arc::clone(&schema), 
vec![Arc::new(ids)]).unwrap();
   //! # let batches = vec![batch];
   //! #
   //! # let mut writer = ArrowWriter::try_new(file, Arc::clone(&schema), 
None).unwrap();
   //! #
   //! # for batch in batches {
   //! #     writer.write(&batch).expect("Writing batch");
   //! # }
   //! # writer.close().unwrap();
   //!
   //!
   //! let file = File::open("data.parquet").unwrap();
   //! let file_reader = SerializedFileReader::new(file).unwrap();
   //! let mut arrow_reader = 
ParquetFileArrowReader::new(Arc::new(file_reader));
   //!
   //! println!("Converted arrow schema is: {}", 
arrow_reader.get_schema().unwrap());
   //! println!("Arrow schema after projection is: {}",
   //!    arrow_reader.get_schema_by_columns(vec![0], true).unwrap());
   //!
   //! let mut record_batch_reader = 
arrow_reader.get_record_reader(2048).unwrap();
   //!
   //! for maybe_record_batch in record_batch_reader {
   //!    let record_batch = maybe_record_batch.unwrap();
   //!    if record_batch.num_rows() > 0 {
   //!        println!("Read {} records.", record_batch.num_rows());
   //!    } else {
   //!        println!("End of file!");
   //!    }
   //!}
   //! ```
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to