(arrow-rs) branch main updated: docs: update examples in ArrowReaderOptions to use in-memory buffers (#9163)

alamb Wed, 14 Jan 2026 14:17:12 -0800

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git



The following commit(s) were added to refs/heads/main by this push:
     new 2ee0fa0b2f docs: update examples in ArrowReaderOptions to use 
in-memory buffers (#9163)
2ee0fa0b2f is described below

commit 2ee0fa0b2f253148e90ef83b31f2bfd381ec610e
Author: Andrea Bozzo <[email protected]>
AuthorDate: Wed Jan 14 23:16:36 2026 +0100

    docs: update examples in ArrowReaderOptions to use in-memory buffers (#9163)
    
    # Which issue does this PR close?
    
    Closes #9161
    
    # Rationale for this change
    
    This PR applies the feedback from #9116 to make the parquet reader
    documentation examples more concise and easier to follow.
    
    # What changes are included in this PR?
    
    Updated 3 documentation examples in
    `parquet/src/arrow/arrow_reader/mod.rs`:
    
    1. **`with_schema` example 1** - Schema mapping with timestamp
    2. **`with_schema` example 2** - Dictionary encoding preservation
    3. **`with_virtual_columns` example** - Virtual columns for row numbers
    
    Changes in each example:
    - Replaced `tempfile::tempfile()` with `Vec::new()` for in-memory buffer
    - Added `use bytes::Bytes;` import
    - Changed `ArrowWriter::try_new(file.try_clone()?, ...)` to
    `ArrowWriter::try_new(&mut file, ...)`
    - Added `let file = Bytes::from(file);` to convert buffer for reading
    - Added `#` prefixes to hide setup/imports in rendered docs
    
    The async example in `async_reader/mod.rs` was intentionally left
    unchanged since it demonstrates `tokio::fs::File` usage.
    
    # Are there any user-facing changes?
    
    No functional changes, only documentation improvements to make examples
    smaller and cleaner in rendered docs.
---
 parquet/src/arrow/arrow_reader/mod.rs | 50 +++++++++++++++++------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

diff --git a/parquet/src/arrow/arrow_reader/mod.rs 
b/parquet/src/arrow/arrow_reader/mod.rs
index b0563d0d69..24530c75b0 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -506,22 +506,21 @@ impl ArrowReaderOptions {
     ///
     /// # Example
     /// ```
-    /// use std::io::Bytes;
-    /// use std::sync::Arc;
-    /// use tempfile::tempfile;
-    /// use arrow_array::{ArrayRef, Int32Array, RecordBatch};
-    /// use arrow_schema::{DataType, Field, Schema, TimeUnit};
-    /// use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
-    /// use parquet::arrow::ArrowWriter;
-    ///
+    /// # use std::sync::Arc;
+    /// # use bytes::Bytes;
+    /// # use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Schema, TimeUnit};
+    /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
+    /// # use parquet::arrow::ArrowWriter;
     /// // Write data - schema is inferred from the data to be Int32
-    /// let file = tempfile().unwrap();
+    /// let mut file = Vec::new();
     /// let batch = RecordBatch::try_from_iter(vec![
     ///     ("col_1", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
     /// ]).unwrap();
-    /// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), 
batch.schema(), None).unwrap();
+    /// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), 
None).unwrap();
     /// writer.write(&batch).unwrap();
     /// writer.close().unwrap();
+    /// let file = Bytes::from(file);
     ///
     /// // Read the file back.
     /// // Supply a schema that interprets the Int32 column as a Timestamp.
@@ -530,7 +529,7 @@ impl ArrowReaderOptions {
     /// ]));
     /// let options = 
ArrowReaderOptions::new().with_schema(supplied_schema.clone());
     /// let mut builder = 
ParquetRecordBatchReaderBuilder::try_new_with_options(
-    ///     file.try_clone().unwrap(),
+    ///     file.clone(),
     ///     options
     /// ).expect("Error if the schema is not compatible with the parquet file 
schema.");
     ///
@@ -546,24 +545,24 @@ impl ArrowReaderOptions {
     /// the dictionary encoding by specifying a `Dictionary` type in the 
schema hint:
     ///
     /// ```
-    /// use std::sync::Arc;
-    /// use tempfile::tempfile;
-    /// use arrow_array::{ArrayRef, RecordBatch, StringArray};
-    /// use arrow_schema::{DataType, Field, Schema};
-    /// use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
-    /// use parquet::arrow::ArrowWriter;
-    ///
+    /// # use std::sync::Arc;
+    /// # use bytes::Bytes;
+    /// # use arrow_array::{ArrayRef, RecordBatch, StringArray};
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
+    /// # use parquet::arrow::ArrowWriter;
     /// // Write a Parquet file with string data
-    /// let file = tempfile().unwrap();
+    /// let mut file = Vec::new();
     /// let schema = Arc::new(Schema::new(vec![
     ///     Field::new("city", DataType::Utf8, false)
     /// ]));
     /// let cities = StringArray::from(vec!["Berlin", "Berlin", "Paris", 
"Berlin", "Paris"]);
     /// let batch = RecordBatch::try_new(schema.clone(), 
vec![Arc::new(cities)]).unwrap();
     ///
-    /// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(), 
batch.schema(), None).unwrap();
+    /// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(), 
None).unwrap();
     /// writer.write(&batch).unwrap();
     /// writer.close().unwrap();
+    /// let file = Bytes::from(file);
     ///
     /// // Read the file back, requesting dictionary encoding preservation
     /// let dict_schema = Arc::new(Schema::new(vec![
@@ -574,7 +573,7 @@ impl ArrowReaderOptions {
     /// ]));
     /// let options = ArrowReaderOptions::new().with_schema(dict_schema);
     /// let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
-    ///     file.try_clone().unwrap(),
+    ///     file.clone(),
     ///     options
     /// ).unwrap();
     ///
@@ -703,26 +702,27 @@ impl ArrowReaderOptions {
     /// # Example
     /// ```
     /// # use std::sync::Arc;
+    /// # use bytes::Bytes;
     /// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
     /// # use arrow_schema::{DataType, Field, Schema};
     /// # use parquet::arrow::{ArrowWriter, RowNumber};
     /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
-    /// # use tempfile::tempfile;
     /// #
     /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
     /// // Create a simple record batch with some data
     /// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
     /// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
     ///
-    /// // Write the batch to a temporary parquet file
-    /// let file = tempfile()?;
+    /// // Write the batch to an in-memory buffer
+    /// let mut file = Vec::new();
     /// let mut writer = ArrowWriter::try_new(
-    ///     file.try_clone()?,
+    ///     &mut file,
     ///     batch.schema(),
     ///     None
     /// )?;
     /// writer.write(&batch)?;
     /// writer.close()?;
+    /// let file = Bytes::from(file);
     ///
     /// // Create a virtual column for row numbers
     /// let row_number_field = Arc::new(Field::new("row_number", 
DataType::Int64, false)

(arrow-rs) branch main updated: docs: update examples in ArrowReaderOptions to use in-memory buffers (#9163)

Reply via email to