This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 2ee0fa0b2f docs: update examples in ArrowReaderOptions to use
in-memory buffers (#9163)
2ee0fa0b2f is described below
commit 2ee0fa0b2f253148e90ef83b31f2bfd381ec610e
Author: Andrea Bozzo <[email protected]>
AuthorDate: Wed Jan 14 23:16:36 2026 +0100
docs: update examples in ArrowReaderOptions to use in-memory buffers (#9163)
# Which issue does this PR close?
Closes #9161
# Rationale for this change
This PR applies the feedback from #9116 to make the parquet reader
documentation examples more concise and easier to follow.
# What changes are included in this PR?
Updated 3 documentation examples in
`parquet/src/arrow/arrow_reader/mod.rs`:
1. **`with_schema` example 1** - Schema mapping with timestamp
2. **`with_schema` example 2** - Dictionary encoding preservation
3. **`with_virtual_columns` example** - Virtual columns for row numbers
Changes in each example:
- Replaced `tempfile::tempfile()` with `Vec::new()` for in-memory buffer
- Added `use bytes::Bytes;` import
- Changed `ArrowWriter::try_new(file.try_clone()?, ...)` to
`ArrowWriter::try_new(&mut file, ...)`
- Added `let file = Bytes::from(file);` to convert buffer for reading
- Added `#` prefixes to hide setup/imports in rendered docs
The async example in `async_reader/mod.rs` was intentionally left
unchanged since it demonstrates `tokio::fs::File` usage.
# Are there any user-facing changes?
No functional changes, only documentation improvements to make examples
smaller and cleaner in rendered docs.
---
parquet/src/arrow/arrow_reader/mod.rs | 50 +++++++++++++++++------------------
1 file changed, 25 insertions(+), 25 deletions(-)
diff --git a/parquet/src/arrow/arrow_reader/mod.rs
b/parquet/src/arrow/arrow_reader/mod.rs
index b0563d0d69..24530c75b0 100644
--- a/parquet/src/arrow/arrow_reader/mod.rs
+++ b/parquet/src/arrow/arrow_reader/mod.rs
@@ -506,22 +506,21 @@ impl ArrowReaderOptions {
///
/// # Example
/// ```
- /// use std::io::Bytes;
- /// use std::sync::Arc;
- /// use tempfile::tempfile;
- /// use arrow_array::{ArrayRef, Int32Array, RecordBatch};
- /// use arrow_schema::{DataType, Field, Schema, TimeUnit};
- /// use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
- /// use parquet::arrow::ArrowWriter;
- ///
+ /// # use std::sync::Arc;
+ /// # use bytes::Bytes;
+ /// # use arrow_array::{ArrayRef, Int32Array, RecordBatch};
+ /// # use arrow_schema::{DataType, Field, Schema, TimeUnit};
+ /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
+ /// # use parquet::arrow::ArrowWriter;
/// // Write data - schema is inferred from the data to be Int32
- /// let file = tempfile().unwrap();
+ /// let mut file = Vec::new();
/// let batch = RecordBatch::try_from_iter(vec![
/// ("col_1", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
/// ]).unwrap();
- /// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(),
batch.schema(), None).unwrap();
+ /// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(),
None).unwrap();
/// writer.write(&batch).unwrap();
/// writer.close().unwrap();
+ /// let file = Bytes::from(file);
///
/// // Read the file back.
/// // Supply a schema that interprets the Int32 column as a Timestamp.
@@ -530,7 +529,7 @@ impl ArrowReaderOptions {
/// ]));
/// let options =
ArrowReaderOptions::new().with_schema(supplied_schema.clone());
/// let mut builder =
ParquetRecordBatchReaderBuilder::try_new_with_options(
- /// file.try_clone().unwrap(),
+ /// file.clone(),
/// options
/// ).expect("Error if the schema is not compatible with the parquet file
schema.");
///
@@ -546,24 +545,24 @@ impl ArrowReaderOptions {
/// the dictionary encoding by specifying a `Dictionary` type in the
schema hint:
///
/// ```
- /// use std::sync::Arc;
- /// use tempfile::tempfile;
- /// use arrow_array::{ArrayRef, RecordBatch, StringArray};
- /// use arrow_schema::{DataType, Field, Schema};
- /// use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
- /// use parquet::arrow::ArrowWriter;
- ///
+ /// # use std::sync::Arc;
+ /// # use bytes::Bytes;
+ /// # use arrow_array::{ArrayRef, RecordBatch, StringArray};
+ /// # use arrow_schema::{DataType, Field, Schema};
+ /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
+ /// # use parquet::arrow::ArrowWriter;
/// // Write a Parquet file with string data
- /// let file = tempfile().unwrap();
+ /// let mut file = Vec::new();
/// let schema = Arc::new(Schema::new(vec![
/// Field::new("city", DataType::Utf8, false)
/// ]));
/// let cities = StringArray::from(vec!["Berlin", "Berlin", "Paris",
"Berlin", "Paris"]);
/// let batch = RecordBatch::try_new(schema.clone(),
vec![Arc::new(cities)]).unwrap();
///
- /// let mut writer = ArrowWriter::try_new(file.try_clone().unwrap(),
batch.schema(), None).unwrap();
+ /// let mut writer = ArrowWriter::try_new(&mut file, batch.schema(),
None).unwrap();
/// writer.write(&batch).unwrap();
/// writer.close().unwrap();
+ /// let file = Bytes::from(file);
///
/// // Read the file back, requesting dictionary encoding preservation
/// let dict_schema = Arc::new(Schema::new(vec![
@@ -574,7 +573,7 @@ impl ArrowReaderOptions {
/// ]));
/// let options = ArrowReaderOptions::new().with_schema(dict_schema);
/// let builder = ParquetRecordBatchReaderBuilder::try_new_with_options(
- /// file.try_clone().unwrap(),
+ /// file.clone(),
/// options
/// ).unwrap();
///
@@ -703,26 +702,27 @@ impl ArrowReaderOptions {
/// # Example
/// ```
/// # use std::sync::Arc;
+ /// # use bytes::Bytes;
/// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
/// # use arrow_schema::{DataType, Field, Schema};
/// # use parquet::arrow::{ArrowWriter, RowNumber};
/// # use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
- /// # use tempfile::tempfile;
/// #
/// # fn main() -> Result<(), Box<dyn std::error::Error>> {
/// // Create a simple record batch with some data
/// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
/// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
///
- /// // Write the batch to a temporary parquet file
- /// let file = tempfile()?;
+ /// // Write the batch to an in-memory buffer
+ /// let mut file = Vec::new();
/// let mut writer = ArrowWriter::try_new(
- /// file.try_clone()?,
+ /// &mut file,
/// batch.schema(),
/// None
/// )?;
/// writer.write(&batch)?;
/// writer.close()?;
+ /// let file = Bytes::from(file);
///
/// // Create a virtual column for row numbers
/// let row_number_field = Arc::new(Field::new("row_number",
DataType::Int64, false)