vustef commented on code in PR #8715:
URL: https://github.com/apache/arrow-rs/pull/8715#discussion_r2499221796


##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -516,6 +518,73 @@ impl ArrowReaderOptions {
         }
     }
 
+    /// Include virtual columns in the output.
+    ///
+    /// Virtual columns are columns that are not part of the Parquet schema, 
but are added to the output by the reader.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use parquet::arrow::{ArrowWriter, RowNumber};
+    /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
+    /// # use tempfile::tempfile;
+    /// #
+    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// // Create a simple record batch with some data
+    /// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
+    /// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
+    ///
+    /// // Write the batch to a temporary parquet file
+    /// let file = tempfile()?;
+    /// let mut writer = ArrowWriter::try_new(
+    ///     file.try_clone()?,
+    ///     batch.schema(),
+    ///     None
+    /// )?;
+    /// writer.write(&batch)?;
+    /// writer.close()?;
+    ///
+    /// // Create a virtual column for row numbers
+    /// let row_number_field = Field::new("row_number", DataType::Int64, false)
+    ///     .with_extension_type(RowNumber::default());
+    ///
+    /// // Configure options with virtual columns
+    /// let options = ArrowReaderOptions::new()
+    ///     .with_virtual_columns(vec![row_number_field]);
+    ///
+    /// // Create a reader with the options
+    /// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
+    ///     file,
+    ///     options
+    /// )?
+    /// .build()?;
+    ///
+    /// // Read the batch - it will include both the original column and the 
virtual row_number column
+    /// let result_batch = reader.next().unwrap()?;
+    /// assert_eq!(result_batch.num_columns(), 2); // "value" + "row_number"
+    /// assert_eq!(result_batch.num_rows(), 3);
+    /// #
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn with_virtual_columns(self, virtual_columns: Vec<Field>) -> Self {

Review Comment:
   @alamb here's a docstring, please let me know if this is what you wanted to 
see



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to