alamb commented on code in PR #8715:
URL: https://github.com/apache/arrow-rs/pull/8715#discussion_r2539358654


##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -566,6 +571,73 @@ impl ArrowReaderOptions {
         }
     }
 
+    /// Include virtual columns in the output.
+    ///
+    /// Virtual columns are columns that are not part of the Parquet schema, 
but are added to the output by the reader such as row numbers.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+    /// # use arrow_schema::{DataType, Field, Schema};
+    /// # use parquet::arrow::{ArrowWriter, RowNumber};
+    /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions, 
ParquetRecordBatchReaderBuilder};
+    /// # use tempfile::tempfile;
+    /// #
+    /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+    /// // Create a simple record batch with some data
+    /// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
+    /// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
+    ///
+    /// // Write the batch to a temporary parquet file
+    /// let file = tempfile()?;
+    /// let mut writer = ArrowWriter::try_new(
+    ///     file.try_clone()?,
+    ///     batch.schema(),
+    ///     None
+    /// )?;
+    /// writer.write(&batch)?;
+    /// writer.close()?;
+    ///
+    /// // Create a virtual column for row numbers
+    /// let row_number_field = Arc::new(Field::new("row_number", 
DataType::Int64, false)
+    ///     .with_extension_type(RowNumber));
+    ///
+    /// // Configure options with virtual columns
+    /// let options = ArrowReaderOptions::new()
+    ///     .with_virtual_columns(vec![row_number_field]);
+    ///
+    /// // Create a reader with the options
+    /// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
+    ///     file,
+    ///     options
+    /// )?
+    /// .build()?;
+    ///
+    /// // Read the batch - it will include both the original column and the 
virtual row_number column
+    /// let result_batch = reader.next().unwrap()?;
+    /// assert_eq!(result_batch.num_columns(), 2); // "value" + "row_number"
+    /// assert_eq!(result_batch.num_rows(), 3);
+    /// #
+    /// # Ok(())
+    /// # }
+    /// ```
+    pub fn with_virtual_columns(self, virtual_columns: Vec<FieldRef>) -> Self {
+        // Validate that all fields are virtual columns
+        for field in &virtual_columns {
+            if !is_virtual_column(field) {
+                panic!(

Review Comment:
   I think it is ok to return errors when trying to build options, like
   ```rust
   let options = options.with_virtual_columns(cols)?;
   ```
   
   If there is an error it is unlikely the code wants to continue configuring 
the options anyways



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to