vustef commented on code in PR #8715:
URL: https://github.com/apache/arrow-rs/pull/8715#discussion_r2526750191
##########
parquet/src/arrow/arrow_reader/mod.rs:
##########
@@ -551,6 +556,73 @@ impl ArrowReaderOptions {
}
}
+ /// Include virtual columns in the output.
+ ///
+ /// Virtual columns are columns that are not part of the Parquet schema,
but are added to the output by the reader such as row numbers.
+ ///
+ /// # Example
+ /// ```
+ /// # use std::sync::Arc;
+ /// # use arrow_array::{ArrayRef, Int64Array, RecordBatch};
+ /// # use arrow_schema::{DataType, Field, Schema};
+ /// # use parquet::arrow::{ArrowWriter, RowNumber};
+ /// # use parquet::arrow::arrow_reader::{ArrowReaderOptions,
ParquetRecordBatchReaderBuilder};
+ /// # use tempfile::tempfile;
+ /// #
+ /// # fn main() -> Result<(), Box<dyn std::error::Error>> {
+ /// // Create a simple record batch with some data
+ /// let values = Arc::new(Int64Array::from(vec![1, 2, 3])) as ArrayRef;
+ /// let batch = RecordBatch::try_from_iter(vec![("value", values)])?;
+ ///
+ /// // Write the batch to a temporary parquet file
+ /// let file = tempfile()?;
+ /// let mut writer = ArrowWriter::try_new(
+ /// file.try_clone()?,
+ /// batch.schema(),
+ /// None
+ /// )?;
+ /// writer.write(&batch)?;
+ /// writer.close()?;
+ ///
+ /// // Create a virtual column for row numbers
+ /// let row_number_field = Arc::new(Field::new("row_number",
DataType::Int64, false)
+ /// .with_extension_type(RowNumber));
+ ///
+ /// // Configure options with virtual columns
+ /// let options = ArrowReaderOptions::new()
+ /// .with_virtual_columns(vec![row_number_field]);
+ ///
+ /// // Create a reader with the options
+ /// let mut reader = ParquetRecordBatchReaderBuilder::try_new_with_options(
+ /// file,
+ /// options
+ /// )?
+ /// .build()?;
+ ///
+ /// // Read the batch - it will include both the original column and the
virtual row_number column
+ /// let result_batch = reader.next().unwrap()?;
+ /// assert_eq!(result_batch.num_columns(), 2); // "value" + "row_number"
+ /// assert_eq!(result_batch.num_rows(), 3);
+ /// #
+ /// # Ok(())
+ /// # }
+ /// ```
+ pub fn with_virtual_columns(self, virtual_columns: Vec<FieldRef>) -> Self {
Review Comment:
Thanks for figuring this out. I guess there's no action that we can take
right now then, please let me know if it's otherwise.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]