alamb commented on code in PR #8365:
URL: https://github.com/apache/arrow-rs/pull/8365#discussion_r2363801740
##########
parquet/src/variant.rs:
##########
@@ -108,6 +98,202 @@
//! ```
//!
//! # Example: Reading a Parquet file with Variant column
-//! (TODO: add example)
+//!
+//! Use the [`VariantType`] extension type to find the Variant column:
+//!
+//! ```
+//! # use std::sync::Arc;
+//! # use std::path::PathBuf;
+//! # use arrow_array::{ArrayRef, RecordBatch, RecordBatchReader};
+//! # use parquet::variant::{Variant, VariantArray, VariantType};
+//! # use parquet::arrow::arrow_reader::ArrowReaderBuilder;
+//! # fn main() -> Result<(), parquet::errors::ParquetError> {
+//! # use arrow_array::StructArray;
+//! # fn file_path() -> PathBuf { // return a testing file path
+//! # PathBuf::from(arrow::util::test_util::parquet_test_data())
+//! # .join("..")
+//! # .join("shredded_variant")
+//! # .join("case-075.parquet")
+//! # }
+//! // Read the Parquet file using standard Arrow Parquet reader
+//! let file = std::fs::File::open(file_path())?;
+//! let mut reader = ArrowReaderBuilder::try_new(file)?.build()?;
+//!
+//! // You can find the Variant using the VariantType extension type
+//! let schema = reader.schema();
+//! let field = schema.field_with_name("var")?;
+//! assert!(field.try_extension_type::<VariantType>().is_ok());
+//!
+//! // The reader will yield RecordBatches with a StructArray
+//! // to convert them to VariantArray, use VariantArray::try_new
+//! let batch = reader.next().unwrap().unwrap();
+//! let col = batch.column_by_name("var").unwrap();
+//! let var_array = VariantArray::try_new(&col)?;
+//! assert_eq!(var_array.len(), 1);
+//! let var_value: Variant = var_array.value(0);
+//! assert_eq!(var_value, Variant::from("iceberg")); // the value in
case-075.parquet
+//! # Ok(())
+//! # }
+//! ```
pub use parquet_variant::*;
-pub use parquet_variant_compute as compute;
+pub use parquet_variant_compute::*;
+
+#[cfg(test)]
+mod tests {
+ use crate::arrow::arrow_reader::ArrowReaderBuilder;
+ use crate::arrow::ArrowWriter;
+ use crate::file::metadata::{ParquetMetaData, ParquetMetaDataReader};
+ use crate::file::reader::ChunkReader;
+ use arrow::util::test_util::parquet_test_data;
+ use arrow_array::{ArrayRef, RecordBatch};
+ use arrow_schema::Schema;
+ use bytes::Bytes;
+ use parquet_variant::{Variant, VariantBuilderExt};
+ use parquet_variant_compute::{VariantArray, VariantArrayBuilder,
VariantType};
+ use std::path::PathBuf;
+ use std::sync::Arc;
+
+ #[test]
+ fn roundtrip_basic() {
+ roundtrip(variant_array());
+ }
+
+ /// Ensure a file with Variant LogicalType, written by another writer in
+ /// parquet-testing, can be read as a VariantArray
+ #[test]
+ fn read_logical_type() {
Review Comment:
I double checked, and this test also does some more stuff like testing
sizes. Do you think it is worth removing?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]