XiangpengHao commented on code in PR #8940:
URL: https://github.com/apache/arrow-rs/pull/8940#discussion_r2594072432


##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -348,6 +349,139 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
     }
 }
 
+fn split_variant_path(path: &str) -> Vec<String> {
+    path.split('.')
+        .filter(|segment| !segment.is_empty())
+        .map(|segment| segment.to_string())
+        .collect()
+}
+
+/// Builder for constructing a variant shredding schema.
+///
+/// The builder pattern makes it easy to incrementally define which fields
+/// should be shredded and with what types.
+///
+/// # Example
+///
+/// ```
+/// use arrow::datatypes::DataType;
+/// use parquet_variant_compute::ShredTypeBuilder;
+///
+/// // Define the shredding schema using the builder
+/// let shredding_type = ShredTypeBuilder::default()
+///     .with_path("time", &DataType::Int64)
+///     .with_path("hostname", &DataType::Utf8)
+///     .with_path("metrics.cpu", &DataType::Float64)
+///     .with_path("metrics.memory", &DataType::Float64)
+///     .build();
+///
+/// // The shredding_type can now be passed to shred_variant:
+/// // let shredded = shred_variant(&input, &shredding_type)?;
+/// ```
+#[derive(Default, Clone)]
+pub struct ShredTypeBuilder {
+    root: VariantSchemaNode,
+}
+
+impl ShredTypeBuilder {
+    /// Create a new empty schema builder.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert a typed path into the schema.
+    ///
+    /// The path uses dot notation to specify nested fields.
+    /// For example, "a.b.c" will create a nested structure.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The dot-separated path (e.g., "user.name" or "metrics.cpu")
+    /// * `data_type` - The Arrow data type for this field
+    pub fn with_path(mut self, path: &str, data_type: &DataType) -> Self {

Review Comment:
   > Out of interest if one was to have a version using a FieldRef what should 
the behaviour be if the name in the field doesn't match the path string.
   Currently the field name is ignored.



##########
parquet-variant-compute/src/shred_variant.rs:
##########
@@ -348,6 +349,139 @@ impl<'a> VariantToShreddedObjectVariantRowBuilder<'a> {
     }
 }
 
+fn split_variant_path(path: &str) -> Vec<String> {
+    path.split('.')
+        .filter(|segment| !segment.is_empty())
+        .map(|segment| segment.to_string())
+        .collect()
+}
+
+/// Builder for constructing a variant shredding schema.
+///
+/// The builder pattern makes it easy to incrementally define which fields
+/// should be shredded and with what types.
+///
+/// # Example
+///
+/// ```
+/// use arrow::datatypes::DataType;
+/// use parquet_variant_compute::ShredTypeBuilder;
+///
+/// // Define the shredding schema using the builder
+/// let shredding_type = ShredTypeBuilder::default()
+///     .with_path("time", &DataType::Int64)
+///     .with_path("hostname", &DataType::Utf8)
+///     .with_path("metrics.cpu", &DataType::Float64)
+///     .with_path("metrics.memory", &DataType::Float64)
+///     .build();
+///
+/// // The shredding_type can now be passed to shred_variant:
+/// // let shredded = shred_variant(&input, &shredding_type)?;
+/// ```
+#[derive(Default, Clone)]
+pub struct ShredTypeBuilder {
+    root: VariantSchemaNode,
+}
+
+impl ShredTypeBuilder {
+    /// Create a new empty schema builder.
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    /// Insert a typed path into the schema.
+    ///
+    /// The path uses dot notation to specify nested fields.
+    /// For example, "a.b.c" will create a nested structure.
+    ///
+    /// # Arguments
+    ///
+    /// * `path` - The dot-separated path (e.g., "user.name" or "metrics.cpu")
+    /// * `data_type` - The Arrow data type for this field
+    pub fn with_path(mut self, path: &str, data_type: &DataType) -> Self {

Review Comment:
   > Out of interest if one was to have a version using a FieldRef what should 
the behaviour be if the name in the field doesn't match the path string.
   
   Currently the field name is ignored.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to