scovich commented on code in PR #8208:
URL: https://github.com/apache/arrow-rs/pull/8208#discussion_r2296036836


##########
parquet-variant/src/builder.rs:
##########
@@ -431,13 +433,97 @@ impl ValueBuilder {
     }
 }
 
+/// A trait for building variant metadata dictionaries, to be used in 
conjunction with a
+/// [`ValueBuilder`]. The trait provides methods for managing field names and 
their IDs, as well as
+/// rolling back a failed builder operation that might have created new field 
ids.
+pub trait MetadataBuilder: std::fmt::Debug {
+    /// Attempts to register a field name, returning the corresponding 
(possibly newly-created)
+    /// field id on success. Attempting to register the same field name twice 
will _generally_
+    /// produce the same field id both times, but the variant spec does not 
actually require it.
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, 
ArrowError>;
+
+    /// Retrieves the field name for a given field id, which must be less than
+    /// [`Self::num_field_names`]. Panics if the field id is out of bounds.
+    fn field_name(&self, field_id: usize) -> &str;
+
+    /// Returns the number of field names stored in this metadata builder. Any 
number less than this
+    /// is a valid field id. The builder can be reverted back to this size 
later on (discarding any
+    /// newer/higher field ids) by calling [`Self::truncate_field_names`].
+    fn num_field_names(&self) -> usize;
+
+    /// Reverts the field names to a previous size, discarding any newly out 
of bounds field ids.
+    fn truncate_field_names(&mut self, new_size: usize);
+}
+
+impl MetadataBuilder for BasicMetadataBuilder {
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, 
ArrowError> {
+        Ok(self.upsert_field_name(field_name))
+    }
+    fn field_name(&self, field_id: usize) -> &str {
+        self.field_name(field_id)
+    }
+    fn num_field_names(&self) -> usize {
+        self.num_field_names()
+    }
+    fn truncate_field_names(&mut self, new_size: usize) {
+        self.field_names.truncate(new_size)
+    }
+}
+
+/// A metadata builder that cannot register new field names, and merely 
returns the field id
+/// associated with a known field name. This is useful for variant unshredding 
operations, where the
+/// metadata column is fixed and -- per variant shredding spec -- already 
contains all field names
+/// from the typed_value column. It is also useful when projecting a subset of 
fields from a variant
+/// object value, since the bytes can be copied across directly without 
re-encoding their field ids.
+#[derive(Debug)]
+pub struct ReadOnlyMetadataBuilder<'m> {
+    metadata: VariantMetadata<'m>,
+    known_field_names: HashMap<&'m str, u32>,
+}
+
+impl<'m> ReadOnlyMetadataBuilder<'m> {
+    /// Creates a new read-only metadata builder from the given metadata 
dictionary.
+    pub fn new(metadata: VariantMetadata<'m>) -> Self {
+        Self {
+            metadata,
+            known_field_names: HashMap::new(),
+        }
+    }
+}
+
+impl MetadataBuilder for ReadOnlyMetadataBuilder<'_> {
+    fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32, 
ArrowError> {
+        if let Some(field_id) = self.known_field_names.get(field_name) {
+            return Ok(*field_id);
+        }
+
+        let Some((field_id, field_name)) = self.metadata.get_entry(field_name) 
else {
+            return Err(ArrowError::InvalidArgumentError(format!(
+                "Field name '{field_name}' not found in metadata dictionary"
+            )));
+        };
+
+        self.known_field_names.insert(field_name, field_id);
+        Ok(field_id)
+    }
+    fn field_name(&self, field_id: usize) -> &str {
+        &self.metadata[field_id]
+    }
+    fn num_field_names(&self) -> usize {
+        self.metadata.len()
+    }
+    fn truncate_field_names(&mut self, new_size: usize) {
+        debug_assert_eq!(self.metadata.len(), new_size);
+    }
+}
+
 /// Builder for constructing metadata for [`Variant`] values.
 ///
 /// This is used internally by the [`VariantBuilder`] to construct the metadata
 ///
 /// You can use an existing `Vec<u8>` as the metadata buffer by using the 
`from` impl.
 #[derive(Default, Debug)]
-struct MetadataBuilder {
+struct BasicMetadataBuilder {

Review Comment:
   Went with `WritableMetadataBuilder`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to