scovich commented on code in PR #8167:
URL: https://github.com/apache/arrow-rs/pull/8167#discussion_r2288896827
##########
parquet-variant/src/builder.rs:
##########
@@ -437,13 +397,76 @@ impl ValueBuffer {
}
}
+pub trait MetadataBuilder: std::fmt::Debug {
+ fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32,
ArrowError>;
+ fn field_name(&self, field_id: usize) -> &str;
+ fn num_field_names(&self) -> usize;
+ fn truncate_field_names(&mut self, new_size: usize);
+}
+
+impl MetadataBuilder for MetadataBuilderXX {
+ fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32,
ArrowError> {
+ Ok(self.upsert_field_name(field_name))
+ }
+ fn field_name(&self, field_id: usize) -> &str {
+ self.field_name(field_id)
+ }
+ fn num_field_names(&self) -> usize {
+ self.num_field_names()
+ }
+ fn truncate_field_names(&mut self, new_size: usize) {
+ self.field_names.truncate(new_size)
+ }
+}
+
+#[derive(Debug)]
+pub struct ReadOnlyMetadataBuilder<'m> {
+ metadata: VariantMetadata<'m>,
+ known_field_names: HashMap<&'m str, usize>,
+}
+
+impl<'m> ReadOnlyMetadataBuilder<'m> {
+ pub fn new(metadata: VariantMetadata<'m>) -> Self {
+ Self {
+ metadata,
+ known_field_names: HashMap::new(),
+ }
+ }
+}
+
+impl MetadataBuilder for ReadOnlyMetadataBuilder<'_> {
+ fn try_upsert_field_name(&mut self, field_name: &str) -> Result<u32,
ArrowError> {
+ if let Some(field_id) = self.known_field_names.get(field_name) {
+ return Ok(*field_id as u32);
+ }
+
+ // TODO: Be (a lot) smarter here!
+ let Some(field_id) = self.metadata.iter().position(|name| name ==
field_name) else {
+ return Err(ArrowError::InvalidArgumentError(format!(
+ "Field name '{field_name}' not found in metadata",
+ )));
+ };
+ self.known_field_names.insert(self.metadata.get_infallible(field_id),
field_id);
Review Comment:
Yes -- this specific object's field names are an arbitrarily small subset of
the metadata dictionary. In an extreme case, the dictionary could contain
thousands of entries while the specific object being manipulated contains only
a handful of fields.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]