friendlymatthew commented on code in PR #7833:
URL: https://github.com/apache/arrow-rs/pull/7833#discussion_r2183779196
##########
parquet-variant/src/builder.rs:
##########
@@ -237,18 +237,40 @@ impl ValueBuffer {
struct MetadataBuilder {
// Field names -- field_ids are assigned in insert order
field_names: IndexSet<String>,
+
+ // flag that checks if field names by insertion order are also
lexicographically sorted
+ is_sorted: bool,
}
impl MetadataBuilder {
/// Upsert field name to dictionary, return its ID
fn upsert_field_name(&mut self, field_name: &str) -> u32 {
- let (id, _) = self.field_names.insert_full(field_name.to_string());
+ let (id, new_entry) =
self.field_names.insert_full(field_name.to_string());
+
+ if new_entry {
+ let n = self.num_field_names();
+
+ if n == 1 {
+ self.is_sorted = true;
+ } else {
+ self.is_sorted &= self.field_names[n - 2] < self.field_names[n
- 1];
+ }
+ }
id as u32
}
+ /// Returns the number of field names stored in the metadata builder.
+ /// Note: this method should be the only place to call
`self.field_names.len()`
+ ///
+ /// # Panics
+ ///
+ /// If the number of field names exceeds the maximum allowed value for
`u32`.
fn num_field_names(&self) -> usize {
- self.field_names.len()
+ let n = self.field_names.len();
+ assert!(n < u32::MAX as usize);
+
+ n
Review Comment:
I added an assert here to make sure we fit in a `u32`. This is unrelated to
this PR but addresses
https://github.com/apache/arrow-rs/pull/7833/files#r2180370340.
Sorry, I couldn't help myself!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]