This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new f59b94fd7c fix: use the values builder capacity for the hash map in
`PrimitiveDictionaryBuilder::new_from_builders` (#7012)
f59b94fd7c is described below
commit f59b94fd7cae376e7398f31743bc732a746b25b3
Author: Raz Luvaton <[email protected]>
AuthorDate: Sat Jan 25 18:18:15 2025 +0200
fix: use the values builder capacity for the hash map in
`PrimitiveDictionaryBuilder::new_from_builders` (#7012)
* feat: allow setting custom value data type in `PrimitiveDictionaryBuilder`
Fixes #7011
* use the values capacity for the hash map
* update new_from_empty_builders and not new_from_builders
---
.../src/builder/primitive_dictionary_builder.rs | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)
diff --git a/arrow-array/src/builder/primitive_dictionary_builder.rs
b/arrow-array/src/builder/primitive_dictionary_builder.rs
index 282f0ae9d5..f4a6662462 100644
--- a/arrow-array/src/builder/primitive_dictionary_builder.rs
+++ b/arrow-array/src/builder/primitive_dictionary_builder.rs
@@ -126,10 +126,11 @@ where
keys_builder.is_empty() && values_builder.is_empty(),
"keys and values builders must be empty"
);
+ let values_capacity = values_builder.capacity();
Self {
keys_builder,
values_builder,
- map: HashMap::new(),
+ map: HashMap::with_capacity(values_capacity),
}
}
@@ -633,4 +634,19 @@ mod tests {
assert_eq!(values, [None, None]);
}
+
+ #[test]
+ fn
creating_dictionary_from_builders_should_use_values_capacity_for_the_map() {
+ let builder = PrimitiveDictionaryBuilder::<Int32Type,
crate::types::TimestampMicrosecondType>::new_from_empty_builders(
+
PrimitiveBuilder::with_capacity(1).with_data_type(DataType::Int32),
+
PrimitiveBuilder::with_capacity(2).with_data_type(DataType::Timestamp(arrow_schema::TimeUnit::Microsecond,
Some("+08:00".into()))),
+ );
+
+ assert!(
+ builder.map.capacity() >= builder.values_builder.capacity(),
+ "map capacity {} should be at least the values capacity {}",
+ builder.map.capacity(),
+ builder.values_builder.capacity()
+ )
+ }
}