lyang24 commented on code in PR #9040:
URL: https://github.com/apache/arrow-rs/pull/9040#discussion_r2645229547
##########
arrow-array/src/builder/generic_bytes_view_builder.rs:
##########
@@ -430,6 +430,49 @@ impl<T: ByteViewType + ?Sized> GenericByteViewBuilder<T> {
};
}
+ /// Append the same value `n` times into the builder
+ ///
+ /// This is more efficient than calling [`Self::append_value`] `n` times,
+ /// especially when deduplication is enabled, as it only hashes the value
once.
+ ///
+ /// # Panics
+ ///
+ /// Panics if
+ /// - String buffer count exceeds `u32::MAX`
+ /// - String length exceeds `u32::MAX`
+ ///
+ /// # Example
+ /// ```
+ /// # use arrow_array::builder::StringViewBuilder;
+ /// # use arrow_array::Array;
+ /// let mut builder = StringViewBuilder::new().with_deduplicate_strings();
+ ///
+ /// // Append "hello" 1000 times efficiently
+ /// builder.append_value_n("hello", 1000);
+ ///
+ /// let array = builder.finish();
+ /// assert_eq!(array.len(), 1000);
+ ///
+ /// // All values are "hello"
+ /// for value in array.iter() {
+ /// assert_eq!(value, Some("hello"));
+ /// }
+ /// ```
+ #[inline]
+ pub fn append_value_n(&mut self, value: impl AsRef<T::Native>, n: usize) {
+ if n == 0 {
+ return;
+ }
+ // Process value once (handles deduplication, buffer management, view
creation)
+ self.append_value(value);
+ // Reuse the view (n-1) times
+ let view = *self.views_buffer.last().unwrap();
+ for _ in 1..n {
+ self.views_buffer.push(view);
+ }
+ self.null_buffer_builder.append_n_non_nulls(n - 1);
+ }
Review Comment:
fixed
##########
arrow-array/src/builder/generic_bytes_view_builder.rs:
##########
@@ -884,4 +927,72 @@ mod tests {
MAX_BLOCK_SIZE as usize
);
}
+
+ #[test]
+ fn test_append_value_n() {
+ // Test with inline strings (<=12 bytes)
+ let mut builder = StringViewBuilder::new();
+
+ builder.append_value_n("hello", 100);
+ builder.append_value("world");
+ builder.append_value_n("foo", 50);
+
+ let array = builder.finish();
+ assert_eq!(array.len(), 151);
+ assert_eq!(array.null_count(), 0);
+
+ // Verify the values
+ for i in 0..100 {
+ assert_eq!(array.value(i), "hello");
+ }
+ assert_eq!(array.value(100), "world");
+ for i in 101..151 {
+ assert_eq!(array.value(i), "foo");
+ }
+
+ // All inline strings should have no data buffers
+ assert_eq!(array.data_buffers().len(), 0);
+ }
+
+ #[test]
+ fn test_append_value_n_with_deduplication() {
+ let long_string = "This is a very long string that exceeds the inline
length";
+
+ // Test with deduplication enabled
+ let mut builder = StringViewBuilder::new().with_deduplicate_strings();
+
+ builder.append_value_n(long_string, 1000);
+
+ let array = builder.finish();
+ assert_eq!(array.len(), 1000);
+ assert_eq!(array.null_count(), 0);
+
+ // Verify all values are the same
+ for i in 0..1000 {
+ assert_eq!(array.value(i), long_string);
+ }
+
+ // With deduplication, should only have 1 data buffer containing the
string once
+ assert_eq!(array.data_buffers().len(), 1);
+
+ // All views should be identical
+ let first_view = array.views()[0];
+ for view in array.views().iter() {
+ assert_eq!(*view, first_view);
+ }
+ }
Review Comment:
good call fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]