This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new e8285bea0ec clean up ByteView construction (#5879)
e8285bea0ec is described below

commit e8285bea0eca99316590fcefc8816e1df6581a5c
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Thu Jun 13 16:09:33 2024 -0400

    clean up ByteView construction (#5879)
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 arrow-cast/src/cast/mod.rs                | 44 +++++++++----------------------
 parquet/src/arrow/buffer/offset_buffer.rs |  7 ++---
 2 files changed, 17 insertions(+), 34 deletions(-)

diff --git a/arrow-cast/src/cast/mod.rs b/arrow-cast/src/cast/mod.rs
index 354c31af695..55f2ed72836 100644
--- a/arrow-cast/src/cast/mod.rs
+++ b/arrow-cast/src/cast/mod.rs
@@ -46,7 +46,7 @@ use crate::cast::dictionary::*;
 use crate::cast::list::*;
 use crate::cast::string::*;
 
-use arrow_buffer::{IntervalMonthDayNano, ScalarBuffer};
+use arrow_buffer::IntervalMonthDayNano;
 use arrow_data::ByteView;
 use chrono::{NaiveTime, Offset, TimeZone, Utc};
 use std::cmp::Ordering;
@@ -2341,47 +2341,29 @@ where
     FROM::Offset: OffsetSizeTrait + ToPrimitive,
     V: ByteViewType,
 {
-    let data = array.to_data();
-    assert_eq!(data.data_type(), &FROM::DATA_TYPE);
-
+    let byte_array: &GenericByteArray<FROM> = array.as_bytes();
     let len = array.len();
-    let str_values_buf = data.buffers()[1].clone();
-    let offsets = data.buffers()[0].typed_data::<FROM::Offset>();
+    let str_values_buf = byte_array.values().clone();
+    let offsets = byte_array.offsets();
 
-    let mut views_builder = BufferBuilder::<u128>::new(len);
-    for w in offsets.windows(2) {
+    let mut views_builder = GenericByteViewBuilder::<V>::with_capacity(len);
+    let block = views_builder.append_block(str_values_buf);
+    for (i, w) in offsets.windows(2).enumerate() {
         let offset = w[0].to_u32().unwrap();
         let end = w[1].to_u32().unwrap();
-        let value_buf = &str_values_buf[offset as usize..end as usize];
         let length = end - offset;
 
-        if length <= 12 {
-            let mut view_buffer = [0; 16];
-            view_buffer[0..4].copy_from_slice(&length.to_le_bytes());
-            view_buffer[4..4 + value_buf.len()].copy_from_slice(value_buf);
-            views_builder.append(u128::from_le_bytes(view_buffer));
+        if byte_array.is_null(i) {
+            views_builder.append_null();
         } else {
-            let view = ByteView {
-                length,
-                prefix: 
u32::from_le_bytes(value_buf[0..4].try_into().unwrap()),
-                buffer_index: 0,
-                offset,
-            };
-            views_builder.append(view.into());
+            // Safety: the input was a valid array so it valid UTF8 (if 
string). And
+            // all offsets were valid and we created the views correctly
+            unsafe { views_builder.append_view_unchecked(block, offset, 
length) }
         }
     }
 
     assert_eq!(views_builder.len(), len);
-
-    // Safety: the input was a valid array so it valid UTF8 (if string). And
-    // all offsets were valid and we created the views correctly
-    Ok(Arc::new(unsafe {
-        GenericByteViewArray::<V>::new_unchecked(
-            ScalarBuffer::new(views_builder.finish(), 0, len),
-            vec![str_values_buf],
-            data.nulls().cloned(),
-        )
-    }))
+    Ok(Arc::new(views_builder.finish()))
 }
 
 /// Helper function to cast from one `ByteViewType` array to `ByteArrayType` 
array.
diff --git a/parquet/src/arrow/buffer/offset_buffer.rs 
b/parquet/src/arrow/buffer/offset_buffer.rs
index 181e69c669a..806f144d966 100644
--- a/parquet/src/arrow/buffer/offset_buffer.rs
+++ b/parquet/src/arrow/buffer/offset_buffer.rs
@@ -164,9 +164,10 @@ impl<I: OffsetSizeTrait> OffsetBuffer<I> {
             let len = (end - start).to_usize().unwrap();
 
             if len != 0 {
-                builder
-                    .try_append_view(block, start.as_usize() as u32, len as 
u32)
-                    .unwrap();
+                // Safety: (1) the buffer is valid (2) the offsets are valid 
(3) the values in between are of ByteViewType
+                unsafe {
+                    builder.append_view_unchecked(block, start.as_usize() as 
u32, len as u32);
+                }
             } else {
                 builder.append_null();
             }

Reply via email to