alamb commented on code in PR #5619:
URL: https://github.com/apache/arrow-rs/pull/5619#discussion_r1593872786


##########
arrow-data/src/byte_view.rs:
##########
@@ -15,10 +15,477 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::Buffer;
+use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::ArrowError;
+use std::fmt::Formatter;
+use std::ops::Range;
 
-#[derive(Debug, Copy, Clone, Default)]
+/// A `View` is a `u128` value that represents a single value in a
+/// [`GenericByteViewArray`].
+///
+/// Depending on the array type, the value may be a utf8 string or simply 
bytes.
+/// The layout of the u128 is different depending on the length of the bytes
+/// stored at that location:
+///
+/// # 12 or fewer bytes [`InlineView`]
+///
+/// Values with 12 or fewer bytes are stored directly inlined in the `u128`. 
See
+/// [`InlineView`] for field access.
+///
+/// ```text
+///                      
┌───────────────────────────────────────────┬──────────────┐
+///                      │                   data                    │    
length    │
+///  Strings, len <= 12  │             (padded with \0)              │    
(u32)     │
+///   (InlineView)       │                                           │         
     │
+///                      
└───────────────────────────────────────────┴──────────────┘
+///                      127                                        31         
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// # More than 12 bytes [`OffsetView`]
+///
+/// Values with more than 12 bytes store the first 4 bytes inline, an offset 
and
+/// buffer index that reference the actual data (including the first 4 bytes) 
in
+/// an externally managed buffer. See [`OffsetView`] for field access.
+///
+/// ```text
+///                      
┌──────────────┬─────────────┬──────────────┬──────────────┐
+///                      │buffer offset │ buffer index│ data prefix  │    
length    │
+///  Strings, len > 12   │    (u32)     │    (u32)    │  (4 bytes)   │    
(u32)     │
+///   (OffsetView)       │              │             │              │         
     │
+///                      
└──────────────┴─────────────┴──────────────┴──────────────┘
+///                      127            95            63             31        
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// See Also:
+/// * [`OwnedView`]: An owned variant of [`View`], used for constructing views
+///
+/// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum View<'a> {

Review Comment:
   There is already another struct named `ByteView` in this file, so in order 
to avoid an API change I didn't reuse the name. If we don't care about API 
changes we could remove the existing ByteView



##########
arrow-data/src/byte_view.rs:
##########
@@ -15,10 +15,477 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::Buffer;
+use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::ArrowError;
+use std::fmt::Formatter;
+use std::ops::Range;
 
-#[derive(Debug, Copy, Clone, Default)]
+/// A `View` is a `u128` value that represents a single value in a
+/// [`GenericByteViewArray`].
+///
+/// Depending on the array type, the value may be a utf8 string or simply 
bytes.
+/// The layout of the u128 is different depending on the length of the bytes
+/// stored at that location:
+///
+/// # 12 or fewer bytes [`InlineView`]
+///
+/// Values with 12 or fewer bytes are stored directly inlined in the `u128`. 
See
+/// [`InlineView`] for field access.
+///
+/// ```text
+///                      
┌───────────────────────────────────────────┬──────────────┐
+///                      │                   data                    │    
length    │
+///  Strings, len <= 12  │             (padded with \0)              │    
(u32)     │
+///   (InlineView)       │                                           │         
     │
+///                      
└───────────────────────────────────────────┴──────────────┘
+///                      127                                        31         
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// # More than 12 bytes [`OffsetView`]
+///
+/// Values with more than 12 bytes store the first 4 bytes inline, an offset 
and
+/// buffer index that reference the actual data (including the first 4 bytes) 
in
+/// an externally managed buffer. See [`OffsetView`] for field access.
+///
+/// ```text
+///                      
┌──────────────┬─────────────┬──────────────┬──────────────┐
+///                      │buffer offset │ buffer index│ data prefix  │    
length    │
+///  Strings, len > 12   │    (u32)     │    (u32)    │  (4 bytes)   │    
(u32)     │
+///   (OffsetView)       │              │             │              │         
     │
+///                      
└──────────────┴─────────────┴──────────────┴──────────────┘
+///                      127            95            63             31        
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// See Also:
+/// * [`OwnedView`]: An owned variant of [`View`], used for constructing views
+///
+/// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum View<'a> {
+    /// Entire string is inlined
+    Inline(InlineView<'a>),
+    /// String is stored in buffer, 4 byte prefix stored inline
+    Offset(OffsetView<'a>),
+}
+
+impl<'a> View<'a> {
+    /// Create a new `View` representing the contents of a `u128`
+    #[inline(always)]
+    pub fn new(v: &'a u128) -> Self {
+        let len = *v as u32;
+        if len <= 12 {
+            Self::Inline(InlineView::from(v))
+        } else {
+            Self::Offset(OffsetView::from(v))
+        }
+    }
+
+    /// Convert the view to a `u128`
+    pub fn to_u128(&self) -> u128 {
+        match self {
+            Self::Inline(inline) => inline.to_u128(),
+            Self::Offset(offset) => offset.to_u128(),
+        }
+    }
+
+    /// Return an [`OwnedView`] representing this view
+    pub fn to_owned(&self) -> OwnedView {
+        OwnedView::new(self.to_u128())
+    }
+}
+
+impl<'a> From<&'a u128> for View<'a> {
+    #[inline(always)]
+    fn from(v: &'a u128) -> Self {
+        Self::new(v)
+    }
+}
+
+/// Owned variant of [`View`] for constructing views from a string or byte 
slice.
+///
+/// # Example
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a string
+/// let view = OwnedView::new_from_str("hello");
+/// assert!(matches!(view, OwnedView::Inline(_)));
+/// ```
+///
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a longer string
+/// let view = OwnedView::new_from_str("hello my name is crumple faced fish");
+/// assert!(matches!(view, OwnedView::Offset(_)));
+/// ```
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(PartialEq)]
+pub enum OwnedView {

Review Comment:
   Its usecase is to create a view from `&str` / `&[u8]`  and copy the relevant 
prefix bytes and remember which variant (inline or offset) the new view was 
during creation
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to