ariesdevil commented on code in PR #5619:
URL: https://github.com/apache/arrow-rs/pull/5619#discussion_r1591029570


##########
arrow-data/src/byte_view.rs:
##########
@@ -15,10 +15,453 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::Buffer;
+use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::ArrowError;
+use std::fmt::Formatter;
+use std::ops::Range;
 
-#[derive(Debug, Copy, Clone, Default)]
+/// A `View` is a `u128` value that represents a single value in a
+/// [`GenericByteViewArray`].
+///
+/// Depending on the array type, the value may be a utf8 string or simply 
bytes.
+/// The layout of the u128 is different depending on the length of the bytes
+/// stored at that location:
+///
+/// # 12 or fewer bytes [`InlineView`]
+///
+/// Values with 12 or fewer bytes are stored directly inlined in the `u128`. 
See
+/// [`InlineView`] for field access.
+///
+/// ```text
+///                      
┌───────────────────────────────────────────┬──────────────┐
+///                      │                   data                    │    
length    │
+///  Strings, len <= 12  │             (padded with \0)              │    
(u32)     │
+///   (InlineView)       │                                           │         
     │
+///                      
└───────────────────────────────────────────┴──────────────┘
+///                      127                                        31         
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// # More than 12 bytes [`OffsetView`]
+///
+/// Values with more than 12 bytes store the first 4 bytes inline, an offset 
and
+/// buffer index that reference the actual data (including the first 4 bytes) 
in
+/// an externally managed buffer. See [`OffsetView`] for field access.
+///
+/// ```text
+///                      
┌──────────────┬─────────────┬──────────────┬──────────────┐
+///                      │buffer offset │ buffer index│ data prefix  │    
length    │
+///  Strings, len > 12   │    (u32)     │    (u32)    │  (4 bytes)   │    
(u32)     │
+///   (OffsetView)       │              │             │              │         
     │
+///                      
└──────────────┴─────────────┴──────────────┴──────────────┘
+///                      127            95            63             31        
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// See Also:
+/// * [`OwnedView`]: An owned variant of [`View`], used for constructing views
+///
+/// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum View<'a> {
+    /// Entire string is inlined
+    Inline(InlineView<'a>),
+    /// String is stored in buffer, 4 byte prefix stored inline
+    Offset(OffsetView<'a>),
+}
+impl<'a> From<&'a u128> for View<'a> {
+    #[inline(always)]
+    fn from(v: &'a u128) -> Self {
+        let len = *v as u32;
+        if len <= 12 {
+            Self::Inline(InlineView::from(v))
+        } else {
+            Self::Offset(OffsetView::from(v))
+        }
+    }
+}
+
+/// Owned variant of [`View`] for constructing views from a string or byte 
slice.
+///
+/// # Example
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a string
+/// let view = OwnedView::new_from_str("hello");
+/// assert!(matches!(view, OwnedView::Inline(_)));
+/// ```
+///
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a longer string
+/// let view = OwnedView::new_from_str("hello my name is crumple faced fish");
+/// assert!(matches!(view, OwnedView::Offset(_)));
+/// ```
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(PartialEq)]
+pub enum OwnedView {

Review Comment:
   Do we need `From<View>` and `Into<View>` for `OwnedView`?



##########
arrow-data/src/byte_view.rs:
##########
@@ -15,10 +15,453 @@
 // specific language governing permissions and limitations
 // under the License.
 
-use arrow_buffer::Buffer;
+use arrow_buffer::{Buffer, ToByteSlice};
 use arrow_schema::ArrowError;
+use std::fmt::Formatter;
+use std::ops::Range;
 
-#[derive(Debug, Copy, Clone, Default)]
+/// A `View` is a `u128` value that represents a single value in a
+/// [`GenericByteViewArray`].
+///
+/// Depending on the array type, the value may be a utf8 string or simply 
bytes.
+/// The layout of the u128 is different depending on the length of the bytes
+/// stored at that location:
+///
+/// # 12 or fewer bytes [`InlineView`]
+///
+/// Values with 12 or fewer bytes are stored directly inlined in the `u128`. 
See
+/// [`InlineView`] for field access.
+///
+/// ```text
+///                      
┌───────────────────────────────────────────┬──────────────┐
+///                      │                   data                    │    
length    │
+///  Strings, len <= 12  │             (padded with \0)              │    
(u32)     │
+///   (InlineView)       │                                           │         
     │
+///                      
└───────────────────────────────────────────┴──────────────┘
+///                      127                                        31         
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// # More than 12 bytes [`OffsetView`]
+///
+/// Values with more than 12 bytes store the first 4 bytes inline, an offset 
and
+/// buffer index that reference the actual data (including the first 4 bytes) 
in
+/// an externally managed buffer. See [`OffsetView`] for field access.
+///
+/// ```text
+///                      
┌──────────────┬─────────────┬──────────────┬──────────────┐
+///                      │buffer offset │ buffer index│ data prefix  │    
length    │
+///  Strings, len > 12   │    (u32)     │    (u32)    │  (4 bytes)   │    
(u32)     │
+///   (OffsetView)       │              │             │              │         
     │
+///                      
└──────────────┴─────────────┴──────────────┴──────────────┘
+///                      127            95            63             31        
    0  bit
+///                                                                            
       offset
+/// ```
+///
+/// See Also:
+/// * [`OwnedView`]: An owned variant of [`View`], used for constructing views
+///
+/// [`GenericByteViewArray`]: 
https://docs.rs/arrow/latest/arrow/array/struct.GenericByteViewArray.html
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(Debug, Copy, Clone, PartialEq)]
+pub enum View<'a> {
+    /// Entire string is inlined
+    Inline(InlineView<'a>),
+    /// String is stored in buffer, 4 byte prefix stored inline
+    Offset(OffsetView<'a>),
+}
+impl<'a> From<&'a u128> for View<'a> {
+    #[inline(always)]
+    fn from(v: &'a u128) -> Self {
+        let len = *v as u32;
+        if len <= 12 {
+            Self::Inline(InlineView::from(v))
+        } else {
+            Self::Offset(OffsetView::from(v))
+        }
+    }
+}
+
+/// Owned variant of [`View`] for constructing views from a string or byte 
slice.
+///
+/// # Example
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a string
+/// let view = OwnedView::new_from_str("hello");
+/// assert!(matches!(view, OwnedView::Inline(_)));
+/// ```
+///
+/// ```
+/// # use arrow_data::OwnedView;
+/// // contruct a view from a longer string
+/// let view = OwnedView::new_from_str("hello my name is crumple faced fish");
+/// assert!(matches!(view, OwnedView::Offset(_)));
+/// ```
+///
+/// # Notes
+/// Equality is based on the bitwise value of the view, not the data it 
logically points to
+#[derive(PartialEq)]
+pub enum OwnedView {

Review Comment:
   Do we need `From<View>` and `Into<View>` for `OwnedView`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to