tustvold commented on code in PR #7120:
URL: https://github.com/apache/arrow-rs/pull/7120#discussion_r1968137595


##########
arrow-ipc/src/reader.rs:
##########
@@ -432,6 +458,17 @@ impl<'a> RecordBatchDecoder<'a> {
         self
     }
 
+    /// Specifies if validation should be skipped when reading data (defaults 
to `false`)
+    ///
+    /// # Safety
+    ///
+    /// Relies on the caller only passing a flag with `true` value if they are
+    /// certain that the data is valid
+    pub fn with_skip_validation(mut self, skip_validation: UnsafeFlag) -> Self 
{

Review Comment:
   ```suggestion
       pub(crate) fn with_skip_validation(mut self, skip_validation: 
UnsafeFlag) -> Self {
   ```
   
   This API is a bit funky, I'm not sure if it is technically sound as 
typically having a Safety comment would imply it needs to be unsafe, but it is 
probably the pragmatic solution here. Making it explicitly private reduces the 
risk of it accidentally becoming public



##########
arrow-data/src/data.rs:
##########
@@ -1781,33 +1780,61 @@ impl PartialEq for ArrayData {
     }
 }
 
-mod private {
-    /// A boolean flag that cannot be mutated outside of unsafe code.
+/// A boolean flag that cannot be mutated outside of unsafe code.
+///
+/// Defaults to a value of false.
+///
+/// This structure is used to enforce safety in the [`ArrayDataBuilder`]
+///
+/// [`ArrayDataBuilder`]: super::ArrayDataBuilder
+///
+/// # Example
+/// ```rust
+/// use arrow_data::UnsafeFlag;
+/// assert!(!UnsafeFlag::default().get()); // default is false
+/// let mut flag = UnsafeFlag::new();
+/// assert!(!flag.get()); // defaults to false
+/// // can only set it to true in unsafe code
+/// unsafe { flag.set(true) };
+/// assert!(flag.get()); // now true
+/// ```
+#[derive(Debug, Clone)]

Review Comment:
   ```suggestion
   #[derive(Debug, Clone)]
   #[doc(hidden)]
   ```
   
   Perhaps if we don't really want people using it outside of arrow-rs?



##########
arrow-ipc/benches/ipc_reader.rs:
##########
@@ -123,6 +163,46 @@ fn criterion_benchmark(c: &mut Criterion) {
     });
 }
 
+/// Return an IPC stream with 10 record batches
+fn ipc_stream() -> Vec<u8> {
+    let batch = create_batch(8192, true);
+    let mut buffer = Vec::with_capacity(2 * 1024 * 1024);
+    let mut writer = StreamWriter::try_new(&mut buffer, 
batch.schema().as_ref()).unwrap();
+    for _ in 0..10 {
+        writer.write(&batch).unwrap();
+    }
+    writer.finish().unwrap();
+    buffer
+}
+
+/// Return an IPC stream with ZSTD compression with 10 record batches
+fn ipc_stream_zstd() -> Vec<u8> {

Review Comment:
   Minor nit, but I wonder if we could have an ipc_stream_options that takes a 
`IpcWriteOptions` and then have ipc_stream just call through to this



##########
arrow-ipc/src/reader.rs:
##########
@@ -318,9 +309,42 @@ impl RecordBatchDecoder<'_> {
             _ => unreachable!("Cannot create list or map array from {:?}", 
data_type),
         };
 
-        let array_data = 
builder.align_buffers(!self.require_alignment).build()?;
+        self.create_array_from_builder(builder)
+    }
 
-        Ok(make_array(array_data))
+    fn create_struct_array(
+        &self,
+        struct_node: &FieldNode,
+        null_buffer: Buffer,
+        struct_fields: &Fields,
+        struct_arrays: Vec<ArrayRef>,
+    ) -> Result<ArrayRef, ArrowError> {
+        let null_count = struct_node.null_count() as usize;
+        let len = struct_node.length() as usize;
+
+        if struct_arrays.is_empty() {
+            // `StructArray::from` can't infer the correct row count
+            // if we have zero fields
+            return Ok(Arc::new(StructArray::new_empty_fields(
+                len,
+                (null_count > 0).then(|| BooleanBuffer::new(null_buffer, 0, 
len).into()),
+            )));
+        }
+
+        let nulls = if null_count > 0 {
+            Some(BooleanBuffer::new(null_buffer, 0, len).into())
+        } else {
+            None
+        };

Review Comment:
   ```suggestion
   
           let nulls = (null_count > 0).then(|| BooleanBuffer::new(null_buffer, 
0, len).into());
           if struct_arrays.is_empty() {
               // `StructArray::from` can't infer the correct row count
               // if we have zero fields
               return Ok(Arc::new(StructArray::new_empty_fields(len, nulls)));
           }
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to