This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new 34305378c Always validate the array data (except the `Decimal`) when 
creating array in IPC reader (#2547)
34305378c is described below

commit 34305378ce69999444a24d1a77ad65250055a8a3
Author: Remzi Yang <[email protected]>
AuthorDate: Tue Aug 23 22:14:16 2022 +0800

    Always validate the array data (except the `Decimal`) when creating array 
in IPC reader (#2547)
    
    * validate array data in ipc reader
    
    Signed-off-by: remzi <[email protected]>
    
    * not validate decimal
    
    Signed-off-by: remzi <[email protected]>
    
    Signed-off-by: remzi <[email protected]>
---
 arrow/src/ipc/reader.rs | 60 +++++++++++++++++++++++++------------------------
 1 file changed, 31 insertions(+), 29 deletions(-)

diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index adfe40600..fca5fa9d6 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -411,12 +411,12 @@ fn create_primitive_array(
         }
         FixedSizeBinary(_) => {
             // read 2 buffers: null buffer (optional) and data buffer
-            let builder = ArrayData::builder(data_type.clone())
+            ArrayData::builder(data_type.clone())
                 .len(length)
                 .add_buffer(buffers[1].clone())
-                .null_bit_buffer(null_buffer);
-
-            unsafe { builder.build_unchecked() }
+                .null_bit_buffer(null_buffer)
+                .build()
+                .unwrap()
         }
         Int8
         | Int16
@@ -429,45 +429,45 @@ fn create_primitive_array(
         | Interval(IntervalUnit::YearMonth) => {
             if buffers[1].len() / 8 == length && length != 1 {
                 // interpret as a signed i64, and cast appropriately
-                let builder = ArrayData::builder(DataType::Int64)
+                let data = ArrayData::builder(DataType::Int64)
                     .len(length)
                     .add_buffer(buffers[1].clone())
-                    .null_bit_buffer(null_buffer);
-
-                let data = unsafe { builder.build_unchecked() };
+                    .null_bit_buffer(null_buffer)
+                    .build()
+                    .unwrap();
                 let values = Arc::new(Int64Array::from(data)) as ArrayRef;
                 // this cast is infallible, the unwrap is safe
                 let casted = cast(&values, data_type).unwrap();
                 casted.into_data()
             } else {
-                let builder = ArrayData::builder(data_type.clone())
+                ArrayData::builder(data_type.clone())
                     .len(length)
                     .add_buffer(buffers[1].clone())
-                    .null_bit_buffer(null_buffer);
-
-                unsafe { builder.build_unchecked() }
+                    .null_bit_buffer(null_buffer)
+                    .build()
+                    .unwrap()
             }
         }
         Float32 => {
             if buffers[1].len() / 8 == length && length != 1 {
                 // interpret as a f64, and cast appropriately
-                let builder = ArrayData::builder(DataType::Float64)
+                let data = ArrayData::builder(DataType::Float64)
                     .len(length)
                     .add_buffer(buffers[1].clone())
-                    .null_bit_buffer(null_buffer);
-
-                let data = unsafe { builder.build_unchecked() };
+                    .null_bit_buffer(null_buffer)
+                    .build()
+                    .unwrap();
                 let values = Arc::new(Float64Array::from(data)) as ArrayRef;
                 // this cast is infallible, the unwrap is safe
                 let casted = cast(&values, data_type).unwrap();
                 casted.into_data()
             } else {
-                let builder = ArrayData::builder(data_type.clone())
+                ArrayData::builder(data_type.clone())
                     .len(length)
                     .add_buffer(buffers[1].clone())
-                    .null_bit_buffer(null_buffer);
-
-                unsafe { builder.build_unchecked() }
+                    .null_bit_buffer(null_buffer)
+                    .build()
+                    .unwrap()
             }
         }
         Boolean
@@ -479,14 +479,12 @@ fn create_primitive_array(
         | Date64
         | Duration(_)
         | Interval(IntervalUnit::DayTime)
-        | Interval(IntervalUnit::MonthDayNano) => {
-            let builder = ArrayData::builder(data_type.clone())
-                .len(length)
-                .add_buffer(buffers[1].clone())
-                .null_bit_buffer(null_buffer);
-
-            unsafe { builder.build_unchecked() }
-        }
+        | Interval(IntervalUnit::MonthDayNano) => 
ArrayData::builder(data_type.clone())
+            .len(length)
+            .add_buffer(buffers[1].clone())
+            .null_bit_buffer(null_buffer)
+            .build()
+            .unwrap(),
         Decimal128(_, _) | Decimal256(_, _) => {
             // read 2 buffers: null buffer (optional) and data buffer
             let builder = ArrayData::builder(data_type.clone())
@@ -494,6 +492,10 @@ fn create_primitive_array(
                 .add_buffer(buffers[1].clone())
                 .null_bit_buffer(null_buffer);
 
+            // Don't validate the decimal array so far,
+            // becasue validating decimal is some what complicated
+            // and there is no conclusion on whether we should do it.
+            // For more infomation, please look at 
https://github.com/apache/arrow-rs/issues/2387
             unsafe { builder.build_unchecked() }
         }
         t => unreachable!("Data type {:?} either unsupported or not 
primitive", t),
@@ -527,7 +529,7 @@ fn create_list_array(
 
         _ => unreachable!("Cannot create list or map array from {:?}", 
data_type),
     };
-    make_array(unsafe { builder.build_unchecked() })
+    make_array(builder.build().unwrap())
 }
 
 /// Reads the correct number of buffers based on list type and null_count, and 
creates a

Reply via email to