viirya commented on a change in pull request #1435:
URL: https://github.com/apache/arrow-rs/pull/1435#discussion_r825515485



##########
File path: arrow/src/array/data.rs
##########
@@ -936,68 +936,68 @@ impl ArrayData {
             )));
         }
 
+        self.validate_dictionary_offest()?;
+
+        // validate all children recursively
+        self.child_data
+            .iter()
+            .enumerate()
+            .try_for_each(|(i, child_data)| {
+                child_data.validate_full().map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!(
+                        "{} child #{} invalid: {}",
+                        self.data_type, i, e
+                    ))
+                })
+            })?;
+
+        Ok(())
+    }
+
+    pub fn validate_dictionary_offest(&self) -> Result<()> {
         match &self.data_type {
-            DataType::Utf8 => {
-                self.validate_utf8::<i32>()?;
-            }
-            DataType::LargeUtf8 => {
-                self.validate_utf8::<i64>()?;
-            }
-            DataType::Binary => {
-                self.validate_offsets_full::<i32>(self.buffers[1].len())?;
-            }
+            DataType::Utf8 => self.validate_utf8::<i32>(),
+            DataType::LargeUtf8 => self.validate_utf8::<i64>(),
+            DataType::Binary => 
self.validate_offsets_full::<i32>(self.buffers[1].len()),
             DataType::LargeBinary => {
-                self.validate_offsets_full::<i64>(self.buffers[1].len())?;
+                self.validate_offsets_full::<i64>(self.buffers[1].len())
             }
             DataType::List(_) | DataType::Map(_, _) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i32>(child.len + child.offset)?;
+                self.validate_offsets_full::<i32>(child.len + child.offset)
             }
             DataType::LargeList(_) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i64>(child.len + child.offset)?;
+                self.validate_offsets_full::<i64>(child.len + child.offset)
             }
             DataType::Union(_, _) => {
                 // Validate Union Array as part of implementing new Union 
semantics
                 // See comments in `ArrayData::validate()`
                 // https://github.com/apache/arrow-rs/issues/85
                 //
                 // TODO file follow on ticket for full union validation
+                Ok(())
             }
             DataType::Dictionary(key_type, _value_type) => {
                 let dictionary_length: i64 = 
self.child_data[0].len.try_into().unwrap();
                 let max_value = dictionary_length - 1;
                 match key_type.as_ref() {
-                    DataType::UInt8 => self.check_bounds::<u8>(max_value)?,
-                    DataType::UInt16 => self.check_bounds::<u16>(max_value)?,
-                    DataType::UInt32 => self.check_bounds::<u32>(max_value)?,
-                    DataType::UInt64 => self.check_bounds::<u64>(max_value)?,
-                    DataType::Int8 => self.check_bounds::<i8>(max_value)?,
-                    DataType::Int16 => self.check_bounds::<i16>(max_value)?,
-                    DataType::Int32 => self.check_bounds::<i32>(max_value)?,
-                    DataType::Int64 => self.check_bounds::<i64>(max_value)?,
+                    DataType::UInt8 => self.check_bounds::<u8>(max_value),
+                    DataType::UInt16 => self.check_bounds::<u16>(max_value),
+                    DataType::UInt32 => self.check_bounds::<u32>(max_value),
+                    DataType::UInt64 => self.check_bounds::<u64>(max_value),
+                    DataType::Int8 => self.check_bounds::<i8>(max_value),
+                    DataType::Int16 => self.check_bounds::<i16>(max_value),
+                    DataType::Int32 => self.check_bounds::<i32>(max_value),
+                    DataType::Int64 => self.check_bounds::<i64>(max_value),
                     _ => unreachable!(),

Review comment:
       > I think a possible solution would be to extract the dictionary 
validation logic out of ArrayData::validate_full into a separate function. 
DictionaryArray::try_new could then use ArrayDataBuilder::build_unchecked and 
afterwards call the new function which only validates that the keys are in 
bounds.
   
   I think "the dictionary validation logic" is only for the logic inside 
`DataType::Dictionary` pattern branch.
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to