viirya commented on a change in pull request #1435:
URL: https://github.com/apache/arrow-rs/pull/1435#discussion_r825515628



##########
File path: arrow/src/array/data.rs
##########
@@ -936,68 +936,68 @@ impl ArrayData {
             )));
         }
 
+        self.validate_dictionary_offest()?;
+
+        // validate all children recursively
+        self.child_data
+            .iter()
+            .enumerate()
+            .try_for_each(|(i, child_data)| {
+                child_data.validate_full().map_err(|e| {
+                    ArrowError::InvalidArgumentError(format!(
+                        "{} child #{} invalid: {}",
+                        self.data_type, i, e
+                    ))
+                })
+            })?;
+
+        Ok(())
+    }
+
+    pub fn validate_dictionary_offest(&self) -> Result<()> {
         match &self.data_type {
-            DataType::Utf8 => {
-                self.validate_utf8::<i32>()?;
-            }
-            DataType::LargeUtf8 => {
-                self.validate_utf8::<i64>()?;
-            }
-            DataType::Binary => {
-                self.validate_offsets_full::<i32>(self.buffers[1].len())?;
-            }
+            DataType::Utf8 => self.validate_utf8::<i32>(),
+            DataType::LargeUtf8 => self.validate_utf8::<i64>(),
+            DataType::Binary => 
self.validate_offsets_full::<i32>(self.buffers[1].len()),
             DataType::LargeBinary => {
-                self.validate_offsets_full::<i64>(self.buffers[1].len())?;
+                self.validate_offsets_full::<i64>(self.buffers[1].len())
             }
             DataType::List(_) | DataType::Map(_, _) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i32>(child.len + child.offset)?;
+                self.validate_offsets_full::<i32>(child.len + child.offset)
             }
             DataType::LargeList(_) => {
                 let child = &self.child_data[0];
-                self.validate_offsets_full::<i64>(child.len + child.offset)?;
+                self.validate_offsets_full::<i64>(child.len + child.offset)
             }
             DataType::Union(_, _) => {
                 // Validate Union Array as part of implementing new Union 
semantics
                 // See comments in `ArrayData::validate()`
                 // https://github.com/apache/arrow-rs/issues/85
                 //
                 // TODO file follow on ticket for full union validation
+                Ok(())
             }
             DataType::Dictionary(key_type, _value_type) => {
                 let dictionary_length: i64 = 
self.child_data[0].len.try_into().unwrap();
                 let max_value = dictionary_length - 1;
                 match key_type.as_ref() {
-                    DataType::UInt8 => self.check_bounds::<u8>(max_value)?,
-                    DataType::UInt16 => self.check_bounds::<u16>(max_value)?,
-                    DataType::UInt32 => self.check_bounds::<u32>(max_value)?,
-                    DataType::UInt64 => self.check_bounds::<u64>(max_value)?,
-                    DataType::Int8 => self.check_bounds::<i8>(max_value)?,
-                    DataType::Int16 => self.check_bounds::<i16>(max_value)?,
-                    DataType::Int32 => self.check_bounds::<i32>(max_value)?,
-                    DataType::Int64 => self.check_bounds::<i64>(max_value)?,
+                    DataType::UInt8 => self.check_bounds::<u8>(max_value),
+                    DataType::UInt16 => self.check_bounds::<u16>(max_value),
+                    DataType::UInt32 => self.check_bounds::<u32>(max_value),
+                    DataType::UInt64 => self.check_bounds::<u64>(max_value),
+                    DataType::Int8 => self.check_bounds::<i8>(max_value),
+                    DataType::Int16 => self.check_bounds::<i16>(max_value),
+                    DataType::Int32 => self.check_bounds::<i32>(max_value),
+                    DataType::Int64 => self.check_bounds::<i64>(max_value),
                     _ => unreachable!(),

Review comment:
       The validation logic for other data types are not for dictionary offset.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to