This is an automated email from the ASF dual-hosted git repository.

tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git


The following commit(s) were added to refs/heads/master by this push:
     new c692b259a Validate dictionary key in TypedDictionaryArray (#2578) 
(#2589)
c692b259a is described below

commit c692b259adfaade6196ccd1835ba535b05d4a76f
Author: Raphael Taylor-Davies <1781103+tustv...@users.noreply.github.com>
AuthorDate: Thu Aug 25 18:36:54 2022 +0100

    Validate dictionary key in TypedDictionaryArray (#2578) (#2589)
---
 arrow/src/array/array.rs            |  8 ++++++++
 arrow/src/array/array_dictionary.rs | 24 ++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs
index 9766f857c..38ba2025a 100644
--- a/arrow/src/array/array.rs
+++ b/arrow/src/array/array.rs
@@ -336,6 +336,14 @@ impl<'a, T: Array> Array for &'a T {
 }
 
 /// A generic trait for accessing the values of an [`Array`]
+///
+/// # Validity
+///
+/// An [`ArrayAccessor`] must always return a well-defined value for an index 
that is
+/// within the bounds `0..Array::len`, including for null indexes where 
[`Array::is_null`] is true.
+///
+/// The value at null indexes is unspecified, and implementations must not 
rely on a specific
+/// value such as [`Default::default`] being returned, however, it must not be 
undefined
 pub trait ArrayAccessor: Array {
     type Item: Send + Sync;
 
diff --git a/arrow/src/array/array_dictionary.rs 
b/arrow/src/array/array_dictionary.rs
index 4977c029a..c08bb2260 100644
--- a/arrow/src/array/array_dictionary.rs
+++ b/arrow/src/array/array_dictionary.rs
@@ -475,8 +475,7 @@ impl<'a, K: ArrowPrimitiveType, V: Sync> Array for 
TypedDictionaryArray<'a, K, V
 impl<'a, K, V> IntoIterator for TypedDictionaryArray<'a, K, V>
 where
     K: ArrowPrimitiveType,
-    V: Sync + Send,
-    &'a V: ArrayAccessor,
+    Self: ArrayAccessor,
 {
     type Item = Option<<Self as ArrayAccessor>::Item>;
     type IntoIter = ArrayIter<Self>;
@@ -491,21 +490,30 @@ where
     K: ArrowPrimitiveType,
     V: Sync + Send,
     &'a V: ArrayAccessor,
+    <&'a V as ArrayAccessor>::Item: Default,
 {
     type Item = <&'a V as ArrayAccessor>::Item;
 
     fn value(&self, index: usize) -> Self::Item {
-        assert!(self.dictionary.is_valid(index), "{}", index);
-        let value_idx = self.dictionary.keys.value(index).to_usize().unwrap();
-        // Dictionary indexes should be valid
-        unsafe { self.values.value_unchecked(value_idx) }
+        assert!(
+            index < self.len(),
+            "Trying to access an element at index {} from a 
TypedDictionaryArray of length {}",
+            index,
+            self.len()
+        );
+        unsafe { self.value_unchecked(index) }
     }
 
     unsafe fn value_unchecked(&self, index: usize) -> Self::Item {
         let val = self.dictionary.keys.value_unchecked(index);
         let value_idx = val.to_usize().unwrap();
-        // Dictionary indexes should be valid
-        self.values.value_unchecked(value_idx)
+
+        // As dictionary keys are only verified for non-null indexes
+        // we must check the value is within bounds
+        match value_idx < self.values.len() {
+            true => self.values.value_unchecked(value_idx),
+            false => Default::default(),
+        }
     }
 }
 

Reply via email to