This is an automated email from the ASF dual-hosted git repository. tustvold pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push: new c692b259a Validate dictionary key in TypedDictionaryArray (#2578) (#2589) c692b259a is described below commit c692b259adfaade6196ccd1835ba535b05d4a76f Author: Raphael Taylor-Davies <1781103+tustv...@users.noreply.github.com> AuthorDate: Thu Aug 25 18:36:54 2022 +0100 Validate dictionary key in TypedDictionaryArray (#2578) (#2589) --- arrow/src/array/array.rs | 8 ++++++++ arrow/src/array/array_dictionary.rs | 24 ++++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) diff --git a/arrow/src/array/array.rs b/arrow/src/array/array.rs index 9766f857c..38ba2025a 100644 --- a/arrow/src/array/array.rs +++ b/arrow/src/array/array.rs @@ -336,6 +336,14 @@ impl<'a, T: Array> Array for &'a T { } /// A generic trait for accessing the values of an [`Array`] +/// +/// # Validity +/// +/// An [`ArrayAccessor`] must always return a well-defined value for an index that is +/// within the bounds `0..Array::len`, including for null indexes where [`Array::is_null`] is true. +/// +/// The value at null indexes is unspecified, and implementations must not rely on a specific +/// value such as [`Default::default`] being returned, however, it must not be undefined pub trait ArrayAccessor: Array { type Item: Send + Sync; diff --git a/arrow/src/array/array_dictionary.rs b/arrow/src/array/array_dictionary.rs index 4977c029a..c08bb2260 100644 --- a/arrow/src/array/array_dictionary.rs +++ b/arrow/src/array/array_dictionary.rs @@ -475,8 +475,7 @@ impl<'a, K: ArrowPrimitiveType, V: Sync> Array for TypedDictionaryArray<'a, K, V impl<'a, K, V> IntoIterator for TypedDictionaryArray<'a, K, V> where K: ArrowPrimitiveType, - V: Sync + Send, - &'a V: ArrayAccessor, + Self: ArrayAccessor, { type Item = Option<<Self as ArrayAccessor>::Item>; type IntoIter = ArrayIter<Self>; @@ -491,21 +490,30 @@ where K: ArrowPrimitiveType, V: Sync + Send, &'a V: ArrayAccessor, + <&'a V as ArrayAccessor>::Item: Default, { type Item = <&'a V as ArrayAccessor>::Item; fn value(&self, index: usize) -> Self::Item { - assert!(self.dictionary.is_valid(index), "{}", index); - let value_idx = self.dictionary.keys.value(index).to_usize().unwrap(); - // Dictionary indexes should be valid - unsafe { self.values.value_unchecked(value_idx) } + assert!( + index < self.len(), + "Trying to access an element at index {} from a TypedDictionaryArray of length {}", + index, + self.len() + ); + unsafe { self.value_unchecked(index) } } unsafe fn value_unchecked(&self, index: usize) -> Self::Item { let val = self.dictionary.keys.value_unchecked(index); let value_idx = val.to_usize().unwrap(); - // Dictionary indexes should be valid - self.values.value_unchecked(value_idx) + + // As dictionary keys are only verified for non-null indexes + // we must check the value is within bounds + match value_idx < self.values.len() { + true => self.values.value_unchecked(value_idx), + false => Default::default(), + } } }