nevi-me commented on a change in pull request #8792: URL: https://github.com/apache/arrow/pull/8792#discussion_r532207725
########## File path: rust/parquet/src/arrow/arrow_writer.rs ########## @@ -650,37 +688,62 @@ def_get_binary_array_fn!(get_large_string_array, arrow_array::LargeStringArray); /// Get the underlying numeric array slice, skipping any null values. /// If there are no null values, it might be quicker to get the slice directly instead of /// calling this function. -fn get_numeric_array_slice<T, A>(array: &arrow_array::PrimitiveArray<A>) -> Vec<T::T> +fn get_numeric_array_slice<T, A>( + array: &arrow_array::PrimitiveArray<A>, + indices: &[usize], +) -> Vec<T::T> where T: DataType, A: arrow::datatypes::ArrowNumericType, T::T: From<A::Native>, { - let mut values = Vec::with_capacity(array.len() - array.null_count()); - for i in 0..array.len() { - if array.is_valid(i) { - values.push(array.value(i).into()) - } + let mut values = Vec::with_capacity(indices.len()); + for i in indices { + values.push(array.value(*i).into()) } values } +/// Given a level's information, calculate the offsets required to index an array +/// correctly. +fn filter_array_indices(level: &LevelInfo) -> Vec<usize> { + // TODO: we don't quite get the def levels right all the time, so for now we recalculate it Review comment: I've subsequently fixed it. It had to do with an Arrow quirk which we addressed in the struct inheritance logic. Basically, we can have a nullable struct holding non-nullable children, in which case we were sometimes overshooting when determining max definition. With the struct null inheritance resolved, I was able to track this down and fix it. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org