This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/main by this push:
new 549d77cb22 feat(arrow_array): add helper function to create MapArray
from `Vec<Option<Vec<(Key, Option<Value>)>>>` for tests (#10123)
549d77cb22 is described below
commit 549d77cb229eebc78153b9e254021e994f88a209
Author: Raz Luvaton <[email protected]>
AuthorDate: Fri Jun 12 00:11:11 2026 +0300
feat(arrow_array): add helper function to create MapArray from
`Vec<Option<Vec<(Key, Option<Value>)>>>` for tests (#10123)
# Which issue does this PR close?
N/A
# Rationale for this change
Whenever you try to write tests that use `MapArray` you have very
verbose way to build the MapArray with the specific values you want
so adding this helper will allow arrow tests and user tests to be
cleaner
# What changes are included in this PR?
added function and updated some of the tests in the repo that use the
`MapBuilder` (that do not test the builder itself of course) with the
new method to showcase how much cleaner it looks
# Are these changes tested?
yes
# Are there any user-facing changes?
new function
---
arrow-array/src/array/map_array.rs | 137 ++++++++++++++++++++++++++++++++-
arrow-flight/src/encode.rs | 44 +++++------
arrow-ord/src/ord.rs | 151 +++++++++++--------------------------
3 files changed, 194 insertions(+), 138 deletions(-)
diff --git a/arrow-array/src/array/map_array.rs
b/arrow-array/src/array/map_array.rs
index e15318d67e..0f7c435430 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -16,11 +16,12 @@
// under the License.
use crate::array::{get_offsets_from_buffer, print_long_array};
+use crate::builder::MapFieldNames;
use crate::iterator::MapArrayIter;
use crate::{Array, ArrayAccessor, ArrayRef, ListArray, StringArray,
StructArray, make_array};
use arrow_buffer::{ArrowNativeType, Buffer, NullBuffer, OffsetBuffer,
ToByteSlice};
use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, Field, FieldRef};
+use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields};
use std::any::Any;
use std::sync::Arc;
@@ -31,7 +32,9 @@ use std::sync::Arc;
/// [`MapArray`] is physically a [`ListArray`] of key values pairs stored as
an `entries`
/// [`StructArray`] with 2 child fields.
///
-/// See [`MapBuilder`](crate::builder::MapBuilder) for how to construct a
[`MapArray`]
+/// # See also
+/// * [`MapBuilder`](crate::builder::MapBuilder) for how to construct a
[`MapArray`]
+/// * [`Self::from_vec_of_maps`] for ergonomically creating maps for testing
#[derive(Clone)]
pub struct MapArray {
data_type: DataType,
@@ -269,6 +272,8 @@ impl From<MapArray> for ArrayData {
}
}
+type Entries<Key, Value> = Vec<(Key, Value)>;
+
impl MapArray {
fn try_new_from_array_data(data: ArrayData) -> Result<Self, ArrowError> {
let (data_type, len, nulls, offset, mut buffers, mut child_data) =
data.into_parts();
@@ -359,6 +364,91 @@ impl MapArray {
Ok(MapArray::from(map_data))
}
+
+ /// Helper to create [`MapArray`] from [`Vec`]s of entries so the code
will look clean and straightforward
+ ///
+ /// the input is: `Vec<Option<Map>>` where each `Map` is `Vec<(Key,
Option<Value>)>`
+ ///
+ /// Useful for tests, this should not be used for performance sensitive
operations
+ ///
+ /// ```
+ /// use std::collections::HashMap;
+ /// # use arrow_array::{MapArray, Int32Array, StringArray};
+ ///
+ /// let map = vec![
+ /// // {}
+ /// Some(vec![]),
+ /// // null
+ /// None,
+ /// // { "a": 1, "b": null, "cd": 4 }
+ /// Some(vec![
+ /// ("a", Some(1)),
+ /// ("b", None),
+ /// ("cd", Some(4)),
+ /// ]),
+ /// // { "e": 0 }
+ /// Some(vec![("e", Some(0))]),
+ /// ];
+ /// let ordered = true;
+ ///
+ /// // created map: [{}, null, {"a": 1, "b": null, "cd": 4}, {"e": 0}]
+ /// let map_array = MapArray::from_vec_of_maps::<StringArray, Int32Array,
_, _>(map, ordered);
+ /// // Or you could fill the last 2 generics manually for the key array
item and value array item
+ /// // let map_array = MapArray::from_vec_of_maps::<StringArray,
Int32Array, &str, i32>(map, ordered);
+ ///```
+ #[allow(clippy::type_complexity)]
+ pub fn from_vec_of_maps<KeyArray, ValueArray, K, V>(
+ input: Vec<Option<Entries<K, Option<V>>>>,
+ ordered: bool,
+ ) -> Self
+ where
+ KeyArray: Array + 'static,
+ ValueArray: Array + 'static,
+ Vec<K>: Into<KeyArray>,
+ Vec<Option<V>>: Into<ValueArray>,
+ {
+ let offsets = OffsetBuffer::<i32>::from_lengths(
+ input.iter().map(|v| v.as_ref().map_or(0, |m| m.len())),
+ );
+ let nulls = NullBuffer::from_iter(input.iter().map(|v| v.is_some()));
+ let nulls = Some(nulls).filter(|b| b.null_count() > 0);
+
+ let (keys, values): (Vec<K>, Vec<Option<V>>) = input
+ .into_iter()
+ .flatten()
+ .flat_map(|m| m.into_iter())
+ .unzip();
+
+ let keys_array: ArrayRef = Arc::new(<Vec<K> as
Into<KeyArray>>::into(keys));
+ let values_array: ArrayRef = Arc::new(<Vec<Option<V>> as
Into<ValueArray>>::into(values));
+
+ let field_names = MapFieldNames::default();
+
+ let entries = StructArray::new(
+ Fields::from(vec![
+ Field::new(field_names.key, keys_array.data_type().clone(),
false),
+ Field::new(
+ field_names.value,
+ values_array.data_type().clone(),
+ values_array.is_nullable(),
+ ),
+ ]),
+ vec![keys_array, values_array],
+ None,
+ );
+
+ MapArray::new(
+ Arc::new(Field::new(
+ field_names.entry,
+ entries.data_type().clone(),
+ false,
+ )),
+ offsets,
+ entries,
+ nulls,
+ ordered,
+ )
+ }
}
/// SAFETY: Correctly implements the contract of Arrow Arrays
@@ -478,6 +568,7 @@ impl From<MapArray> for ListArray {
#[cfg(test)]
mod tests {
+ use crate::builder::{Int32Builder, MapBuilder, StringBuilder};
use crate::cast::AsArray;
use crate::types::UInt32Type;
use crate::{Int32Array, UInt32Array};
@@ -839,4 +930,46 @@ mod tests {
"Invalid argument error: MapArray entries must contain two
children, got 3"
);
}
+
+ #[test]
+ fn test_from_vec_of_maps() {
+ for ordered in [true, false] {
+ let map = vec![
+ Some(vec![]),
+ None,
+ Some(vec![("a", Some(1)), ("b", None), ("cd", Some(4))]),
+ Some(vec![("e", Some(0))]),
+ ];
+
+ let map_array =
+ MapArray::from_vec_of_maps::<StringArray, Int32Array, _,
_>(map, ordered);
+ assert_eq!(map_array.len(), 4);
+
+ let mut builder = MapBuilder::new(None, StringBuilder::new(),
Int32Builder::default());
+
+ // {}
+ builder.append(true).unwrap();
+
+ // null
+ builder.append_nulls(1).unwrap();
+
+ // {"a": 1, "b": null, "cd": 4}
+ builder.keys().extend(["a", "b", "cd"].map(Some));
+ builder.values().extend([Some(1), None, Some(4)]);
+
+ builder.append(true).unwrap();
+
+ // {"e": 0}
+ builder.keys().append_value("e");
+ builder.values().append_value(0);
+
+ builder.append(true).unwrap();
+
+ let (field, offsets, entries, null_buffer, _) =
builder.finish().into_parts();
+
+ let expected_map = MapArray::new(field, offsets, entries,
null_buffer, ordered);
+
+ assert_eq!(map_array, expected_map);
+ }
+ }
}
diff --git a/arrow-flight/src/encode.rs b/arrow-flight/src/encode.rs
index 191da02413..29ac7e9574 100644
--- a/arrow-flight/src/encode.rs
+++ b/arrow-flight/src/encode.rs
@@ -795,7 +795,7 @@ mod tests {
use arrow_cast::pretty::pretty_format_batches;
use arrow_ipc::MetadataVersion;
use arrow_schema::{UnionFields, UnionMode};
- use builder::{GenericStringBuilder, MapBuilder};
+ use builder::MapBuilder;
use std::collections::HashMap;
use super::*;
@@ -1505,34 +1505,24 @@ mod tests {
let expected_schema = Arc::new(expected_schema);
- // Builder without dictionary fields
- let mut builder = MapBuilder::new(
- None,
- GenericStringBuilder::<i32>::new(),
- GenericStringBuilder::<i32>::new(),
+ // array without dictionary fields
+ let arr1 = MapArray::from_vec_of_maps::<StringArray, StringArray, _,
_>(
+ vec![Some(vec![
+ ("k1", Some("a")),
+ ("k2", None),
+ ("k3", Some("b")),
+ ])],
+ false,
);
- // {"k1":"a","k2":null,"k3":"b"}
- builder.keys().append_value("k1");
- builder.values().append_value("a");
- builder.keys().append_value("k2");
- builder.values().append_null();
- builder.keys().append_value("k3");
- builder.values().append_value("b");
- builder.append(true).unwrap();
-
- let arr1 = builder.finish();
-
- // {"k1":"c","k2":null,"k3":"d"}
- builder.keys().append_value("k1");
- builder.values().append_value("c");
- builder.keys().append_value("k2");
- builder.values().append_null();
- builder.keys().append_value("k3");
- builder.values().append_value("d");
- builder.append(true).unwrap();
-
- let arr2 = builder.finish();
+ let arr2 = MapArray::from_vec_of_maps::<StringArray, StringArray, _,
_>(
+ vec![Some(vec![
+ ("k1", Some("c")),
+ ("k2", None),
+ ("k3", Some("d")),
+ ])],
+ false,
+ );
let mut expected_arrays = vec![arr1, arr2].into_iter();
diff --git a/arrow-ord/src/ord.rs b/arrow-ord/src/ord.rs
index 51ae3c4d76..bcc6a217bd 100644
--- a/arrow-ord/src/ord.rs
+++ b/arrow-ord/src/ord.rs
@@ -568,7 +568,7 @@ pub fn make_comparator(
#[cfg(test)]
mod tests {
use super::*;
- use arrow_array::builder::{Int32Builder, ListBuilder, MapBuilder,
StringBuilder};
+ use arrow_array::builder::{Int32Builder, ListBuilder};
use arrow_buffer::{IntervalDayTime, NullBuffer, OffsetBuffer,
ScalarBuffer, i256};
use arrow_schema::{DataType, Field, Fields, UnionFields};
use half::f16;
@@ -1262,64 +1262,31 @@ mod tests {
#[test]
fn test_map() {
// Create first map array demonstrating key priority over values:
- // [{"a": 100, "b": 1}, {"b": 999, "c": 1}, {}, {"x": 1}]
- let string_builder = StringBuilder::new();
- let int_builder = Int32Builder::new();
- let mut map1_builder = MapBuilder::new(None, string_builder,
int_builder);
-
- // {"a": 100, "b": 1} - high value for "a", low value for "b"
- map1_builder.keys().append_value("a");
- map1_builder.values().append_value(100);
- map1_builder.keys().append_value("b");
- map1_builder.values().append_value(1);
- map1_builder.append(true).unwrap();
-
- // {"b": 999, "c": 1} - very high value for "b", low value for "c"
- map1_builder.keys().append_value("b");
- map1_builder.values().append_value(999);
- map1_builder.keys().append_value("c");
- map1_builder.values().append_value(1);
- map1_builder.append(true).unwrap();
-
- // {}
- map1_builder.append(true).unwrap();
-
- // {"x": 1}
- map1_builder.keys().append_value("x");
- map1_builder.values().append_value(1);
- map1_builder.append(true).unwrap();
-
- let map1 = map1_builder.finish();
+ let map1 = MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(
+ vec![
+ // high value for "a", low value for "b"
+ Some(vec![("a", Some(100)), ("b", Some(1))]),
+ // very high value for "b", low value for "c"
+ Some(vec![("b", Some(999)), ("c", Some(1))]),
+ Some(vec![]),
+ Some(vec![("x", Some(1))]),
+ ],
+ false,
+ );
// Create second map array:
// [{"a": 1, "c": 999}, {"b": 1, "d": 999}, {"a": 1}, None]
- let string_builder = StringBuilder::new();
- let int_builder = Int32Builder::new();
- let mut map2_builder = MapBuilder::new(None, string_builder,
int_builder);
-
- // {"a": 1, "c": 999} - low value for "a", high value for "c"
- map2_builder.keys().append_value("a");
- map2_builder.values().append_value(1);
- map2_builder.keys().append_value("c");
- map2_builder.values().append_value(999);
- map2_builder.append(true).unwrap();
-
- // {"b": 1, "d": 999} - low value for "b", high value for "d"
- map2_builder.keys().append_value("b");
- map2_builder.values().append_value(1);
- map2_builder.keys().append_value("d");
- map2_builder.values().append_value(999);
- map2_builder.append(true).unwrap();
-
- // {"a": 1}
- map2_builder.keys().append_value("a");
- map2_builder.values().append_value(1);
- map2_builder.append(true).unwrap();
-
- // None
- map2_builder.append(false).unwrap();
-
- let map2 = map2_builder.finish();
+ let map2 = MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(
+ vec![
+ // low value for "a", high value for "c"
+ Some(vec![("a", Some(1)), ("c", Some(999))]),
+ // low value for "b", high value for "d"
+ Some(vec![("b", Some(1)), ("d", Some(999))]),
+ Some(vec![("a", Some(1))]),
+ None,
+ ],
+ false,
+ );
let opts = SortOptions {
descending: false,
@@ -1380,59 +1347,25 @@ mod tests {
#[test]
fn test_map_vs_list_consistency() {
// Create map arrays and convert them to list arrays to verify
comparison consistency
- // Map arrays: [{"a": 1, "b": 2}, {"x": 10}, {}, {"c": 3}]
- let string_builder = StringBuilder::new();
- let int_builder = Int32Builder::new();
- let mut map1_builder = MapBuilder::new(None, string_builder,
int_builder);
-
- // {"a": 1, "b": 2}
- map1_builder.keys().append_value("a");
- map1_builder.values().append_value(1);
- map1_builder.keys().append_value("b");
- map1_builder.values().append_value(2);
- map1_builder.append(true).unwrap();
-
- // {"x": 10}
- map1_builder.keys().append_value("x");
- map1_builder.values().append_value(10);
- map1_builder.append(true).unwrap();
-
- // {}
- map1_builder.append(true).unwrap();
-
- // {"c": 3}
- map1_builder.keys().append_value("c");
- map1_builder.values().append_value(3);
- map1_builder.append(true).unwrap();
-
- let map1 = map1_builder.finish();
-
- // Second map array: [{"a": 1, "b": 2}, {"y": 20}, {"d": 4}, None]
- let string_builder = StringBuilder::new();
- let int_builder = Int32Builder::new();
- let mut map2_builder = MapBuilder::new(None, string_builder,
int_builder);
-
- // {"a": 1, "b": 2}
- map2_builder.keys().append_value("a");
- map2_builder.values().append_value(1);
- map2_builder.keys().append_value("b");
- map2_builder.values().append_value(2);
- map2_builder.append(true).unwrap();
-
- // {"y": 20}
- map2_builder.keys().append_value("y");
- map2_builder.values().append_value(20);
- map2_builder.append(true).unwrap();
-
- // {"d": 4}
- map2_builder.keys().append_value("d");
- map2_builder.values().append_value(4);
- map2_builder.append(true).unwrap();
-
- // None
- map2_builder.append(false).unwrap();
-
- let map2 = map2_builder.finish();
+ let map1 = MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(
+ vec![
+ Some(vec![("a", Some(1)), ("b", Some(2))]),
+ Some(vec![("x", Some(10))]),
+ Some(vec![]),
+ Some(vec![("c", Some(3))]),
+ ],
+ false,
+ );
+
+ let map2 = MapArray::from_vec_of_maps::<StringArray, Int32Array, _, _>(
+ vec![
+ Some(vec![("a", Some(1)), ("b", Some(2))]),
+ Some(vec![("y", Some(20))]),
+ Some(vec![("d", Some(4))]),
+ None,
+ ],
+ false,
+ );
// Convert map arrays to list arrays (Map entries are struct arrays
with key-value pairs)
let list1: ListArray = map1.clone().into();