This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 9cf48c1a7 refactor: construct `StructArray` w/ `FieldRef` (#4116)
9cf48c1a7 is described below
commit 9cf48c1a7559af150699fdeeb01031e357946d75
Author: Marco Neumann <[email protected]>
AuthorDate: Wed Apr 26 13:29:47 2023 +0200
refactor: construct `StructArray` w/ `FieldRef` (#4116)
`DataType` uses `Fields`/`FieldRef` internally. Accepting `Field` just
to wrap it into an `Arc` is unnecessary expensive, esp. when the `Field`
was cloned from an `FieldRef` (happens in some non-test code).
I've decided to NOT allow the construction from `Field` anymore because
in prod code this is most likely a performance bug.
---
arrow-array/src/array/map_array.rs | 22 ++++++-------
arrow-array/src/array/struct_array.rs | 34 ++++++++++----------
arrow-array/src/builder/map_builder.rs | 8 ++---
arrow-array/src/builder/mod.rs | 6 ++--
arrow-array/src/record_batch.rs | 4 +--
arrow-cast/src/pretty.rs | 8 ++---
arrow-ipc/src/reader.rs | 19 ++++++-----
arrow-ipc/src/writer.rs | 13 +++++---
arrow-json/src/writer.rs | 20 ++++++------
arrow-row/src/lib.rs | 4 +--
arrow-select/src/concat.rs | 4 +--
arrow-select/src/take.rs | 8 ++---
arrow/examples/builders.rs | 4 +--
arrow/examples/dynamic_types.rs | 6 ++--
arrow/src/array/ffi.rs | 10 +++---
arrow/src/ffi.rs | 2 +-
arrow/tests/array_cast.rs | 4 +--
arrow/tests/array_transform.rs | 4 +--
parquet/src/arrow/array_reader/struct_array.rs | 2 +-
parquet/src/arrow/arrow_writer/levels.rs | 32 +++++++++----------
parquet/src/arrow/arrow_writer/mod.rs | 44 +++++++++++++++-----------
21 files changed, 136 insertions(+), 122 deletions(-)
diff --git a/arrow-array/src/array/map_array.rs
b/arrow-array/src/array/map_array.rs
index 62e12c30e..c53e452a6 100644
--- a/arrow-array/src/array/map_array.rs
+++ b/arrow-array/src/array/map_array.rs
@@ -197,12 +197,12 @@ impl MapArray {
let entry_offsets_buffer = Buffer::from(entry_offsets.to_byte_slice());
let keys_data = StringArray::from_iter_values(keys);
- let keys_field = Field::new("keys", DataType::Utf8, false);
- let values_field = Field::new(
+ let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
+ let values_field = Arc::new(Field::new(
"values",
values.data_type().clone(),
values.null_count() > 0,
- );
+ ));
let entry_struct = StructArray::from(vec![
(keys_field, Arc::new(keys_data) as ArrayRef),
@@ -336,8 +336,8 @@ mod tests {
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
- let keys = Field::new("keys", DataType::Int32, false);
- let values = Field::new("values", DataType::UInt32, false);
+ let keys = Arc::new(Field::new("keys", DataType::Int32, false));
+ let values = Arc::new(Field::new("values", DataType::UInt32, false));
let entry_struct = StructArray::from(vec![
(keys, make_array(keys_data)),
(values, make_array(values_data)),
@@ -382,8 +382,8 @@ mod tests {
// [[0, 1, 2], [3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 6, 8].to_byte_slice());
- let keys_field = Field::new("keys", DataType::Int32, false);
- let values_field = Field::new("values", DataType::UInt32, true);
+ let keys_field = Arc::new(Field::new("keys", DataType::Int32, false));
+ let values_field = Arc::new(Field::new("values", DataType::UInt32,
true));
let entry_struct = StructArray::from(vec![
(keys_field.clone(), make_array(key_data)),
(values_field.clone(), make_array(value_data.clone())),
@@ -504,8 +504,8 @@ mod tests {
// [[3, 4, 5], [6, 7]]
let entry_offsets = Buffer::from(&[0, 3, 5].to_byte_slice());
- let keys = Field::new("keys", DataType::Int32, false);
- let values = Field::new("values", DataType::UInt32, false);
+ let keys = Arc::new(Field::new("keys", DataType::Int32, false));
+ let values = Arc::new(Field::new("values", DataType::UInt32, false));
let entry_struct = StructArray::from(vec![
(keys, make_array(keys_data)),
(values, make_array(values_data)),
@@ -582,8 +582,8 @@ mod tests {
let key_array = Arc::new(StringArray::from(vec!["a", "b", "c"])) as
ArrayRef;
let value_array = Arc::new(UInt32Array::from(vec![0u32, 10, 20])) as
ArrayRef;
- let keys_field = Field::new("keys", DataType::Utf8, false);
- let values_field = Field::new("values", DataType::UInt32, false);
+ let keys_field = Arc::new(Field::new("keys", DataType::Utf8, false));
+ let values_field = Arc::new(Field::new("values", DataType::UInt32,
false));
let struct_array =
StructArray::from(vec![(keys_field, key_array), (values_field,
value_array)]);
assert_eq!(
diff --git a/arrow-array/src/array/struct_array.rs
b/arrow-array/src/array/struct_array.rs
index a18f38c08..fac947f14 100644
--- a/arrow-array/src/array/struct_array.rs
+++ b/arrow-array/src/array/struct_array.rs
@@ -18,7 +18,7 @@
use crate::{make_array, new_null_array, Array, ArrayRef, RecordBatch};
use arrow_buffer::{BooleanBuffer, Buffer, NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, Field, Fields, SchemaBuilder};
+use arrow_schema::{ArrowError, DataType, Field, FieldRef, Fields,
SchemaBuilder};
use std::sync::Arc;
use std::{any::Any, ops::Index};
@@ -58,11 +58,11 @@ use std::{any::Any, ops::Index};
///
/// let struct_array = StructArray::from(vec![
/// (
-/// Field::new("b", DataType::Boolean, false),
+/// Arc::new(Field::new("b", DataType::Boolean, false)),
/// boolean.clone() as ArrayRef,
/// ),
/// (
-/// Field::new("c", DataType::Int32, false),
+/// Arc::new(Field::new("c", DataType::Int32, false)),
/// int.clone() as ArrayRef,
/// ),
/// ]);
@@ -379,8 +379,8 @@ impl Array for StructArray {
}
}
-impl From<Vec<(Field, ArrayRef)>> for StructArray {
- fn from(v: Vec<(Field, ArrayRef)>) -> Self {
+impl From<Vec<(FieldRef, ArrayRef)>> for StructArray {
+ fn from(v: Vec<(FieldRef, ArrayRef)>) -> Self {
let (schema, arrays): (SchemaBuilder, _) = v.into_iter().unzip();
StructArray::new(schema.finish().fields, arrays, None)
}
@@ -405,8 +405,8 @@ impl std::fmt::Debug for StructArray {
}
}
-impl From<(Vec<(Field, ArrayRef)>, Buffer)> for StructArray {
- fn from(pair: (Vec<(Field, ArrayRef)>, Buffer)) -> Self {
+impl From<(Vec<(FieldRef, ArrayRef)>, Buffer)> for StructArray {
+ fn from(pair: (Vec<(FieldRef, ArrayRef)>, Buffer)) -> Self {
let len = pair.0.first().map(|x| x.1.len()).unwrap_or_default();
let (fields, arrays): (SchemaBuilder, Vec<_>) =
pair.0.into_iter().unzip();
let nulls = NullBuffer::new(BooleanBuffer::new(pair.1, 0, len));
@@ -480,11 +480,11 @@ mod tests {
let struct_array = StructArray::from(vec![
(
- Field::new("b", DataType::Boolean, false),
+ Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
- Field::new("c", DataType::Int32, false),
+ Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
@@ -503,11 +503,11 @@ mod tests {
let struct_array = StructArray::from(vec![
(
- Field::new("b", DataType::Boolean, false),
+ Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
- Field::new("c", DataType::Int32, false),
+ Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
@@ -582,7 +582,7 @@ mod tests {
)]
fn test_struct_array_from_mismatched_types_single() {
drop(StructArray::from(vec![(
- Field::new("b", DataType::Int16, false),
+ Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
)]));
@@ -595,12 +595,12 @@ mod tests {
fn test_struct_array_from_mismatched_types_multiple() {
drop(StructArray::from(vec![
(
- Field::new("b", DataType::Int16, false),
+ Arc::new(Field::new("b", DataType::Int16, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
- Field::new("c", DataType::Utf8, false),
+ Arc::new(Field::new("c", DataType::Utf8, false)),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
]));
@@ -700,11 +700,11 @@ mod tests {
fn test_invalid_struct_child_array_lengths() {
drop(StructArray::from(vec![
(
- Field::new("b", DataType::Float32, false),
+ Arc::new(Field::new("b", DataType::Float32, false)),
Arc::new(Float32Array::from(vec![1.1])) as Arc<dyn Array>,
),
(
- Field::new("c", DataType::Float64, false),
+ Arc::new(Field::new("c", DataType::Float64, false)),
Arc::new(Float64Array::from(vec![2.2, 3.3])),
),
]));
@@ -722,7 +722,7 @@ mod tests {
)]
fn test_struct_array_from_mismatched_nullability() {
drop(StructArray::from(vec![(
- Field::new("c", DataType::Int32, false),
+ Arc::new(Field::new("c", DataType::Int32, false)),
Arc::new(Int32Array::from(vec![Some(42), None, Some(19)])) as
ArrayRef,
)]));
}
diff --git a/arrow-array/src/builder/map_builder.rs
b/arrow-array/src/builder/map_builder.rs
index 72fa1bb91..db85465c8 100644
--- a/arrow-array/src/builder/map_builder.rs
+++ b/arrow-array/src/builder/map_builder.rs
@@ -181,16 +181,16 @@ impl<K: ArrayBuilder, V: ArrayBuilder> MapBuilder<K, V> {
keys_arr.null_count()
);
- let keys_field = Field::new(
+ let keys_field = Arc::new(Field::new(
self.field_names.key.as_str(),
keys_arr.data_type().clone(),
false, // always non-nullable
- );
- let values_field = Field::new(
+ ));
+ let values_field = Arc::new(Field::new(
self.field_names.value.as_str(),
values_arr.data_type().clone(),
true,
- );
+ ));
let struct_array =
StructArray::from(vec![(keys_field, keys_arr), (values_field,
values_arr)]);
diff --git a/arrow-array/src/builder/mod.rs b/arrow-array/src/builder/mod.rs
index b0c0a4988..081f4d5f4 100644
--- a/arrow-array/src/builder/mod.rs
+++ b/arrow-array/src/builder/mod.rs
@@ -115,14 +115,14 @@
//! /// Note: returns StructArray to allow nesting within another array if
desired
//! fn finish(&mut self) -> StructArray {
//! let i32 = Arc::new(self.i32.finish()) as ArrayRef;
-//! let i32_field = Field::new("i32", DataType::Int32, false);
+//! let i32_field = Arc::new(Field::new("i32", DataType::Int32,
false));
//!
//! let string = Arc::new(self.string.finish()) as ArrayRef;
-//! let string_field = Field::new("i32", DataType::Utf8, false);
+//! let string_field = Arc::new(Field::new("i32", DataType::Utf8,
false));
//!
//! let i32_list = Arc::new(self.i32_list.finish()) as ArrayRef;
//! let value_field = Arc::new(Field::new("item", DataType::Int32,
true));
-//! let i32_list_field = Field::new("i32_list",
DataType::List(value_field), true);
+//! let i32_list_field = Arc::new(Field::new("i32_list",
DataType::List(value_field), true));
//!
//! StructArray::from(vec![
//! (i32_field, i32),
diff --git a/arrow-array/src/record_batch.rs b/arrow-array/src/record_batch.rs
index 8fb08111c..bd1cc65c7 100644
--- a/arrow-array/src/record_batch.rs
+++ b/arrow-array/src/record_batch.rs
@@ -788,11 +788,11 @@ mod tests {
let int = Arc::new(Int32Array::from(vec![42, 28, 19, 31]));
let struct_array = StructArray::from(vec![
(
- Field::new("b", DataType::Boolean, false),
+ Arc::new(Field::new("b", DataType::Boolean, false)),
boolean.clone() as ArrayRef,
),
(
- Field::new("c", DataType::Int32, false),
+ Arc::new(Field::new("c", DataType::Int32, false)),
int.clone() as ArrayRef,
),
]);
diff --git a/arrow-cast/src/pretty.rs b/arrow-cast/src/pretty.rs
index c75721ab8..13d1df6a1 100644
--- a/arrow-cast/src/pretty.rs
+++ b/arrow-cast/src/pretty.rs
@@ -649,17 +649,17 @@ mod tests {
let c1 = StructArray::from(vec![
(
- Field::new("c11", DataType::Int32, true),
+ Arc::new(Field::new("c11", DataType::Int32, true)),
Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as
ArrayRef,
),
(
- Field::new_struct(
+ Arc::new(Field::new_struct(
"c12",
vec![Field::new("c121", DataType::Utf8, false)],
false,
- ),
+ )),
Arc::new(StructArray::from(vec![(
- Field::new("c121", DataType::Utf8, false),
+ Arc::new(Field::new("c121", DataType::Utf8, false)),
Arc::new(StringArray::from(vec![Some("e"), Some("f"),
Some("g")]))
as ArrayRef,
)])) as ArrayRef,
diff --git a/arrow-ipc/src/reader.rs b/arrow-ipc/src/reader.rs
index 16cb99b92..d19869616 100644
--- a/arrow-ipc/src/reader.rs
+++ b/arrow-ipc/src/reader.rs
@@ -183,7 +183,7 @@ fn create_array(
)?;
node_index = triple.1;
buffer_index = triple.2;
- struct_arrays.push((struct_field.as_ref().clone(), triple.0));
+ struct_arrays.push((struct_field.clone(), triple.0));
}
let null_count = struct_node.null_count() as usize;
let struct_array = if null_count > 0 {
@@ -1593,7 +1593,7 @@ mod tests {
let array = Arc::new(inner) as ArrayRef;
- let dctfield = Field::new("dict", array.data_type().clone(), false);
+ let dctfield = Arc::new(Field::new("dict", array.data_type().clone(),
false));
let s = StructArray::from(vec![(dctfield, array)]);
let struct_array = Arc::new(s) as ArrayRef;
@@ -1695,9 +1695,12 @@ mod tests {
);
let string_array: ArrayRef = Arc::new(StringArray::from(xs.clone()));
let struct_array = StructArray::from(vec![
- (Field::new("f2.1", DataType::Utf8, false), string_array),
(
- Field::new("f2.2_struct", dict.data_type().clone(), false),
+ Arc::new(Field::new("f2.1", DataType::Utf8, false)),
+ string_array,
+ ),
+ (
+ Arc::new(Field::new("f2.2_struct", dict.data_type().clone(),
false)),
dict.clone() as ArrayRef,
),
]);
@@ -1727,20 +1730,20 @@ mod tests {
let key_dict_keys = Int8Array::from_iter_values([0, 0, 2, 1, 1, 3]);
let key_dict_array = DictionaryArray::new(key_dict_keys, values);
- let keys_field = Field::new_dict(
+ let keys_field = Arc::new(Field::new_dict(
"keys",
DataType::Dictionary(Box::new(DataType::Int8),
Box::new(DataType::Utf8)),
true,
1,
false,
- );
- let values_field = Field::new_dict(
+ ));
+ let values_field = Arc::new(Field::new_dict(
"values",
DataType::Dictionary(Box::new(DataType::Int8),
Box::new(DataType::Utf8)),
true,
1,
false,
- );
+ ));
let entry_struct = StructArray::from(vec![
(keys_field, make_array(key_dict_array.into_data())),
(values_field, make_array(value_dict_array.into_data())),
diff --git a/arrow-ipc/src/writer.rs b/arrow-ipc/src/writer.rs
index abaecea1f..8f36f8c04 100644
--- a/arrow-ipc/src/writer.rs
+++ b/arrow-ipc/src/writer.rs
@@ -1692,8 +1692,13 @@ mod tests {
let array = Arc::new(inner) as ArrayRef;
// Dict field with id 2
- let dctfield =
- Field::new_dict("dict", array.data_type().clone(), false, 2,
false);
+ let dctfield = Arc::new(Field::new_dict(
+ "dict",
+ array.data_type().clone(),
+ false,
+ 2,
+ false,
+ ));
let s = StructArray::from(vec![(dctfield, array)]);
let struct_array = Arc::new(s) as ArrayRef;
@@ -1896,11 +1901,11 @@ mod tests {
let struct_array = StructArray::from(vec![
(
- Field::new("s", DataType::Utf8, true),
+ Arc::new(Field::new("s", DataType::Utf8, true)),
Arc::new(strings) as ArrayRef,
),
(
- Field::new("c", DataType::Int32, true),
+ Arc::new(Field::new("c", DataType::Int32, true)),
Arc::new(ints) as ArrayRef,
),
]);
diff --git a/arrow-json/src/writer.rs b/arrow-json/src/writer.rs
index a096590ec..d610dd9a3 100644
--- a/arrow-json/src/writer.rs
+++ b/arrow-json/src/writer.rs
@@ -983,19 +983,19 @@ mod tests {
let c1 = StructArray::from(vec![
(
- Field::new("c11", DataType::Int32, true),
+ Arc::new(Field::new("c11", DataType::Int32, true)),
Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as
ArrayRef,
),
(
- Field::new(
+ Arc::new(Field::new(
"c12",
DataType::Struct(
vec![Field::new("c121", DataType::Utf8, false)].into(),
),
false,
- ),
+ )),
Arc::new(StructArray::from(vec![(
- Field::new("c121", DataType::Utf8, false),
+ Arc::new(Field::new("c121", DataType::Utf8, false)),
Arc::new(StringArray::from(vec![Some("e"), Some("f"),
Some("g")]))
as ArrayRef,
)])) as ArrayRef,
@@ -1150,19 +1150,19 @@ mod tests {
let struct_values = StructArray::from(vec![
(
- Field::new("c11", DataType::Int32, true),
+ Arc::new(Field::new("c11", DataType::Int32, true)),
Arc::new(Int32Array::from(vec![Some(1), None, Some(5)])) as
ArrayRef,
),
(
- Field::new(
+ Arc::new(Field::new(
"c12",
DataType::Struct(
vec![Field::new("c121", DataType::Utf8, false)].into(),
),
false,
- ),
+ )),
Arc::new(StructArray::from(vec![(
- Field::new("c121", DataType::Utf8, false),
+ Arc::new(Field::new("c121", DataType::Utf8, false)),
Arc::new(StringArray::from(vec![Some("e"), Some("f"),
Some("g")]))
as ArrayRef,
)])) as ArrayRef,
@@ -1340,8 +1340,8 @@ mod tests {
super::StringArray::from(vec!["foo", "bar", "baz", "qux", "quux"]);
let values_array = super::Int64Array::from(vec![10, 20, 30, 40, 50]);
- let keys = Field::new("keys", DataType::Utf8, false);
- let values = Field::new("values", DataType::Int64, false);
+ let keys = Arc::new(Field::new("keys", DataType::Utf8, false));
+ let values = Arc::new(Field::new("values", DataType::Int64, false));
let entry_struct = StructArray::from(vec![
(keys, Arc::new(keys_array) as ArrayRef),
(values, Arc::new(values_array) as ArrayRef),
diff --git a/arrow-row/src/lib.rs b/arrow-row/src/lib.rs
index 71e1de416..9010c8d9a 100644
--- a/arrow-row/src/lib.rs
+++ b/arrow-row/src/lib.rs
@@ -1748,9 +1748,9 @@ mod tests {
fn test_struct() {
// Test basic
let a = Arc::new(Int32Array::from(vec![1, 1, 2, 2])) as ArrayRef;
- let a_f = Field::new("int", DataType::Int32, false);
+ let a_f = Arc::new(Field::new("int", DataType::Int32, false));
let u = Arc::new(StringArray::from(vec!["a", "b", "c", "d"])) as
ArrayRef;
- let u_f = Field::new("s", DataType::Utf8, false);
+ let u_f = Arc::new(Field::new("s", DataType::Utf8, false));
let s1 = Arc::new(StructArray::from(vec![(a_f, a), (u_f, u)])) as
ArrayRef;
let sort_fields = vec![SortField::new(s1.data_type().clone())];
diff --git a/arrow-select/src/concat.rs b/arrow-select/src/concat.rs
index ed27520cc..0bf4c97ff 100644
--- a/arrow-select/src/concat.rs
+++ b/arrow-select/src/concat.rs
@@ -326,7 +326,7 @@ mod tests {
#[test]
fn test_concat_struct_arrays() {
- let field = Field::new("field", DataType::Int64, true);
+ let field = Arc::new(Field::new("field", DataType::Int64, true));
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
@@ -381,7 +381,7 @@ mod tests {
#[test]
fn test_concat_struct_array_slices() {
- let field = Field::new("field", DataType::Int64, true);
+ let field = Arc::new(Field::new("field", DataType::Int64, true));
let input_primitive_1: ArrayRef =
Arc::new(PrimitiveArray::<Int64Type>::from(vec![
Some(-1),
diff --git a/arrow-select/src/take.rs b/arrow-select/src/take.rs
index 3e7432530..5d6507e71 100644
--- a/arrow-select/src/take.rs
+++ b/arrow-select/src/take.rs
@@ -25,7 +25,7 @@ use arrow_array::types::*;
use arrow_array::*;
use arrow_buffer::{bit_util, ArrowNativeType, Buffer, MutableBuffer,
NullBuffer};
use arrow_data::{ArrayData, ArrayDataBuilder};
-use arrow_schema::{ArrowError, DataType, Field};
+use arrow_schema::{ArrowError, DataType, FieldRef};
use num::{ToPrimitive, Zero};
@@ -163,8 +163,8 @@ where
.iter()
.map(|a| take_impl(a.as_ref(), indices, Some(options.clone())))
.collect::<Result<Vec<ArrayRef>, _>>()?;
- let fields: Vec<(Field, ArrayRef)> =
- fields.iter().map(|f|
f.as_ref().clone()).zip(arrays).collect();
+ let fields: Vec<(FieldRef, ArrayRef)> =
+ fields.iter().cloned().zip(arrays).collect();
// Create the null bit buffer.
let is_valid: Buffer = indices
@@ -924,7 +924,7 @@ where
mod tests {
use super::*;
use arrow_array::builder::*;
- use arrow_schema::{Fields, TimeUnit};
+ use arrow_schema::{Field, Fields, TimeUnit};
fn test_take_decimal_arrays(
data: Vec<Option<i128>>,
diff --git a/arrow/examples/builders.rs b/arrow/examples/builders.rs
index d0e6b3108..a6d8c563b 100644
--- a/arrow/examples/builders.rs
+++ b/arrow/examples/builders.rs
@@ -119,12 +119,12 @@ fn main() {
// helper, which takes the underlying arrays and field types.
let struct_array = StructArray::from(vec![
(
- Field::new("b", DataType::Boolean, false),
+ Arc::new(Field::new("b", DataType::Boolean, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
- Field::new("c", DataType::Int32, false),
+ Arc::new(Field::new("c", DataType::Int32, false)),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
]);
diff --git a/arrow/examples/dynamic_types.rs b/arrow/examples/dynamic_types.rs
index cb26a0d33..5470131d6 100644
--- a/arrow/examples/dynamic_types.rs
+++ b/arrow/examples/dynamic_types.rs
@@ -49,15 +49,15 @@ fn main() -> Result<()> {
let nested = StructArray::from(vec![
(
- Field::new("a", DataType::Utf8, false),
+ Arc::new(Field::new("a", DataType::Utf8, false)),
Arc::new(StringArray::from(vec!["a", "b", "c", "d", "e"])) as
Arc<dyn Array>,
),
(
- Field::new("b", DataType::Float64, false),
+ Arc::new(Field::new("b", DataType::Float64, false)),
Arc::new(Float64Array::from(vec![1.1, 2.2, 3.3, 4.4, 5.5])),
),
(
- Field::new("c", DataType::Float64, false),
+ Arc::new(Field::new("c", DataType::Float64, false)),
Arc::new(Float64Array::from(vec![2.2, 3.3, 4.4, 5.5, 6.6])),
),
]);
diff --git a/arrow/src/array/ffi.rs b/arrow/src/array/ffi.rs
index 1611dc530..56b9b6ecf 100644
--- a/arrow/src/array/ffi.rs
+++ b/arrow/src/array/ffi.rs
@@ -123,28 +123,28 @@ mod tests {
fn test_struct() -> Result<()> {
let inner = StructArray::from(vec![
(
- Field::new("a1", DataType::Boolean, false),
+ Arc::new(Field::new("a1", DataType::Boolean, false)),
Arc::new(BooleanArray::from(vec![true, true, false, false]))
as Arc<dyn Array>,
),
(
- Field::new("a2", DataType::UInt32, false),
+ Arc::new(Field::new("a2", DataType::UInt32, false)),
Arc::new(UInt32Array::from(vec![1, 2, 3, 4])),
),
]);
let array = StructArray::from(vec![
(
- Field::new("a", inner.data_type().clone(), false),
+ Arc::new(Field::new("a", inner.data_type().clone(), false)),
Arc::new(inner) as Arc<dyn Array>,
),
(
- Field::new("b", DataType::Boolean, false),
+ Arc::new(Field::new("b", DataType::Boolean, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
- Field::new("c", DataType::UInt32, false),
+ Arc::new(Field::new("c", DataType::UInt32, false)),
Arc::new(UInt32Array::from(vec![42, 28, 19, 31])),
),
]);
diff --git a/arrow/src/ffi.rs b/arrow/src/ffi.rs
index 0af1b1111..d8b5be69a 100644
--- a/arrow/src/ffi.rs
+++ b/arrow/src/ffi.rs
@@ -1099,7 +1099,7 @@ mod tests {
let metadata: HashMap<String, String> =
[("Hello".to_string(), "World! 😊".to_string())].into();
let struct_array = StructArray::from(vec![(
- Field::new("a", DataType::Int32, false).with_metadata(metadata),
+ Arc::new(Field::new("a", DataType::Int32,
false).with_metadata(metadata)),
Arc::new(Int32Array::from(vec![2, 4, 6])) as Arc<dyn Array>,
)]);
diff --git a/arrow/tests/array_cast.rs b/arrow/tests/array_cast.rs
index 96a4f2b41..bf7e7a326 100644
--- a/arrow/tests/array_cast.rs
+++ b/arrow/tests/array_cast.rs
@@ -127,12 +127,12 @@ fn get_arrays_of_all_types() -> Vec<ArrayRef> {
Arc::new(make_fixed_size_binary_array()),
Arc::new(StructArray::from(vec![
(
- Field::new("a", DataType::Boolean, false),
+ Arc::new(Field::new("a", DataType::Boolean, false)),
Arc::new(BooleanArray::from(vec![false, false, true, true]))
as Arc<dyn Array>,
),
(
- Field::new("b", DataType::Int32, false),
+ Arc::new(Field::new("b", DataType::Int32, false)),
Arc::new(Int32Array::from(vec![42, 28, 19, 31])),
),
])),
diff --git a/arrow/tests/array_transform.rs b/arrow/tests/array_transform.rs
index 7cd0007cc..40938c80f 100644
--- a/arrow/tests/array_transform.rs
+++ b/arrow/tests/array_transform.rs
@@ -764,11 +764,11 @@ fn test_map_nulls_append() {
let expected_entry_array = StructArray::from(vec![
(
- Field::new("keys", DataType::Int64, false),
+ Arc::new(Field::new("keys", DataType::Int64, false)),
Arc::new(expected_key_array) as ArrayRef,
),
(
- Field::new("values", DataType::Int64, true),
+ Arc::new(Field::new("values", DataType::Int64, true)),
Arc::new(expected_value_array) as ArrayRef,
),
]);
diff --git a/parquet/src/arrow/array_reader/struct_array.rs
b/parquet/src/arrow/array_reader/struct_array.rs
index 11e019f29..600fda4fb 100644
--- a/parquet/src/arrow/array_reader/struct_array.rs
+++ b/parquet/src/arrow/array_reader/struct_array.rs
@@ -292,7 +292,7 @@ mod tests {
let validity = Buffer::from([0b00000111]);
let struct_fields = vec![(
- Field::new("foo", expected_l.data_type().clone(), true),
+ Arc::new(Field::new("foo", expected_l.data_type().clone(), true)),
expected_l.clone() as ArrayRef,
)];
let expected = StructArray::from((struct_fields, validity));
diff --git a/parquet/src/arrow/arrow_writer/levels.rs
b/parquet/src/arrow/arrow_writer/levels.rs
index fe6126ba4..fc5b94603 100644
--- a/parquet/src/arrow/arrow_writer/levels.rs
+++ b/parquet/src/arrow/arrow_writer/levels.rs
@@ -675,7 +675,7 @@ mod tests {
.unwrap();
let list = make_array(list);
- let list_field = Field::new("list", list_type, true);
+ let list_field = Arc::new(Field::new("list", list_type, true));
let struct_array =
StructArray::from((vec![(list_field, list)],
Buffer::from([0b00011010])));
@@ -793,7 +793,7 @@ mod tests {
.build()
.unwrap();
let list = make_array(list);
- let list_field = Field::new("list", list_type, true);
+ let list_field = Arc::new(Field::new("list", list_type, true));
let struct_array = StructArray::from(vec![(list_field, list)]);
let array = Arc::new(struct_array) as ArrayRef;
@@ -839,7 +839,7 @@ mod tests {
.unwrap();
let list_2 = make_array(list_2);
- let list_2_field = Field::new("list_2", list_2_type, true);
+ let list_2_field = Arc::new(Field::new("list_2", list_2_type, true));
let struct_array =
StructArray::from((vec![(list_2_field, list_2)],
Buffer::from([0b00001111])));
@@ -871,13 +871,13 @@ mod tests {
// - {a: {b: {c: 6}}}
let c = Int32Array::from_iter([Some(1), None, Some(3), None, Some(5),
Some(6)]);
- let c_field = Field::new("c", DataType::Int32, true);
+ let c_field = Arc::new(Field::new("c", DataType::Int32, true));
let b = StructArray::from((
(vec![(c_field, Arc::new(c) as ArrayRef)]),
Buffer::from([0b00110111]),
));
- let b_field = Field::new("b", b.data_type().clone(), true);
+ let b_field = Arc::new(Field::new("b", b.data_type().clone(), true));
let a = StructArray::from((
(vec![(b_field, Arc::new(b) as ArrayRef)]),
Buffer::from([0b00101111]),
@@ -944,18 +944,18 @@ mod tests {
// this tests the level generation from the equivalent
arrow_writer_complex test
// define schema
- let struct_field_d = Field::new("d", DataType::Float64, true);
- let struct_field_f = Field::new("f", DataType::Float32, true);
- let struct_field_g = Field::new(
+ let struct_field_d = Arc::new(Field::new("d", DataType::Float64,
true));
+ let struct_field_f = Arc::new(Field::new("f", DataType::Float32,
true));
+ let struct_field_g = Arc::new(Field::new(
"g",
DataType::List(Arc::new(Field::new("items", DataType::Int16,
false))),
false,
- );
- let struct_field_e = Field::new(
+ ));
+ let struct_field_e = Arc::new(Field::new(
"e",
DataType::Struct(vec![struct_field_f.clone(),
struct_field_g.clone()].into()),
true,
- );
+ ));
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, true),
@@ -1072,7 +1072,7 @@ mod tests {
#[test]
fn test_null_vs_nonnull_struct() {
// define schema
- let offset_field = Field::new("offset", DataType::Int32, true);
+ let offset_field = Arc::new(Field::new("offset", DataType::Int32,
true));
let schema = Schema::new(vec![Field::new(
"some_nested_object",
DataType::Struct(vec![offset_field.clone()].into()),
@@ -1095,7 +1095,7 @@ mod tests {
// create second batch
// define schema
- let offset_field = Field::new("offset", DataType::Int32, true);
+ let offset_field = Arc::new(Field::new("offset", DataType::Int32,
true));
let schema = Schema::new(vec![Field::new(
"some_nested_object",
DataType::Struct(vec![offset_field.clone()].into()),
@@ -1286,7 +1286,7 @@ mod tests {
// This test assumes that nulls don't take up space
assert_eq!(inner.values().len(), 7);
- let field = Field::new("list", inner.data_type().clone(), true);
+ let field = Arc::new(Field::new("list", inner.data_type().clone(),
true));
let array = Arc::new(inner) as ArrayRef;
let nulls = Buffer::from([0b01010111]);
let struct_a = StructArray::from((vec![(field, array)], nulls));
@@ -1331,8 +1331,8 @@ mod tests {
None,
])) as ArrayRef;
- let field_a1 = Field::new("list", a1.data_type().clone(), true);
- let field_a2 = Field::new("integers", a2.data_type().clone(), true);
+ let field_a1 = Arc::new(Field::new("list", a1.data_type().clone(),
true));
+ let field_a2 = Arc::new(Field::new("integers", a2.data_type().clone(),
true));
let nulls = Buffer::from([0b00110111]);
let struct_a = Arc::new(
diff --git a/parquet/src/arrow/arrow_writer/mod.rs
b/parquet/src/arrow/arrow_writer/mod.rs
index 3987cccf6..67fec4489 100644
--- a/parquet/src/arrow/arrow_writer/mod.rs
+++ b/parquet/src/arrow/arrow_writer/mod.rs
@@ -879,13 +879,19 @@ mod tests {
#[test]
fn arrow_writer_complex() {
// define schema
- let struct_field_d = Field::new("d", DataType::Float64, true);
- let struct_field_f = Field::new("f", DataType::Float32, true);
- let struct_field_g =
- Field::new_list("g", Field::new("item", DataType::Int16, true),
false);
- let struct_field_h =
- Field::new_list("h", Field::new("item", DataType::Int16, false),
true);
- let struct_field_e = Field::new_struct(
+ let struct_field_d = Arc::new(Field::new("d", DataType::Float64,
true));
+ let struct_field_f = Arc::new(Field::new("f", DataType::Float32,
true));
+ let struct_field_g = Arc::new(Field::new_list(
+ "g",
+ Field::new("item", DataType::Int16, true),
+ false,
+ ));
+ let struct_field_h = Arc::new(Field::new_list(
+ "h",
+ Field::new("item", DataType::Int16, false),
+ true,
+ ));
+ let struct_field_e = Arc::new(Field::new_struct(
"e",
vec![
struct_field_f.clone(),
@@ -893,7 +899,7 @@ mod tests {
struct_field_h.clone(),
],
false,
- );
+ ));
let schema = Schema::new(vec![
Field::new("a", DataType::Int32, false),
Field::new("b", DataType::Int32, true),
@@ -963,9 +969,9 @@ mod tests {
// It was subsequently fixed while investigating
https://github.com/apache/arrow-rs/issues/245.
// define schema
- let offset_field = Field::new("offset", DataType::Int32, false);
- let partition_field = Field::new("partition", DataType::Int64, true);
- let topic_field = Field::new("topic", DataType::Utf8, true);
+ let offset_field = Arc::new(Field::new("offset", DataType::Int32,
false));
+ let partition_field = Arc::new(Field::new("partition",
DataType::Int64, true));
+ let topic_field = Arc::new(Field::new("topic", DataType::Utf8, true));
let schema = Schema::new(vec![Field::new(
"some_nested_object",
DataType::Struct(Fields::from(vec![
@@ -1857,7 +1863,7 @@ mod tests {
#[test]
fn struct_single_column() {
let a_values = Int32Array::from(vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10]);
- let struct_field_a = Field::new("f", DataType::Int32, false);
+ let struct_field_a = Arc::new(Field::new("f", DataType::Int32, false));
let s = StructArray::from(vec![(struct_field_a, Arc::new(a_values) as
ArrayRef)]);
let values = Arc::new(s);
@@ -2233,20 +2239,20 @@ mod tests {
#[test]
fn complex_aggregate() {
// Tests aggregating nested data
- let field_a = Field::new("leaf_a", DataType::Int32, false);
- let field_b = Field::new("leaf_b", DataType::Int32, true);
- let struct_a = Field::new(
+ let field_a = Arc::new(Field::new("leaf_a", DataType::Int32, false));
+ let field_b = Arc::new(Field::new("leaf_b", DataType::Int32, true));
+ let struct_a = Arc::new(Field::new(
"struct_a",
DataType::Struct(vec![field_a.clone(), field_b.clone()].into()),
true,
- );
+ ));
- let list_a = Field::new("list", DataType::List(Arc::new(struct_a)),
true);
- let struct_b = Field::new(
+ let list_a = Arc::new(Field::new("list", DataType::List(struct_a),
true));
+ let struct_b = Arc::new(Field::new(
"struct_b",
DataType::Struct(vec![list_a.clone()].into()),
false,
- );
+ ));
let schema = Arc::new(Schema::new(vec![struct_b]));