This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 4de689598 remove null_count from try_new (#1721)
4de689598 is described below
commit 4de689598df6ea284452e687d69c7654b5a71762
Author: Remzi Yang <[email protected]>
AuthorDate: Sun May 22 22:06:13 2022 +0800
remove null_count from try_new (#1721)
Signed-off-by: remzi <[email protected]>
---
arrow/benches/array_data_validate.rs | 1 -
arrow/src/array/data.rs | 116 ++++-------------------------------
arrow/src/array/transform/mod.rs | 4 --
arrow/src/compute/util.rs | 1 -
4 files changed, 11 insertions(+), 111 deletions(-)
diff --git a/arrow/benches/array_data_validate.rs
b/arrow/benches/array_data_validate.rs
index 32e548a29..c46252bec 100644
--- a/arrow/benches/array_data_validate.rs
+++ b/arrow/benches/array_data_validate.rs
@@ -30,7 +30,6 @@ fn create_binary_array_data(length: i32) -> ArrayData {
DataType::Binary,
length as usize,
None,
- None,
0,
vec![offsets_buffer, value_buffer],
vec![],
diff --git a/arrow/src/array/data.rs b/arrow/src/array/data.rs
index 22536ca58..dcf382ae0 100644
--- a/arrow/src/array/data.rs
+++ b/arrow/src/array/data.rs
@@ -312,16 +312,12 @@ impl ArrayData {
/// Create a new ArrayData, validating that the provided buffers
/// form a valid Arrow array of the specified data type.
///
- /// If `null_count` is not specified, the number of nulls in
- /// null_bit_buffer is calculated
- ///
/// Note: This is a low level API and most users of the arrow
/// crate should create arrays using the methods in the `array`
/// module.
pub fn try_new(
data_type: DataType,
len: usize,
- null_count: Option<usize>,
null_bit_buffer: Option<Buffer>,
offset: usize,
buffers: Vec<Buffer>,
@@ -345,7 +341,7 @@ impl ArrayData {
Self::new_unchecked(
data_type,
len,
- null_count,
+ None,
null_bit_buffer,
offset,
buffers,
@@ -1482,7 +1478,6 @@ impl ArrayDataBuilder {
ArrayData::try_new(
self.data_type,
self.len,
- self.null_count,
self.null_bit_buffer,
self.offset,
self.buffers,
@@ -1534,7 +1529,6 @@ mod tests {
let child_arr_data = ArrayData::try_new(
DataType::Int32,
5,
- Some(0),
None,
0,
vec![Buffer::from_slice_ref(&[1i32, 2, 3, 4, 5])],
@@ -1657,7 +1651,6 @@ mod tests {
let string_data = ArrayData::try_new(
DataType::Utf8,
3,
- Some(1),
Some(Buffer::from_iter(vec![true, false, true])),
0,
vec![offsets_buffer, data_buffer],
@@ -1696,8 +1689,7 @@ mod tests {
fn test_buffer_too_small() {
let buffer = Buffer::from_slice_ref(&[0i32, 2i32]);
// should fail as the declared size (10*8 = 80) is larger than the
underlying bfufer (8)
- ArrayData::try_new(DataType::Int64, 10, Some(0), None, 0,
vec![buffer], vec![])
- .unwrap();
+ ArrayData::try_new(DataType::Int64, 10, None, 0, vec![buffer],
vec![]).unwrap();
}
#[test]
@@ -1707,8 +1699,7 @@ mod tests {
fn test_buffer_too_small_offset() {
let buffer = Buffer::from_slice_ref(&[0i32, 2i32]);
// should fail -- size is ok, but also has offset
- ArrayData::try_new(DataType::Int64, 1, Some(0), None, 1, vec![buffer],
vec![])
- .unwrap();
+ ArrayData::try_new(DataType::Int64, 1, None, 1, vec![buffer],
vec![]).unwrap();
}
#[test]
@@ -1716,62 +1707,27 @@ mod tests {
fn test_bad_number_of_buffers() {
let buffer1 = Buffer::from_slice_ref(&[0i32, 2i32]);
let buffer2 = Buffer::from_slice_ref(&[0i32, 2i32]);
- ArrayData::try_new(
- DataType::Int64,
- 1,
- Some(0),
- None,
- 0,
- vec![buffer1, buffer2],
- vec![],
- )
- .unwrap();
+ ArrayData::try_new(DataType::Int64, 1, None, 0, vec![buffer1,
buffer2], vec![])
+ .unwrap();
}
#[test]
#[should_panic(expected = "integer overflow computing min buffer size")]
fn test_fixed_width_overflow() {
let buffer = Buffer::from_slice_ref(&[0i32, 2i32]);
- ArrayData::try_new(
- DataType::Int64,
- usize::MAX,
- Some(0),
- None,
- 0,
- vec![buffer],
- vec![],
- )
- .unwrap();
- }
-
- #[test]
- #[should_panic(expected = "null_bit_buffer size too small. got 1 needed
2")]
- fn test_bitmap_too_small_with_null_count() {
- let buffer = make_i32_buffer(9);
- let null_bit_buffer = Buffer::from(vec![0b11111111]);
-
- ArrayData::try_new(
- DataType::Int32,
- 9,
- Some(0),
- Some(null_bit_buffer),
- 0,
- vec![buffer],
- vec![],
- )
- .unwrap();
+ ArrayData::try_new(DataType::Int64, usize::MAX, None, 0, vec![buffer],
vec![])
+ .unwrap();
}
#[test]
#[should_panic(expected = "null_bit_buffer size too small. got 1 needed
2")]
- fn test_bitmap_too_small_without_null_count() {
+ fn test_bitmap_too_small() {
let buffer = make_i32_buffer(9);
let null_bit_buffer = Buffer::from(vec![0b11111111]);
ArrayData::try_new(
DataType::Int32,
9,
- None,
Some(null_bit_buffer),
0,
vec![buffer],
@@ -1780,14 +1736,6 @@ mod tests {
.unwrap();
}
- #[test]
- #[should_panic(expected = "null_count 3 for an array exceeds length of 2
elements")]
- fn test_bad_null_count() {
- let buffer = Buffer::from_slice_ref(&[0i32, 2i32]);
- ArrayData::try_new(DataType::Int32, 2, Some(3), None, 0, vec![buffer],
vec![])
- .unwrap();
- }
-
// Test creating a dictionary with a non integer type
#[test]
#[should_panic(expected = "Dictionary key type must be integer, but was
Utf8")]
@@ -1798,7 +1746,6 @@ mod tests {
let child_data = ArrayData::try_new(
DataType::Int32,
1,
- Some(0),
None,
0,
vec![i32_buffer.clone()],
@@ -1808,7 +1755,6 @@ mod tests {
ArrayData::try_new(
data_type,
1,
- Some(0),
None,
0,
vec![i32_buffer.clone(), i32_buffer],
@@ -1830,16 +1776,8 @@ mod tests {
Box::new(DataType::LargeUtf8),
);
let child_data = string_array.data().clone();
- ArrayData::try_new(
- data_type,
- 1,
- Some(0),
- None,
- 0,
- vec![i32_buffer],
- vec![child_data],
- )
- .unwrap();
+ ArrayData::try_new(data_type, 1, None, 0, vec![i32_buffer],
vec![child_data])
+ .unwrap();
}
#[test]
@@ -1850,7 +1788,6 @@ mod tests {
DataType::Utf8,
0,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1866,7 +1803,6 @@ mod tests {
DataType::Utf8,
0,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1883,7 +1819,6 @@ mod tests {
DataType::Utf8,
0,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1899,7 +1834,6 @@ mod tests {
DataType::Utf8,
0,
None,
- None,
3,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1918,7 +1852,6 @@ mod tests {
DataType::LargeUtf8,
0,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1937,7 +1870,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1956,7 +1888,6 @@ mod tests {
DataType::LargeUtf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1973,7 +1904,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -1990,7 +1920,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2008,7 +1937,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2026,7 +1954,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2044,7 +1971,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2061,7 +1987,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
1,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2082,7 +2007,6 @@ mod tests {
DataType::Utf8,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2107,7 +2031,6 @@ mod tests {
DataType::FixedSizeList(Box::new(field), 2),
3,
None,
- None,
0,
vec![],
vec![child_array.data().clone()],
@@ -2127,7 +2050,6 @@ mod tests {
DataType::Struct(vec![Field::new("field1", DataType::Int64,
true)]),
3,
None,
- None,
0,
vec![],
vec![field1.data().clone()],
@@ -2149,7 +2071,6 @@ mod tests {
DataType::Struct(vec![Field::new("field1", DataType::Int32,
true)]),
6,
None,
- None,
0,
vec![],
vec![field1.data().clone()],
@@ -2171,7 +2092,6 @@ mod tests {
data_type,
2,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2205,7 +2125,6 @@ mod tests {
data_type,
4,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2259,7 +2178,6 @@ mod tests {
data_type,
4,
None,
- None,
0,
vec![offsets_buffer, data_buffer],
vec![],
@@ -2318,7 +2236,6 @@ mod tests {
data_type,
2,
None,
- None,
0,
vec![keys.data().buffers[0].clone()],
vec![values.data().clone()],
@@ -2345,7 +2262,6 @@ mod tests {
data_type,
2,
None,
- None,
0,
vec![keys.data().buffers[0].clone()],
vec![values.data().clone()],
@@ -2371,7 +2287,6 @@ mod tests {
data_type,
1,
None,
- None,
0,
vec![keys.data().buffers[0].clone()],
vec![values.data().clone()],
@@ -2398,7 +2313,6 @@ mod tests {
data_type,
2,
None,
- None,
0,
vec![keys.data().buffers[0].clone()],
vec![values.data().clone()],
@@ -2422,7 +2336,6 @@ mod tests {
data_type,
3,
None,
- None,
0,
vec![offsets_buffer],
vec![values.data().clone()],
@@ -2467,7 +2380,6 @@ mod tests {
data_type,
3,
None,
- None,
0,
vec![offsets_buffer],
vec![values.data().clone()],
@@ -2508,7 +2420,7 @@ mod tests {
let data_type =
DataType::Struct(vec![Field::new("d",
dict_data.data_type().clone(), true)]);
- ArrayData::try_new(data_type, 1, None, None, 0, vec![],
vec![dict_data]).unwrap();
+ ArrayData::try_new(data_type, 1, None, 0, vec![],
vec![dict_data]).unwrap();
}
/// returns a buffer initialized with some constant value for tests
@@ -2541,7 +2453,6 @@ mod tests {
),
2,
None,
- None,
0,
vec![type_ids],
vec![field1.data().clone(), field2.data().clone()],
@@ -2573,7 +2484,6 @@ mod tests {
),
2,
None,
- None,
0,
vec![type_ids],
vec![field1.data().clone(), field2.data().clone()],
@@ -2601,7 +2511,6 @@ mod tests {
),
2,
None,
- None,
0,
vec![type_ids], // need offsets buffer here too
vec![field1.data().clone(), field2.data().clone()],
@@ -2632,7 +2541,6 @@ mod tests {
),
2,
None,
- None,
0,
vec![type_ids, offsets],
vec![field1.data().clone(), field2.data().clone()],
@@ -2725,7 +2633,6 @@ mod tests {
let cloned_data = ArrayData::try_new(
struct_array_slice.data_type().clone(),
struct_array_slice.len(),
- None, // force new to compute the number of null bits
struct_array_data.null_buffer().cloned(),
struct_array_slice.offset(),
struct_array_data.buffers().to_vec(),
@@ -2785,7 +2692,6 @@ mod tests {
let data = ArrayData::try_new(
DataType::Utf8,
4,
- None,
Some(null_buffer),
0,
vec![offsets_buffer, strings_buffer],
diff --git a/arrow/src/array/transform/mod.rs b/arrow/src/array/transform/mod.rs
index 586a4fec2..4e47dbc29 100644
--- a/arrow/src/array/transform/mod.rs
+++ b/arrow/src/array/transform/mod.rs
@@ -1252,7 +1252,6 @@ mod tests {
DataType::List(Box::new(Field::new("item", DataType::Int64,
true))),
8,
None,
- None,
0,
vec![list_value_offsets],
vec![expected_int_array.data().clone()],
@@ -1333,7 +1332,6 @@ mod tests {
let expected_list_data = ArrayData::try_new(
DataType::List(Box::new(Field::new("item", DataType::Int64,
true))),
12,
- None,
Some(Buffer::from(&[0b11011011, 0b1110])),
0,
vec![list_value_offsets],
@@ -1484,7 +1482,6 @@ mod tests {
false,
),
12,
- None,
Some(Buffer::from(&[0b11011011, 0b1110])),
0,
vec![map_offsets],
@@ -1557,7 +1554,6 @@ mod tests {
DataType::List(Box::new(Field::new("item", DataType::Utf8, true))),
6,
None,
- None,
0,
vec![list_value_offsets],
vec![expected_string_array.data().clone()],
diff --git a/arrow/src/compute/util.rs b/arrow/src/compute/util.rs
index 62c3be62f..23f33d228 100644
--- a/arrow/src/compute/util.rs
+++ b/arrow/src/compute/util.rs
@@ -190,7 +190,6 @@ pub(super) mod tests {
ArrayData::try_new(
DataType::UInt8,
len,
- None,
null_bit_buffer,
offset,
vec![buffer],