This is an automated email from the ASF dual-hosted git repository.
tustvold pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 34305378c Always validate the array data (except the `Decimal`) when
creating array in IPC reader (#2547)
34305378c is described below
commit 34305378ce69999444a24d1a77ad65250055a8a3
Author: Remzi Yang <[email protected]>
AuthorDate: Tue Aug 23 22:14:16 2022 +0800
Always validate the array data (except the `Decimal`) when creating array
in IPC reader (#2547)
* validate array data in ipc reader
Signed-off-by: remzi <[email protected]>
* not validate decimal
Signed-off-by: remzi <[email protected]>
Signed-off-by: remzi <[email protected]>
---
arrow/src/ipc/reader.rs | 60 +++++++++++++++++++++++++------------------------
1 file changed, 31 insertions(+), 29 deletions(-)
diff --git a/arrow/src/ipc/reader.rs b/arrow/src/ipc/reader.rs
index adfe40600..fca5fa9d6 100644
--- a/arrow/src/ipc/reader.rs
+++ b/arrow/src/ipc/reader.rs
@@ -411,12 +411,12 @@ fn create_primitive_array(
}
FixedSizeBinary(_) => {
// read 2 buffers: null buffer (optional) and data buffer
- let builder = ArrayData::builder(data_type.clone())
+ ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- unsafe { builder.build_unchecked() }
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap()
}
Int8
| Int16
@@ -429,45 +429,45 @@ fn create_primitive_array(
| Interval(IntervalUnit::YearMonth) => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a signed i64, and cast appropriately
- let builder = ArrayData::builder(DataType::Int64)
+ let data = ArrayData::builder(DataType::Int64)
.len(length)
.add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- let data = unsafe { builder.build_unchecked() };
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap();
let values = Arc::new(Int64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.into_data()
} else {
- let builder = ArrayData::builder(data_type.clone())
+ ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- unsafe { builder.build_unchecked() }
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap()
}
}
Float32 => {
if buffers[1].len() / 8 == length && length != 1 {
// interpret as a f64, and cast appropriately
- let builder = ArrayData::builder(DataType::Float64)
+ let data = ArrayData::builder(DataType::Float64)
.len(length)
.add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- let data = unsafe { builder.build_unchecked() };
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap();
let values = Arc::new(Float64Array::from(data)) as ArrayRef;
// this cast is infallible, the unwrap is safe
let casted = cast(&values, data_type).unwrap();
casted.into_data()
} else {
- let builder = ArrayData::builder(data_type.clone())
+ ArrayData::builder(data_type.clone())
.len(length)
.add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- unsafe { builder.build_unchecked() }
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap()
}
}
Boolean
@@ -479,14 +479,12 @@ fn create_primitive_array(
| Date64
| Duration(_)
| Interval(IntervalUnit::DayTime)
- | Interval(IntervalUnit::MonthDayNano) => {
- let builder = ArrayData::builder(data_type.clone())
- .len(length)
- .add_buffer(buffers[1].clone())
- .null_bit_buffer(null_buffer);
-
- unsafe { builder.build_unchecked() }
- }
+ | Interval(IntervalUnit::MonthDayNano) =>
ArrayData::builder(data_type.clone())
+ .len(length)
+ .add_buffer(buffers[1].clone())
+ .null_bit_buffer(null_buffer)
+ .build()
+ .unwrap(),
Decimal128(_, _) | Decimal256(_, _) => {
// read 2 buffers: null buffer (optional) and data buffer
let builder = ArrayData::builder(data_type.clone())
@@ -494,6 +492,10 @@ fn create_primitive_array(
.add_buffer(buffers[1].clone())
.null_bit_buffer(null_buffer);
+ // Don't validate the decimal array so far,
+ // becasue validating decimal is some what complicated
+ // and there is no conclusion on whether we should do it.
+ // For more infomation, please look at
https://github.com/apache/arrow-rs/issues/2387
unsafe { builder.build_unchecked() }
}
t => unreachable!("Data type {:?} either unsupported or not
primitive", t),
@@ -527,7 +529,7 @@ fn create_list_array(
_ => unreachable!("Cannot create list or map array from {:?}",
data_type),
};
- make_array(unsafe { builder.build_unchecked() })
+ make_array(builder.build().unwrap())
}
/// Reads the correct number of buffers based on list type and null_count, and
creates a