alamb commented on code in PR #9158:
URL: https://github.com/apache/arrow-rs/pull/9158#discussion_r2687587159
##########
arrow-array/src/array/byte_view_array.rs:
##########
@@ -1814,4 +1822,46 @@ mod tests {
assert_eq!(lengths_iter.next(), None, "Should not have more lengths");
}
+
+ #[should_panic(expected = "Mismatched data type, expected Utf8View, got
BinaryView")]
+ #[test]
+ fn invalid_casting_from_array_data() {
+ // Should not be able to cast to StringViewArray due to invalid UTF-8
+ let array_data =
binary_view_array_with_invalid_utf8_data().into_data();
+ let _ = StringViewArray::from(array_data);
+ }
+
+ #[should_panic(expected = "invalid utf-8 sequence")]
+ #[test]
+ fn invalid_array_data() {
+ let (views, buffers, nulls) =
binary_view_array_with_invalid_utf8_data().into_parts();
+
+ // manually try and add invalid array data with Utf8View data type
+ let mut builder = ArrayDataBuilder::new(DataType::Utf8View)
+ .add_buffer(views.into_inner())
+ .len(3);
+ for buffer in buffers.iter() {
+ builder = builder.add_buffer(buffer.clone())
+ }
+ builder = builder.nulls(nulls);
+
+ let data = builder.build().unwrap(); // should fail validation
+ let _arr = StringViewArray::from(data);
+ }
+
+ /// Returns a BinaryViewArray with one invalid UTF-8 value
+ fn binary_view_array_with_invalid_utf8_data() -> BinaryViewArray {
+ let array = GenericByteViewArray::<BinaryViewType>::from(vec![
+ b"aaaaaaaaaaaaaaaaaaaaaaaaaaa" as &[u8],
+ &[
+ 0xf0, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00,
+ ],
+ b"good",
+ ]);
+ assert!(from_utf8(array.value(0)).is_ok());
+ assert!(from_utf8(array.value(1)).is_err()); // value 1 is invalid utf8
+ assert!(from_utf8(array.value(0)).is_ok());
Review Comment:
🤦
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]