This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow-rs.git
The following commit(s) were added to refs/heads/master by this push:
new 51c1b4b2ed9 Add tests for Arrow Flight support for `StringViewArray`
and `BinaryViewArray` (#5601)
51c1b4b2ed9 is described below
commit 51c1b4b2ed9b7f2ecbc03d7ef1d4d7a224f0648a
Author: Xiangpeng Hao <[email protected]>
AuthorDate: Mon Apr 8 14:16:16 2024 -0500
Add tests for Arrow Flight support for `StringViewArray` and
`BinaryViewArray` (#5601)
* Add tests to send view types with flight
* make clippy happy
---
arrow-flight/tests/encode_decode.rs | 60 +++++++++++++++++++++++++++++++++++--
1 file changed, 57 insertions(+), 3 deletions(-)
diff --git a/arrow-flight/tests/encode_decode.rs
b/arrow-flight/tests/encode_decode.rs
index 224b12500a0..0185fa77f06 100644
--- a/arrow-flight/tests/encode_decode.rs
+++ b/arrow-flight/tests/encode_decode.rs
@@ -20,7 +20,10 @@
use std::{collections::HashMap, sync::Arc};
use arrow_array::types::Int32Type;
-use arrow_array::{ArrayRef, DictionaryArray, Float64Array, RecordBatch,
UInt8Array};
+use arrow_array::{
+ ArrayRef, BinaryViewArray, DictionaryArray, Float64Array, RecordBatch,
StringViewArray,
+ UInt8Array,
+};
use arrow_cast::pretty::pretty_format_batches;
use arrow_flight::flight_descriptor::DescriptorType;
use arrow_flight::FlightDescriptor;
@@ -111,6 +114,22 @@ async fn test_dictionary_many() {
.await;
}
+#[tokio::test]
+async fn test_view_types_one() {
+ roundtrip(vec![make_view_batches(5)]).await;
+}
+
+#[tokio::test]
+async fn test_view_types_many() {
+ roundtrip(vec![
+ make_view_batches(5),
+ make_view_batches(9),
+ make_view_batches(5),
+ make_view_batches(5),
+ ])
+ .await;
+}
+
#[tokio::test]
async fn test_zero_batches_no_schema() {
let stream =
FlightDataEncoderBuilder::default().build(futures::stream::iter(vec![]));
@@ -450,8 +469,43 @@ fn make_dictionary_batch(num_rows: usize) -> RecordBatch {
RecordBatch::try_from_iter(vec![("a", Arc::new(a) as ArrayRef)]).unwrap()
}
+fn make_view_batches(num_rows: usize) -> RecordBatch {
+ const LONG_TEST_STRING: &str =
+ "This is a long string to make sure binary view array handles it";
+ let schema = Schema::new(vec![
+ Field::new("field1", DataType::BinaryView, true),
+ Field::new("field2", DataType::Utf8View, true),
+ ]);
+
+ let string_view_values: Vec<Option<&str>> = (0..num_rows)
+ .map(|i| match i % 3 {
+ 0 => None,
+ 1 => Some("foo"),
+ 2 => Some(LONG_TEST_STRING),
+ _ => unreachable!(),
+ })
+ .collect();
+
+ let bin_view_values: Vec<Option<&[u8]>> = (0..num_rows)
+ .map(|i| match i % 3 {
+ 0 => None,
+ 1 => Some("bar".as_bytes()),
+ 2 => Some(LONG_TEST_STRING.as_bytes()),
+ _ => unreachable!(),
+ })
+ .collect();
+
+ let binary_array = BinaryViewArray::from_iter(bin_view_values);
+ let utf8_array = StringViewArray::from_iter(string_view_values);
+ RecordBatch::try_new(
+ Arc::new(schema.clone()),
+ vec![Arc::new(binary_array), Arc::new(utf8_array)],
+ )
+ .unwrap()
+}
+
/// Encodes input as a FlightData stream, and then decodes it using
-/// FlightRecordBatchStream and valides the decoded record batches
+/// FlightRecordBatchStream and validates the decoded record batches
/// match the input.
async fn roundtrip(input: Vec<RecordBatch>) {
let expected_output = input.clone();
@@ -459,7 +513,7 @@ async fn roundtrip(input: Vec<RecordBatch>) {
}
/// Encodes input as a FlightData stream, and then decodes it using
-/// FlightRecordBatchStream and valides the decoded record batches
+/// FlightRecordBatchStream and validates the decoded record batches
/// match the expected input.
///
/// When <https://github.com/apache/arrow-rs/issues/3389> is resolved,