alamb commented on code in PR #3481:
URL: https://github.com/apache/arrow-rs/pull/3481#discussion_r1063680110


##########
arrow-flight/src/encode.rs:
##########
@@ -556,4 +559,215 @@ mod tests {
 
     // test sending record batches
     // test sending record batches with multiple different dictionaries
+
+    #[tokio::test]
+    async fn flight_data_size_even() {
+        let s1 =
+            StringArray::from_iter_values(std::iter::repeat(".10 
bytes.").take(1024));
+        let i1 = Int16Array::from_iter_values(0..1024);
+        let s2 = 
StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
+        let i2 = Int64Array::from_iter_values(0..1024);
+
+        let batch = RecordBatch::try_from_iter(vec![
+            ("s1", Arc::new(s1) as _),
+            ("i1", Arc::new(i1) as _),
+            ("s2", Arc::new(s2) as _),
+            ("i2", Arc::new(i2) as _),
+        ])
+        .unwrap();
+
+        verify_encoded_split(batch, 112).await;

Review Comment:
   This is pretty good -- only 112 bytes above desired max (I think that is 
mostly the  various alignment and padding overhead)



##########
arrow-flight/src/encode.rs:
##########
@@ -556,4 +559,215 @@ mod tests {
 
     // test sending record batches
     // test sending record batches with multiple different dictionaries
+
+    #[tokio::test]
+    async fn flight_data_size_even() {
+        let s1 =
+            StringArray::from_iter_values(std::iter::repeat(".10 
bytes.").take(1024));
+        let i1 = Int16Array::from_iter_values(0..1024);
+        let s2 = 
StringArray::from_iter_values(std::iter::repeat("6bytes").take(1024));
+        let i2 = Int64Array::from_iter_values(0..1024);
+
+        let batch = RecordBatch::try_from_iter(vec![
+            ("s1", Arc::new(s1) as _),
+            ("i1", Arc::new(i1) as _),
+            ("s2", Arc::new(s2) as _),
+            ("i2", Arc::new(i2) as _),
+        ])
+        .unwrap();
+
+        verify_encoded_split(batch, 112).await;
+    }
+
+    #[tokio::test]
+    async fn flight_data_size_uneven_variable_lengths() {
+        // each row has a longer string than the last with increasing lengths 
0 --> 1024
+        let array = StringArray::from_iter_values((0..1024).map(|i| 
"*".repeat(i)));
+        let batch =
+            RecordBatch::try_from_iter(vec![("data", Arc::new(array) as 
_)]).unwrap();
+
+        verify_encoded_split(batch, 4304).await;
+    }
+
+    #[tokio::test]
+    async fn flight_data_size_large_row() {
+        // batch with individual that can each exceed the batch size
+        let array1 = StringArray::from_iter_values(vec![
+            "*".repeat(500),
+            "*".repeat(500),
+            "*".repeat(500),
+            "*".repeat(500),
+        ]);
+        let array2 = StringArray::from_iter_values(vec![
+            "*".repeat(1),
+            "*".repeat(1000),
+            "*".repeat(2000),
+            "*".repeat(4000),
+        ]);
+
+        let array3 = StringArray::from_iter_values(vec![
+            "*".repeat(1),
+            "*".repeat(1),
+            "*".repeat(1000),
+            "*".repeat(2000),
+        ]);
+
+        let batch = RecordBatch::try_from_iter(vec![
+            ("a1", Arc::new(array1) as _),
+            ("a2", Arc::new(array2) as _),
+            ("a3", Arc::new(array3) as _),
+        ])
+        .unwrap();
+
+        // 5k over limit (which is 2x larger than limit of 5k) -- not great :(
+        verify_encoded_split(batch, 5800).await;

Review Comment:
   This is pretty bad -- over 2x larger than the largest message size limit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to