Copilot commented on code in PR #219:
URL: https://github.com/apache/fluss-rust/pull/219#discussion_r2737081433


##########
crates/fluss/src/client/write/batch.rs:
##########
@@ -363,4 +384,149 @@ mod tests {
         batch.re_enqueued();
         assert_eq!(batch.attempts(), 1);
     }
+
+    #[test]
+    fn test_arrow_log_write_batch_estimated_size() {
+        use crate::client::WriteRecord;
+        use crate::compression::{
+            ArrowCompressionInfo, ArrowCompressionType, 
DEFAULT_NON_ZSTD_COMPRESSION_LEVEL,
+        };
+        use crate::metadata::{DataField, DataTypes, RowType};
+        use crate::row::GenericRow;
+        use arrow::array::{Int32Array, RecordBatch, StringArray};
+        use std::sync::Arc;
+
+        let row_type = RowType::new(vec![
+            DataField::new("id".to_string(), DataTypes::int(), None),
+            DataField::new("name".to_string(), DataTypes::string(), None),
+        ]);
+        let table_path = TablePath::new("db".to_string(), "tbl".to_string());
+
+        // Test 1: RowAppendRecordBatchBuilder (to_append_record_batch=false)
+        {
+            let mut batch = ArrowLogWriteBatch::new(
+                1,
+                table_path.clone(),
+                1,
+                ArrowCompressionInfo {
+                    compression_type: ArrowCompressionType::None,
+                    compression_level: DEFAULT_NON_ZSTD_COMPRESSION_LEVEL,
+                },
+                &row_type,
+                0,
+                0,
+                false,
+            )
+            .unwrap();
+
+            // Append rows
+            for _ in 0..200 {
+                let mut row = GenericRow::new(2);
+                row.set_field(0, 1_i32);
+                row.set_field(1, "hello");
+                let record = 
WriteRecord::for_append(Arc::new(table_path.clone()), 1, row);
+                batch.try_append(&record).unwrap();
+            }
+
+            let estimated_size = batch.estimated_size_in_bytes();
+            assert!(estimated_size > 0);
+
+            let built_data = batch.build().unwrap();
+            let actual_size = built_data.len();
+
+            let diff = actual_size - estimated_size;
+            let threshold = actual_size / 10; // 10% tolerance
+            assert!(
+                diff <= threshold,
+                "RowAppend: estimated_size {estimated_size} and actual_size 
{actual_size} differ by more than 10%"
+            );

Review Comment:
   The computation of `diff` as `actual_size - estimated_size` can underflow 
when `estimated_size` is greater than `actual_size`, causing a panic in debug 
builds and incorrect behavior in release builds. To correctly enforce the 10% 
tolerance regardless of which value is larger, compute a non-negative 
difference (for example using an absolute difference between `actual_size` and 
`estimated_size`) before comparing against `threshold`.



##########
crates/fluss/src/client/write/batch.rs:
##########
@@ -363,4 +384,149 @@ mod tests {
         batch.re_enqueued();
         assert_eq!(batch.attempts(), 1);
     }
+
+    #[test]
+    fn test_arrow_log_write_batch_estimated_size() {
+        use crate::client::WriteRecord;
+        use crate::compression::{
+            ArrowCompressionInfo, ArrowCompressionType, 
DEFAULT_NON_ZSTD_COMPRESSION_LEVEL,
+        };
+        use crate::metadata::{DataField, DataTypes, RowType};
+        use crate::row::GenericRow;
+        use arrow::array::{Int32Array, RecordBatch, StringArray};
+        use std::sync::Arc;
+
+        let row_type = RowType::new(vec![
+            DataField::new("id".to_string(), DataTypes::int(), None),
+            DataField::new("name".to_string(), DataTypes::string(), None),
+        ]);
+        let table_path = TablePath::new("db".to_string(), "tbl".to_string());
+
+        // Test 1: RowAppendRecordBatchBuilder (to_append_record_batch=false)
+        {
+            let mut batch = ArrowLogWriteBatch::new(
+                1,
+                table_path.clone(),
+                1,
+                ArrowCompressionInfo {
+                    compression_type: ArrowCompressionType::None,
+                    compression_level: DEFAULT_NON_ZSTD_COMPRESSION_LEVEL,
+                },
+                &row_type,
+                0,
+                0,
+                false,
+            )
+            .unwrap();
+
+            // Append rows
+            for _ in 0..200 {
+                let mut row = GenericRow::new(2);
+                row.set_field(0, 1_i32);
+                row.set_field(1, "hello");
+                let record = 
WriteRecord::for_append(Arc::new(table_path.clone()), 1, row);
+                batch.try_append(&record).unwrap();
+            }
+
+            let estimated_size = batch.estimated_size_in_bytes();
+            assert!(estimated_size > 0);
+
+            let built_data = batch.build().unwrap();
+            let actual_size = built_data.len();
+
+            let diff = actual_size - estimated_size;
+            let threshold = actual_size / 10; // 10% tolerance
+            assert!(
+                diff <= threshold,
+                "RowAppend: estimated_size {estimated_size} and actual_size 
{actual_size} differ by more than 10%"
+            );
+        }
+
+        // Test 2: PrebuiltRecordBatchBuilder (to_append_record_batch=true)
+        {
+            let mut batch = ArrowLogWriteBatch::new(
+                1,
+                table_path.clone(),
+                1,
+                ArrowCompressionInfo {
+                    compression_type: ArrowCompressionType::None,
+                    compression_level: DEFAULT_NON_ZSTD_COMPRESSION_LEVEL,
+                },
+                &row_type,
+                0,
+                0,
+                true,
+            )
+            .unwrap();
+
+            // Create a pre-built RecordBatch
+            let schema = crate::record::to_arrow_schema(&row_type).unwrap();
+            let ids: Vec<i32> = (0..200).collect();
+            let names: Vec<&str> = (0..200).map(|_| "hello").collect();
+            let record_batch = RecordBatch::try_new(
+                schema,
+                vec![
+                    Arc::new(Int32Array::from(ids)),
+                    Arc::new(StringArray::from(names)),
+                ],
+            )
+            .unwrap();
+
+            let record =
+                
WriteRecord::for_append_record_batch(Arc::new(table_path.clone()), 1, 
record_batch);
+            batch.try_append(&record).unwrap();
+
+            let estimated_size = batch.estimated_size_in_bytes();
+            assert!(estimated_size > 0);
+
+            let built_data = batch.build().unwrap();
+            let actual_size = built_data.len();
+
+            let diff = actual_size - estimated_size;
+            let threshold = actual_size / 10; // 10% tolerance
+            assert!(
+                diff <= threshold,
+                "Prebuilt: estimated_size {estimated_size} and actual_size 
{actual_size} differ by more than 10%"
+            );

Review Comment:
   As in the earlier test, computing `diff` as `actual_size - estimated_size` 
can underflow when `estimated_size` exceeds `actual_size`, leading to a panic 
in debug builds or wrapped values in release builds. This test should also use 
a non-negative difference between `actual_size` and `estimated_size` (for 
example via an absolute difference) before comparing it to the 10% `threshold`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to