rluvaton commented on code in PR #9357:
URL: https://github.com/apache/arrow-rs/pull/9357#discussion_r2769462094


##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -4518,4 +4575,185 @@ mod tests {
         assert_eq!(get_dict_page_size(col0_meta), 1024 * 1024);
         assert_eq!(get_dict_page_size(col1_meta), 1024 * 1024 * 4);
     }
+
+    /// Helper to create a test batch with the given number of rows.
+    /// Each row is approximately 4 bytes (one i32).
+    fn create_test_batch(num_rows: usize) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "int",
+            ArrowDataType::Int32,
+            false,
+        )]));
+        let array = Int32Array::from((0..num_rows as i32).collect::<Vec<_>>());
+        RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap()
+    }
+
+    #[test]
+    fn test_row_group_limit_none_writes_single_row_group() {
+        // When both limits are None, all data should go into a single row 
group
+        let batch = create_test_batch(1000);
+
+        let props = WriterProperties::builder()
+            .set_max_row_group_row_count(None)
+            .set_max_row_group_bytes(None)
+            .build();
+
+        let file = tempfile::tempfile().unwrap();
+        let mut writer =
+            ArrowWriter::try_new(file.try_clone().unwrap(), batch.schema(), 
Some(props)).unwrap();
+
+        writer.write(&batch).unwrap();
+        writer.close().unwrap();
+
+        let builder = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        assert_eq!(
+            &row_group_sizes(builder.metadata()),
+            &[1000],
+            "With no limits, all rows should be in a single row group"
+        );
+    }
+
+    #[test]
+    fn test_row_group_limit_rows_only() {
+        // When only max_row_group_size is set, respect the row limit

Review Comment:
   the comment is not on the correct line



##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -4518,4 +4575,185 @@ mod tests {
         assert_eq!(get_dict_page_size(col0_meta), 1024 * 1024);
         assert_eq!(get_dict_page_size(col1_meta), 1024 * 1024 * 4);
     }
+
+    /// Helper to create a test batch with the given number of rows.
+    /// Each row is approximately 4 bytes (one i32).
+    fn create_test_batch(num_rows: usize) -> RecordBatch {
+        let schema = Arc::new(Schema::new(vec![Field::new(
+            "int",
+            ArrowDataType::Int32,
+            false,
+        )]));
+        let array = Int32Array::from((0..num_rows as i32).collect::<Vec<_>>());
+        RecordBatch::try_new(schema, vec![Arc::new(array)]).unwrap()
+    }
+
+    #[test]
+    fn test_row_group_limit_none_writes_single_row_group() {
+        // When both limits are None, all data should go into a single row 
group

Review Comment:
   the comment is not on the correct line



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to