askoa commented on code in PR #3447:
URL: https://github.com/apache/arrow-rs/pull/3447#discussion_r1061468303


##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -1838,6 +1851,47 @@ mod tests {
         one_column_roundtrip(values, false);
     }
 
+    #[test]
+    fn fallback_flush_data_page() {
+        //tests if the Fallback::flush_data_page clears all buffers correctly
+        let raw_values: Vec<_> = (0..MEDIUM_SIZE).map(|i| 
i.to_string()).collect();
+        let values = Arc::new(StringArray::from(raw_values));
+        let encodings = vec![
+            Encoding::DELTA_BYTE_ARRAY,
+            Encoding::DELTA_LENGTH_BYTE_ARRAY,
+        ];
+        let data_type = values.data_type().clone();
+        let schema = Arc::new(Schema::new(vec![Field::new("col", data_type, 
false)]));
+        let expected_batch = RecordBatch::try_new(schema, 
vec![values]).unwrap();
+
+        let row_group_sizes = [1024, SMALL_SIZE, SMALL_SIZE / 2, SMALL_SIZE / 
2 + 1, 10];
+        let data_pagesize_limit: usize = 32;
+        let write_batch_size: usize = 16;
+
+        for encoding in &encodings {
+            for row_group_size in row_group_sizes {
+                let props = WriterProperties::builder()
+                    .set_writer_version(WriterVersion::PARQUET_2_0)
+                    .set_max_row_group_size(row_group_size)
+                    .set_dictionary_enabled(false)
+                    .set_encoding(*encoding)
+                    .set_data_pagesize_limit(data_pagesize_limit)
+                    .set_write_batch_size(write_batch_size)
+                    .build();
+
+                roundtrip_opts_with_array_validation(&expected_batch, props, 
|a, b| {
+                    let string_array_a = StringArray::from(a.clone());

Review Comment:
   Array data comparison output is in binary format and not easily 
comprehendible. I converted it to string comparison so that I can see the 
difference like below.
   
   ```
   running 1 test
   thread 'arrow::arrow_writer::tests::fallback_flush_data_page' panicked at 
'assertion failed: `(left == right)`
     left: `["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", 
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", 
"38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", 
"51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62"]`,
    right: `["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", 
"12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", 
"25", "26", "27", "28", "29", "30", "31", "2", "23", "24", "25", "26", "27", 
"28", "29", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", 
"51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61", "62"]`', 
parquet/src/arrow/arrow_writer/mod.rs:1889:21
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to