askoa commented on code in PR #3447:
URL: https://github.com/apache/arrow-rs/pull/3447#discussion_r1061478586
##########
parquet/src/arrow/arrow_writer/mod.rs:
##########
@@ -1838,6 +1851,47 @@ mod tests {
one_column_roundtrip(values, false);
}
+ #[test]
+ fn fallback_flush_data_page() {
+ //tests if the Fallback::flush_data_page clears all buffers correctly
+ let raw_values: Vec<_> = (0..MEDIUM_SIZE).map(|i|
i.to_string()).collect();
+ let values = Arc::new(StringArray::from(raw_values));
+ let encodings = vec![
+ Encoding::DELTA_BYTE_ARRAY,
+ Encoding::DELTA_LENGTH_BYTE_ARRAY,
+ ];
+ let data_type = values.data_type().clone();
+ let schema = Arc::new(Schema::new(vec![Field::new("col", data_type,
false)]));
+ let expected_batch = RecordBatch::try_new(schema,
vec![values]).unwrap();
+
+ let row_group_sizes = [1024, SMALL_SIZE, SMALL_SIZE / 2, SMALL_SIZE /
2 + 1, 10];
+ let data_pagesize_limit: usize = 32;
+ let write_batch_size: usize = 16;
+
+ for encoding in &encodings {
+ for row_group_size in row_group_sizes {
+ let props = WriterProperties::builder()
+ .set_writer_version(WriterVersion::PARQUET_2_0)
+ .set_max_row_group_size(row_group_size)
+ .set_dictionary_enabled(false)
+ .set_encoding(*encoding)
+ .set_data_pagesize_limit(data_pagesize_limit)
+ .set_write_batch_size(write_batch_size)
+ .build();
+
+ roundtrip_opts_with_array_validation(&expected_batch, props,
|a, b| {
+ let string_array_a = StringArray::from(a.clone());
Review Comment:
> Perhaps this could be simplified to `assert_eq(string_array_a,
string_array_b)` then?
I just tried to compare string_array locally and it prints output in a
different format. I prefer the output comparing `Vec`s
```
running 1 test
thread 'arrow::arrow_writer::tests::fallback_flush_data_page' panicked at
'assertion failed: `(left == right)`
left: `StringArray
[
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
...43 elements...,
"53",
"54",
"55",
"56",
"57",
"58",
"59",
"60",
"61",
"62",
]`,
right: `StringArray
[
"0",
"1",
"2",
"3",
"4",
"5",
"6",
"7",
"8",
"9",
...43 elements...,
"53",
"54",
"55",
"56",
"57",
"58",
"59",
"60",
"61",
"62",
]`: failed for encoder: DELTA_BYTE_ARRAY and row_group_size: 1024',
parquet/src/arrow/arrow_writer/mod.rs:1885:21
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]