This is an automated email from the ASF dual-hosted git repository.

wesm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new fac70f9  ARROW-8414: [Python] Fix non-deterministic row order failure 
in parquet tests
fac70f9 is described below

commit fac70f9ea4600ea4da31a6fb4c3be07c3d53107b
Author: Joris Van den Bossche <[email protected]>
AuthorDate: Mon Apr 13 09:44:05 2020 -0500

    ARROW-8414: [Python] Fix non-deterministic row order failure in parquet 
tests
    
    Closes #6903 from jorisvandenbossche/fix-dataset-failure
    
    Authored-by: Joris Van den Bossche <[email protected]>
    Signed-off-by: Wes McKinney <[email protected]>
---
 python/pyarrow/tests/test_parquet.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/tests/test_parquet.py 
b/python/pyarrow/tests/test_parquet.py
index 89131b4..3dee3ac 100644
--- a/python/pyarrow/tests/test_parquet.py
+++ b/python/pyarrow/tests/test_parquet.py
@@ -2691,12 +2691,15 @@ def _test_write_to_dataset_with_partitions(base_path,
     input_df_cols = input_df.columns.tolist()
     assert partition_by == input_df_cols[-1 * len(partition_by):]
 
-    # Partitioned columns become 'categorical' dtypes
     input_df = input_df[cols]
     if use_legacy_dataset:
+        # Partitioned columns become 'categorical' dtypes
         for col in partition_by:
             output_df[col] = output_df[col].astype('category')
-    assert output_df.equals(input_df)
+    else:
+        # ensure deterministic row order
+        input_df = input_df.sort_values(by=["num"]).reset_index(drop=True)
+    tm.assert_frame_equal(output_df, input_df)
 
 
 def _test_write_to_dataset_no_partitions(base_path,

Reply via email to