This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f49418b79c88 [SPARK-48751][INFRA][PYTHON][TESTS] Re-balance `pyspark-pandas-connect` tests on GA f49418b79c88 is described below commit f49418b79c8817b59ef6ec41b517c8098b7aaa7b Author: panbingkun <panbing...@baidu.com> AuthorDate: Mon Jul 1 08:36:46 2024 +0900 [SPARK-48751][INFRA][PYTHON][TESTS] Re-balance `pyspark-pandas-connect` tests on GA ### What changes were proposed in this pull request? The pr aims to `re-balance` `pyspark-pandas-connect` tests on `GA`. ### Why are the changes needed? Make the execution cost time of `pyspark-pandas-connect-part[0-3]` testing to a relatively average level, avoiding the occurrence of long tails and resulting in higher overall GA execution cost time. Here are some currently observed examples: - https://github.com/apache/spark/pull/47135/checks?check_run_id=26784966983 <img width="311" alt="image" src="https://github.com/apache/spark/assets/15246973/45d627bc-f0e7-4a76-bfd5-edc6e821e427"> Most of them are around `1 hour`, but `part2` cost `1h 49m`, `part3` cost `2h 16m` - https://github.com/panbingkun/spark/actions/runs/9693237300 <img width="296" alt="image" src="https://github.com/apache/spark/assets/15246973/6837622a-3ff3-42d7-9725-e548c161277e"> Most of them are around `1 hour`, but `part2` cost `1h 47m`, `part3` cost `2h 20m` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually observing the cost time of `pyspark-pandas-connect-part[0-3]`. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47137 from panbingkun/split_pyspark_tests_to_5. Authored-by: panbingkun <panbing...@baidu.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- dev/sparktestsupport/modules.py | 42 ++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 718231efff90..44295e7e630e 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -1176,6 +1176,9 @@ pyspark_pandas_connect_part0 = Module( "pyspark.pandas.tests.connect.indexes.test_parity_reindex", "pyspark.pandas.tests.connect.indexes.test_parity_rename", "pyspark.pandas.tests.connect.indexes.test_parity_reset_index", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between", "pyspark.pandas.tests.connect.computation.test_parity_any_all", "pyspark.pandas.tests.connect.computation.test_parity_apply_func", "pyspark.pandas.tests.connect.computation.test_parity_binary_ops", @@ -1188,6 +1191,12 @@ pyspark_pandas_connect_part0 = Module( "pyspark.pandas.tests.connect.computation.test_parity_describe", "pyspark.pandas.tests.connect.computation.test_parity_eval", "pyspark.pandas.tests.connect.computation.test_parity_melt", + "pyspark.pandas.tests.connect.computation.test_parity_missing_data", + "pyspark.pandas.tests.connect.groupby.test_parity_stat", + "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv", + "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof", + "pyspark.pandas.tests.connect.groupby.test_parity_stat_func", + "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod", ], excluded_python_implementations=[ "PyPy" # Skip these tests under PyPy since they require numpy, pandas, and pyarrow and @@ -1253,6 +1262,18 @@ pyspark_pandas_connect_part1 = Module( "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_object", "pyspark.pandas.tests.connect.reshape.test_parity_get_dummies_prefix", "pyspark.pandas.tests.connect.reshape.test_parity_merge_asof", + "pyspark.pandas.tests.connect.indexes.test_parity_append", + "pyspark.pandas.tests.connect.indexes.test_parity_intersection", + "pyspark.pandas.tests.connect.indexes.test_parity_monotonic", + "pyspark.pandas.tests.connect.indexes.test_parity_union", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property", + "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift", + "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform", # fallback "pyspark.pandas.tests.connect.frame.test_parity_asfreq", "pyspark.pandas.tests.connect.frame.test_parity_asof", @@ -1278,7 +1299,6 @@ pyspark_pandas_connect_part2 = Module( "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx", "pyspark.pandas.tests.connect.computation.test_parity_pivot_table_multi_idx_adv", "pyspark.pandas.tests.connect.computation.test_parity_stats", - "pyspark.pandas.tests.connect.computation.test_parity_missing_data", "pyspark.pandas.tests.connect.frame.test_parity_interpolate", "pyspark.pandas.tests.connect.frame.test_parity_interpolate_error", "pyspark.pandas.tests.connect.resample.test_parity_frame", @@ -1351,24 +1371,6 @@ pyspark_pandas_connect_part3 = Module( "pyspark.pandas.tests.connect.io.test_parity_dataframe_conversion", "pyspark.pandas.tests.connect.io.test_parity_dataframe_spark_io", "pyspark.pandas.tests.connect.io.test_parity_series_conversion", - "pyspark.pandas.tests.connect.groupby.test_parity_stat", - "pyspark.pandas.tests.connect.groupby.test_parity_stat_adv", - "pyspark.pandas.tests.connect.groupby.test_parity_stat_ddof", - "pyspark.pandas.tests.connect.groupby.test_parity_stat_func", - "pyspark.pandas.tests.connect.groupby.test_parity_stat_prod", - "pyspark.pandas.tests.connect.indexes.test_parity_append", - "pyspark.pandas.tests.connect.indexes.test_parity_intersection", - "pyspark.pandas.tests.connect.indexes.test_parity_monotonic", - "pyspark.pandas.tests.connect.indexes.test_parity_union", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_at", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_between", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_ceil", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_floor", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_iso", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_map", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_property", - "pyspark.pandas.tests.connect.indexes.test_parity_datetime_round", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_arithmetic_ext_float", @@ -1391,9 +1393,7 @@ pyspark_pandas_connect_part3 = Module( "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_diff_len", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_fillna", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_filter", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_shift", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_split_apply_combine", - "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_transform", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_adv", "pyspark.pandas.tests.connect.diff_frames_ops.test_parity_groupby_expanding_count", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org