This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8cbff3d5b6f [SPARK-41878][CONNECT][TESTS]
pyspark.sql.tests.test_dataframe - Add JIRAs or messages for skipped messages
8cbff3d5b6f is described below
commit 8cbff3d5b6f6a34e551aa42e965a16c3cb41e4c7
Author: Sandeep Singh <[email protected]>
AuthorDate: Thu Jan 5 08:53:08 2023 +0900
[SPARK-41878][CONNECT][TESTS] pyspark.sql.tests.test_dataframe - Add JIRAs
or messages for skipped messages
### What changes were proposed in this pull request?
This PR enables the reused PySpark tests in Spark Connect that pass now.
And add JIRAs/ Messages to the skipped ones
### Why are the changes needed?
To make sure on the test coverage.
### Does this PR introduce any user-facing change?
No, test-only.
### How was this patch tested?
Enabling tests
Closes #39382 from techaddict/SPARK-41878.
Authored-by: Sandeep Singh <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../sql/tests/connect/test_parity_dataframe.py | 44 ++++++++++++++++++----
1 file changed, 36 insertions(+), 8 deletions(-)
diff --git a/python/pyspark/sql/tests/connect/test_parity_dataframe.py
b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
index ea1eb23fd4f..69f445b69ca 100644
--- a/python/pyspark/sql/tests/connect/test_parity_dataframe.py
+++ b/python/pyspark/sql/tests/connect/test_parity_dataframe.py
@@ -41,154 +41,182 @@ class DataFrameParityTests(DataFrameTestsMixin,
ReusedSQLTestCase):
cls._spark.stop()
del os.environ["SPARK_REMOTE"]
+ # TODO(SPARK-41612): support Catalog.isCached
@unittest.skip("Fails in Spark Connect, should enable.")
def test_cache(self):
super().test_cache()
+ # TODO(SPARK-41866): createDataframe support array type
@unittest.skip("Fails in Spark Connect, should enable.")
def test_create_dataframe_from_array_of_long(self):
super().test_create_dataframe_from_array_of_long()
+ # TODO(SPARK-41868): Support data type Duration(NANOSECOND)
@unittest.skip("Fails in Spark Connect, should enable.")
def test_create_dataframe_from_pandas_with_day_time_interval(self):
super().test_create_dataframe_from_pandas_with_day_time_interval()
+ # TODO(SPARK-41842): Support data type Timestamp(NANOSECOND, null)
@unittest.skip("Fails in Spark Connect, should enable.")
def test_create_dataframe_from_pandas_with_dst(self):
super().test_create_dataframe_from_pandas_with_dst()
+ # TODO(SPARK-41842): Support data type Timestamp(NANOSECOND, null)
@unittest.skip("Fails in Spark Connect, should enable.")
def test_create_dataframe_from_pandas_with_timestamp(self):
super().test_create_dataframe_from_pandas_with_timestamp()
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_create_dataframe_required_pandas_not_found(self):
- super().test_create_dataframe_required_pandas_not_found()
-
+ # TODO(SPARK-41855): createDataFrame doesn't handle None/NaN properly
@unittest.skip("Fails in Spark Connect, should enable.")
def test_create_nan_decimal_dataframe(self):
super().test_create_nan_decimal_dataframe()
+ # TODO(SPARK-41869): DataFrame dropDuplicates should throw error on non
list argument
@unittest.skip("Fails in Spark Connect, should enable.")
def test_drop_duplicates(self):
super().test_drop_duplicates()
+ # TODO(SPARK-41870): Handle duplicate columns in `createDataFrame`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_duplicated_column_names(self):
super().test_duplicated_column_names()
+ # TODO(SPARK-41871): DataFrame hint parameter can be a float
@unittest.skip("Fails in Spark Connect, should enable.")
def test_extended_hint_types(self):
super().test_extended_hint_types()
+ # TODO(SPARK-41872): Fix DataFrame createDataframe handling of None
@unittest.skip("Fails in Spark Connect, should enable.")
def test_fillna(self):
super().test_fillna()
+ # TODO: comparing types, need to expose connect types
@unittest.skip("Fails in Spark Connect, should enable.")
def test_generic_hints(self):
super().test_generic_hints()
+ # Spark Connect does not support RDD but the tests depend on them.
@unittest.skip("Fails in Spark Connect, should enable.")
def test_help_command(self):
super().test_help_command()
+ # Spark Connect throws NotImplementedError tests expects
IllegalArgumentException
@unittest.skip("Fails in Spark Connect, should enable.")
def test_invalid_join_method(self):
super().test_invalid_join_method()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_join_without_on(self):
super().test_join_without_on()
+ # TODO(SPARK-41527): Implement DataFrame.observe
@unittest.skip("Fails in Spark Connect, should enable.")
def test_observe(self):
super().test_observe()
+ # TODO(SPARK-41625): Support Structured Streaming
@unittest.skip("Fails in Spark Connect, should enable.")
def test_observe_str(self):
super().test_observe_str()
+ # TODO(SPARK-41873): Implement DataFrame `pandas_api`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_pandas_api(self):
super().test_pandas_api()
+ # TODO(SPARK-41840): DataFrame.show(): 'Column' object is not callable
@unittest.skip("Fails in Spark Connect, should enable.")
def test_repartitionByRange_dataframe(self):
super().test_repartitionByRange_dataframe()
+ # TODO(SPARK-41872): Fix DataFrame createDataframe handling of None
@unittest.skip("Fails in Spark Connect, should enable.")
def test_replace(self):
super().test_replace()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_repr_behaviors(self):
super().test_repr_behaviors()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_require_cross(self):
super().test_require_cross()
+ # TODO(SPARK-41874): Implement DataFrame `sameSemantics`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_same_semantics_error(self):
super().test_same_semantics_error()
+ # TODO(SPARK-41830): Fix DataFrame.sample parameters
@unittest.skip("Fails in Spark Connect, should enable.")
def test_sample(self):
super().test_sample()
+ # TODO(SPARK-41875): throw proper errors in Dataset.to()
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to(self):
super().test_to()
+ # Spark Connect does not support RDD but the tests depend on them.
@unittest.skip("Fails in Spark Connect, should enable.")
def test_toDF_with_schema_string(self):
super().test_toDF_with_schema_string()
+ # TODO(SPARK-41876): Implement DataFrame `toLocalIterator`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_local_iterator(self):
super().test_to_local_iterator()
+ # TODO(SPARK-41876): Implement DataFrame `toLocalIterator`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_local_iterator_not_fully_consumed(self):
super().test_to_local_iterator_not_fully_consumed()
+ # TODO(SPARK-41876): Implement DataFrame `toLocalIterator`
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_local_iterator_prefetch(self):
super().test_to_local_iterator_prefetch()
+ # TODO(SPARK-41884): DataFrame `toPandas` parity in return types
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas(self):
super().test_to_pandas()
- @unittest.skip("Fails in Spark Connect, should enable.")
- def test_to_pandas_avoid_astype(self):
- super().test_to_pandas_avoid_astype()
-
+ # TODO(SPARK-41884): DataFrame `toPandas` parity in return types
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_for_array_of_struct(self):
super().test_to_pandas_for_array_of_struct()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_from_empty_dataframe(self):
super().test_to_pandas_from_empty_dataframe()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_from_mixed_dataframe(self):
super().test_to_pandas_from_mixed_dataframe()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_from_null_dataframe(self):
super().test_to_pandas_from_null_dataframe()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_on_cross_join(self):
super().test_to_pandas_on_cross_join()
+ # TODO(SPARK-41834): Implement SparkSession.conf
@unittest.skip("Fails in Spark Connect, should enable.")
def test_to_pandas_with_duplicated_column_names(self):
super().test_to_pandas_with_duplicated_column_names()
+ # TODO(SPARK-41877): createDataframe throw proper errors
@unittest.skip("Fails in Spark Connect, should enable.")
def test_unpivot(self):
super().test_unpivot()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]