This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 889c08b4fe4 [SPARK-41399][CONNECT] Refactor column related tests to
test_connect_column
889c08b4fe4 is described below
commit 889c08b4fe4fb58157a68af5ecd50ec1df10d127
Author: Rui Wang <[email protected]>
AuthorDate: Mon Dec 5 19:11:06 2022 -0800
[SPARK-41399][CONNECT] Refactor column related tests to test_connect_column
### What changes were proposed in this pull request?
Given there is a dedicated `test_connect_column.py` now, we should move
those column API tests to this place.
### Why are the changes needed?
Codebase refactoring
### Does this PR introduce _any_ user-facing change?
NO
### How was this patch tested?
Existing UT
Closes #38925 from amaliujia/move_all_column_tests_to_right_place.
Authored-by: Rui Wang <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../sql/tests/connect/test_connect_basic.py | 51 ---------------------
.../sql/tests/connect/test_connect_column.py | 52 ++++++++++++++++++++++
2 files changed, 52 insertions(+), 51 deletions(-)
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 22ee98558de..1f67f4c49de 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -146,48 +146,6 @@ class SparkConnectTests(SparkConnectSQLTestCase):
)
self.assert_eq(joined_plan3.toPandas(), joined_plan4.toPandas())
- def test_columns(self):
- # SPARK-41036: test `columns` API for python client.
- df = self.connect.read.table(self.tbl_name)
- df2 = self.spark.read.table(self.tbl_name)
- self.assertEqual(["id", "name"], df.columns)
-
- self.assert_eq(
- df.filter(df.name.rlike("20")).toPandas(),
df2.filter(df2.name.rlike("20")).toPandas()
- )
- self.assert_eq(
- df.filter(df.name.like("20")).toPandas(),
df2.filter(df2.name.like("20")).toPandas()
- )
- self.assert_eq(
- df.filter(df.name.ilike("20")).toPandas(),
df2.filter(df2.name.ilike("20")).toPandas()
- )
- self.assert_eq(
- df.filter(df.name.contains("20")).toPandas(),
- df2.filter(df2.name.contains("20")).toPandas(),
- )
- self.assert_eq(
- df.filter(df.name.startswith("2")).toPandas(),
- df2.filter(df2.name.startswith("2")).toPandas(),
- )
- self.assert_eq(
- df.filter(df.name.endswith("0")).toPandas(),
- df2.filter(df2.name.endswith("0")).toPandas(),
- )
- self.assert_eq(
- df.select(df.name.substr(0, 1).alias("col")).toPandas(),
- df2.select(df2.name.substr(0, 1).alias("col")).toPandas(),
- )
- df3 = self.connect.sql("SELECT cast(null as int) as name")
- df4 = self.spark.sql("SELECT cast(null as int) as name")
- self.assert_eq(
- df3.filter(df3.name.isNull()).toPandas(),
- df4.filter(df4.name.isNull()).toPandas(),
- )
- self.assert_eq(
- df3.filter(df3.name.isNotNull()).toPandas(),
- df4.filter(df4.name.isNotNull()).toPandas(),
- )
-
def test_collect(self):
df = self.connect.read.table(self.tbl_name)
data = df.limit(10).collect()
@@ -369,15 +327,6 @@ class SparkConnectTests(SparkConnectSQLTestCase):
finally:
shutil.rmtree(tmpPath)
- def test_simple_binary_expressions(self):
- """Test complex expression"""
- df = self.connect.read.table(self.tbl_name)
- pd = df.select(df.id).where(df.id % lit(30) ==
lit(0)).sort(df.id.asc()).toPandas()
- self.assertEqual(len(pd.index), 4)
-
- res = pandas.DataFrame(data={"id": [0, 30, 60, 90]})
- self.assert_(pd.equals(res), f"{pd.to_string()} != {res.to_string()}")
-
def test_limit_offset(self):
df = self.connect.read.table(self.tbl_name)
pd = df.limit(10).offset(1).toPandas()
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py
b/python/pyspark/sql/tests/connect/test_connect_column.py
index 803481508e5..106ab609bfa 100644
--- a/python/pyspark/sql/tests/connect/test_connect_column.py
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -20,6 +20,7 @@ from pyspark.testing.sqlutils import have_pandas
if have_pandas:
from pyspark.sql.connect.functions import lit
+ import pandas
class SparkConnectTests(SparkConnectSQLTestCase):
@@ -28,6 +29,57 @@ class SparkConnectTests(SparkConnectSQLTestCase):
df = self.connect.range(10)
self.assertEqual(9, len(df.filter(df.id != lit(1)).collect()))
+ def test_columns(self):
+ # SPARK-41036: test `columns` API for python client.
+ df = self.connect.read.table(self.tbl_name)
+ df2 = self.spark.read.table(self.tbl_name)
+ self.assertEqual(["id", "name"], df.columns)
+
+ self.assert_eq(
+ df.filter(df.name.rlike("20")).toPandas(),
df2.filter(df2.name.rlike("20")).toPandas()
+ )
+ self.assert_eq(
+ df.filter(df.name.like("20")).toPandas(),
df2.filter(df2.name.like("20")).toPandas()
+ )
+ self.assert_eq(
+ df.filter(df.name.ilike("20")).toPandas(),
df2.filter(df2.name.ilike("20")).toPandas()
+ )
+ self.assert_eq(
+ df.filter(df.name.contains("20")).toPandas(),
+ df2.filter(df2.name.contains("20")).toPandas(),
+ )
+ self.assert_eq(
+ df.filter(df.name.startswith("2")).toPandas(),
+ df2.filter(df2.name.startswith("2")).toPandas(),
+ )
+ self.assert_eq(
+ df.filter(df.name.endswith("0")).toPandas(),
+ df2.filter(df2.name.endswith("0")).toPandas(),
+ )
+ self.assert_eq(
+ df.select(df.name.substr(0, 1).alias("col")).toPandas(),
+ df2.select(df2.name.substr(0, 1).alias("col")).toPandas(),
+ )
+ df3 = self.connect.sql("SELECT cast(null as int) as name")
+ df4 = self.spark.sql("SELECT cast(null as int) as name")
+ self.assert_eq(
+ df3.filter(df3.name.isNull()).toPandas(),
+ df4.filter(df4.name.isNull()).toPandas(),
+ )
+ self.assert_eq(
+ df3.filter(df3.name.isNotNull()).toPandas(),
+ df4.filter(df4.name.isNotNull()).toPandas(),
+ )
+
+ def test_simple_binary_expressions(self):
+ """Test complex expression"""
+ df = self.connect.read.table(self.tbl_name)
+ pd = df.select(df.id).where(df.id % lit(30) ==
lit(0)).sort(df.id.asc()).toPandas()
+ self.assertEqual(len(pd.index), 4)
+
+ res = pandas.DataFrame(data={"id": [0, 30, 60, 90]})
+ self.assert_(pd.equals(res), f"{pd.to_string()} != {res.to_string()}")
+
if __name__ == "__main__":
import unittest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]