This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 501beeb93b2 [SPARK-41871][CONNECT] DataFrame hint parameter can be
str, float or int
501beeb93b2 is described below
commit 501beeb93b2be42348fb1150204023e13ed5e35f
Author: Sandeep Singh <[email protected]>
AuthorDate: Thu Jan 5 14:33:03 2023 +0900
[SPARK-41871][CONNECT] DataFrame hint parameter can be str, float or int
### What changes were proposed in this pull request?
Spark Connect DataFrame hint parameter can be str, list, float, or int.
This is done in parity with pyspark DataFrame.hint
### Why are the changes needed?
For parity
### Does this PR introduce _any_ user-facing change?
yes, allows more types as parameters.
### How was this patch tested?
Enabling existing tests
Closes #39393 from techaddict/SPARK-41871.
Authored-by: Sandeep Singh <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
python/pyspark/sql/connect/dataframe.py | 6 ++++--
python/pyspark/sql/connect/plan.py | 3 ++-
python/pyspark/sql/tests/connect/test_connect_basic.py | 15 ++++++++++++++-
3 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/python/pyspark/sql/connect/dataframe.py
b/python/pyspark/sql/connect/dataframe.py
index 2464441bcf2..de50e6f52ca 100644
--- a/python/pyspark/sql/connect/dataframe.py
+++ b/python/pyspark/sql/connect/dataframe.py
@@ -480,9 +480,11 @@ class DataFrame:
def hint(self, name: str, *params: Any) -> "DataFrame":
for param in params:
- if param is not None and not isinstance(param, (int, str)):
+ # TODO(SPARK-41887): support list type as hint parameter
+ if param is not None and not isinstance(param, (int, str, float)):
raise TypeError(
- f"param should be a int or str, but got
{type(param).__name__} {param}"
+ f"param should be a str, float or int, but got
{type(param).__name__}"
+ f" {param}"
)
return DataFrame.withPlan(
diff --git a/python/pyspark/sql/connect/plan.py
b/python/pyspark/sql/connect/plan.py
index 1f4e4192fdf..f63e39c7f3e 100644
--- a/python/pyspark/sql/connect/plan.py
+++ b/python/pyspark/sql/connect/plan.py
@@ -403,8 +403,9 @@ class Hint(LogicalPlan):
self.name = name
+ # TODO(SPARK-41887): support list type as hint parameter
assert isinstance(params, list) and all(
- p is None or isinstance(p, (int, str)) for p in params
+ p is not None and isinstance(p, (int, str, float)) for p in params
)
self.params = params
diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index fe6c2c65e25..57d2b675065 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -1193,13 +1193,26 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
self.spark.read.table(self.tbl_name).hint("illegal").toPandas(),
)
+ # Hint with all supported parameter values
+ such_a_nice_list = ["itworks1", "itworks2", "itworks3"]
+ self.assert_eq(
+ self.connect.read.table(self.tbl_name).hint("my awesome hint",
1.2345, 2).toPandas(),
+ self.spark.read.table(self.tbl_name).hint("my awesome hint",
1.2345, 2).toPandas(),
+ )
+
# Hint with unsupported parameter values
with self.assertRaises(SparkConnectException):
self.connect.read.table(self.tbl_name).hint("REPARTITION",
"id+1").toPandas()
# Hint with unsupported parameter types
with self.assertRaises(TypeError):
- self.connect.read.table(self.tbl_name).hint("REPARTITION",
1.1).toPandas()
+ self.connect.read.table(self.tbl_name).hint("REPARTITION",
range(5)).toPandas()
+
+ # Hint with unsupported parameter types
+ with self.assertRaises(TypeError):
+ self.connect.read.table(self.tbl_name).hint(
+ "my awesome hint", 1.2345, 2, such_a_nice_list, range(6)
+ ).toPandas()
# Hint with wrong combination
with self.assertRaises(SparkConnectException):
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]