Repository: spark Updated Branches: refs/heads/master 6be272b75 -> 28d337440
[SPARK-23647][PYTHON][SQL] Adds more types for hint in pyspark Signed-off-by: DylanGuedes <djmgguedesgmail.com> ## What changes were proposed in this pull request? Addition of float, int and list hints for `pyspark.sql` Hint. ## How was this patch tested? I did manual tests following the same principles used in the Scala version, and also added unit tests. Closes #20788 from DylanGuedes/jira-21030. Authored-by: DylanGuedes <djmggue...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/28d33744 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/28d33744 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/28d33744 Branch: refs/heads/master Commit: 28d33744076abd8bf7955eefcbdeef4849a99c40 Parents: 6be272b Author: DylanGuedes <djmggue...@gmail.com> Authored: Sat Dec 1 10:37:03 2018 +0800 Committer: Hyukjin Kwon <gurwls...@apache.org> Committed: Sat Dec 1 10:37:03 2018 +0800 ---------------------------------------------------------------------- python/pyspark/sql/dataframe.py | 6 ++++-- python/pyspark/sql/tests/test_dataframe.py | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py index b8833a3..1b1092c 100644 --- a/python/pyspark/sql/dataframe.py +++ b/python/pyspark/sql/dataframe.py @@ -485,10 +485,12 @@ class DataFrame(object): if not isinstance(name, str): raise TypeError("name should be provided as str, got {0}".format(type(name))) + allowed_types = (basestring, list, float, int) for p in parameters: - if not isinstance(p, str): + if not isinstance(p, allowed_types): raise TypeError( - "all parameters should be str, got {0} of type {1}".format(p, type(p))) + "all parameters should be in {0}, got {1} of type {2}".format( + allowed_types, p, type(p))) jdf = self._jdf.hint(name, self._jseq(parameters)) return DataFrame(jdf, self.sql_ctx) http://git-wip-us.apache.org/repos/asf/spark/blob/28d33744/python/pyspark/sql/tests/test_dataframe.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 908d400..65edf59 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -375,6 +375,19 @@ class DataFrameTests(ReusedSQLTestCase): plan = df1.join(df2.hint("broadcast"), "id")._jdf.queryExecution().executedPlan() self.assertEqual(1, plan.toString().count("BroadcastHashJoin")) + # add tests for SPARK-23647 (test more types for hint) + def test_extended_hint_types(self): + from pyspark.sql import DataFrame + + df = self.spark.range(10e10).toDF("id") + such_a_nice_list = ["itworks1", "itworks2", "itworks3"] + hinted_df = df.hint("my awesome hint", 1.2345, "what", such_a_nice_list) + logical_plan = hinted_df._jdf.queryExecution().logical() + + self.assertEqual(1, logical_plan.toString().count("1.2345")) + self.assertEqual(1, logical_plan.toString().count("what")) + self.assertEqual(3, logical_plan.toString().count("itworks")) + def test_sample(self): self.assertRaisesRegexp( TypeError, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org