Repository: spark Updated Branches: refs/heads/master 681e3024b -> baf4587a5
[SPARK-9691] [SQL] PySpark SQL rand function treats seed 0 as no seed https://issues.apache.org/jira/browse/SPARK-9691 jkbradley rxin Author: Yin Huai <[email protected]> Closes #7999 from yhuai/pythonRand and squashes the following commits: 4187e0c [Yin Huai] Regression test. a985ef9 [Yin Huai] Use "if seed is not None" instead "if seed" because "if seed" returns false when seed is 0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/baf4587a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/baf4587a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/baf4587a Branch: refs/heads/master Commit: baf4587a569b49e39020c04c2785041bdd00789b Parents: 681e302 Author: Yin Huai <[email protected]> Authored: Thu Aug 6 17:03:14 2015 -0700 Committer: Reynold Xin <[email protected]> Committed: Thu Aug 6 17:03:14 2015 -0700 ---------------------------------------------------------------------- python/pyspark/sql/functions.py | 4 ++-- python/pyspark/sql/tests.py | 10 ++++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/baf4587a/python/pyspark/sql/functions.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index b5c6a01..95f4604 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -268,7 +268,7 @@ def rand(seed=None): """Generates a random column with i.i.d. samples from U[0.0, 1.0]. """ sc = SparkContext._active_spark_context - if seed: + if seed is not None: jc = sc._jvm.functions.rand(seed) else: jc = sc._jvm.functions.rand() @@ -280,7 +280,7 @@ def randn(seed=None): """Generates a column with i.i.d. samples from the standard normal distribution. """ sc = SparkContext._active_spark_context - if seed: + if seed is not None: jc = sc._jvm.functions.randn(seed) else: jc = sc._jvm.functions.randn() http://git-wip-us.apache.org/repos/asf/spark/blob/baf4587a/python/pyspark/sql/tests.py ---------------------------------------------------------------------- diff --git a/python/pyspark/sql/tests.py b/python/pyspark/sql/tests.py index ebd3ea8..1e3444d 100644 --- a/python/pyspark/sql/tests.py +++ b/python/pyspark/sql/tests.py @@ -629,6 +629,16 @@ class SQLTests(ReusedPySparkTestCase): for row in rndn: assert row[1] >= -4.0 and row[1] <= 4.0, "got: %s" % row[1] + # If the specified seed is 0, we should use it. + # https://issues.apache.org/jira/browse/SPARK-9691 + rnd1 = df.select('key', functions.rand(0)).collect() + rnd2 = df.select('key', functions.rand(0)).collect() + self.assertEqual(sorted(rnd1), sorted(rnd2)) + + rndn1 = df.select('key', functions.randn(0)).collect() + rndn2 = df.select('key', functions.randn(0)).collect() + self.assertEqual(sorted(rndn1), sorted(rndn2)) + def test_between_function(self): df = self.sc.parallelize([ Row(a=1, b=2, c=3), --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
