Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/19929#discussion_r158571320
--- Diff: python/pyspark/sql/tests.py ---
@@ -434,6 +434,16 @@ def test_udf_with_array_type(self):
self.assertEqual(list(range(3)), l1)
self.assertEqual(1, l2)
+ def test_nondeterministic_udf(self):
+ from pyspark.sql.functions import udf
+ import random
+ udf_random_col = udf(lambda: int(100 * random.random()),
IntegerType()).asNondeterministic()
+ df =
self.spark.createDataFrame([Row(1)]).select(udf_random_col().alias('RAND'))
+ random.seed(1234)
+ udf_add_ten = udf(lambda rand: rand + 10, IntegerType())
+ [row] = df.withColumn('RAND_PLUS_TEN',
udf_add_ten('RAND')).collect()
+ self.assertEqual(row[0] + 10, row[1])
--- End diff --
Compare the values, since you already set the seed?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]