Github user gatorsmile commented on a diff in the pull request:
https://github.com/apache/spark/pull/20137#discussion_r159448813
--- Diff: python/pyspark/sql/tests.py ---
@@ -378,6 +378,23 @@ def test_udf2(self):
[res] = self.spark.sql("SELECT strlen(a) FROM test WHERE strlen(a)
> 1").collect()
self.assertEqual(4, res[0])
+ def test_non_deterministic_udf(self):
+ import random
+ from pyspark.sql.functions import udf
+ random_udf = udf(lambda: random.randint(6, 6),
IntegerType()).asNondeterministic()
+ self.assertEqual(random_udf.deterministic, False)
+ random_udf1 = self.spark.catalog.registerFunction("randInt",
random_udf, StringType())
+ self.assertEqual(random_udf1.deterministic, False)
+ [row] = self.spark.sql("SELECT randInt()").collect()
+ self.assertEqual(row[0], "6")
+ [row] = self.spark.range(1).select(random_udf1()).collect()
+ self.assertEqual(row[0], "6")
+ [row] = self.spark.range(1).select(random_udf()).collect()
+ self.assertEqual(row[0], 6)
+ pydoc.render_doc(udf(lambda: random.randint(6, 6), IntegerType()))
--- End diff --
This is to test a help function. See
https://github.com/gatorsmile/spark/blob/85f11bfbfb564acb670097ff4ce520bfbc79b855/python/pyspark/sql/tests.py#L1681-L1688
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]