Github user gatorsmile commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19929#discussion_r158571320
  
    --- Diff: python/pyspark/sql/tests.py ---
    @@ -434,6 +434,16 @@ def test_udf_with_array_type(self):
             self.assertEqual(list(range(3)), l1)
             self.assertEqual(1, l2)
     
    +    def test_nondeterministic_udf(self):
    +        from pyspark.sql.functions import udf
    +        import random
    +        udf_random_col = udf(lambda: int(100 * random.random()), 
IntegerType()).asNondeterministic()
    +        df = 
self.spark.createDataFrame([Row(1)]).select(udf_random_col().alias('RAND'))
    +        random.seed(1234)
    +        udf_add_ten = udf(lambda rand: rand + 10, IntegerType())
    +        [row] = df.withColumn('RAND_PLUS_TEN', 
udf_add_ten('RAND')).collect()
    +        self.assertEqual(row[0] + 10, row[1])
    --- End diff --
    
    Compare the values, since you already set the seed?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to