Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/18906#discussion_r162857996
--- Diff: python/pyspark/sql/tests.py ---
@@ -602,6 +602,30 @@ def test_non_existed_udf(self):
self.assertRaisesRegexp(AnalysisException, "Can not load class
non_existed_udf",
lambda:
sqlContext.registerJavaFunction("udf1", "non_existed_udf"))
+ def test_udf_no_nulls(self):
+ from pyspark.sql.functions import udf
+ plus_four = udf(lambda x: x + 4, IntegerType()).asNonNullable()
+ df = self.spark.range(10)
+ res = df.select(plus_four(df['id']).alias('plus_four'))
+ self.assertFalse(plus_four.nullable)
+ self.assertFalse(res.schema['plus_four'].nullable)
+ self.assertEqual(res.agg({'plus_four': 'sum'}).collect()[0][0], 85)
+
+ def test_udf_with_callable_no_nulls(self):
+ df = self.spark.range(10)
+
+ class PlusFour:
+ def __call__(self, col):
+ if col is not None:
+ return col + 4
--- End diff --
We need `else` clause to be non-nullable?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]