Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20171#discussion_r161723055
--- Diff: python/pyspark/sql/catalog.py ---
@@ -256,27 +261,55 @@ def registerFunction(self, name, f,
returnType=StringType()):
>>> spark.sql("SELECT stringLengthInt('test')").collect()
[Row(stringLengthInt(test)=4)]
+ >>> from pyspark.sql.types import IntegerType
+ >>> from pyspark.sql.functions import udf
+ >>> slen = udf(lambda s: len(s), IntegerType())
+ >>> _ = spark.udf.register("slen", slen)
+ >>> spark.sql("SELECT slen('test')").collect()
+ [Row(slen(test)=4)]
+
>>> import random
>>> from pyspark.sql.functions import udf
- >>> from pyspark.sql.types import IntegerType, StringType
+ >>> from pyspark.sql.types import IntegerType
>>> random_udf = udf(lambda: random.randint(0, 100),
IntegerType()).asNondeterministic()
- >>> newRandom_udf = spark.catalog.registerFunction("random_udf",
random_udf, StringType())
+ >>> new_random_udf = spark.catalog.registerFunction("random_udf",
random_udf)
>>> spark.sql("SELECT random_udf()").collect() # doctest: +SKIP
- [Row(random_udf()=u'82')]
- >>> spark.range(1).select(newRandom_udf()).collect() # doctest:
+SKIP
- [Row(random_udf()=u'62')]
+ [Row(random_udf()=82)]
+ >>> spark.range(1).select(new_random_udf()).collect() # doctest:
+SKIP
+ [Row(<lambda>()=26)]
+
+ >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
+ >>> @pandas_udf("integer", PandasUDFType.SCALAR) # doctest: +SKIP
+ ... def add_one(x):
+ ... return x + 1
+ ...
+ >>> _ = spark.udf.register("add_one", add_one) # doctest: +SKIP
+ >>> spark.sql("SELECT add_one(id) FROM range(3)").collect() #
doctest: +SKIP
+ [Row(add_one(id)=1), Row(add_one(id)=2), Row(add_one(id)=3)]
"""
# This is to check whether the input function is a wrapped/native
UserDefinedFunction
if hasattr(f, 'asNondeterministic'):
- udf = UserDefinedFunction(f.func, returnType=returnType,
name=name,
-
evalType=PythonEvalType.SQL_BATCHED_UDF,
- deterministic=f.deterministic)
+ if returnType is not None:
+ raise TypeError(
+ "Invalid returnType: None is expected when f is a
UserDefinedFunction, "
--- End diff --
Here too, I think here we should say `returnType` is disallowed to be set
when `f` is a `UserDefinedFunction`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]