Github user icexelloss commented on a diff in the pull request:
https://github.com/apache/spark/pull/19630#discussion_r151475971
--- Diff: python/pyspark/sql/udf.py ---
@@ -0,0 +1,155 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""
+User-defined function related classes and functions
+"""
+import functools
+
+from pyspark import SparkContext
+from pyspark.rdd import _prepare_for_python_RDD, PythonEvalType
+from pyspark.sql.column import Column, _to_java_column, _to_seq
+from pyspark.sql.types import StringType, DataType, StructType,
_parse_datatype_string
+
+
+def _wrap_function(sc, func, returnType):
+ command = (func, returnType)
+ pickled_command, broadcast_vars, env, includes =
_prepare_for_python_RDD(sc, command)
+ return sc._jvm.PythonFunction(bytearray(pickled_command), env,
includes, sc.pythonExec,
+ sc.pythonVer, broadcast_vars,
sc._javaAccumulator)
+
+
+def _create_udf(f, returnType, evalType):
+ if evalType == PythonEvalType.PANDAS_SCALAR_UDF:
+ import inspect
+ argspec = inspect.getargspec(f)
+ if len(argspec.args) == 0 and argspec.varargs is None:
+ raise ValueError(
+ "Invalid function: 0-arg pandas_udfs are not supported. "
+ "Instead, create a 1-arg pandas_udf and ignore the arg in
your function."
+ )
+
+ elif evalType == PythonEvalType.PANDAS_GROUP_MAP_UDF:
+ import inspect
+ argspec = inspect.getargspec(f)
+ if len(argspec.args) != 1:
+ raise ValueError(
+ "Invalid function: pandas_udf with function type GROUP_MAP
"
+ "must take a single arg that is a pandas DataFrame."
+ )
+
+ udf_obj = UserDefinedFunction(f, returnType=returnType, name=None,
evalType=evalType)
--- End diff --
I added the comment, but keep the `name=None` because I think this is more
explicit.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]