Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20288#discussion_r162035576
--- Diff: python/pyspark/sql/context.py ---
@@ -172,113 +173,34 @@ def range(self, start, end=None, step=1,
numPartitions=None):
"""
return self.sparkSession.range(start, end, step, numPartitions)
- @ignore_unicode_prefix
- @since(1.2)
def registerFunction(self, name, f, returnType=None):
- """Registers a Python function (including lambda function) or a
:class:`UserDefinedFunction`
- as a UDF. The registered UDF can be used in SQL statements.
-
- :func:`spark.udf.register` is an alias for
:func:`sqlContext.registerFunction`.
-
- In addition to a name and the function itself, `returnType` can be
optionally specified.
- 1) When f is a Python function, `returnType` defaults to a string.
The produced object must
- match the specified type. 2) When f is a
:class:`UserDefinedFunction`, Spark uses the return
- type of the given UDF as the return type of the registered UDF.
The input parameter
- `returnType` is None by default. If given by users, the value must
be None.
-
- :param name: name of the UDF in SQL statements.
- :param f: a Python function, or a wrapped/native
UserDefinedFunction. The UDF can be either
- row-at-a-time or vectorized.
- :param returnType: the return type of the registered UDF.
- :return: a wrapped/native :class:`UserDefinedFunction`
-
- >>> strlen = sqlContext.registerFunction("stringLengthString",
lambda x: len(x))
- >>> sqlContext.sql("SELECT stringLengthString('test')").collect()
- [Row(stringLengthString(test)=u'4')]
-
- >>> sqlContext.sql("SELECT 'foo' AS
text").select(strlen("text")).collect()
- [Row(stringLengthString(text)=u'3')]
-
- >>> from pyspark.sql.types import IntegerType
- >>> _ = sqlContext.registerFunction("stringLengthInt", lambda x:
len(x), IntegerType())
- >>> sqlContext.sql("SELECT stringLengthInt('test')").collect()
- [Row(stringLengthInt(test)=4)]
-
- >>> from pyspark.sql.types import IntegerType
- >>> _ = sqlContext.udf.register("stringLengthInt", lambda x:
len(x), IntegerType())
- >>> sqlContext.sql("SELECT stringLengthInt('test')").collect()
- [Row(stringLengthInt(test)=4)]
-
- >>> from pyspark.sql.types import IntegerType
- >>> from pyspark.sql.functions import udf
- >>> slen = udf(lambda s: len(s), IntegerType())
- >>> _ = sqlContext.udf.register("slen", slen)
- >>> sqlContext.sql("SELECT slen('test')").collect()
- [Row(slen(test)=4)]
-
- >>> import random
- >>> from pyspark.sql.functions import udf
- >>> from pyspark.sql.types import IntegerType
- >>> random_udf = udf(lambda: random.randint(0, 100),
IntegerType()).asNondeterministic()
- >>> new_random_udf = sqlContext.registerFunction("random_udf",
random_udf)
- >>> sqlContext.sql("SELECT random_udf()").collect() # doctest:
+SKIP
- [Row(random_udf()=82)]
- >>> sqlContext.range(1).select(new_random_udf()).collect() #
doctest: +SKIP
- [Row(<lambda>()=26)]
-
- >>> from pyspark.sql.functions import pandas_udf, PandasUDFType
- >>> @pandas_udf("integer", PandasUDFType.SCALAR) # doctest: +SKIP
- ... def add_one(x):
- ... return x + 1
- ...
- >>> _ = sqlContext.udf.register("add_one", add_one) # doctest:
+SKIP
- >>> sqlContext.sql("SELECT add_one(id) FROM range(3)").collect()
# doctest: +SKIP
- [Row(add_one(id)=1), Row(add_one(id)=2), Row(add_one(id)=3)]
- """
- return self.sparkSession.catalog.registerFunction(name, f,
returnType)
+ warnings.warn(
+ "Deprecated in 2.3.0. Use spark.udf.register instead.",
+ DeprecationWarning)
+ return self.sparkSession.udf.register(name, f, returnType)
+ # Reuse the docstring from UDFRegistration but with few notes.
+ _register_doc = UDFRegistration.register.__doc__.strip()
+ registerFunction.__doc__ = """%s
- @ignore_unicode_prefix
- @since(2.1)
- def registerJavaFunction(self, name, javaClassName, returnType=None):
- """Register a java UDF so it can be used in SQL statements.
-
- In addition to a name and the function itself, the return type can
be optionally specified.
- When the return type is not specified we would infer it via
reflection.
- :param name: name of the UDF
- :param javaClassName: fully qualified name of java class
- :param returnType: a :class:`pyspark.sql.types.DataType` object
-
- >>> sqlContext.registerJavaFunction("javaStringLength",
- ... "test.org.apache.spark.sql.JavaStringLength", IntegerType())
- >>> sqlContext.sql("SELECT javaStringLength('test')").collect()
- [Row(UDF:javaStringLength(test)=4)]
- >>> sqlContext.registerJavaFunction("javaStringLength2",
- ... "test.org.apache.spark.sql.JavaStringLength")
- >>> sqlContext.sql("SELECT javaStringLength2('test')").collect()
- [Row(UDF:javaStringLength2(test)=4)]
+ .. note:: :func:`sqlContext.registerFunction` is an alias for
+ :func:`spark.udf.register`.
+ .. note:: Deprecated in 2.3.0. Use :func:`spark.udf.register`
instead.
+ .. versionadded:: 1.2
+ """ % _register_doc[:_register_doc.rfind('versionadded::')]
- """
- jdt = None
- if returnType is not None:
- jdt =
self.sparkSession._jsparkSession.parseDataType(returnType.json())
- self.sparkSession._jsparkSession.udf().registerJava(name,
javaClassName, jdt)
+ def registerJavaFunction(self, name, javaClassName, returnType=None):
+ warnings.warn(
+ "Deprecated in 2.3.0. Use spark.udf.registerJavaFunction
instead.",
+ DeprecationWarning)
+ return self.sparkSession.udf.registerJavaFunction(name,
javaClassName, returnType)
+ _registerJavaFunction_doc =
UDFRegistration.registerJavaFunction.__doc__.strip()
+ registerJavaFunction.__doc__ = """%s
- @ignore_unicode_prefix
- @since(2.3)
- def registerJavaUDAF(self, name, javaClassName):
- """Register a java UDAF so it can be used in SQL statements.
-
- :param name: name of the UDAF
- :param javaClassName: fully qualified name of java class
-
- >>> sqlContext.registerJavaUDAF("javaUDAF",
- ... "test.org.apache.spark.sql.MyDoubleAvg")
- >>> df = sqlContext.createDataFrame([(1, "a"),(2, "b"), (3,
"a")],["id", "name"])
- >>> df.registerTempTable("df")
- >>> sqlContext.sql("SELECT name, javaUDAF(id) as avg from df group
by name").collect()
- [Row(name=u'b', avg=102.0), Row(name=u'a', avg=102.0)]
- """
- self.sparkSession._jsparkSession.udf().registerJavaUDAF(name,
javaClassName)
+ .. note:: :func:`sqlContext.registerJavaFunction` is an alias for
+ :func:`spark.udf.registerJavaFunction`
+ .. note:: Deprecated in 2.3.0. Use
:func:`spark.udf.registerJavaFunction` instead.
+ .. versionadded:: 2.1
+ """ %
_registerJavaFunction_doc[:_registerJavaFunction_doc.rfind('versionadded::')]
--- End diff --
<img width="699" alt="2018-01-17 9 22 57"
src="https://user-images.githubusercontent.com/6477701/35042699-076059e6-fbcd-11e7-8737-50c45d681f33.png">
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]