This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new b0b7835bee2 [SPARK-45059][CONNECT][PYTHON] Add `try_reflect` functions to Scala and Python b0b7835bee2 is described below commit b0b7835bee2837c6e2875547aca259e02d2b0af7 Author: Jia Fan <fanjiaemi...@qq.com> AuthorDate: Mon Sep 4 16:28:08 2023 +0800 [SPARK-45059][CONNECT][PYTHON] Add `try_reflect` functions to Scala and Python ### What changes were proposed in this pull request? Add new `try_reflect` funtion to python and connect. ### Why are the changes needed? for parity ### Does this PR introduce _any_ user-facing change? Yes, new function ### How was this patch tested? add new test. ### Was this patch authored or co-authored using generative AI tooling? No Closes #42783 from Hisoka-X/SPARK-45059_try_reflect_to_python. Authored-by: Jia Fan <fanjiaemi...@qq.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../scala/org/apache/spark/sql/functions.scala | 9 ++++++ .../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++ .../explain-results/function_try_reflect.explain | 2 ++ .../query-tests/queries/function_try_reflect.json | 33 +++++++++++++++++++++ .../queries/function_try_reflect.proto.bin | Bin 0 -> 216 bytes .../source/reference/pyspark.sql/functions.rst | 1 + python/pyspark/sql/connect/functions.py | 7 +++++ python/pyspark/sql/functions.py | 29 ++++++++++++++++++ python/pyspark/sql/tests/test_functions.py | 2 -- .../scala/org/apache/spark/sql/functions.scala | 3 +- 10 files changed, 87 insertions(+), 3 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala index baafdd4e172..9ead800ace7 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/functions.scala @@ -3629,6 +3629,15 @@ object functions { */ def java_method(cols: Column*): Column = Column.fn("java_method", cols: _*) + /** + * This is a special version of `reflect` that performs the same operation, but returns a NULL + * value instead of raising an error if the invoke method thrown exception. + * + * @group misc_funcs + * @since 4.0.0 + */ + def try_reflect(cols: Column*): Column = Column.fn("try_reflect", cols: _*) + /** * Returns the Spark version. The string contains 2 fields, the first being a release version * and the second being a git revision. diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index c457f269213..aa15fbd75ff 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -2864,6 +2864,10 @@ class PlanGenerationTestSuite fn.java_method(lit("java.util.UUID"), lit("fromString"), fn.col("g")) } + functionTest("try_reflect") { + fn.try_reflect(lit("java.util.UUID"), lit("fromString"), fn.col("g")) + } + functionTest("typeof") { fn.typeof(fn.col("g")) } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain new file mode 100644 index 00000000000..5c68f3bf2c1 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/function_try_reflect.explain @@ -0,0 +1,2 @@ +Project [reflect(java.util.UUID, fromString, g#0, false) AS try_reflect(java.util.UUID, fromString, g)#0] ++- LocalRelation <empty>, [id#0L, a#0, b#0, d#0, e#0, f#0, g#0] diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json b/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json new file mode 100644 index 00000000000..de3fae90c2c --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.json @@ -0,0 +1,33 @@ +{ + "common": { + "planId": "1" + }, + "project": { + "input": { + "common": { + "planId": "0" + }, + "localRelation": { + "schema": "struct\u003cid:bigint,a:int,b:double,d:struct\u003cid:bigint,a:int,b:double\u003e,e:array\u003cint\u003e,f:map\u003cstring,struct\u003cid:bigint,a:int,b:double\u003e\u003e,g:string\u003e" + } + }, + "expressions": [{ + "unresolvedFunction": { + "functionName": "try_reflect", + "arguments": [{ + "literal": { + "string": "java.util.UUID" + } + }, { + "literal": { + "string": "fromString" + } + }, { + "unresolvedAttribute": { + "unparsedIdentifier": "g" + } + }] + } + }] + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin new file mode 100644 index 00000000000..e38e0e5c065 Binary files /dev/null and b/connector/connect/common/src/test/resources/query-tests/queries/function_try_reflect.proto.bin differ diff --git a/python/docs/source/reference/pyspark.sql/functions.rst b/python/docs/source/reference/pyspark.sql/functions.rst index cd4f79ec62f..6896efd4fb4 100644 --- a/python/docs/source/reference/pyspark.sql/functions.rst +++ b/python/docs/source/reference/pyspark.sql/functions.rst @@ -502,6 +502,7 @@ Misc Functions assert_true raise_error reflect + try_reflect hll_sketch_estimate hll_union java_method diff --git a/python/pyspark/sql/connect/functions.py b/python/pyspark/sql/connect/functions.py index d5d2cd1c5e9..f290549ae47 100644 --- a/python/pyspark/sql/connect/functions.py +++ b/python/pyspark/sql/connect/functions.py @@ -3787,6 +3787,13 @@ def java_method(*cols: "ColumnOrName") -> Column: java_method.__doc__ = pysparkfuncs.java_method.__doc__ +def try_reflect(*cols: "ColumnOrName") -> Column: + return _invoke_function_over_columns("try_reflect", *cols) + + +try_reflect.__doc__ = pysparkfuncs.try_reflect.__doc__ + + def version() -> Column: return _invoke_function_over_columns("version") diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index 52707217bda..d025b13cd10 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -461,6 +461,7 @@ __all__ = [ "try_divide", "try_element_at", "try_multiply", + "try_reflect", "try_subtract", "try_sum", "try_to_binary", @@ -16110,6 +16111,34 @@ def java_method(*cols: "ColumnOrName") -> Column: return _invoke_function_over_seq_of_columns("java_method", cols) +@try_remote_functions +def try_reflect(*cols: "ColumnOrName") -> Column: + """ + This is a special version of `reflect` that performs the same operation, but returns a NULL + value instead of raising an error if the invoke method thrown exception. + + + .. versionadded:: 4.0.0 + + Parameters + ---------- + cols : :class:`~pyspark.sql.Column` or str + the first element should be a literal string for the class name, + and the second element should be a literal string for the method name, + and the remaining are input arguments to the Java method. + + Examples + -------- + >>> from pyspark.sql import functions as sf + >>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"]) + >>> df.select( + ... sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), df.a) + ... ).collect() + [Row(try_reflect(java.util.UUID, fromString, a)='a5cf6c42-0c85-418f-af6c-3e4e5b1328f2')] + """ + return _invoke_function_over_seq_of_columns("try_reflect", cols) + + @try_remote_functions def version() -> Column: """ diff --git a/python/pyspark/sql/tests/test_functions.py b/python/pyspark/sql/tests/test_functions.py index 218e8eb060b..5a8e36d287c 100644 --- a/python/pyspark/sql/tests/test_functions.py +++ b/python/pyspark/sql/tests/test_functions.py @@ -87,8 +87,6 @@ class FunctionsTestsMixin: # https://issues.apache.org/jira/browse/SPARK-44788 "from_xml", "schema_of_xml", - # TODO: reflect function will soon be added and removed from this list - "try_reflect", } self.assertEqual( diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index dcfe10f9a4d..d4e271db5b2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -3683,7 +3683,8 @@ object functions { call_function("java_method", cols: _*) /** - * Calls a method with reflection. + * This is a special version of `reflect` that performs the same operation, but returns a NULL + * value instead of raising an error if the invoke method thrown exception. * * @group misc_funcs * @since 4.0.0 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org