dtenedor commented on code in PR #43611:
URL: https://github.com/apache/spark/pull/43611#discussion_r1401278217
##########
python/pyspark/sql/worker/analyze_udtf.py:
##########
@@ -116,12 +118,89 @@ def main(infile: IO, outfile: IO) -> None:
handler = read_udtf(infile)
args, kwargs = read_arguments(infile)
+ error_prefix = f"Failed to evaluate the user-defined table function
'{handler.__name__}'"
+
+ def format_error(msg: str) -> str:
+ return dedent(msg).replace("\n", " ")
+
+ # Check that the arguments provided to the UDTF call match the
expected parameters defined
+ # in the static 'analyze' method signature.
+ try:
+ inspect.signature(handler.analyze).bind(*args, **kwargs) # type:
ignore[attr-defined]
+ except TypeError as e:
+ # The UDTF call's arguments did not match the expected signature.
+ raise PySparkValueError(
+ format_error(
+ f"""
+ {error_prefix} because the function arguments did not
match the expected
+ signature of the static 'analyze' method ({e}). Please
update the query so that
+ this table function call provides arguments matching the
expected signature, or
+ else update the table function so that its static
'analyze' method accepts the
+ provided arguments, and then try the query again."""
+ )
+ )
+
+ # Invoke the UDTF's 'analyze' method.
result = handler.analyze(*args, **kwargs) # type: ignore[attr-defined]
+ # Check invariants about the 'analyze' method after running it.
if not isinstance(result, AnalyzeResult):
raise PySparkValueError(
- "Output of `analyze` static method of Python UDTFs expects "
- f"a pyspark.sql.udtf.AnalyzeResult but got: {type(result)}"
+ format_error(
+ f"""
+ {error_prefix} because the static 'analyze' method expects
a result of type
+ pyspark.sql.udtf.AnalyzeResult, but instead this method
returned a value of
+ type: {type(result)}"""
+ )
Review Comment:
Good question: these use the error class
`TABLE_VALUED_FUNCTION_FAILED_TO_ANALYZE_IN_PYTHON`. Each of these strings goes
in the `msg` parameter [1].
[1]
https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/execution/python/UserDefinedPythonFunction.scala#L229
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]