grundprinzip commented on code in PR #39137: URL: https://github.com/apache/spark/pull/39137#discussion_r1057107476
########## python/pyspark/errors/__init__.py: ########## @@ -0,0 +1,33 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from pyspark.errors.errors import * # noqa: F403 +from pyspark.errors.exceptions import PySparkException + +__all__ = [ # noqa: F405 + "PySparkException", + "columnInListError", + "higherOrderFunctionShouldReturnColumnError", + "notColumnError", Review Comment: Exporting a function that returns an instance of an error seems weird and indicates that the constructor is not well designed. ########## python/pyspark/errors/exceptions.py: ########## @@ -0,0 +1,92 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import cast, Optional + +from pyspark import SparkContext + +from py4j.java_gateway import JavaObject, is_instance_of + + +class PySparkException(Exception): Review Comment: Given that Spark Connect does not use the JVM shouldn't most abstract error be one without JVM? ########## python/pyspark/errors/errors.py: ########## @@ -0,0 +1,93 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Union, Any, Type +from pyspark.errors.exceptions import PySparkException +from py4j.java_gateway import JavaClass + + +def _get_pyspark_errors() -> JavaClass: + from pyspark.sql import SparkSession + + spark = SparkSession._getActiveSessionOrCreate() + assert spark._jvm is not None + return spark._jvm.org.apache.spark.python.errors.PySparkErrors + + +def column_in_list_error(func_name: str) -> "PySparkException": + pyspark_errors = _get_pyspark_errors() + e = pyspark_errors.columnInListError(func_name) Review Comment: Invoking a function on a Scala object just to access a JSON file feels wrong to me. ########## python/pyspark/sql/utils.py: ########## @@ -73,39 +74,6 @@ def __init__( self.cause = convert_exception(origin.getCause()) self._origin = origin - def __str__(self) -> str: Review Comment: I think it might make sense to move this back. The captured exception is actually thrown from the JVM and thus the place where a JVM backtrace is actually present. So it much rather belongs here than in the parent class. ########## python/pyspark/errors/exceptions.py: ########## @@ -0,0 +1,92 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import cast, Optional + +from pyspark import SparkContext + +from py4j.java_gateway import JavaObject, is_instance_of + + +class PySparkException(Exception): + """ + Base Exception for handling exceptions generated from PySpark. + """ + + def __init__( + self, + desc: Optional[str] = None, + stackTrace: Optional[str] = None, + origin: Optional[JavaObject] = None, + ): + # desc & stackTrace vs origin are mutually exclusive. + assert (origin is not None and desc is None and stackTrace is None) or ( + origin is None and desc is not None and stackTrace is not None + ) + + self.desc = desc if desc is not None else cast(JavaObject, origin).getMessage() + assert SparkContext._jvm is not None + self.stackTrace = ( + stackTrace + if stackTrace is not None + else (SparkContext._jvm.org.apache.spark.util.Utils.exceptionString(origin)) + ) + self._origin = origin + + def getMessageParameters(self) -> Optional[dict]: + assert SparkContext._gateway is not None + + gw = SparkContext._gateway + if self._origin is not None and is_instance_of( + gw, self._origin, "org.apache.spark.SparkThrowable" + ): + return self._origin.getMessageParameters() + else: + return None + + def __str__(self) -> str: + assert SparkContext._jvm is not None + + jvm = SparkContext._jvm + sql_conf = jvm.org.apache.spark.sql.internal.SQLConf.get() + debug_enabled = sql_conf.pysparkJVMStacktraceEnabled() + desc = self.desc + if debug_enabled: + desc = desc + "\n\nJVM stacktrace:\n%s" % self.stackTrace Review Comment: Why is this a JVM backtrace? ########## python/pyspark/sql/functions.py: ########## @@ -172,7 +184,7 @@ def lit(col: Any) -> Column: return col elif isinstance(col, list): if any(isinstance(c, Column) for c in col): - raise ValueError("lit does not allow a column in a list") + raise column_in_list_error(func_name="lit") Review Comment: I find the readability is reduced with those function names. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
