[GitHub] [spark] rithwik-db commented on a diff in pull request #40045: [SPARK-41591][PYTHON][FOLLOW-UP] Remove gRPC version check for Distributor

via GitHub Wed, 15 Feb 2023 16:43:44 -0800


rithwik-db commented on code in PR #40045:
URL: https://github.com/apache/spark/pull/40045#discussion_r1107891507



##########
python/pyspark/ml/torch/distributor.py:
##########
@@ -187,11 +187,9 @@ def _get_num_tasks(self) -> int:
                 return math.ceil(self.num_processes / task_gpu_amount)
             else:
                 key = "spark.driver.resource.gpu.amount"
-                if "gpu" not in self.sc.resources:

Review Comment:
   @HyukjinKwon, this is the stacktrace:
   ```
   File /databricks/spark/python/pyspark/ml/torch/distributor.py:344, in 
TorchDistributor.__init__(self, num_processes, local_mode, use_gpu)
       314 def __init__(
       315     self,
       316     num_processes: int = 1,
       317     local_mode: bool = True,
       318     use_gpu: bool = True,
       319 ):
       320     """Initializes the distributor.
       321 
       322     Parameters
      (...)
       342         If an active SparkSession is unavailable.
       343     """
   --> 344     super().__init__(num_processes, local_mode, use_gpu)
       345     self.ssl_conf = "pytorch.spark.distributor.ignoreSsl"  # type: 
ignore
       346     self._validate_input_params()
   File /databricks/spark/python/pyspark/ml/torch/distributor.py:156, in 
Distributor.__init__(self, num_processes, local_mode, use_gpu)
       154     raise RuntimeError("An active SparkSession is required for the 
distributor.")
       155 self.sc = self.spark.sparkContext
   --> 156 self.num_tasks = self._get_num_tasks()
       157 self.ssl_conf = None
   File /databricks/spark/python/pyspark/ml/torch/distributor.py:190, in 
Distributor._get_num_tasks(self)
       188 else:
       189     key = "spark.driver.resource.gpu.amount"
   --> 190     if "gpu" not in self.sc.resources:
       191         raise RuntimeError("GPUs were unable to be found on the 
driver.")
       192     num_available_gpus = int(self.sc.getConf().get(key, "0"))
   File /databricks/spark/python/pyspark/context.py:2545, in 
SparkContext.resources(self)
      2543 resources = {}
      2544 jresources = self._jsc.resources()
   -> 2545 for x in jresources:
      2546     name = jresources[x].name()
      2547     jaddresses = jresources[x].addresses()
   File 
/databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_collections.py:62, 
in JavaIterator.next(self)
        58     self._methods[self._next_name] = JavaMember(
        59         self._next_name, self,
        60         self._target_id, self._gateway_client)
        61 try:
   ---> 62     return self._methods[self._next_name]()
        63 except Py4JError:
        64     raise StopIteration()
   File 
/databricks/spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py:1322, 
in JavaMember.__call__(self, *args)
      1316 command = proto.CALL_COMMAND_NAME +\
      1317     self.command_header +\
      1318     args_command +\
      1319     proto.END_COMMAND_PART
      1321 answer = self.gateway_client.send_command(command)
   -> 1322 return_value = get_return_value(
      1323     answer, self.gateway_client, self.target_id, self.name)
      1325 for temp_arg in temp_args:
      1326     if hasattr(temp_arg, "_detach"):
   File /databricks/spark/python/pyspark/errors/exceptions.py:230, in 
capture_sql_exception.<locals>.deco(*a, **kw)
       228     return f(*a, **kw)
       229 except Py4JJavaError as e:
   --> 230     converted = convert_exception(e.java_exception)
       231     if not isinstance(converted, UnknownException):
       232         # Hide where the exception came from that shows a 
non-Pythonic
       233         # JVM exception message.
       234         raise converted from None
   File /databricks/spark/python/pyspark/errors/exceptions.py:198, in 
convert_exception(e)
       194     return SparkUpgradeException(origin=e)
       196 # BEGIN-EDGE
       197 # For Delta exception improvement.
   --> 198 from delta.exceptions import _convert_delta_exception
       200 delta_exception = _convert_delta_exception(e)
       201 if delta_exception is not None:
   File 
/databricks/python_shell/dbruntime/PythonPackageImportsInstrumentation/__init__.py:171,
 in _create_import_patch.<locals>.import_patch(name, globals, locals, fromlist, 
level)
       166 thread_local._nest_level += 1
       168 try:
       169     # Import the desired module. If you’re seeing this while 
debugging a failed import,
       170     # look at preceding stack frames for relevant error information.
   --> 171     original_result = python_builtin_import(name, globals, locals, 
fromlist, level)
       173     is_root_import = thread_local._nest_level == 1
       174     # `level` represents the number of leading dots in a relative 
import statement.
       175     # If it's zero, then this is an absolute import.
   File /databricks/spark/python/delta/__init__.py:17
         1 #
         2 # Copyright (2021) The Delta Lake Project Authors.
         3 #
      (...)
        14 # limitations under the License.
        15 #
   ---> 17 from delta.tables import DeltaTable
        18 from delta.pip_utils import configure_spark_with_delta_pip
        20 __all__ = ['DeltaTable', 'configure_spark_with_delta_pip']
   File 
/databricks/python_shell/dbruntime/PythonPackageImportsInstrumentation/__init__.py:171,
 in _create_import_patch.<locals>.import_patch(name, globals, locals, fromlist, 
level)
       166 thread_local._nest_level += 1
       168 try:
       169     # Import the desired module. If you’re seeing this while 
debugging a failed import,
       170     # look at preceding stack frames for relevant error information.
   --> 171     original_result = python_builtin_import(name, globals, locals, 
fromlist, level)
       173     is_root_import = thread_local._nest_level == 1
       174     # `level` represents the number of leading dots in a relative 
import statement.
       175     # If it's zero, then this is an absolute import.
   File /databricks/spark/python/delta/tables.py:30
        28 from pyspark.sql.column import _to_seq  # type: ignore[attr-defined]
        29 from pyspark.sql.types import DataType, StructType, StructField
   ---> 30 from pyspark.sql.connect.session import SparkSession as 
RemoteSparkSession # Edge
        33 if TYPE_CHECKING:
        34     from py4j.java_gateway import JavaObject, JVMView  # type: 
ignore[import]
   File 
/databricks/python_shell/dbruntime/PythonPackageImportsInstrumentation/__init__.py:171,
 in _create_import_patch.<locals>.import_patch(name, globals, locals, fromlist, 
level)
       166 thread_local._nest_level += 1
       168 try:
       169     # Import the desired module. If you’re seeing this while 
debugging a failed import,
       170     # look at preceding stack frames for relevant error information.
   --> 171     original_result = python_builtin_import(name, globals, locals, 
fromlist, level)
       173     is_root_import = thread_local._nest_level == 1
       174     # `level` represents the number of leading dots in a relative 
import statement.
       175     # If it's zero, then this is an absolute import.
   File /databricks/spark/python/pyspark/sql/connect/session.py:19
         1 #
         2 # Licensed to the Apache Software Foundation (ASF) under one or more
         3 # contributor license agreements.  See the NOTICE file distributed 
with
      (...)
        15 # limitations under the License.
        16 #
        17 from pyspark.sql.connect import check_dependencies
   ---> 19 check_dependencies(__name__, __file__)
        21 import os
        22 import warnings
   File /databricks/spark/python/pyspark/sql/connect/__init__.py:42, in 
check_dependencies(mod_name, file_name)
        40 require_minimum_pandas_version()
        41 require_minimum_pyarrow_version()
   ---> 42 require_minimum_grpc_version()
   File /databricks/spark/python/pyspark/sql/pandas/utils.py:89, in 
require_minimum_grpc_version()
        85     raise ImportError(
        86         "grpc >= %s must be installed; however, " "it was not 
found." % minimum_grpc_version
        87     ) from error
        88 if LooseVersion(grpc.__version__) < 
LooseVersion(minimum_grpc_version):
   ---> 89     raise ImportError(
        90         "gRPC >= %s must be installed; however, "
        91         "your version was %s." % (minimum_grpc_version, 
grpc.__version__)
        92     )
   ImportError: gRPC >= 1.48.1 must be installed; however, your version was 
1.42.0.)))
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] rithwik-db commented on a diff in pull request #40045: [SPARK-41591][PYTHON][FOLLOW-UP] Remove gRPC version check for Distributor

Reply via email to