Github user zjffdu commented on a diff in the pull request:

    https://github.com/apache/spark/pull/14180#discussion_r75821756
  
    --- Diff: python/pyspark/context.py ---
    @@ -797,21 +824,65 @@ def clearFiles(self):
     
         def addPyFile(self, path):
             """
    -        Add a .py or .zip dependency for all tasks to be executed on this
    +        Add a .py, .zip or .egg dependency for all tasks to be executed on 
this
             SparkContext in the future.  The C{path} passed can be either a 
local
             file, a file in HDFS (or other Hadoop-supported filesystems), or an
             HTTP, HTTPS or FTP URI.
    +        Note that .whl should not be handled by this method
             """
    +        if not path:
    +            return
             self.addFile(path)
    -        (dirname, filename) = os.path.split(path)  # dirname may be 
directory or HDFS/S3 prefix
    -        if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
    +
    +        (_dirname, filename) = os.path.split(path)  # dirname may be 
directory or HDFS/S3 prefix
    +        extname = os.path.splitext(path)[1].lower()
    +        if extname == '.whl':
    +            return
    +
    +        if extname in self.PACKAGE_EXTENSIONS:
                 self._python_includes.append(filename)
    -            # for tests in local mode
    -            sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), 
filename))
    +            if extname != '.whl':
    +                # for tests in local mode
    +                # Prepend the python package (except for *.whl) to sys.path
    +                sys.path.insert(1, 
os.path.join(SparkFiles.getRootDirectory(), filename))
             if sys.version > '3':
                 import importlib
                 importlib.invalidate_caches()
     
    +    def _installWheelFiles(self, paths, quiet=True, upgrade=True, 
no_deps=True, no_index=True):
    +        """
    +        Install .whl files at once by pip install. We are garantee to have 
'pip' module available
    +        since presence of whl in py-files, or in a wheelhouse, triggered 
the installation of a
    +        virtualenv
    +        """
    +        root_dir = SparkFiles.getRootDirectory()
    +        paths = {
    +            os.path.join(root_dir, os.path.basename(path))
    +            for path in paths
    +            if os.path.splitext(path)[1].lower() == '.whl'
    +        }
    +        if not paths:
    +            return
    +
    +        pip_args = [
    +            '--find-links', root_dir,
    +            '--target', os.path.join(root_dir, 'site-packages'),
    +        ]
    +        if quiet:
    +            pip_args.append('--quiet')
    +        if upgrade:
    +            pip_args.append('--upgrade')
    +        if no_deps:
    +            pip_args.append('--no-deps')
    +        if no_index:
    +            pip_args.append('--no-index')
    +        pip_args.extend(paths)
    +
    +        # We had this dependency here to avoid general script case, ie 
when not in a virtualenv,
    +        # where pip might not be installed
    +        from pip.commands.install import InstallCommand as 
pip_InstallCommand
    +        pip_InstallCommand().main(args=pip_args)
    +
    --- End diff --
    
    why install wheel Files here ? Shouldn't they been done in 
`PythonWorkerFactory.scala` ?


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to