Github user zjffdu commented on a diff in the pull request:
https://github.com/apache/spark/pull/14180#discussion_r75821756
--- Diff: python/pyspark/context.py ---
@@ -797,21 +824,65 @@ def clearFiles(self):
def addPyFile(self, path):
"""
- Add a .py or .zip dependency for all tasks to be executed on this
+ Add a .py, .zip or .egg dependency for all tasks to be executed on
this
SparkContext in the future. The C{path} passed can be either a
local
file, a file in HDFS (or other Hadoop-supported filesystems), or an
HTTP, HTTPS or FTP URI.
+ Note that .whl should not be handled by this method
"""
+ if not path:
+ return
self.addFile(path)
- (dirname, filename) = os.path.split(path) # dirname may be
directory or HDFS/S3 prefix
- if filename[-4:].lower() in self.PACKAGE_EXTENSIONS:
+
+ (_dirname, filename) = os.path.split(path) # dirname may be
directory or HDFS/S3 prefix
+ extname = os.path.splitext(path)[1].lower()
+ if extname == '.whl':
+ return
+
+ if extname in self.PACKAGE_EXTENSIONS:
self._python_includes.append(filename)
- # for tests in local mode
- sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(),
filename))
+ if extname != '.whl':
+ # for tests in local mode
+ # Prepend the python package (except for *.whl) to sys.path
+ sys.path.insert(1,
os.path.join(SparkFiles.getRootDirectory(), filename))
if sys.version > '3':
import importlib
importlib.invalidate_caches()
+ def _installWheelFiles(self, paths, quiet=True, upgrade=True,
no_deps=True, no_index=True):
+ """
+ Install .whl files at once by pip install. We are garantee to have
'pip' module available
+ since presence of whl in py-files, or in a wheelhouse, triggered
the installation of a
+ virtualenv
+ """
+ root_dir = SparkFiles.getRootDirectory()
+ paths = {
+ os.path.join(root_dir, os.path.basename(path))
+ for path in paths
+ if os.path.splitext(path)[1].lower() == '.whl'
+ }
+ if not paths:
+ return
+
+ pip_args = [
+ '--find-links', root_dir,
+ '--target', os.path.join(root_dir, 'site-packages'),
+ ]
+ if quiet:
+ pip_args.append('--quiet')
+ if upgrade:
+ pip_args.append('--upgrade')
+ if no_deps:
+ pip_args.append('--no-deps')
+ if no_index:
+ pip_args.append('--no-index')
+ pip_args.extend(paths)
+
+ # We had this dependency here to avoid general script case, ie
when not in a virtualenv,
+ # where pip might not be installed
+ from pip.commands.install import InstallCommand as
pip_InstallCommand
+ pip_InstallCommand().main(args=pip_args)
+
--- End diff --
why install wheel Files here ? Shouldn't they been done in
`PythonWorkerFactory.scala` ?
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]