Github user zjffdu commented on a diff in the pull request: https://github.com/apache/spark/pull/14180#discussion_r75821756 --- Diff: python/pyspark/context.py --- @@ -797,21 +824,65 @@ def clearFiles(self): def addPyFile(self, path): """ - Add a .py or .zip dependency for all tasks to be executed on this + Add a .py, .zip or .egg dependency for all tasks to be executed on this SparkContext in the future. The C{path} passed can be either a local file, a file in HDFS (or other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI. + Note that .whl should not be handled by this method """ + if not path: + return self.addFile(path) - (dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix - if filename[-4:].lower() in self.PACKAGE_EXTENSIONS: + + (_dirname, filename) = os.path.split(path) # dirname may be directory or HDFS/S3 prefix + extname = os.path.splitext(path)[1].lower() + if extname == '.whl': + return + + if extname in self.PACKAGE_EXTENSIONS: self._python_includes.append(filename) - # for tests in local mode - sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename)) + if extname != '.whl': + # for tests in local mode + # Prepend the python package (except for *.whl) to sys.path + sys.path.insert(1, os.path.join(SparkFiles.getRootDirectory(), filename)) if sys.version > '3': import importlib importlib.invalidate_caches() + def _installWheelFiles(self, paths, quiet=True, upgrade=True, no_deps=True, no_index=True): + """ + Install .whl files at once by pip install. We are garantee to have 'pip' module available + since presence of whl in py-files, or in a wheelhouse, triggered the installation of a + virtualenv + """ + root_dir = SparkFiles.getRootDirectory() + paths = { + os.path.join(root_dir, os.path.basename(path)) + for path in paths + if os.path.splitext(path)[1].lower() == '.whl' + } + if not paths: + return + + pip_args = [ + '--find-links', root_dir, + '--target', os.path.join(root_dir, 'site-packages'), + ] + if quiet: + pip_args.append('--quiet') + if upgrade: + pip_args.append('--upgrade') + if no_deps: + pip_args.append('--no-deps') + if no_index: + pip_args.append('--no-index') + pip_args.extend(paths) + + # We had this dependency here to avoid general script case, ie when not in a virtualenv, + # where pip might not be installed + from pip.commands.install import InstallCommand as pip_InstallCommand + pip_InstallCommand().main(args=pip_args) + --- End diff -- why install wheel Files here ? Shouldn't they been done in `PythonWorkerFactory.scala` ?
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org