mridulm commented on code in PR #42357:
URL: https://github.com/apache/spark/pull/42357#discussion_r1329512411
##########
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:
##########
@@ -533,9 +536,12 @@ private[spark] class Client(
// If preload is enabled, preload the statCache with the files in the
directories
val statCache = if (statCachePreloadEnabled) {
// Consider only following configurations, as they involve the
distribution of multiple files
- val files = sparkConf.get(SPARK_JARS).orNull ++
sparkConf.get(JARS_TO_DISTRIBUTE) ++
- sparkConf.get(FILES_TO_DISTRIBUTE) ++
sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++
- sparkConf.get(PY_FILES) ++ pySparkArchives
+ var files = sparkConf.get(JARS_TO_DISTRIBUTE) ++
sparkConf.get(FILES_TO_DISTRIBUTE) ++
+ sparkConf.get(ARCHIVES_TO_DISTRIBUTE) ++ sparkConf.get(PY_FILES) ++
pySparkArchives
+ if (!sparkConf.get(SPARK_JARS).isEmpty) {
Review Comment:
Why this `if` condition ? we can directly added to `files` in previous line
itself ?
##########
resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/Client.scala:
##########
@@ -494,11 +494,14 @@ private[spark] class Client(
fsLookup: URI => FileSystem = FileSystem.get(_, hadoopConf)):
HashMap[URI, FileStatus] = {
val statCache = HashMap[URI, FileStatus]()
directoriesToBePreloaded(files).foreach { case (dir: URI, filesInDir:
HashSet[String]) =>
- fsLookup(dir).listStatus(new Path(dir)).filter(_.isFile()).
- filter(f => filesInDir.contains(f.getPath.getName)).foreach {
fileStatus =>
- val uri = fileStatus.getPath.toUri
+ fsLookup(dir).listStatus(new Path(dir), new PathFilter() {
+ override def accept(path: Path): Boolean =
filesInDir.contains(path.getName)
+ }).filter(_.isFile()).foreach { fileStatus =>
+ val uri = fileStatus.getPath.toUri
+ if (uri != null) {
Review Comment:
IIRC `uri` cant be `null` - why was this condition added ?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]