vanzin commented on a change in pull request #25910: [SPARK-28762][CORE] Read
JAR main class if JAR is not located in local file system
URL: https://github.com/apache/spark/pull/25910#discussion_r335712806
##########
File path: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
##########
@@ -203,6 +204,52 @@ private[spark] class SparkSubmit extends Logging {
}
}
+ /**
+ * Tries to read the mainClass from the JAR manifest if not already set.
+ *
+ * Works with non-local JARs as well.
+ *
+ * @param args Spark submit arguments
+ * @param hadoopConf Hadoop configuration
+ * @param jarPath Path to JAR file, can be remote
+ * @return the FQDN of the main class or null if not found
+ */
+ private def resolveMainClassIfNeeded(
+ args: SparkSubmitArguments,
+ hadoopConf: HadoopConfiguration,
+ jarPath: String
+ ): String = {
+ if (args.mainClass != null) {
+ return args.mainClass
+ }
+
+ var mainClass: String = null
+ if (args.mainClass == null && !args.isPython && !args.isR && jarPath !=
null) {
+ val uri = new URI(jarPath)
+ val uriScheme = uri.getScheme
+
+ try {
+ uriScheme match {
+ case "file" =>
+ // If local file, probably more stable to use JarFile than
Hadoop's FileSystem class
+ Utils.tryWithResource(new JarFile(uri.getPath)) { jar =>
+ mainClass =
jar.getManifest.getMainAttributes.getValue("Main-Class")
+ }
+ case _ =>
+ val fs = FileSystem.get(uri, hadoopConf)
Review comment:
Also, you could use this code for both local and remote files, right?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]