Github user vanzin commented on a diff in the pull request:
https://github.com/apache/spark/pull/19130#discussion_r138694417
--- Diff:
core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala ---
@@ -897,6 +897,80 @@ class SparkSubmitSuite
sysProps("spark.submit.pyFiles") should (startWith("/"))
}
+ test("handle remote http(s) resources in yarn mode") {
+ val hadoopConf = new Configuration()
+ updateConfWithFakeS3Fs(hadoopConf)
+
+ val tmpDir = Utils.createTempDir()
+ val mainResource = File.createTempFile("tmpPy", ".py", tmpDir)
+ val tmpJar = TestUtils.createJarWithFiles(Map("test.resource" ->
"USER"), tmpDir)
+ val tmpJarPath = s"s3a://${new File(tmpJar.toURI).getAbsolutePath}"
+ // This assumes UT environment could access external network.
+ val remoteHttpJar =
+
"http://central.maven.org/maven2/io/dropwizard/metrics/metrics-core/" +
+ "3.2.4/metrics-core-3.2.4.jar"
+
+ val args = Seq(
+ "--class", UserClasspathFirstTest.getClass.getName.stripPrefix("$"),
+ "--name", "testApp",
+ "--master", "yarn",
+ "--deploy-mode", "client",
+ "--jars", s"$tmpJarPath,$remoteHttpJar",
+ s"s3a://$mainResource"
+ )
+
+ val appArgs = new SparkSubmitArguments(args)
+ val sysProps = SparkSubmit.prepareSubmitEnvironment(appArgs,
Some(hadoopConf))._3
+
+ // Resources in S3 should still be remote path, but remote http
resource will be downloaded
--- End diff --
...still are...
Also I'm not sure I understand the comment.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]