HyukjinKwon commented on a change in pull request #31565:
URL: https://github.com/apache/spark/pull/31565#discussion_r577259333



##########
File path: core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
##########
@@ -1068,18 +1065,50 @@ object SparkSubmit extends CommandLineUtils with 
Logging {
     mainClass == "org.apache.spark.sql.hive.thriftserver.HiveThriftServer2"
   }
 
+  /**
+   * Extracts the path from a URI or returns the local path.
+   *
+   * This helper is used to determine whether the path or URI given as the 
driver to
+   * spark-submit is a Python or R file. It needs to parse the path as URI to 
deal
+   * with URIs that have a query fragment (such as AWS S3 presigned URLs).
+   *
+   * This was introduced to fix 
https://issues.apache.org/jira/browse/SPARK-34438
+   */
+  private def extractPath(localPathOrUri: String): String = {
+    try {
+      val uri = new URI(localPathOrUri)
+      return uri.getPath
+    } catch {
+      case _: URISyntaxException => return localPathOrUri
+    }
+  }
+
   /**
    * Return whether the given primary resource requires running python.
    */
   private[deploy] def isPython(res: String): Boolean = {
-    res != null && res.endsWith(".py") || res == PYSPARK_SHELL

Review comment:
       I would just do a one line fix:
   
   ```diff
   diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala 
b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
   index 8f1425fbb84..b2846d2e7a8 100644
   --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
   +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala
   @@ -1072,14 +1072,16 @@ object SparkSubmit extends CommandLineUtils with 
Logging {
       * Return whether the given primary resource requires running python.
       */
      private[deploy] def isPython(res: String): Boolean = {
   -    res != null && res.endsWith(".py") || res == PYSPARK_SHELL
   +    // Try to extract the paths from URI to address fragments, see also 
SPARK-34438.
   +    res != null && Try(new URI(res).getPath).getOrElse(res).endsWith(".py") 
|| res == PYSPARK_SHELL
      }
   
      /**
       * Return whether the given primary resource requires running R.
       */
      private[deploy] def isR(res: String): Boolean = {
   -    res != null && (res.endsWith(".R") || res.endsWith(".r")) || res == 
SPARKR_SHELL
   +    // Try to extract the paths from URI to address fragments, see also 
SPARK-34438.
   +    res != null && Try(new URI(res).getPath).getOrElse(res).endsWith(".r") 
|| res == SPARKR_SHELL
      }
   
      private[deploy] def isInternal(res: String): Boolean = {
   ```




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to