Repository: spark
Updated Branches:
  refs/heads/master e9d3ca0b7 -> e91b60771


[SPARK-25918][SQL] LOAD DATA LOCAL INPATH should handle a relative path

## What changes were proposed in this pull request?

Unfortunately, it seems that we missed this in 2.4.0. In Spark 2.4, if the 
default file system is not the local file system, `LOAD DATA LOCAL INPATH` only 
works in case of absolute paths. This PR aims to fix it to support relative 
paths. This is a regression in 2.4.0.

```scala
$ ls kv1.txt
kv1.txt

scala> spark.sql("LOAD DATA LOCAL INPATH 'kv1.txt' INTO TABLE t")
org.apache.spark.sql.AnalysisException: LOAD DATA input path does not exist: 
kv1.txt;
```

## How was this patch tested?

Pass the Jenkins

Closes #22927 from dongjoon-hyun/SPARK-LOAD.

Authored-by: Dongjoon Hyun <dongj...@apache.org>
Signed-off-by: Dongjoon Hyun <dongj...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e91b6077
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e91b6077
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e91b6077

Branch: refs/heads/master
Commit: e91b607719886b57d1550a70c0f9df4342d72989
Parents: e9d3ca0
Author: Dongjoon Hyun <dongj...@apache.org>
Authored: Thu Nov 1 23:18:20 2018 -0700
Committer: Dongjoon Hyun <dongj...@apache.org>
Committed: Thu Nov 1 23:18:20 2018 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/execution/command/tables.scala     | 5 +++--
 .../apache/spark/sql/hive/execution/HiveCommandSuite.scala  | 9 +++++++++
 2 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e91b6077/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
index 871eba4..823dc0d 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala
@@ -376,7 +376,8 @@ object LoadDataCommand {
    * @return qualified path object
    */
   private[sql] def makeQualified(defaultUri: URI, workingDir: Path, path: 
Path): Path = {
-    val pathUri = if (path.isAbsolute()) path.toUri() else new 
Path(workingDir, path).toUri()
+    val newPath = new Path(workingDir, path)
+    val pathUri = if (path.isAbsolute()) path.toUri() else newPath.toUri()
     if (pathUri.getScheme == null || pathUri.getAuthority == null &&
         defaultUri.getAuthority != null) {
       val scheme = if (pathUri.getScheme == null) defaultUri.getScheme else 
pathUri.getScheme
@@ -393,7 +394,7 @@ object LoadDataCommand {
           throw new IllegalArgumentException(e)
       }
     } else {
-      path
+      newPath
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e91b6077/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
index 6937e97..9147a98 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala
@@ -20,11 +20,13 @@ package org.apache.spark.sql.hive.execution
 import java.io.File
 
 import com.google.common.io.Files
+import org.apache.hadoop.fs.{FileContext, FsConstants, Path}
 
 import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.TableIdentifier
 import org.apache.spark.sql.catalyst.analysis.NoSuchTableException
 import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, 
CatalogTable, CatalogTableType}
+import org.apache.spark.sql.execution.command.LoadDataCommand
 import org.apache.spark.sql.hive.test.TestHiveSingleton
 import org.apache.spark.sql.test.SQLTestUtils
 import org.apache.spark.sql.types.StructType
@@ -439,4 +441,11 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils 
with TestHiveSingleto
     }
   }
 
+  test("SPARK-25918: LOAD DATA LOCAL INPATH should handle a relative path") {
+    val localFS = FileContext.getLocalFSFileContext()
+    val workingDir = localFS.getWorkingDirectory
+    val r = LoadDataCommand.makeQualified(
+      FsConstants.LOCAL_FS_URI, workingDir, new Path("kv1.txt"))
+    assert(r === new Path(s"$workingDir/kv1.txt"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to