Repository: spark Updated Branches: refs/heads/master e9d3ca0b7 -> e91b60771
[SPARK-25918][SQL] LOAD DATA LOCAL INPATH should handle a relative path ## What changes were proposed in this pull request? Unfortunately, it seems that we missed this in 2.4.0. In Spark 2.4, if the default file system is not the local file system, `LOAD DATA LOCAL INPATH` only works in case of absolute paths. This PR aims to fix it to support relative paths. This is a regression in 2.4.0. ```scala $ ls kv1.txt kv1.txt scala> spark.sql("LOAD DATA LOCAL INPATH 'kv1.txt' INTO TABLE t") org.apache.spark.sql.AnalysisException: LOAD DATA input path does not exist: kv1.txt; ``` ## How was this patch tested? Pass the Jenkins Closes #22927 from dongjoon-hyun/SPARK-LOAD. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e91b6077 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e91b6077 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e91b6077 Branch: refs/heads/master Commit: e91b607719886b57d1550a70c0f9df4342d72989 Parents: e9d3ca0 Author: Dongjoon Hyun <dongj...@apache.org> Authored: Thu Nov 1 23:18:20 2018 -0700 Committer: Dongjoon Hyun <dongj...@apache.org> Committed: Thu Nov 1 23:18:20 2018 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/execution/command/tables.scala | 5 +++-- .../apache/spark/sql/hive/execution/HiveCommandSuite.scala | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e91b6077/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala index 871eba4..823dc0d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala @@ -376,7 +376,8 @@ object LoadDataCommand { * @return qualified path object */ private[sql] def makeQualified(defaultUri: URI, workingDir: Path, path: Path): Path = { - val pathUri = if (path.isAbsolute()) path.toUri() else new Path(workingDir, path).toUri() + val newPath = new Path(workingDir, path) + val pathUri = if (path.isAbsolute()) path.toUri() else newPath.toUri() if (pathUri.getScheme == null || pathUri.getAuthority == null && defaultUri.getAuthority != null) { val scheme = if (pathUri.getScheme == null) defaultUri.getScheme else pathUri.getScheme @@ -393,7 +394,7 @@ object LoadDataCommand { throw new IllegalArgumentException(e) } } else { - path + newPath } } } http://git-wip-us.apache.org/repos/asf/spark/blob/e91b6077/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala index 6937e97..9147a98 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveCommandSuite.scala @@ -20,11 +20,13 @@ package org.apache.spark.sql.hive.execution import java.io.File import com.google.common.io.Files +import org.apache.hadoop.fs.{FileContext, FsConstants, Path} import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} import org.apache.spark.sql.catalyst.TableIdentifier import org.apache.spark.sql.catalyst.analysis.NoSuchTableException import org.apache.spark.sql.catalyst.catalog.{CatalogStorageFormat, CatalogTable, CatalogTableType} +import org.apache.spark.sql.execution.command.LoadDataCommand import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils import org.apache.spark.sql.types.StructType @@ -439,4 +441,11 @@ class HiveCommandSuite extends QueryTest with SQLTestUtils with TestHiveSingleto } } + test("SPARK-25918: LOAD DATA LOCAL INPATH should handle a relative path") { + val localFS = FileContext.getLocalFSFileContext() + val workingDir = localFS.getWorkingDirectory + val r = LoadDataCommand.makeQualified( + FsConstants.LOCAL_FS_URI, workingDir, new Path("kv1.txt")) + assert(r === new Path(s"$workingDir/kv1.txt")) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org