Github user srowen commented on a diff in the pull request:
https://github.com/apache/spark/pull/20611#discussion_r206119389
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/command/tables.scala ---
@@ -303,94 +303,44 @@ case class LoadDataCommand(
s"partitioned, but a partition spec was provided.")
}
}
-
- val loadPath =
+ val loadPath = {
if (isLocal) {
- val uri = Utils.resolveURI(path)
- val file = new File(uri.getPath)
- val exists = if (file.getAbsolutePath.contains("*")) {
- val fileSystem = FileSystems.getDefault
- val dir = file.getParentFile.getAbsolutePath
- if (dir.contains("*")) {
- throw new AnalysisException(
- s"LOAD DATA input path allows only filename wildcard: $path")
- }
-
- // Note that special characters such as "*" on Windows are not
allowed as a path.
- // Calling `WindowsFileSystem.getPath` throws an exception if
there are in the path.
- val dirPath = fileSystem.getPath(dir)
- val pathPattern = new File(dirPath.toAbsolutePath.toString,
file.getName).toURI.getPath
- val safePathPattern = if (Utils.isWindows) {
- // On Windows, the pattern should not start with slashes for
absolute file paths.
- pathPattern.stripPrefix("/")
- } else {
- pathPattern
- }
- val files = new File(dir).listFiles()
- if (files == null) {
- false
- } else {
- val matcher = fileSystem.getPathMatcher("glob:" +
safePathPattern)
- files.exists(f =>
matcher.matches(fileSystem.getPath(f.getAbsolutePath)))
- }
- } else {
- new File(file.getAbsolutePath).exists()
- }
- if (!exists) {
- throw new AnalysisException(s"LOAD DATA input path does not
exist: $path")
- }
- uri
+ val localFS = FileContext.getLocalFSFileContext()
+ localFS.makeQualified(new Path(path))
} else {
- val uri = new URI(path)
- val hdfsUri = if (uri.getScheme() != null && uri.getAuthority() !=
null) {
- uri
- } else {
- // Follow Hive's behavior:
- // If no schema or authority is provided with non-local inpath,
- // we will use hadoop configuration "fs.defaultFS".
- val defaultFSConf =
sparkSession.sessionState.newHadoopConf().get("fs.defaultFS")
- val defaultFS = if (defaultFSConf == null) {
- new URI("")
- } else {
- new URI(defaultFSConf)
- }
-
- val scheme = if (uri.getScheme() != null) {
--- End diff --
I mean, the Hadoop method you are calling is marked as one that we are not
supposed to call. The code you are replacing looks similar to the
implementation of the method you call now. I think this isn't a coincidence.
Maybe the implementation was based on an older implementation of
`makeQualified`, on purpose? how about just extracting the logic of
`makeQualified` here? I think that's already what the code kind of is.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]