Github user HyukjinKwon commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21408#discussion_r190440692
  
    --- Diff: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/InMemoryFileIndex.scala
 ---
    @@ -311,14 +314,27 @@ object InMemoryFileIndex extends Logging {
             // The other constructor of LocatedFileStatus will call 
FileStatus.getPermission(),
             // which is very slow on some file system (RawLocalFileSystem, 
which is launch a
             // subprocess and parse the stdout).
    -        val locations = fs.getFileBlockLocations(f, 0, f.getLen)
    -        val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, 
f.getReplication, f.getBlockSize,
    -          f.getModificationTime, 0, null, null, null, null, f.getPath, 
locations)
    -        if (f.isSymlink) {
    -          lfs.setSymlink(f.getSymlink)
    +        try {
    +          val locations = fs.getFileBlockLocations(f, 0, f.getLen)
    +          val lfs = new LocatedFileStatus(f.getLen, f.isDirectory, 
f.getReplication, f.getBlockSize,
    +            f.getModificationTime, 0, null, null, null, null, f.getPath, 
locations)
    +          if (f.isSymlink) {
    +            lfs.setSymlink(f.getSymlink)
    +          }
    +          Some(lfs)
    +        } catch {
    +          case _: FileNotFoundException =>
    +            missingFiles += f.getPath.toString
    +            None
             }
    -        lfs
         }
    +
    +    if (missingFiles.nonEmpty) {
    +      logWarning(s"The paths [${missingFiles.mkString(", ")}] were not 
found. " +
    +        "Were they deleted very recently?")
    --- End diff --
    
    Now the error messages should look like:
    
    ```
    the following files were missing during file scan:
      hdfs://.../rel/00171151/input/hyukjin/part-43011-...
      hdfs://.../rel/00171151/input/hyukjin/part-43012-...
      hdfs://.../rel/00171151/input/hyukjin/part-43013-...
      ...
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to