Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/18979#discussion_r144595826
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/BasicWriteStatsTracker.scala
---
@@ -44,20 +47,32 @@ case class BasicWriteTaskStats(
* @param hadoopConf
*/
class BasicWriteTaskStatsTracker(hadoopConf: Configuration)
- extends WriteTaskStatsTracker {
+ extends WriteTaskStatsTracker with Logging {
private[this] var numPartitions: Int = 0
private[this] var numFiles: Int = 0
+ private[this] var submittedFiles: Int = 0
private[this] var numBytes: Long = 0L
private[this] var numRows: Long = 0L
- private[this] var curFile: String = null
-
+ private[this] var curFile: Option[String] = None
- private def getFileSize(filePath: String): Long = {
+ /**
+ * Get the size of the file expected to have been written by a worker.
+ * @param filePath path to the file
+ * @return the file size or None if the file was not found.
+ */
+ private def getFileSize(filePath: String): Option[Long] = {
val path = new Path(filePath)
val fs = path.getFileSystem(hadoopConf)
- fs.getFileStatus(path).getLen()
+ try {
+ Some(fs.getFileStatus(path).getLen())
+ } catch {
+ case e: FileNotFoundException =>
+ // may arise against eventually consistent object stores
+ logDebug(s"File $path is not yet visible", e)
--- End diff --
For the error messages, it looks okay for me. First, it's a debug message.
Second, ORC writer bug will be fixed in Spark 2.3 in any way.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]