Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/20240#discussion_r161107677
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
---
@@ -50,23 +50,35 @@ object OrcUtils extends Logging {
paths
}
- def readSchema(file: Path, conf: Configuration): Option[TypeDescription]
= {
+ def readSchema(file: Path, conf: Configuration, ignoreCorruptFiles:
Boolean)
+ : Option[TypeDescription] = {
val fs = file.getFileSystem(conf)
val readerOptions = OrcFile.readerOptions(conf).filesystem(fs)
- val reader = OrcFile.createReader(file, readerOptions)
- val schema = reader.getSchema
- if (schema.getFieldNames.size == 0) {
- None
- } else {
- Some(schema)
+ try {
+ val reader = OrcFile.createReader(file, readerOptions)
+ val schema = reader.getSchema
+ if (schema.getFieldNames.size == 0) {
+ None
+ } else {
+ Some(schema)
+ }
+ } catch {
+ case e: org.apache.orc.FileFormatException =>
+ if (true) {
--- End diff --
Oops.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]