spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters

lixiao Fri, 29 Sep 2017 09:07:13 -0700

Repository: spark
Updated Branches:
  refs/heads/branch-2.2 ac9a0f692 -> 7bf25e086



[SPARK-22146] FileNotFoundException while reading ORC files containing special 
characters

## What changes were proposed in this pull request?

Reading ORC files containing special characters like '%' fails with a 
FileNotFoundException.
This PR aims to fix the problem.

## How was this patch tested?

Added UT.

Author: Marco Gaido <marcogaid...@gmail.com>
Author: Marco Gaido <mga...@hortonworks.com>

Closes #19368 from mgaido91/SPARK-22146.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf25e08
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf25e08
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf25e08

Branch: refs/heads/branch-2.2
Commit: 7bf25e086729782c62b8189e7417b86fa720553d
Parents: ac9a0f6
Author: Marco Gaido <marcogaid...@gmail.com>
Authored: Thu Sep 28 23:14:53 2017 -0700
Committer: gatorsmile <gatorsm...@gmail.com>
Committed: Fri Sep 29 09:05:15 2017 -0700

----------------------------------------------------------------------
 .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala   |  2 +-
 .../spark/sql/hive/MetastoreDataSourcesSuite.scala      | 12 +++++++++++-
 2 files changed, 12 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
index 3a34ec5..6b76cfa 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala
@@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with 
DataSourceRegister with Serializable
       options: Map[String, String],
       files: Seq[FileStatus]): Option[StructType] = {
     OrcFileOperator.readSchema(
-      files.map(_.getPath.toUri.toString),
+      files.map(_.getPath.toString),
       Some(sparkSession.sessionState.newHadoopConf())
     )
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
index 07d641d..32e97eb 100644
--- 
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
+++ 
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala
@@ -998,7 +998,6 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
     spark.sql("""drop database if exists testdb8156 CASCADE""")
   }
 
-
   test("skip hive metadata on table creation") {
     withTempDir { tempPath =>
       val schema = StructType((1 to 5).map(i => StructField(s"c_$i", 
StringType)))
@@ -1350,6 +1349,17 @@ class MetastoreDataSourcesSuite extends QueryTest with 
SQLTestUtils with TestHiv
     }
   }
 
+  Seq("orc", "parquet", "csv", "json", "text").foreach { format =>
+    test(s"SPARK-22146: read files containing special characters using 
$format") {
+      val nameWithSpecialChars = s"sp&cial%chars"
+      withTempDir { dir =>
+        val tmpFile = s"$dir/$nameWithSpecialChars"
+        spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile)
+        spark.read.format(format).load(tmpFile)
+      }
+    }
+  }
+
   private def withDebugMode(f: => Unit): Unit = {
     val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE)
     try {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22146] FileNotFoundException while reading ORC files containing special characters

Reply via email to