Repository: spark Updated Branches: refs/heads/branch-2.2 ac9a0f692 -> 7bf25e086
[SPARK-22146] FileNotFoundException while reading ORC files containing special characters ## What changes were proposed in this pull request? Reading ORC files containing special characters like '%' fails with a FileNotFoundException. This PR aims to fix the problem. ## How was this patch tested? Added UT. Author: Marco Gaido <marcogaid...@gmail.com> Author: Marco Gaido <mga...@hortonworks.com> Closes #19368 from mgaido91/SPARK-22146. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7bf25e08 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7bf25e08 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7bf25e08 Branch: refs/heads/branch-2.2 Commit: 7bf25e086729782c62b8189e7417b86fa720553d Parents: ac9a0f6 Author: Marco Gaido <marcogaid...@gmail.com> Authored: Thu Sep 28 23:14:53 2017 -0700 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Fri Sep 29 09:05:15 2017 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/hive/orc/OrcFileFormat.scala | 2 +- .../spark/sql/hive/MetastoreDataSourcesSuite.scala | 12 +++++++++++- 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala index 3a34ec5..6b76cfa 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFileFormat.scala @@ -58,7 +58,7 @@ class OrcFileFormat extends FileFormat with DataSourceRegister with Serializable options: Map[String, String], files: Seq[FileStatus]): Option[StructType] = { OrcFileOperator.readSchema( - files.map(_.getPath.toUri.toString), + files.map(_.getPath.toString), Some(sparkSession.sessionState.newHadoopConf()) ) } http://git-wip-us.apache.org/repos/asf/spark/blob/7bf25e08/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala index 07d641d..32e97eb 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/MetastoreDataSourcesSuite.scala @@ -998,7 +998,6 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv spark.sql("""drop database if exists testdb8156 CASCADE""") } - test("skip hive metadata on table creation") { withTempDir { tempPath => val schema = StructType((1 to 5).map(i => StructField(s"c_$i", StringType))) @@ -1350,6 +1349,17 @@ class MetastoreDataSourcesSuite extends QueryTest with SQLTestUtils with TestHiv } } + Seq("orc", "parquet", "csv", "json", "text").foreach { format => + test(s"SPARK-22146: read files containing special characters using $format") { + val nameWithSpecialChars = s"sp&cial%chars" + withTempDir { dir => + val tmpFile = s"$dir/$nameWithSpecialChars" + spark.createDataset(Seq("a", "b")).write.format(format).save(tmpFile) + spark.read.format(format).load(tmpFile) + } + } + } + private def withDebugMode(f: => Unit): Unit = { val previousValue = sparkSession.sparkContext.conf.get(DEBUG_MODE) try { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org