bersprockets commented on a change in pull request #29737:
URL: https://github.com/apache/spark/pull/29737#discussion_r489034283



##########
File path: 
sql/hive/src/test/scala/org/apache/spark/sql/hive/orc/HiveOrcQuerySuite.scala
##########
@@ -316,4 +317,24 @@ class HiveOrcQuerySuite extends OrcQueryTest with 
TestHiveSingleton {
       }
     }
   }
+
+  test("SPARK-32864: Support ORC forced positional evolution") {
+    Seq("native", "hive").foreach { orcImpl =>
+      withSQLConf(SQLConf.ORC_IMPLEMENTATION.key -> orcImpl,
+        OrcConf.FORCE_POSITIONAL_EVOLUTION.getAttribute -> "true") {
+        withTempPath { f =>
+          val path = f.getCanonicalPath
+          Seq(1 -> 2).toDF("c1", "c2").write.orc(path)
+          checkAnswer(spark.read.orc(path), Row(1, 2))
+
+          withSQLConf(HiveUtils.CONVERT_METASTORE_ORC.key -> "true") { // 
default since 2.3.0
+            withTable("t") {
+              sql(s"CREATE EXTERNAL TABLE t(c3 INT, c4 INT) STORED AS ORC 
LOCATION '$path'")
+              checkAnswer(spark.table("t"), Row(1, 2))

Review comment:
       Should there also be a test with orc.force.positional.evolution=false to 
verify that the answer is Row(null, null)? Or does that test already 
incidentally exist elsewhere?

##########
File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/orc/OrcUtils.scala
##########
@@ -142,11 +142,12 @@ object OrcUtils extends Logging {
       reader: Reader,
       conf: Configuration): Option[(Array[Int], Boolean)] = {
     val orcFieldNames = reader.getSchema.getFieldNames.asScala
+    val forcePositionalEvolution = 
OrcConf.FORCE_POSITIONAL_EVOLUTION.getBoolean(conf)
     if (orcFieldNames.isEmpty) {
       // SPARK-8501: Some old empty ORC files always have an empty schema 
stored in their footer.
       None
     } else {
-      if (orcFieldNames.forall(_.startsWith("_col"))) {
+      if (forcePositionalEvolution || 
orcFieldNames.forall(_.startsWith("_col"))) {
         // This is a ORC file written by Hive, no field names in the physical 
schema, assume the

Review comment:
       >This is a ORC file written by Hive, no field names in the physical 
schema
   
   The comment should probably reflect your change.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to