cloud-fan commented on a change in pull request #33888:
URL: https://github.com/apache/spark/pull/33888#discussion_r703341729



##########
File path: 
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
##########
@@ -901,6 +901,91 @@ abstract class ParquetQuerySuite extends QueryTest with 
ParquetTest with SharedS
       }
     }
   }
+
+  test("SPARK-36634: Support access and read parquet file by column index") {
+    withTempDir { dir =>
+      val loc = s"file:///$dir/t"
+
+      withTable("t1", "t2", "t3") {
+        sql(s"create table t1 (my_id int, my_name string) using parquet 
location '$loc'")
+        sql(s"create table t2 (myid int, myName string) using parquet location 
'$loc'")
+        sql("insert into t1 select 1, 'apache'")
+        sql("insert into t2 select 2, 'software'")
+        sql("insert into t2 select 3, 'foundation'")
+        sql(s"create table t3 (myid int, myname string, myage int) using 
parquet location '$loc'")
+
+        withSQLConf((SQLConf.PARQUET_ACCESS_BY_ORDINAL.key, "false")) {
+          checkAnswer(sql("select my_id from t1"), Seq(Row(1), Row(null), 
Row(null)))
+          checkAnswer(sql("select my_id, my_name from t1"),
+            Seq(Row(1, "apache"), Row(null, null), Row(null, null)))
+          assert(sql("select my_id, my_name from t1 where my_id=2").isEmpty)
+          checkAnswer(sql("select myid, myname, myage from t3"),
+            Seq(Row(2, "software", null),
+              Row(3, "foundation", null),
+              Row(null, null, null)))
+        }
+
+        sql("insert into t3 select 4, 'spark', 11")
+
+        withAllParquetReaders {
+          withSQLConf((SQLConf.PARQUET_ACCESS_BY_ORDINAL.key, "true")) {
+            checkAnswer(sql("select my_id from t1"), Seq(Row(1), Row(2), 
Row(3)))
+            val e1 = {
+              intercept[SparkException](sql("select my_name from 
t1").collect())
+            }
+            assert(e1.getCause.getMessage.contains("Parquet column cannot be 
converted in"))

Review comment:
       hmm, so this feature breaks column pruning. How useful is this feature 
then?




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to