cloud-fan commented on a change in pull request #33888:
URL: https://github.com/apache/spark/pull/33888#discussion_r704921836
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetQuerySuite.scala
##########
@@ -901,6 +901,91 @@ abstract class ParquetQuerySuite extends QueryTest with
ParquetTest with SharedS
}
}
}
+
+ test("SPARK-36634: Support access and read parquet file by column index") {
+ withTempDir { dir =>
+ val loc = s"file:///$dir/t"
+
+ withTable("t1", "t2", "t3") {
+ sql(s"create table t1 (my_id int, my_name string) using parquet
location '$loc'")
+ sql(s"create table t2 (myid int, myName string) using parquet location
'$loc'")
+ sql("insert into t1 select 1, 'apache'")
+ sql("insert into t2 select 2, 'software'")
+ sql("insert into t2 select 3, 'foundation'")
+ sql(s"create table t3 (myid int, myname string, myage int) using
parquet location '$loc'")
+
+ withSQLConf((SQLConf.PARQUET_ACCESS_BY_ORDINAL.key, "false")) {
+ checkAnswer(sql("select my_id from t1"), Seq(Row(1), Row(null),
Row(null)))
+ checkAnswer(sql("select my_id, my_name from t1"),
+ Seq(Row(1, "apache"), Row(null, null), Row(null, null)))
+ assert(sql("select my_id, my_name from t1 where my_id=2").isEmpty)
+ checkAnswer(sql("select myid, myname, myage from t3"),
+ Seq(Row(2, "software", null),
+ Row(3, "foundation", null),
+ Row(null, null, null)))
+ }
+
+ sql("insert into t3 select 4, 'spark', 11")
+
+ withAllParquetReaders {
+ withSQLConf((SQLConf.PARQUET_ACCESS_BY_ORDINAL.key, "true")) {
+ checkAnswer(sql("select my_id from t1"), Seq(Row(1), Row(2),
Row(3)))
+ val e1 = {
+ intercept[SparkException](sql("select my_name from
t1").collect())
+ }
+ assert(e1.getCause.getMessage.contains("Parquet column cannot be
converted in"))
Review comment:
Think about a user running `SELECT x, y` and `SELECT y`, he/she will see
the column `y` has different values in each run, which is kind of a correctness
bug.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]