This is an automated email from the ASF dual-hosted git repository.
beliefer pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 7d8b9b6698 [GLUTEN-11062][FOLLOW-UP][TEST] Add test cases for other
Spark version (#11154)
7d8b9b6698 is described below
commit 7d8b9b6698b618761e785432ac2d4a34a68cb594
Author: Jiaan Geng <[email protected]>
AuthorDate: Mon Nov 24 20:09:20 2025 +0800
[GLUTEN-11062][FOLLOW-UP][TEST] Add test cases for other Spark version
(#11154)
---
.../hive/execution/GlutenHiveSQLQuerySuite.scala | 1 -
.../hive/execution/GlutenHiveSQLQuerySuite.scala | 33 ++++++++++++++++++++-
.../hive/execution/GlutenHiveSQLQuerySuite.scala | 33 ++++++++++++++++++++-
.../hive/execution/GlutenHiveSQLQuerySuite.scala | 34 +++++++++++++++++++++-
.../hive/execution/GlutenHiveSQLQuerySuite.scala | 34 +++++++++++++++++++++-
5 files changed, 130 insertions(+), 5 deletions(-)
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index f3a9d85903..ac613214a4 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -100,5 +100,4 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
}
}
}
-
}
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index 0b520ae321..b4b51c8ccd 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -23,7 +23,8 @@ import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
import org.apache.spark.sql.execution.SparkPlan
-import org.apache.spark.sql.hive.HiveTableScanExecTransformer
+import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveTableScanExecTransformer}
+import org.apache.spark.sql.hive.client.HiveClient
class GlutenHiveSQLQuerySuite extends GlutenHiveSQLQuerySuiteBase {
@@ -136,4 +137,34 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
purge = false)
}
+ test("GLUTEN-11062: Supports mixed input format for partitioned Hive table")
{
+ val hiveClient: HiveClient =
+
spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+
+ withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
+ withTempDir {
+ dir =>
+ val parquetLoc = s"file:///$dir/test_parquet"
+ val orcLoc = s"file:///$dir/test_orc"
+ withTable("test_parquet", "test_orc") {
+ hiveClient.runSqlHive(s"""create table test_parquet(id int)
+ partitioned by(pid int)
+ stored as parquet location '$parquetLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_parquet partition(pid=1)
select 2")
+ hiveClient.runSqlHive(s"""create table test_orc(id int)
+ partitioned by(pid int)
+ stored as orc location '$orcLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_orc partition(pid=2)
select 2")
+ hiveClient.runSqlHive(
+ s"alter table test_parquet add partition (pid=2) location
'$orcLoc/pid=2'")
+ hiveClient.runSqlHive("alter table test_parquet partition(pid=2)
SET FILEFORMAT orc")
+ val df = sql("select pid, id from test_parquet order by pid")
+ checkAnswer(df, Seq(Row(1, 2), Row(2, 2)))
+ checkOperatorMatch[HiveTableScanExecTransformer](df)
+ }
+ }
+ }
+ }
}
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index b348f67193..f945b38ede 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.hive.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.hive.HiveTableScanExecTransformer
+import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveTableScanExecTransformer}
+import org.apache.spark.sql.hive.client.HiveClient
class GlutenHiveSQLQuerySuite extends GlutenHiveSQLQuerySuiteBase {
@@ -48,4 +49,34 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
purge = false)
}
+ test("GLUTEN-11062: Supports mixed input format for partitioned Hive table")
{
+ val hiveClient: HiveClient =
+
spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+
+ withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
+ withTempDir {
+ dir =>
+ val parquetLoc = s"file:///$dir/test_parquet"
+ val orcLoc = s"file:///$dir/test_orc"
+ withTable("test_parquet", "test_orc") {
+ hiveClient.runSqlHive(s"""create table test_parquet(id int)
+ partitioned by(pid int)
+ stored as parquet location '$parquetLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_parquet partition(pid=1)
select 2")
+ hiveClient.runSqlHive(s"""create table test_orc(id int)
+ partitioned by(pid int)
+ stored as orc location '$orcLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_orc partition(pid=2)
select 2")
+ hiveClient.runSqlHive(
+ s"alter table test_parquet add partition (pid=2) location
'$orcLoc/pid=2'")
+ hiveClient.runSqlHive("alter table test_parquet partition(pid=2)
SET FILEFORMAT orc")
+ val df = sql("select pid, id from test_parquet order by pid")
+ checkAnswer(df, Seq(Row(1, 2), Row(2, 2)))
+ checkOperatorMatch[HiveTableScanExecTransformer](df)
+ }
+ }
+ }
+ }
}
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index c93aa6640d..f945b38ede 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.hive.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.hive.HiveTableScanExecTransformer
+import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveTableScanExecTransformer}
+import org.apache.spark.sql.hive.client.HiveClient
class GlutenHiveSQLQuerySuite extends GlutenHiveSQLQuerySuiteBase {
@@ -47,4 +48,35 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
ignoreIfNotExists = true,
purge = false)
}
+
+ test("GLUTEN-11062: Supports mixed input format for partitioned Hive table")
{
+ val hiveClient: HiveClient =
+
spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+
+ withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
+ withTempDir {
+ dir =>
+ val parquetLoc = s"file:///$dir/test_parquet"
+ val orcLoc = s"file:///$dir/test_orc"
+ withTable("test_parquet", "test_orc") {
+ hiveClient.runSqlHive(s"""create table test_parquet(id int)
+ partitioned by(pid int)
+ stored as parquet location '$parquetLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_parquet partition(pid=1)
select 2")
+ hiveClient.runSqlHive(s"""create table test_orc(id int)
+ partitioned by(pid int)
+ stored as orc location '$orcLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_orc partition(pid=2)
select 2")
+ hiveClient.runSqlHive(
+ s"alter table test_parquet add partition (pid=2) location
'$orcLoc/pid=2'")
+ hiveClient.runSqlHive("alter table test_parquet partition(pid=2)
SET FILEFORMAT orc")
+ val df = sql("select pid, id from test_parquet order by pid")
+ checkAnswer(df, Seq(Row(1, 2), Row(2, 2)))
+ checkOperatorMatch[HiveTableScanExecTransformer](df)
+ }
+ }
+ }
+ }
}
diff --git
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
index c93aa6640d..f945b38ede 100644
---
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
+++
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/hive/execution/GlutenHiveSQLQuerySuite.scala
@@ -19,7 +19,8 @@ package org.apache.spark.sql.hive.execution
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.TableIdentifier
-import org.apache.spark.sql.hive.HiveTableScanExecTransformer
+import org.apache.spark.sql.hive.{HiveExternalCatalog,
HiveTableScanExecTransformer}
+import org.apache.spark.sql.hive.client.HiveClient
class GlutenHiveSQLQuerySuite extends GlutenHiveSQLQuerySuiteBase {
@@ -47,4 +48,35 @@ class GlutenHiveSQLQuerySuite extends
GlutenHiveSQLQuerySuiteBase {
ignoreIfNotExists = true,
purge = false)
}
+
+ test("GLUTEN-11062: Supports mixed input format for partitioned Hive table")
{
+ val hiveClient: HiveClient =
+
spark.sharedState.externalCatalog.unwrapped.asInstanceOf[HiveExternalCatalog].client
+
+ withSQLConf("spark.sql.hive.convertMetastoreParquet" -> "false") {
+ withTempDir {
+ dir =>
+ val parquetLoc = s"file:///$dir/test_parquet"
+ val orcLoc = s"file:///$dir/test_orc"
+ withTable("test_parquet", "test_orc") {
+ hiveClient.runSqlHive(s"""create table test_parquet(id int)
+ partitioned by(pid int)
+ stored as parquet location '$parquetLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_parquet partition(pid=1)
select 2")
+ hiveClient.runSqlHive(s"""create table test_orc(id int)
+ partitioned by(pid int)
+ stored as orc location '$orcLoc'
+ """.stripMargin)
+ hiveClient.runSqlHive("insert into test_orc partition(pid=2)
select 2")
+ hiveClient.runSqlHive(
+ s"alter table test_parquet add partition (pid=2) location
'$orcLoc/pid=2'")
+ hiveClient.runSqlHive("alter table test_parquet partition(pid=2)
SET FILEFORMAT orc")
+ val df = sql("select pid, id from test_parquet order by pid")
+ checkAnswer(df, Seq(Row(1, 2), Row(2, 2)))
+ checkOperatorMatch[HiveTableScanExecTransformer](df)
+ }
+ }
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]