huaxingao commented on a change in pull request #35764:
URL: https://github.com/apache/spark/pull/35764#discussion_r825523233
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/v2/jdbc/JDBCTableCatalogSuite.scala
##########
@@ -425,4 +430,48 @@ class JDBCTableCatalogSuite extends QueryTest with
SharedSparkSession {
assert(m.contains("\"TABLEENGINENAME\" not found"))
}
}
+
+ test("test scan data partition optimization") {
+ withConnection {conn =>
+ conn.prepareStatement(
+ """CREATE TABLE "test"."employee" (id INTEGER PRIMARY KEY NOT NULL,
+ |name TEXT(32) NOT NULL)""".stripMargin)
+ .executeUpdate()
+ }
+ // scan empty table
+ val empty_df = sql("select id, name from h2.test.employee")
+ // default partition num is 10
+ assert(empty_df.rdd.getNumPartitions == 1)
+ sql("insert overwrite h2.test.employee select 1000 as id, 'a' as name")
+ for (id <- 1 to 100) {
+ sql(s"insert into h2.test.employee values($id, 'a')")
+ }
+ val df = sql("select id, name from h2.test.employee")
+ // default partition num is 15
Review comment:
you mean `default partition num is 10`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]