This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 89104b93d324 [SPARK-47557][SQL][TEST] Audit MySQL ENUM/SET Types
89104b93d324 is described below
commit 89104b93d324129ebe4dec3c666fe5e36a7586ad
Author: Kent Yao <[email protected]>
AuthorDate: Tue Mar 26 07:37:39 2024 -0700
[SPARK-47557][SQL][TEST] Audit MySQL ENUM/SET Types
### What changes were proposed in this pull request?
This PR adds tests for MySQL ENUM/SET Types
In MySQL/Maria Connector/J, the JDBC ResultSetMetadata API maps ENUM/SET
types to `typeId:java.sql.Types.CHAR,typeName:'CHAR'`, which makes it
impossible to distinguish them from a normal `CHAR(n)` type.
When working with ENUM/SET, it's possible to encounter char padding issues.
However, this can be resolved by setting the LEGACY_CHAR_VARCHAR_AS_STRING
parameter to true.
### Why are the changes needed?
API auditing for MYSQL jdbc data source
### Does this PR introduce _any_ user-facing change?
no, test only
### How was this patch tested?
added tests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #45713 from yaooqinn/SPARK-47557.
Authored-by: Kent Yao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 15 +++++++++++++++
1 file changed, 15 insertions(+)
diff --git
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
index 09eb99c25227..705957631601 100644
---
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
+++
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala
@@ -26,6 +26,7 @@ import scala.util.Using
import org.apache.spark.sql.Row
import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.tags.DockerTest
/**
@@ -84,6 +85,10 @@ class MySQLIntegrationSuite extends
DockerJDBCIntegrationSuite {
"f4 FLOAT UNSIGNED, f5 FLOAT(10) UNSIGNED, f6 FLOAT(53)
UNSIGNED)").executeUpdate()
conn.prepareStatement("INSERT INTO floats VALUES (1.23, 4.56, 7.89, 1.23,
4.56, 7.89)")
.executeUpdate()
+
+ conn.prepareStatement("CREATE TABLE collections (" +
+ "a SET('cap', 'hat', 'helmet'), b ENUM('S', 'M', 'L',
'XL'))").executeUpdate()
+ conn.prepareStatement("INSERT INTO collections VALUES ('cap,hat',
'M')").executeUpdate()
}
def testConnection(): Unit = {
@@ -275,6 +280,16 @@ class MySQLIntegrationSuite extends
DockerJDBCIntegrationSuite {
val df = spark.read.jdbc(jdbcUrl, "floats", new Properties)
checkAnswer(df, Row(1.23f, 4.56f, 7.89d, 1.23d, 4.56d, 7.89d))
}
+
+ test("SPARK-47557: MySQL ENUM/SET types contains only java.sq.Types.CHAR
information") {
+ val df = spark.read.jdbc(jdbcUrl, "collections", new Properties)
+ checkAnswer(df, Row("cap,hat ", "M "))
+ df.write.mode("append").jdbc(jdbcUrl, "collections", new Properties)
+ withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> "true") {
+ checkAnswer(spark.read.jdbc(jdbcUrl, "collections", new Properties),
+ Row("cap,hat", "M") :: Row("cap,hat", "M") :: Nil)
+ }
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]