This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 89104b93d324 [SPARK-47557][SQL][TEST] Audit MySQL ENUM/SET Types 89104b93d324 is described below commit 89104b93d324129ebe4dec3c666fe5e36a7586ad Author: Kent Yao <y...@apache.org> AuthorDate: Tue Mar 26 07:37:39 2024 -0700 [SPARK-47557][SQL][TEST] Audit MySQL ENUM/SET Types ### What changes were proposed in this pull request? This PR adds tests for MySQL ENUM/SET Types In MySQL/Maria Connector/J, the JDBC ResultSetMetadata API maps ENUM/SET types to `typeId:java.sql.Types.CHAR,typeName:'CHAR'`, which makes it impossible to distinguish them from a normal `CHAR(n)` type. When working with ENUM/SET, it's possible to encounter char padding issues. However, this can be resolved by setting the LEGACY_CHAR_VARCHAR_AS_STRING parameter to true. ### Why are the changes needed? API auditing for MYSQL jdbc data source ### Does this PR introduce _any_ user-facing change? no, test only ### How was this patch tested? added tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #45713 from yaooqinn/SPARK-47557. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala index 09eb99c25227..705957631601 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/MySQLIntegrationSuite.scala @@ -26,6 +26,7 @@ import scala.util.Using import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.tags.DockerTest /** @@ -84,6 +85,10 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { "f4 FLOAT UNSIGNED, f5 FLOAT(10) UNSIGNED, f6 FLOAT(53) UNSIGNED)").executeUpdate() conn.prepareStatement("INSERT INTO floats VALUES (1.23, 4.56, 7.89, 1.23, 4.56, 7.89)") .executeUpdate() + + conn.prepareStatement("CREATE TABLE collections (" + + "a SET('cap', 'hat', 'helmet'), b ENUM('S', 'M', 'L', 'XL'))").executeUpdate() + conn.prepareStatement("INSERT INTO collections VALUES ('cap,hat', 'M')").executeUpdate() } def testConnection(): Unit = { @@ -275,6 +280,16 @@ class MySQLIntegrationSuite extends DockerJDBCIntegrationSuite { val df = spark.read.jdbc(jdbcUrl, "floats", new Properties) checkAnswer(df, Row(1.23f, 4.56f, 7.89d, 1.23d, 4.56d, 7.89d)) } + + test("SPARK-47557: MySQL ENUM/SET types contains only java.sq.Types.CHAR information") { + val df = spark.read.jdbc(jdbcUrl, "collections", new Properties) + checkAnswer(df, Row("cap,hat ", "M ")) + df.write.mode("append").jdbc(jdbcUrl, "collections", new Properties) + withSQLConf(SQLConf.LEGACY_CHAR_VARCHAR_AS_STRING.key -> "true") { + checkAnswer(spark.read.jdbc(jdbcUrl, "collections", new Properties), + Row("cap,hat", "M") :: Row("cap,hat", "M") :: Nil) + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org