(spark) branch master updated: [SPARK-48211][SQL] DB2: Read SMALLINT as ShortType

yao Thu, 09 May 2024 02:27:21 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 207d675110e6 [SPARK-48211][SQL] DB2: Read SMALLINT as ShortType
207d675110e6 is described below

commit 207d675110e6fa699a434e81296f6f050eb0304b
Author: Kent Yao <y...@apache.org>
AuthorDate: Thu May 9 17:27:04 2024 +0800

    [SPARK-48211][SQL] DB2: Read SMALLINT as ShortType
    
    ### What changes were proposed in this pull request?
    
    This PR supports read SMALLINT from DB2 as ShortType
    
    ### Why are the changes needed?
    
    - 15 bits is sufficient
    - we write ShortType as SMALLINT
    - we read smallint from other builtin jdbc sources as ShortType
    
    ### Does this PR introduce _any_ user-facing change?
    
    yes, we add a migration guide for this
    ### How was this patch tested?
    
    changed tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    no
    
    Closes #46497 from yaooqinn/SPARK-48211.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/jdbc/DB2IntegrationSuite.scala       | 69 +++++++++++++---------
 docs/sql-migration-guide.md                        |  1 +
 .../org/apache/spark/sql/internal/SQLConf.scala    | 11 ++++
 .../org/apache/spark/sql/jdbc/DB2Dialect.scala     |  3 +
 4 files changed, 56 insertions(+), 28 deletions(-)

diff --git 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
index cedb33d491fb..aca174cce194 100644
--- 
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
+++ 
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/DB2IntegrationSuite.scala
@@ -25,6 +25,7 @@ import org.scalatest.time.SpanSugar._
 
 import org.apache.spark.sql.{Row, SaveMode}
 import org.apache.spark.sql.catalyst.util.DateTimeTestUtils._
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types.{BooleanType, ByteType, ShortType, 
StructType}
 import org.apache.spark.tags.DockerTest
 
@@ -77,32 +78,44 @@ class DB2IntegrationSuite extends 
DockerJDBCIntegrationSuite {
   }
 
   test("Numeric types") {
-    val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
-    val rows = df.collect()
-    assert(rows.length == 1)
-    val types = rows(0).toSeq.map(x => x.getClass.toString)
-    assert(types.length == 10)
-    assert(types(0).equals("class java.lang.Integer"))
-    assert(types(1).equals("class java.lang.Integer"))
-    assert(types(2).equals("class java.lang.Long"))
-    assert(types(3).equals("class java.math.BigDecimal"))
-    assert(types(4).equals("class java.lang.Double"))
-    assert(types(5).equals("class java.lang.Double"))
-    assert(types(6).equals("class java.lang.Float"))
-    assert(types(7).equals("class java.math.BigDecimal"))
-    assert(types(8).equals("class java.math.BigDecimal"))
-    assert(types(9).equals("class java.math.BigDecimal"))
-    assert(rows(0).getInt(0) == 17)
-    assert(rows(0).getInt(1) == 77777)
-    assert(rows(0).getLong(2) == 922337203685477580L)
-    val bd = new BigDecimal("123456745.56789012345000000000")
-    assert(rows(0).getAs[BigDecimal](3).equals(bd))
-    assert(rows(0).getDouble(4) == 42.75)
-    assert(rows(0).getDouble(5) == 5.4E-70)
-    assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
-    assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
-    assert(rows(0).getDecimal(8) == new 
BigDecimal("99999999999999990000.000000000000000000"))
-    assert(rows(0).getDecimal(9) == new 
BigDecimal("1234567891234567.123456789123456789"))
+    Seq(true, false).foreach { legacy =>
+      withSQLConf(SQLConf.LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED.key -> 
legacy.toString) {
+        val df = sqlContext.read.jdbc(jdbcUrl, "numbers", new Properties)
+        val rows = df.collect()
+        assert(rows.length == 1)
+        val types = rows(0).toSeq.map(x => x.getClass.toString)
+        assert(types.length == 10)
+        if (legacy) {
+          assert(types(0).equals("class java.lang.Integer"))
+        } else {
+          assert(types(0).equals("class java.lang.Short"))
+        }
+        assert(types(1).equals("class java.lang.Integer"))
+        assert(types(2).equals("class java.lang.Long"))
+        assert(types(3).equals("class java.math.BigDecimal"))
+        assert(types(4).equals("class java.lang.Double"))
+        assert(types(5).equals("class java.lang.Double"))
+        assert(types(6).equals("class java.lang.Float"))
+        assert(types(7).equals("class java.math.BigDecimal"))
+        assert(types(8).equals("class java.math.BigDecimal"))
+        assert(types(9).equals("class java.math.BigDecimal"))
+        if (legacy) {
+          assert(rows(0).getInt(0) == 17)
+        } else {
+          assert(rows(0).getShort(0) == 17)
+        }
+        assert(rows(0).getInt(1) == 77777)
+        assert(rows(0).getLong(2) == 922337203685477580L)
+        val bd = new BigDecimal("123456745.56789012345000000000")
+        assert(rows(0).getAs[BigDecimal](3).equals(bd))
+        assert(rows(0).getDouble(4) == 42.75)
+        assert(rows(0).getDouble(5) == 5.4E-70)
+        assert(rows(0).getFloat(6) == 3.4028234663852886e+38)
+        assert(rows(0).getDecimal(7) == new BigDecimal("4.299900000000000000"))
+        assert(rows(0).getDecimal(8) == new 
BigDecimal("99999999999999990000.000000000000000000"))
+        assert(rows(0).getDecimal(9) == new 
BigDecimal("1234567891234567.123456789123456789"))
+      }
+    }
   }
 
   test("Date types") {
@@ -154,8 +167,8 @@ class DB2IntegrationSuite extends 
DockerJDBCIntegrationSuite {
       new StructType().add("c1", ShortType).add("b", ByteType).add("c3", 
BooleanType))
     df4.write.jdbc(jdbcUrl, "otherscopy", new Properties)
     val rows = sqlContext.read.jdbc(jdbcUrl, "otherscopy", new 
Properties).collect()
-    assert(rows(0).getInt(0) == 1)
-    assert(rows(0).getInt(1) == 20)
+    assert(rows(0).getShort(0) == 1)
+    assert(rows(0).getShort(1) == 20)
     assert(rows(0).getString(2) == "1")
   }
 
diff --git a/docs/sql-migration-guide.md b/docs/sql-migration-guide.md
index bd6604cb69c0..8b55fb48b8b5 100644
--- a/docs/sql-migration-guide.md
+++ b/docs/sql-migration-guide.md
@@ -50,6 +50,7 @@ license: |
 - Since Spark 4.0, Oracle JDBC datasource will write TimestampType as 
TIMESTAMP WITH LOCAL TIME ZONE, while in Spark 3.5 and previous, write as 
TIMESTAMP. To restore the previous behavior, set 
`spark.sql.legacy.oracle.timestampMapping.enabled` to `true`.
 - Since Spark 4.0, MsSQL Server JDBC datasource will read TINYINT as 
ShortType, while in Spark 3.5 and previous, read as IntegerType. To restore the 
previous behavior, set `spark.sql.legacy.mssqlserver.numericMapping.enabled` to 
`true`.
 - Since Spark 4.0, MsSQL Server JDBC datasource will read DATETIMEOFFSET as 
TimestampType, while in Spark 3.5 and previous, read as StringType. To restore 
the previous behavior, set 
`spark.sql.legacy.mssqlserver.datetimeoffsetMapping.enabled` to `true`.
+- Since Spark 4.0, DB2 JDBC datasource will read SMALLINT as ShortType, while 
in Spark 3.5 and previous, it was read as IntegerType. To restore the previous 
behavior, set `spark.sql.legacy.db2.numericMapping.enabled` to `true`.
 - Since Spark 4.0, The default value for 
`spark.sql.legacy.ctePrecedencePolicy` has been changed from `EXCEPTION` to 
`CORRECTED`. Instead of raising an error, inner CTE definitions take precedence 
over outer definitions.
 - Since Spark 4.0, The default value for `spark.sql.legacy.timeParserPolicy` 
has been changed from `EXCEPTION` to `CORRECTED`. Instead of raising an 
`INCONSISTENT_BEHAVIOR_CROSS_VERSION` error, `CANNOT_PARSE_TIMESTAMP` will be 
raised if ANSI mode is enable. `NULL` will be returned if ANSI mode is 
disabled. See [Datetime Patterns for Formatting and 
Parsing](sql-ref-datetime-pattern.html).
 - Since Spark 4.0, A bug falsely allowing `!` instead of `NOT` when `!` is not 
a prefix operator has been fixed. Clauses such as `expr ! IN (...)`, `expr ! 
BETWEEN ...`, or `col ! NULL` now raise syntax errors. To restore the previous 
behavior, set `spark.sql.legacy.bangEqualsNot` to `true`. 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
index df75985043d0..54aa87260534 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/internal/SQLConf.scala
@@ -4222,6 +4222,14 @@ object SQLConf {
       .booleanConf
       .createWithDefault(false)
 
+  val LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED =
+    buildConf("spark.sql.legacy.db2.numericMapping.enabled")
+      .internal()
+      .doc("When true, SMALLINT maps to IntegerType in DB2; otherwise, 
ShortType" )
+      .version("4.0.0")
+      .booleanConf
+      .createWithDefault(false)
+
   val CSV_FILTER_PUSHDOWN_ENABLED = 
buildConf("spark.sql.csv.filterPushdown.enabled")
     .doc("When true, enable filter pushdown to CSV datasource.")
     .version("3.0.0")
@@ -5339,6 +5347,9 @@ class SQLConf extends Serializable with Logging with 
SqlApiConf {
   def legacyOracleTimestampMappingEnabled: Boolean =
     getConf(LEGACY_ORACLE_TIMESTAMP_MAPPING_ENABLED)
 
+  def legacyDB2numericMappingEnabled: Boolean =
+    getConf(LEGACY_DB2_TIMESTAMP_MAPPING_ENABLED)
+
   override def legacyTimeParserPolicy: LegacyBehaviorPolicy.Value = {
     LegacyBehaviorPolicy.withName(getConf(SQLConf.LEGACY_TIME_PARSER_POLICY))
   }
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala 
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
index 31a7c783ba60..cc596a5f0185 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/DB2Dialect.scala
@@ -28,6 +28,7 @@ import 
org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
 import org.apache.spark.sql.connector.catalog.Identifier
 import org.apache.spark.sql.connector.expressions.Expression
 import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
+import org.apache.spark.sql.internal.SQLConf
 import org.apache.spark.sql.types._
 
 private case class DB2Dialect() extends JdbcDialect {
@@ -86,6 +87,8 @@ private case class DB2Dialect() extends JdbcDialect {
       typeName: String,
       size: Int,
       md: MetadataBuilder): Option[DataType] = sqlType match {
+    case Types.SMALLINT if !SQLConf.get.legacyDB2numericMappingEnabled =>
+      Option(ShortType)
     case Types.REAL => Option(FloatType)
     case Types.OTHER =>
       typeName match {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-48211][SQL] DB2: Read SMALLINT as ShortType

Reply via email to