This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.5 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push: new b59db1eb079 [SPARK-44885][SQL] NullPointerException is thrown when column with ROWID type contains NULL values b59db1eb079 is described below commit b59db1eb0795c70d86ce00cfb183a5d021a2af27 Author: Tim Nieradzik <t...@sparse.tech> AuthorDate: Tue Aug 22 19:48:09 2023 +0900 [SPARK-44885][SQL] NullPointerException is thrown when column with ROWID type contains NULL values ### What changes were proposed in this pull request? If a `rowid` column is `null`, do not call `toString` on it. ### Why are the changes needed? A column with the `rowid` type may contain NULL values. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Test cases. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #42576 from tindzk/fix/rowid-null. Authored-by: Tim Nieradzik <t...@sparse.tech> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> (cherry picked from commit 16607a5fd03f562dc8ea3825e90c40e80e8063e6) Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../apache/spark/sql/jdbc/OracleIntegrationSuite.scala | 17 ++++++++++++++++- .../sql/execution/datasources/jdbc/JdbcUtils.scala | 7 ++++++- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala index 407da028b7e..483f6087c81 100644 --- a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala +++ b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/OracleIntegrationSuite.scala @@ -49,7 +49,7 @@ import org.apache.spark.tags.DockerTest * 4. Start docker: sudo service docker start * - Optionally, docker pull $ORACLE_DOCKER_IMAGE_NAME * 5. Run Spark integration tests for Oracle with: ./build/sbt -Pdocker-integration-tests - * "testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" + * "docker-integration-tests/testOnly org.apache.spark.sql.jdbc.OracleIntegrationSuite" * * A sequence of commands to build the Oracle XE database container image: * $ git clone https://github.com/oracle/docker-images.git @@ -521,4 +521,19 @@ class OracleIntegrationSuite extends DockerJDBCIntegrationSuite with SharedSpark assert(types(0).equals("class java.lang.String")) assert(!rows(0).getString(0).isEmpty) } + + test("SPARK-44885: query row with ROWID type containing NULL value") { + val rows = spark.read.format("jdbc") + .option("url", jdbcUrl) + // Rename column to `row_id` to prevent the following SQL error: + // ORA-01446: cannot select ROWID from view with DISTINCT, GROUP BY, etc. + // See also https://stackoverflow.com/a/42632686/13300239 + .option("query", "SELECT rowid as row_id from datetime where d = {d '1991-11-09'}\n" + + "union all\n" + + "select null from dual") + .load() + .collect() + assert(rows(0).getString(0).nonEmpty) + assert(rows(1).getString(0) == null) + } } diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala index 448ef220829..b7019c1dcbe 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JdbcUtils.scala @@ -453,7 +453,12 @@ object JdbcUtils extends Logging with SQLConfHelper { case StringType if metadata.contains("rowid") => (rs: ResultSet, row: InternalRow, pos: Int) => - row.update(pos, UTF8String.fromString(rs.getRowId(pos + 1).toString)) + val rawRowId = rs.getRowId(pos + 1) + if (rawRowId == null) { + row.update(pos, null) + } else { + row.update(pos, UTF8String.fromString(rawRowId.toString)) + } case StringType => (rs: ResultSet, row: InternalRow, pos: Int) => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org