This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f5b9ea8103dd [SPARK-54552][CONNECT] Fix
`SparkConnectResultSet.getString` to handle BINARY data type with `UTF_8`
f5b9ea8103dd is described below
commit f5b9ea8103dd1f37c6f0cea0692d7bc5b50b778c
Author: vinodkc <[email protected]>
AuthorDate: Fri Nov 28 18:57:43 2025 -0800
[SPARK-54552][CONNECT] Fix `SparkConnectResultSet.getString` to handle
BINARY data type with `UTF_8`
### What changes were proposed in this pull request?
Fixed `SparkConnectResultSet.getString()` to properly convert BINARY data
to UTF-8 strings instead of returning byte array object references (e.g.,
"[B<hashcode>").
### Why are the changes needed?
The current implementation violates JDBC specification behavior. Users
calling getString() on BINARY columns expect UTF-8 decoded strings, not Java
object references.
Before
```
SELECT binary('xDeAdBeEf')
spark-sql: `\xDeAdBeEf`
beeline with STS: `\xDeAdBeEf`
beeline with Connect Server: `[B4d518c66`
```
After
```
SELECT binary('xDeAdBeEf')
spark-sql: `\xDeAdBeEf`
beeline with STS: `\xDeAdBeEf`
beeline with Connect Server: `\xDeAdBeEf`
```
### Does this PR introduce _any_ user-facing change?
Yes. getString() on BINARY columns now returns UTF-8 decoded strings
instead of byte array references like "[B1a2b3c4d".
### How was this patch tested?
Added new test
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #53262 from vinodkc/br_fix_getString_BINARY.
Authored-by: vinodkc <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../client/jdbc/SparkConnectResultSet.scala | 8 +++++++-
.../jdbc/SparkConnectJdbcDataTypeSuite.scala | 24 ++++++++++++++++++++++
2 files changed, 31 insertions(+), 1 deletion(-)
diff --git
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
index 0070cbd93c3e..e90f80f783dc 100644
---
a/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
+++
b/sql/connect/client/jdbc/src/main/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectResultSet.scala
@@ -114,7 +114,13 @@ class SparkConnectResultSet(
}
override def getString(columnIndex: Int): String = {
- getColumnValue(columnIndex, null: String) { idx =>
String.valueOf(currentRow.get(idx)) }
+ getColumnValue(columnIndex, null: String) { idx =>
+ currentRow.get(idx) match {
+ case bytes: Array[Byte] =>
+ new String(bytes, java.nio.charset.StandardCharsets.UTF_8)
+ case other => String.valueOf(other)
+ }
+ }
}
override def getBoolean(columnIndex: Int): Boolean = {
diff --git
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
index eb3afcc1bcf2..9a1db36514da 100644
---
a/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
+++
b/sql/connect/client/jdbc/src/test/scala/org/apache/spark/sql/connect/client/jdbc/SparkConnectJdbcDataTypeSuite.scala
@@ -385,6 +385,11 @@ class SparkConnectJdbcDataTypeSuite extends
ConnectFunSuite with RemoteSparkSess
assert(bytes.length === testBytes.length)
assert(bytes.sameElements(testBytes))
assert(!rs.wasNull)
+
+ val stringValue = rs.getString(1)
+ val expectedString = new String(testBytes,
java.nio.charset.StandardCharsets.UTF_8)
+ assert(stringValue === expectedString)
+
assert(!rs.next())
val metaData = rs.getMetaData
@@ -396,6 +401,22 @@ class SparkConnectJdbcDataTypeSuite extends
ConnectFunSuite with RemoteSparkSess
}
}
+ test("get binary type with UTF-8 text") {
+ val textBytes =
"\\xDeAdBeEf".getBytes(java.nio.charset.StandardCharsets.UTF_8)
+ val hexString = textBytes.map(b => "%02X".format(b)).mkString
+ withExecuteQuery(s"SELECT CAST(X'$hexString' AS BINARY)") { rs =>
+ assert(rs.next())
+ val bytes = rs.getBytes(1)
+ assert(bytes !== null)
+ assert(bytes.sameElements(textBytes))
+
+ val stringValue = rs.getString(1)
+ assert(stringValue === "\\xDeAdBeEf")
+
+ assert(!rs.next())
+ }
+ }
+
test("get binary type with null") {
withExecuteQuery("SELECT cast(null as binary)") { rs =>
assert(rs.next())
@@ -437,6 +458,9 @@ class SparkConnectJdbcDataTypeSuite extends ConnectFunSuite
with RemoteSparkSess
assert(bytes !== null)
assert(bytes.length === 0)
assert(!rs.wasNull)
+
+ val stringValue = rs.getString(1)
+ assert(stringValue === "")
assert(!rs.next())
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]