This is an automated email from the ASF dual-hosted git repository.
yao pushed a commit to branch branch-3.5
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.5 by this push:
new 91d11568c2eb [SPARK-47440][SQL] Fix pushing unsupported syntax to
MsSqlServer
91d11568c2eb is described below
commit 91d11568c2ebd937e0ea6de892649d9115035410
Author: Stefan Bukorovic <[email protected]>
AuthorDate: Fri Mar 22 18:44:30 2024 +0800
[SPARK-47440][SQL] Fix pushing unsupported syntax to MsSqlServer
### What changes were proposed in this pull request?
In this PR, I propose a change in SQLQuery builder of MsSqlServer dialect.
I override build method to check for boolean operator in binary comparisons and
throw exception if encountered.
### Why are the changes needed?
MsSqlServer syntax prevents boolean operators in any binary comparison.
Reasoning is lack of boolean data type in MsSqlServer.
It was possible to construct Spark query that would generate this situation
in MsSqlServer and engine would throw syntax exception on the MsSqlServer side.
This PR solves this bug.
For example, in table `people` there is a `name` column. In MsSqlServer if
we try to execute:
`SELECT * FROM people WHERE (name LIKE 'a%') = (name LIKE '%b')`
we would get a syntax error. However this query is fine in other major
engines.
### Does this PR introduce _any_ user-facing change?
Yes, user will not encounter syntax exception in MsSqlServer when writing
these queries.
### How was this patch tested?
By running a unit test in MsSqlServerIntegrationSuite
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #45564 from stefanbuk-db/SQLServer_like_operator_bugfix.
Authored-by: Stefan Bukorovic <[email protected]>
Signed-off-by: Kent Yao <[email protected]>
(cherry picked from commit 227a50a1766ac1476b0031e1c60d2604eccdb9a7)
Signed-off-by: Kent Yao <[email protected]>
---
.../spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala | 16 ++++++++++++++++
.../org/apache/spark/sql/jdbc/MsSqlServerDialect.scala | 15 +++++++++++++++
2 files changed, 31 insertions(+)
diff --git
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
index e451cc2b8c52..0dc3a39f4db5 100644
---
a/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
+++
b/connector/docker-integration-tests/src/test/scala/org/apache/spark/sql/jdbc/v2/MsSqlServerIntegrationSuite.scala
@@ -127,4 +127,20 @@ class MsSqlServerIntegrationSuite extends
DockerJDBCIntegrationV2Suite with V2JD
},
errorClass = "_LEGACY_ERROR_TEMP_2271")
}
+
+ test("SPARK-47440: SQLServer does not support boolean expression in binary
comparison") {
+ val df1 = sql("SELECT name FROM " +
+ s"$catalogName.employee WHERE ((name LIKE 'am%') = (name LIKE '%y'))")
+ assert(df1.collect().length == 4)
+
+ val df2 = sql("SELECT name FROM " +
+ s"$catalogName.employee " +
+ "WHERE ((name NOT LIKE 'am%') = (name NOT LIKE '%y'))")
+ assert(df2.collect().length == 4)
+
+ val df3 = sql("SELECT name FROM " +
+ s"$catalogName.employee " +
+ "WHERE (dept > 1 AND ((name LIKE 'am%') = (name LIKE '%y')))")
+ assert(df3.collect().length == 3)
+ }
}
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
index 78ec3ac42d79..3022bca87a9f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/MsSqlServerDialect.scala
@@ -26,6 +26,7 @@ import org.apache.spark.sql.AnalysisException
import org.apache.spark.sql.catalyst.analysis.NonEmptyNamespaceException
import org.apache.spark.sql.connector.catalog.Identifier
import org.apache.spark.sql.connector.expressions.{Expression, NullOrdering,
SortDirection}
+import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.errors.QueryExecutionErrors
import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions
import org.apache.spark.sql.internal.SQLConf
@@ -86,6 +87,20 @@ private object MsSqlServerDialect extends JdbcDialect {
case "STDDEV_SAMP" => "STDEV"
case _ => super.dialectFunctionName(funcName)
}
+
+ override def build(expr: Expression): String = {
+ // MsSqlServer does not support boolean comparison using standard
comparison operators
+ // We shouldn't propagate these queries to MsSqlServer
+ expr match {
+ case e: Predicate => e.name() match {
+ case "=" | "<>" | "<=>" | "<" | "<=" | ">" | ">="
+ if e.children().exists(_.isInstanceOf[Predicate]) =>
+ super.visitUnexpectedExpr(expr)
+ case _ => super.build(expr)
+ }
+ case _ => super.build(expr)
+ }
+ }
}
override def compileExpression(expr: Expression): Option[String] = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]