cloud-fan commented on code in PR #43801:
URL: https://github.com/apache/spark/pull/43801#discussion_r1403333706
##########
sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala:
##########
@@ -1262,6 +1275,94 @@ class JDBCV2Suite extends QueryTest with
SharedSparkSession with ExplainSuiteHel
checkAnswer(df17, Seq(Row(6, "jen", 12000, 1200, true)))
}
+ test("SPARK-38432: escape the single quote, _ and % for DS V2 pushdown") {
+ val df1 =
spark.table("h2.test.address").filter($"email".startsWith("abc_"))
+ checkFiltersRemoved(df1)
+ checkPushedInfo(df1, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'abc\_%' ESCAPE '\']")
+ checkAnswer(df1,
+ Seq(Row("abc_%[email protected]"), Row("abc_'%[email protected]"),
Row("[email protected]")))
+
+ val df2 =
spark.table("h2.test.address").filter($"email".startsWith("abc%"))
+ checkFiltersRemoved(df2)
+ checkPushedInfo(df2, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'abc\%%' ESCAPE '\']")
+ checkAnswer(df2, Seq(Row("abc%[email protected]"), Row("abc%[email protected]")))
+
+ val df3 =
spark.table("h2.test.address").filter($"email".startsWith("abc%_"))
+ checkFiltersRemoved(df3)
+ checkPushedInfo(df3, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'abc\%\_%' ESCAPE '\']")
+ checkAnswer(df3, Seq(Row("abc%[email protected]")))
+
+ val df4 =
spark.table("h2.test.address").filter($"email".startsWith("abc_%"))
+ checkFiltersRemoved(df4)
+ checkPushedInfo(df4, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'abc\_\%%' ESCAPE '\']")
+ checkAnswer(df4, Seq(Row("abc_%[email protected]")))
+
+ val df5 =
spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
+ checkFiltersRemoved(df5)
+ checkPushedInfo(df5,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_'\%%' ESCAPE
'\']")
+ checkAnswer(df5, Seq(Row("abc_'%[email protected]")))
+
+ val df6 =
spark.table("h2.test.address").filter($"email".endsWith("[email protected]"))
+ checkFiltersRemoved(df6)
+ checkPushedInfo(df6,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\[email protected]'
ESCAPE '\']")
+ checkAnswer(df6, Seq(Row("abc%[email protected]"), Row("[email protected]")))
+
+ val df7 =
spark.table("h2.test.address").filter($"email".endsWith("%[email protected]"))
+ checkFiltersRemoved(df7)
+ checkPushedInfo(df7,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%[email protected]'
ESCAPE '\']")
+ checkAnswer(df7,
+ Seq(Row("abc%[email protected]"), Row("abc_%[email protected]"),
Row("abc_'%[email protected]")))
+
+ val df8 =
spark.table("h2.test.address").filter($"email".endsWith("%[email protected]"))
+ checkFiltersRemoved(df8)
+ checkPushedInfo(df8,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%\[email protected]'
ESCAPE '\']")
+ checkAnswer(df8, Seq(Row("abc%[email protected]")))
+
+ val df9 =
spark.table("h2.test.address").filter($"email".endsWith("_%[email protected]"))
+ checkFiltersRemoved(df9)
+ checkPushedInfo(df9,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\%[email protected]'
ESCAPE '\']")
+ checkAnswer(df9, Seq(Row("abc_%[email protected]")))
+
+ val df10 =
spark.table("h2.test.address").filter($"email".endsWith("_'%[email protected]"))
+ checkFiltersRemoved(df10)
+ checkPushedInfo(df10,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_'\%[email protected]'
ESCAPE '\']")
+ checkAnswer(df10, Seq(Row("abc_'%[email protected]")))
+
+ val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
+ checkFiltersRemoved(df11)
+ checkPushedInfo(df11, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'%c\_d%' ESCAPE '\']")
+ checkAnswer(df11, Seq(Row("[email protected]")))
+
+ val df12 = spark.table("h2.test.address").filter($"email".contains("c%d"))
+ checkFiltersRemoved(df12)
+ checkPushedInfo(df12, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE
'%c\%d%' ESCAPE '\']")
+ checkAnswer(df12, Seq(Row("abc%[email protected]")))
+
+ val df13 = spark.table("h2.test.address").filter($"email".contains("c%_d"))
+ checkFiltersRemoved(df13)
+ checkPushedInfo(df13,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\%\_d%' ESCAPE
'\']")
+ checkAnswer(df13, Seq(Row("abc%[email protected]")))
+
+ val df14 = spark.table("h2.test.address").filter($"email".contains("c_%d"))
+ checkFiltersRemoved(df14)
+ checkPushedInfo(df14,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\%d%' ESCAPE
'\']")
+ checkAnswer(df14, Seq(Row("abc_%[email protected]")))
+
+ val df15 =
spark.table("h2.test.address").filter($"email".contains("c_'%d"))
+ checkFiltersRemoved(df15)
+ checkPushedInfo(df15,
+ raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_'\%d%' ESCAPE
'\']")
Review Comment:
I don't think this is right. When we generate the `LIKE` clause, we should
call `escapeSql` to escape the `'` in the like pattern. Are you sure there is
an extra step later to escape it? I can't find such code in the codebase.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]