cloud-fan commented on code in PR #43801:
URL: https://github.com/apache/spark/pull/43801#discussion_r1403363778


##########
sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala:
##########
@@ -1262,6 +1275,94 @@ class JDBCV2Suite extends QueryTest with 
SharedSparkSession with ExplainSuiteHel
     checkAnswer(df17, Seq(Row(6, "jen", 12000, 1200, true)))
   }
 
+  test("SPARK-38432: escape the single quote, _ and % for DS V2 pushdown") {
+    val df1 = 
spark.table("h2.test.address").filter($"email".startsWith("abc_"))
+    checkFiltersRemoved(df1)
+    checkPushedInfo(df1, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'abc\_%' ESCAPE '\']")
+    checkAnswer(df1,
+      Seq(Row("abc_%[email protected]"), Row("abc_'%[email protected]"), 
Row("[email protected]")))
+
+    val df2 = 
spark.table("h2.test.address").filter($"email".startsWith("abc%"))
+    checkFiltersRemoved(df2)
+    checkPushedInfo(df2, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'abc\%%' ESCAPE '\']")
+    checkAnswer(df2, Seq(Row("abc%[email protected]"), Row("abc%[email protected]")))
+
+    val df3 = 
spark.table("h2.test.address").filter($"email".startsWith("abc%_"))
+    checkFiltersRemoved(df3)
+    checkPushedInfo(df3, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'abc\%\_%' ESCAPE '\']")
+    checkAnswer(df3, Seq(Row("abc%[email protected]")))
+
+    val df4 = 
spark.table("h2.test.address").filter($"email".startsWith("abc_%"))
+    checkFiltersRemoved(df4)
+    checkPushedInfo(df4, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'abc\_\%%' ESCAPE '\']")
+    checkAnswer(df4, Seq(Row("abc_%[email protected]")))
+
+    val df5 = 
spark.table("h2.test.address").filter($"email".startsWith("abc_'%"))
+    checkFiltersRemoved(df5)
+    checkPushedInfo(df5,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 'abc\_'\%%' ESCAPE 
'\']")
+    checkAnswer(df5, Seq(Row("abc_'%[email protected]")))
+
+    val df6 = 
spark.table("h2.test.address").filter($"email".endsWith("[email protected]"))
+    checkFiltersRemoved(df6)
+    checkPushedInfo(df6,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\[email protected]' 
ESCAPE '\']")
+    checkAnswer(df6, Seq(Row("abc%[email protected]"), Row("[email protected]")))
+
+    val df7 = 
spark.table("h2.test.address").filter($"email".endsWith("%[email protected]"))
+    checkFiltersRemoved(df7)
+    checkPushedInfo(df7,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%[email protected]' 
ESCAPE '\']")
+    checkAnswer(df7,
+      Seq(Row("abc%[email protected]"), Row("abc_%[email protected]"), 
Row("abc_'%[email protected]")))
+
+    val df8 = 
spark.table("h2.test.address").filter($"email".endsWith("%[email protected]"))
+    checkFiltersRemoved(df8)
+    checkPushedInfo(df8,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\%\[email protected]' 
ESCAPE '\']")
+    checkAnswer(df8, Seq(Row("abc%[email protected]")))
+
+    val df9 = 
spark.table("h2.test.address").filter($"email".endsWith("_%[email protected]"))
+    checkFiltersRemoved(df9)
+    checkPushedInfo(df9,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_\%[email protected]' 
ESCAPE '\']")
+    checkAnswer(df9, Seq(Row("abc_%[email protected]")))
+
+    val df10 = 
spark.table("h2.test.address").filter($"email".endsWith("_'%[email protected]"))
+    checkFiltersRemoved(df10)
+    checkPushedInfo(df10,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%\_'\%[email protected]' 
ESCAPE '\']")
+    checkAnswer(df10, Seq(Row("abc_'%[email protected]")))
+
+    val df11 = spark.table("h2.test.address").filter($"email".contains("c_d"))
+    checkFiltersRemoved(df11)
+    checkPushedInfo(df11, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'%c\_d%' ESCAPE '\']")
+    checkAnswer(df11, Seq(Row("[email protected]")))
+
+    val df12 = spark.table("h2.test.address").filter($"email".contains("c%d"))
+    checkFiltersRemoved(df12)
+    checkPushedInfo(df12, raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE 
'%c\%d%' ESCAPE '\']")
+    checkAnswer(df12, Seq(Row("abc%[email protected]")))
+
+    val df13 = spark.table("h2.test.address").filter($"email".contains("c%_d"))
+    checkFiltersRemoved(df13)
+    checkPushedInfo(df13,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\%\_d%' ESCAPE 
'\']")
+    checkAnswer(df13, Seq(Row("abc%[email protected]")))
+
+    val df14 = spark.table("h2.test.address").filter($"email".contains("c_%d"))
+    checkFiltersRemoved(df14)
+    checkPushedInfo(df14,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_\%d%' ESCAPE 
'\']")
+    checkAnswer(df14, Seq(Row("abc_%[email protected]")))
+
+    val df15 = 
spark.table("h2.test.address").filter($"email".contains("c_'%d"))
+    checkFiltersRemoved(df15)
+    checkPushedInfo(df15,
+      raw"PushedFilters: [EMAIL IS NOT NULL, EMAIL LIKE '%c\_'\%d%' ESCAPE 
'\']")

Review Comment:
   I'm surprised the test passed. When we call `visitContains`, the like 
pattern was produced by `visitLiteral` which invokes `compileValue` to escape 
the `'`.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to