[GitHub] [spark] beliefer commented on a diff in pull request #37169: [SPARK-38901][SQL] DS V2 supports push down misc functions

GitBox Tue, 12 Jul 2022 23:21:39 -0700


beliefer commented on code in PR #37169:
URL: https://github.com/apache/spark/pull/37169#discussion_r919687439



##########
sql/core/src/main/scala/org/apache/spark/sql/jdbc/H2Dialect.scala:
##########
@@ -125,5 +125,17 @@ private[sql] object H2Dialect extends JdbcDialect {
       }
       s"EXTRACT($newField FROM $source)"
     }
+
+    override def visitSQLFunction(funcName: String, inputs: Array[String]): 
String = {
+      funcName match {
+        case "MD5" =>
+          "RAWTOHEX(HASH('MD5', " + inputs.mkString(",") + "))"
+        case "SHA1" =>
+          "RAWTOHEX(HASH('SHA-1', " + inputs.mkString(",") + "))"
+        case "SHA2" =>
+          "RAWTOHEX(HASH('SHA-" + inputs(1) + "'," + inputs(0) + "))"

Review Comment:
   
   ```
   override def visitSQLFunction(funcName: String, inputs: Array[String]): 
String = {
     if (isSupportedFunction(funcName)) {
       ...
     }
   }
   ```



##########
sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala:
##########
@@ -1115,6 +1115,69 @@ class JDBCV2Suite extends QueryTest with 
SharedSparkSession with ExplainSuiteHel
     checkAnswer(df8, Seq(Row("alex")))
   }
 
+  test("scan with filter push-down with misc functions") {
+    Seq(false, true).foreach { ansiMode =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) {
+        val df1 = sql("SELECT * FROM h2.test.employee WHERE " +
+          "md5(name) = 'b18ea44550b68d0d012bd9017c4a864a'")
+        checkFiltersRemoved(df1, ansiMode)
+        val expectedPlanFragment1 = if (ansiMode) {
+          "PushedFilters: [NAME IS NOT NULL, " +
+          "MD5(CAST(NAME AS binary)) = 'b18ea44550b68d0d012bd9017c4a864a']"
+        } else {
+          "PushedFilters: [NAME IS NOT NULL], "
+        }
+        checkPushedInfo(df1, expectedPlanFragment1)
+        checkAnswer(df1, Seq(Row(6, "jen", 12000, 1200, true)))
+
+        val df2 = sql("SELECT * FROM h2.test.employee WHERE " +
+          "sha1(name) = 'ce2e028af11e2044175219057c86bc1f3d51e88d'")
+        checkFiltersRemoved(df2, ansiMode)
+        val expectedPlanFragment2 = if (ansiMode) {
+          "PushedFilters: [NAME IS NOT NULL, " +
+          "SHA1(CAST(NAME AS binary)) = 
'ce2e028af11e2044175219057c86bc1f3d51e88d'],"
+        } else {
+          "PushedFilters: [NAME IS NOT NULL], "
+        }
+        checkPushedInfo(df2, expectedPlanFragment2)
+        checkAnswer(df2, Seq(Row(6, "jen", 12000, 1200, true)))
+
+        val df3 = sql("SELECT * FROM h2.test.employee WHERE " +
+          "sha2(name, 256) = 
'c24762dad798ed1af474def3f5bbcdda02dfd70813dfcacc0002474d34b4524d'")
+        checkFiltersRemoved(df3, ansiMode)
+        val expectedPlanFragment3 = if (ansiMode) {
+          "PushedFilters: [NAME IS NOT NULL, (SHA2(CAST(NAME AS binary), 256)) 
= " +
+          "'c24762dad798ed1af474def3f5bbcdda02dfd70813dfcacc0002474d34b4524d']"
+        } else {
+          "PushedFilters: [NAME IS NOT NULL], "
+        }
+        checkPushedInfo(df3, expectedPlanFragment3)
+        checkAnswer(df3, Seq(Row(6, "jen", 12000, 1200, true)))
+
+        val df4 = sql("SELECT * FROM h2.test.employee WHERE " +
+          "crc32(name) = '142689369'")

Review Comment:
   `val df4 = sql("SELECT * FROM h2.test.employee WHERE crc32(name) = 
'142689369'")`



##########
sql/core/src/test/scala/org/apache/spark/sql/jdbc/JDBCV2Suite.scala:
##########
@@ -1115,6 +1115,69 @@ class JDBCV2Suite extends QueryTest with 
SharedSparkSession with ExplainSuiteHel
     checkAnswer(df8, Seq(Row("alex")))
   }
 
+  test("scan with filter push-down with misc functions") {
+    Seq(false, true).foreach { ansiMode =>
+      withSQLConf(SQLConf.ANSI_ENABLED.key -> ansiMode.toString) {

Review Comment:
   Why we need test `ANSI_ENABLED` ?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] beliefer commented on a diff in pull request #37169: [SPARK-38901][SQL] DS V2 supports push down misc functions

Reply via email to