(spark) branch master updated: [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect function tests with ANSI mode

dongjoon Tue, 07 May 2024 09:14:57 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 8f719adcf556 [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect 
function tests with ANSI mode
8f719adcf556 is described below

commit 8f719adcf556f23ba66d3742266f4ca2e4875530
Author: Martin Grund <martin.gr...@databricks.com>
AuthorDate: Tue May 7 09:14:06 2024 -0700

    [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect function tests with 
ANSI mode
    
    ### What changes were proposed in this pull request?
    This patch re-enables the previously failing tests after enablement of ANSI 
SQL.
    
    ### Why are the changes needed?
    Spark 4 / ANSI SQL
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Re-enabled tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #46432 from grundprinzip/grundprinzip/SPARK-41547.
    
    Authored-by: Martin Grund <martin.gr...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 .../sql/tests/connect/test_connect_function.py     | 33 ++++++++++++++--------
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py 
b/python/pyspark/sql/tests/connect/test_connect_function.py
index 2f21dd5a7d3a..9d4db8cf7d15 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -2030,7 +2030,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
             (CF.sentences, SF.sentences),
             (CF.initcap, SF.initcap),
             (CF.soundex, SF.soundex),
-            (CF.bin, SF.bin),
             (CF.hex, SF.hex),
             (CF.unhex, SF.unhex),
             (CF.length, SF.length),
@@ -2043,6 +2042,19 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
                 sdf.select(sfunc("a"), sfunc(sdf.b)).toPandas(),
             )
 
+        query = """
+                SELECT * FROM VALUES
+                ('   1   ', '2   ', NULL), ('   3', NULL, '4')
+                AS tab(a, b, c)
+                """
+        cdf = self.connect.sql(query)
+        sdf = self.spark.sql(query)
+
+        self.assert_eq(
+            cdf.select(CF.bin(cdf.a), CF.bin(cdf.b)).toPandas(),
+            sdf.select(SF.bin(sdf.a), SF.bin(sdf.b)).toPandas(),
+        )
+
     def test_string_functions_multi_args(self):
         query = """
             SELECT * FROM VALUES
@@ -2149,15 +2161,15 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
     def test_date_ts_functions(self):
         query = """
             SELECT * FROM VALUES
-            ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 
2020, 12, 6),
-            ('2000/01/01 04:30:05', '2020/05/01 12:15:00', 'PST', 1403892395, 
2022, 12, 6)
+            ('1997-02-28 10:30:00', '2023-03-01 06:00:00', 'JST', 1428476400, 
2020, 12, 6),
+            ('2000-01-01 04:30:05', '2020-05-01 12:15:00', 'PST', 1403892395, 
2022, 12, 6)
             AS tab(ts1, ts2, tz, seconds, Y, M, D)
             """
         # +-------------------+-------------------+---+----------+----+---+---+
         # |                ts1|                ts2| tz|   seconds|   Y|  M|  D|
         # +-------------------+-------------------+---+----------+----+---+---+
-        # |1997/02/28 10:30:00|2023/03/01 06:00:00|JST|1428476400|2020| 12|  6|
-        # |2000/01/01 04:30:05|2020/05/01 12:15:00|PST|1403892395|2022| 12|  6|
+        # |1997-02-28 10:30:00|2023-03-01 06:00:00|JST|1428476400|2020| 12|  6|
+        # |2000-01-01 04:30:05|2020-05-01 12:15:00|PST|1403892395|2022| 12|  6|
         # +-------------------+-------------------+---+----------+----+---+---+
 
         cdf = self.connect.sql(query)
@@ -2213,14 +2225,14 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
             (CF.to_date, SF.to_date),
         ]:
             self.assert_eq(
-                cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd")).toPandas(),
-                sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd")).toPandas(),
+                cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd 
HH:mm:ss")).toPandas(),
+                sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd 
HH:mm:ss")).toPandas(),
             )
         self.compare_by_show(
             # [left]:  datetime64[ns, America/Los_Angeles]
             # [right]: datetime64[ns]
-            cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd")),
-            sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd")),
+            cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd HH:mm:ss")),
+            sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd HH:mm:ss")),
         )
 
         # With tz parameter
@@ -2590,9 +2602,6 @@ if __name__ == "__main__":
     import os
     from pyspark.sql.tests.connect.test_connect_function import *  # noqa: F401
 
-    # TODO(SPARK-41547): Enable ANSI mode in this file.
-    os.environ["SPARK_ANSI_SQL_MODE"] = "false"
-
     try:
         import xmlrunner  # type: ignore
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect function tests with ANSI mode

Reply via email to