This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 8f719adcf556 [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect function tests with ANSI mode 8f719adcf556 is described below commit 8f719adcf556f23ba66d3742266f4ca2e4875530 Author: Martin Grund <martin.gr...@databricks.com> AuthorDate: Tue May 7 09:14:06 2024 -0700 [SPARK-41547][CONNECT][TESTS] Re-eneable Spark Connect function tests with ANSI mode ### What changes were proposed in this pull request? This patch re-enables the previously failing tests after enablement of ANSI SQL. ### Why are the changes needed? Spark 4 / ANSI SQL ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Re-enabled tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #46432 from grundprinzip/grundprinzip/SPARK-41547. Authored-by: Martin Grund <martin.gr...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../sql/tests/connect/test_connect_function.py | 33 ++++++++++++++-------- 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py index 2f21dd5a7d3a..9d4db8cf7d15 100644 --- a/python/pyspark/sql/tests/connect/test_connect_function.py +++ b/python/pyspark/sql/tests/connect/test_connect_function.py @@ -2030,7 +2030,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S (CF.sentences, SF.sentences), (CF.initcap, SF.initcap), (CF.soundex, SF.soundex), - (CF.bin, SF.bin), (CF.hex, SF.hex), (CF.unhex, SF.unhex), (CF.length, SF.length), @@ -2043,6 +2042,19 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S sdf.select(sfunc("a"), sfunc(sdf.b)).toPandas(), ) + query = """ + SELECT * FROM VALUES + (' 1 ', '2 ', NULL), (' 3', NULL, '4') + AS tab(a, b, c) + """ + cdf = self.connect.sql(query) + sdf = self.spark.sql(query) + + self.assert_eq( + cdf.select(CF.bin(cdf.a), CF.bin(cdf.b)).toPandas(), + sdf.select(SF.bin(sdf.a), SF.bin(sdf.b)).toPandas(), + ) + def test_string_functions_multi_args(self): query = """ SELECT * FROM VALUES @@ -2149,15 +2161,15 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S def test_date_ts_functions(self): query = """ SELECT * FROM VALUES - ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 2020, 12, 6), - ('2000/01/01 04:30:05', '2020/05/01 12:15:00', 'PST', 1403892395, 2022, 12, 6) + ('1997-02-28 10:30:00', '2023-03-01 06:00:00', 'JST', 1428476400, 2020, 12, 6), + ('2000-01-01 04:30:05', '2020-05-01 12:15:00', 'PST', 1403892395, 2022, 12, 6) AS tab(ts1, ts2, tz, seconds, Y, M, D) """ # +-------------------+-------------------+---+----------+----+---+---+ # | ts1| ts2| tz| seconds| Y| M| D| # +-------------------+-------------------+---+----------+----+---+---+ - # |1997/02/28 10:30:00|2023/03/01 06:00:00|JST|1428476400|2020| 12| 6| - # |2000/01/01 04:30:05|2020/05/01 12:15:00|PST|1403892395|2022| 12| 6| + # |1997-02-28 10:30:00|2023-03-01 06:00:00|JST|1428476400|2020| 12| 6| + # |2000-01-01 04:30:05|2020-05-01 12:15:00|PST|1403892395|2022| 12| 6| # +-------------------+-------------------+---+----------+----+---+---+ cdf = self.connect.sql(query) @@ -2213,14 +2225,14 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S (CF.to_date, SF.to_date), ]: self.assert_eq( - cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd")).toPandas(), - sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd")).toPandas(), + cdf.select(cfunc(cdf.ts1, format="yyyy-MM-dd HH:mm:ss")).toPandas(), + sdf.select(sfunc(sdf.ts1, format="yyyy-MM-dd HH:mm:ss")).toPandas(), ) self.compare_by_show( # [left]: datetime64[ns, America/Los_Angeles] # [right]: datetime64[ns] - cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd")), - sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd")), + cdf.select(CF.to_timestamp(cdf.ts1, format="yyyy-MM-dd HH:mm:ss")), + sdf.select(SF.to_timestamp(sdf.ts1, format="yyyy-MM-dd HH:mm:ss")), ) # With tz parameter @@ -2590,9 +2602,6 @@ if __name__ == "__main__": import os from pyspark.sql.tests.connect.test_connect_function import * # noqa: F401 - # TODO(SPARK-41547): Enable ANSI mode in this file. - os.environ["SPARK_ANSI_SQL_MODE"] = "false" - try: import xmlrunner # type: ignore --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org