This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 806f8e0466b9 [SPARK-47349][SQL][TESTS] Refactor string function `startsWith` and `endsWith` tests 806f8e0466b9 is described below commit 806f8e0466b968d3fe87c7bbe3326bdf5458677a Author: Stevo Mitric <stevo.mit...@databricks.com> AuthorDate: Tue Mar 12 16:54:55 2024 -0700 [SPARK-47349][SQL][TESTS] Refactor string function `startsWith` and `endsWith` tests ### What changes were proposed in this pull request? Refactored tests inside `CollationSuite` by migrating `startsWith` and `endsWith` tests into new `UTF8StringWithCollationSuite` suite that does unit string-level tests. Changes originally proposed in [this PR](https://github.com/apache/spark/pull/45421#discussion_r1519451854). ### Why are the changes needed? Removes cluttering of `CollationSuite`. ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? Test suite proposed in this PR ### Was this patch authored or co-authored using generative AI tooling? No Closes #45477 from stevomitric/stevomitric/string-function-tests. Authored-by: Stevo Mitric <stevo.mit...@databricks.com> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .../unsafe/types/UTF8StringWithCollationSuite.java | 103 +++++++++++++++++++++ .../org/apache/spark/sql/CollationSuite.scala | 60 +----------- 2 files changed, 105 insertions(+), 58 deletions(-) diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringWithCollationSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringWithCollationSuite.java new file mode 100644 index 000000000000..b60da7b945a4 --- /dev/null +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringWithCollationSuite.java @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.unsafe.types; + +import org.apache.spark.SparkException; +import org.apache.spark.sql.catalyst.util.CollationFactory; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + + +public class UTF8StringWithCollationSuite { + + private void assertStartsWith(String pattern, String prefix, String collationName, boolean value) + throws SparkException { + assertEquals(UTF8String.fromString(pattern).startsWith(UTF8String.fromString(prefix), + CollationFactory.collationNameToId(collationName)), value); + } + + private void assertEndsWith(String pattern, String suffix, String collationName, boolean value) + throws SparkException { + assertEquals(UTF8String.fromString(pattern).endsWith(UTF8String.fromString(suffix), + CollationFactory.collationNameToId(collationName)), value); + } + + @Test + public void startsWithTest() throws SparkException { + assertStartsWith("", "", "UTF8_BINARY", true); + assertStartsWith("c", "", "UTF8_BINARY", true); + assertStartsWith("", "c", "UTF8_BINARY", false); + assertStartsWith("abcde", "a", "UTF8_BINARY", true); + assertStartsWith("abcde", "A", "UTF8_BINARY", false); + assertStartsWith("abcde", "bcd", "UTF8_BINARY", false); + assertStartsWith("abcde", "BCD", "UTF8_BINARY", false); + assertStartsWith("", "", "UNICODE", true); + assertStartsWith("c", "", "UNICODE", true); + assertStartsWith("", "c", "UNICODE", false); + assertStartsWith("abcde", "a", "UNICODE", true); + assertStartsWith("abcde", "A", "UNICODE", false); + assertStartsWith("abcde", "bcd", "UNICODE", false); + assertStartsWith("abcde", "BCD", "UNICODE", false); + assertStartsWith("", "", "UTF8_BINARY_LCASE", true); + assertStartsWith("c", "", "UTF8_BINARY_LCASE", true); + assertStartsWith("", "c", "UTF8_BINARY_LCASE", false); + assertStartsWith("abcde", "a", "UTF8_BINARY_LCASE", true); + assertStartsWith("abcde", "A", "UTF8_BINARY_LCASE", true); + assertStartsWith("abcde", "abc", "UTF8_BINARY_LCASE", true); + assertStartsWith("abcde", "BCD", "UTF8_BINARY_LCASE", false); + assertStartsWith("", "", "UNICODE_CI", true); + assertStartsWith("c", "", "UNICODE_CI", true); + assertStartsWith("", "c", "UNICODE_CI", false); + assertStartsWith("abcde", "a", "UNICODE_CI", true); + assertStartsWith("abcde", "A", "UNICODE_CI", true); + assertStartsWith("abcde", "abc", "UNICODE_CI", true); + assertStartsWith("abcde", "BCD", "UNICODE_CI", false); + } + + @Test + public void endsWithTest() throws SparkException { + assertEndsWith("", "", "UTF8_BINARY", true); + assertEndsWith("c", "", "UTF8_BINARY", true); + assertEndsWith("", "c", "UTF8_BINARY", false); + assertEndsWith("abcde", "e", "UTF8_BINARY", true); + assertEndsWith("abcde", "E", "UTF8_BINARY", false); + assertEndsWith("abcde", "bcd", "UTF8_BINARY", false); + assertEndsWith("abcde", "BCD", "UTF8_BINARY", false); + assertEndsWith("", "", "UNICODE", true); + assertEndsWith("c", "", "UNICODE", true); + assertEndsWith("", "c", "UNICODE", false); + assertEndsWith("abcde", "e", "UNICODE", true); + assertEndsWith("abcde", "E", "UNICODE", false); + assertEndsWith("abcde", "bcd", "UNICODE", false); + assertEndsWith("abcde", "BCD", "UNICODE", false); + assertEndsWith("", "", "UTF8_BINARY_LCASE", true); + assertEndsWith("c", "", "UTF8_BINARY_LCASE", true); + assertEndsWith("", "c", "UTF8_BINARY_LCASE", false); + assertEndsWith("abcde", "e", "UTF8_BINARY_LCASE", true); + assertEndsWith("abcde", "E", "UTF8_BINARY_LCASE", true); + assertEndsWith("abcde", "cde", "UTF8_BINARY_LCASE", true); + assertEndsWith("abcde", "BCD", "UTF8_BINARY_LCASE", false); + assertEndsWith("", "", "UNICODE_CI", true); + assertEndsWith("c", "", "UNICODE_CI", true); + assertEndsWith("", "c", "UNICODE_CI", false); + assertEndsWith("abcde", "e", "UNICODE_CI", true); + assertEndsWith("abcde", "E", "UNICODE_CI", true); + assertEndsWith("abcde", "cde", "UNICODE_CI", true); + assertEndsWith("abcde", "BCD", "UNICODE_CI", false); + } +} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index 685067513b67..72e72a53c4f6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -329,42 +329,14 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { test("Support startsWith string expression with Collation") { // Supported collations val checks = Seq( - CollationTestCase("", "", "UTF8_BINARY", true), - CollationTestCase("c", "", "UTF8_BINARY", true), - CollationTestCase("", "c", "UTF8_BINARY", false), - CollationTestCase("abcde", "a", "UTF8_BINARY", true), - CollationTestCase("abcde", "A", "UTF8_BINARY", false), CollationTestCase("abcde", "abc", "UTF8_BINARY", true), CollationTestCase("abcde", "ABC", "UTF8_BINARY", false), - CollationTestCase("abcde", "bcd", "UTF8_BINARY", false), - CollationTestCase("abcde", "BCD", "UTF8_BINARY", false), - CollationTestCase("", "", "UNICODE", true), - CollationTestCase("c", "", "UNICODE", true), - CollationTestCase("", "c", "UNICODE", false), - CollationTestCase("abcde", "a", "UNICODE", true), - CollationTestCase("abcde", "A", "UNICODE", false), CollationTestCase("abcde", "abc", "UNICODE", true), CollationTestCase("abcde", "ABC", "UNICODE", false), - CollationTestCase("abcde", "bcd", "UNICODE", false), - CollationTestCase("abcde", "BCD", "UNICODE", false), - CollationTestCase("", "", "UTF8_BINARY_LCASE", true), - CollationTestCase("c", "", "UTF8_BINARY_LCASE", true), - CollationTestCase("", "c", "UTF8_BINARY_LCASE", false), - CollationTestCase("abcde", "a", "UTF8_BINARY_LCASE", true), - CollationTestCase("abcde", "A", "UTF8_BINARY_LCASE", true), - CollationTestCase("abcde", "abc", "UTF8_BINARY_LCASE", true), CollationTestCase("abcde", "ABC", "UTF8_BINARY_LCASE", true), CollationTestCase("abcde", "bcd", "UTF8_BINARY_LCASE", false), - CollationTestCase("abcde", "BCD", "UTF8_BINARY_LCASE", false), - CollationTestCase("", "", "UNICODE_CI", true), - CollationTestCase("c", "", "UNICODE_CI", true), - CollationTestCase("", "c", "UNICODE_CI", false), - CollationTestCase("abcde", "a", "UNICODE_CI", true), - CollationTestCase("abcde", "A", "UNICODE_CI", true), - CollationTestCase("abcde", "abc", "UNICODE_CI", true), CollationTestCase("abcde", "ABC", "UNICODE_CI", true), - CollationTestCase("abcde", "bcd", "UNICODE_CI", false), - CollationTestCase("abcde", "BCD", "UNICODE_CI", false) + CollationTestCase("abcde", "bcd", "UNICODE_CI", false) ) checks.foreach(testCase => { checkAnswer(sql(s"SELECT startswith(collate('${testCase.left}', '${testCase.collation}')," + @@ -375,42 +347,14 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { test("Support endsWith string expression with Collation") { // Supported collations val checks = Seq( - CollationTestCase("", "", "UTF8_BINARY", true), - CollationTestCase("c", "", "UTF8_BINARY", true), - CollationTestCase("", "c", "UTF8_BINARY", false), - CollationTestCase("abcde", "e", "UTF8_BINARY", true), - CollationTestCase("abcde", "E", "UTF8_BINARY", false), CollationTestCase("abcde", "cde", "UTF8_BINARY", true), CollationTestCase("abcde", "CDE", "UTF8_BINARY", false), - CollationTestCase("abcde", "bcd", "UTF8_BINARY", false), - CollationTestCase("abcde", "BCD", "UTF8_BINARY", false), - CollationTestCase("", "", "UNICODE", true), - CollationTestCase("c", "", "UNICODE", true), - CollationTestCase("", "c", "UNICODE", false), - CollationTestCase("abcde", "e", "UNICODE", true), - CollationTestCase("abcde", "E", "UNICODE", false), CollationTestCase("abcde", "cde", "UNICODE", true), CollationTestCase("abcde", "CDE", "UNICODE", false), - CollationTestCase("abcde", "bcd", "UNICODE", false), - CollationTestCase("abcde", "BCD", "UNICODE", false), - CollationTestCase("", "", "UTF8_BINARY_LCASE", true), - CollationTestCase("c", "", "UTF8_BINARY_LCASE", true), - CollationTestCase("", "c", "UTF8_BINARY_LCASE", false), - CollationTestCase("abcde", "e", "UTF8_BINARY_LCASE", true), - CollationTestCase("abcde", "E", "UTF8_BINARY_LCASE", true), - CollationTestCase("abcde", "cde", "UTF8_BINARY_LCASE", true), CollationTestCase("abcde", "CDE", "UTF8_BINARY_LCASE", true), CollationTestCase("abcde", "bcd", "UTF8_BINARY_LCASE", false), - CollationTestCase("abcde", "BCD", "UTF8_BINARY_LCASE", false), - CollationTestCase("", "", "UNICODE_CI", true), - CollationTestCase("c", "", "UNICODE_CI", true), - CollationTestCase("", "c", "UNICODE_CI", false), - CollationTestCase("abcde", "e", "UNICODE_CI", true), - CollationTestCase("abcde", "E", "UNICODE_CI", true), - CollationTestCase("abcde", "cde", "UNICODE_CI", true), CollationTestCase("abcde", "CDE", "UNICODE_CI", true), - CollationTestCase("abcde", "bcd", "UNICODE_CI", false), - CollationTestCase("abcde", "BCD", "UNICODE_CI", false) + CollationTestCase("abcde", "bcd", "UNICODE_CI", false) ) checks.foreach(testCase => { checkAnswer(sql(s"SELECT endswith(collate('${testCase.left}', '${testCase.collation}')," + --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org