This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 44a88edc995e [MINOR][TESTS] Collation - extending golden file coverage 44a88edc995e is described below commit 44a88edc995e1e09adfab80e63a409f8ced3b131 Author: Aleksandar Tomic <aleksandar.to...@databricks.com> AuthorDate: Mon Mar 18 13:52:48 2024 +0500 [MINOR][TESTS] Collation - extending golden file coverage ### What changes were proposed in this pull request? This PR adds new golden file tests for collation feature: 1) DESCRIBE 3) Basic array operations 4) Removing struct test since same is already covered in golden files. ### Why are the changes needed? Extending test coverage for collation feature. ### Does this PR introduce _any_ user-facing change? ### How was this patch tested? ### Was this patch authored or co-authored using generative AI tooling? No Closes #45515 from dbatomic/collation_golden_files_update. Authored-by: Aleksandar Tomic <aleksandar.to...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../sql-tests/analyzer-results/collations.sql.out | 38 +++++++++++++++++- .../test/resources/sql-tests/inputs/collations.sql | 15 +++++++- .../resources/sql-tests/results/collations.sql.out | 45 +++++++++++++++++++++- .../org/apache/spark/sql/CollationSuite.scala | 22 ----------- 4 files changed, 92 insertions(+), 28 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out index 6d9bb3470be6..3a0f8eec02ba 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/collations.sql.out @@ -37,6 +37,12 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d +- LocalRelation [col1#x, col2#x] +-- !query +describe table t1 +-- !query analysis +DescribeTableCommand `spark_catalog`.`default`.`t1`, false, [col_name#x, data_type#x, comment#x] + + -- !query select count(*) from t1 group by utf8_binary -- !query analysis @@ -207,7 +213,7 @@ CreateDataSourceTableCommand `spark_catalog`.`default`.`t1`, false -- !query -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')) +insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')) -- !query analysis InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [c1] +- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_binary_lcase, cast(col1#x.utf8_binary_lcase as string collate UTF8_BINARY_LCASE)) AS c1#x] @@ -215,7 +221,7 @@ InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_d -- !query -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')) +insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')) -- !query analysis InsertIntoHadoopFsRelationCommand file:[not included in comparison]/{warehouse_dir}/t1, false, Parquet, [path=file:[not included in comparison]/{warehouse_dir}/t1], Append, `spark_catalog`.`default`.`t1`, org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:[not included in comparison]/{warehouse_dir}/t1), [c1] +- Project [named_struct(utf8_binary, col1#x.utf8_binary, utf8_binary_lcase, cast(col1#x.utf8_binary_lcase as string collate UTF8_BINARY_LCASE)) AS c1#x] @@ -243,3 +249,31 @@ drop table t1 -- !query analysis DropTable false, false +- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t1 + + +-- !query +select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase) +-- !query analysis +Project [array_contains(array(collate(aaa, utf8_binary_lcase)), collate(AAA, utf8_binary_lcase)) AS array_contains(array(collate(aaa)), collate(AAA))#x] ++- OneRowRelation + + +-- !query +select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase) +-- !query analysis +Project [array_position(array(collate(aaa, utf8_binary_lcase), collate(bbb, utf8_binary_lcase)), collate(BBB, utf8_binary_lcase)) AS array_position(array(collate(aaa), collate(bbb)), collate(BBB))#xL] ++- OneRowRelation + + +-- !query +select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase) +-- !query analysis +Project [nullif(collate(aaa, utf8_binary_lcase), collate(AAA, utf8_binary_lcase)) AS nullif(collate(aaa), collate(AAA))#x] ++- OneRowRelation + + +-- !query +select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase) +-- !query analysis +Project [least(collate(aaa, utf8_binary_lcase), collate(AAA, utf8_binary_lcase), collate(a, utf8_binary_lcase)) AS least(collate(aaa), collate(AAA), collate(a))#x] ++- OneRowRelation diff --git a/sql/core/src/test/resources/sql-tests/inputs/collations.sql b/sql/core/src/test/resources/sql-tests/inputs/collations.sql index 52ce58b80823..a56d6c9ae1ea 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/collations.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/collations.sql @@ -7,6 +7,9 @@ insert into t1 values('AAA', 'AAA'); insert into t1 values('bbb', 'bbb'); insert into t1 values('BBB', 'BBB'); +-- describe +describe table t1; + -- group by and count utf8_binary select count(*) from t1 group by utf8_binary; @@ -49,8 +52,8 @@ select col1 collate utf8_binary_lcase from values ('aaa'), ('bbb'), ('BBB'), ('z -- create table with struct field create table t1 (c1 struct<utf8_binary: string collate utf8_binary, utf8_binary_lcase: string collate utf8_binary_lcase>) USING PARQUET; -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')); -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')); +insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')); +insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')); -- aggregate against nested field utf8_binary select count(*) from t1 group by c1.utf8_binary; @@ -59,3 +62,11 @@ select count(*) from t1 group by c1.utf8_binary; select count(*) from t1 group by c1.utf8_binary_lcase; drop table t1; + +-- array function tests +select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase); +select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase); + +-- utility +select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase); +select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase); diff --git a/sql/core/src/test/resources/sql-tests/results/collations.sql.out b/sql/core/src/test/resources/sql-tests/results/collations.sql.out index 7d7c054c2b08..4954196a0ccd 100644 --- a/sql/core/src/test/resources/sql-tests/results/collations.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/collations.sql.out @@ -39,6 +39,15 @@ struct<> +-- !query +describe table t1 +-- !query schema +struct<col_name:string,data_type:string,comment:string> +-- !query output +utf8_binary string +utf8_binary_lcase string collate UTF8_BINARY_LCASE + + -- !query select count(*) from t1 group by utf8_binary -- !query schema @@ -220,7 +229,7 @@ struct<> -- !query -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')) +insert into t1 values (named_struct('utf8_binary', 'aaa', 'utf8_binary_lcase', 'aaa')) -- !query schema struct<> -- !query output @@ -228,7 +237,7 @@ struct<> -- !query -INSERT INTO t1 VALUES (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')) +insert into t1 values (named_struct('utf8_binary', 'AAA', 'utf8_binary_lcase', 'AAA')) -- !query schema struct<> -- !query output @@ -258,3 +267,35 @@ drop table t1 struct<> -- !query output + + +-- !query +select array_contains(ARRAY('aaa' collate utf8_binary_lcase),'AAA' collate utf8_binary_lcase) +-- !query schema +struct<array_contains(array(collate(aaa)), collate(AAA)):boolean> +-- !query output +true + + +-- !query +select array_position(ARRAY('aaa' collate utf8_binary_lcase, 'bbb' collate utf8_binary_lcase),'BBB' collate utf8_binary_lcase) +-- !query schema +struct<array_position(array(collate(aaa), collate(bbb)), collate(BBB)):bigint> +-- !query output +2 + + +-- !query +select nullif('aaa' COLLATE utf8_binary_lcase, 'AAA' COLLATE utf8_binary_lcase) +-- !query schema +struct<nullif(collate(aaa), collate(AAA)):string collate UTF8_BINARY_LCASE> +-- !query output +NULL + + +-- !query +select least('aaa' COLLATE utf8_binary_lcase, 'AAA' collate utf8_binary_lcase, 'a' collate utf8_binary_lcase) +-- !query schema +struct<least(collate(aaa), collate(AAA), collate(a)):string collate UTF8_BINARY_LCASE> +-- !query output +a diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala index aaf3e88c9bdb..42506950149d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/CollationSuite.scala @@ -442,28 +442,6 @@ class CollationSuite extends DatasourceV2SQLBase with AdaptiveSparkPlanHelper { } } - test("create table with collations inside a struct") { - val tableName = "struct_collation_tbl" - val collationName = "UTF8_BINARY_LCASE" - val collationId = CollationFactory.collationNameToId(collationName) - - withTable(tableName) { - sql( - s""" - |CREATE TABLE $tableName - |(c1 STRUCT<name: STRING COLLATE $collationName, age: INT>) - |USING PARQUET - |""".stripMargin) - - sql(s"INSERT INTO $tableName VALUES (named_struct('name', 'aaa', 'id', 1))") - sql(s"INSERT INTO $tableName VALUES (named_struct('name', 'AAA', 'id', 2))") - - checkAnswer(sql(s"SELECT DISTINCT collation(c1.name) FROM $tableName"), - Seq(Row(collationName))) - assert(sql(s"SELECT c1.name FROM $tableName").schema.head.dataType == StringType(collationId)) - } - } - test("add collated column with alter table") { val tableName = "alter_column_tbl" val defaultCollation = "UTF8_BINARY" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org