uros-db commented on code in PR #47828:
URL: https://github.com/apache/spark/pull/47828#discussion_r1728797420
##########
sql/core/src/test/resources/sql-tests/inputs/collations.sql:
##########
@@ -101,3 +101,226 @@ select str_to_map(text collate utf8_binary, pairDelim
collate utf8_lcase, keyVal
select str_to_map(text collate utf8_binary, pairDelim collate utf8_binary,
keyValueDelim collate utf8_binary) from t4;
drop table t4;
+
+create table t1(utf8_binary string collate utf8_binary, utf8_lcase string
collate utf8_lcase) using parquet;
+insert into t1 values ('Spark', 'SQL');
+insert into t1 values ('aaAaAAaA', 'aaAaAAaA');
+insert into t1 values ('aaAaAAaA', 'aaAaaAaA');
+insert into t1 values ('aaAaAAaA', 'aaAaaAaAaaAaaAaAaaAaaAaA');
+insert into t1 values ('İo', 'İo');
+insert into t1 values ('İo', 'i̇o');
+insert into t1 values ('efd2', 'efd2');
+insert into t1 values ('Hello, world! Nice day.', 'Hello, world! Nice day.');
+insert into t1 values ('Something else. Nothing here.', 'Something else.
Nothing here.');
+insert into t1 values ('kitten', 'sitTing');
+insert into t1 values ('abc', 'abc');
+insert into t1 values ('abcdcba', 'aBcDCbA');
+
+create table t2(ascii long) using parquet;
+insert into t2 values (97);
+insert into t2 values (66);
+
+create table t3(ascii double) using parquet;
+insert into t3 values (97.52143);
+insert into t3 values (66.421);
+
+create table t4(format string collate utf8_binary, utf8_binary string collate
utf8_binary, utf8_lcase string collate utf8_lcase) using parquet;
+insert into t4 values ('%s%s', 'abCdE', 'abCdE');
+
+create table t5(num long) using parquet;
+insert into t5 values (97);
+insert into t5 values (66);
+
+create table t6(utf8_binary string collate utf8_binary, utf8_lcase string
collate utf8_lcase) using parquet;
+insert into t6 values ('aaAaAAaA', 'aaAaaAaA');
+insert into t6 values ('efd2', 'efd2');
+
+-- ConcatWs
+select concat_ws(' ', utf8_binary, utf8_lcase) from t1;
+select concat_ws(' ' collate utf8_binary, utf8_binary, 'SQL' collate
utf8_lcase) from t1;
+select concat_ws(',', utf8_lcase, 'word'), concat_ws(',', utf8_binary, 'word')
from t1;
+select concat_ws(',', utf8_lcase, 'word' collate utf8_binary), concat_ws(',',
utf8_binary, 'word' collate utf8_lcase) from t1;
+
+-- Elt
+select elt(2, utf8_binary, utf8_lcase) from t1;
+select elt(1, utf8_binary collate utf8_lcase, utf8_lcase) from t1;
+select elt(1, utf8_binary, 'word'), elt(1, utf8_lcase, 'word') from t1;
+
+-- SplitPart
+select split_part(utf8_binary, utf8_lcase, 3) from t1;
+select split_part(utf8_binary, 'a', 3), split_part(utf8_lcase, 'a', 3) from t1;
+select split_part(utf8_binary, 'a' collate utf8_lcase, 3),
split_part(utf8_lcase, 'a' collate utf8_binary, 3) from t1;
+
+-- Contains
+select contains(utf8_binary, utf8_lcase) from t1;
+select contains(utf8_binary collate utf8_lcase, utf8_lcase),
contains(utf8_binary, utf8_lcase collate utf8_binary) from t1;
+select contains(utf8_binary, 'AAa'), contains(utf8_lcase, 'AaAA') from t1;
Review Comment:
let's make sure that we have complete testing coverage
think: what are all the ways someone can use this expression
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]