This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 47042d25290 [SPARK-40028][SQL] Add binary examples for string
expressions
47042d25290 is described below
commit 47042d25290a8d71b96d829f5a1e7ac4578cb795
Author: Jiaan Geng <[email protected]>
AuthorDate: Wed Aug 10 15:34:18 2022 +0900
[SPARK-40028][SQL] Add binary examples for string expressions
### What changes were proposed in this pull request?
Currently, Spark have many string expressions support binary type, but
missing examples of binary.
This PR will add examples of binary for some string expressions show below.
`Elt`
`Contains`
`StartsWith`
`EndsWith`
`StringLPad`
`StringRPad`
`Substring`
`Left`
`Length`
`BitLength`
`OctetLength`
`Base64`
### Why are the changes needed?
Add binary examples for string expressions
### Does this PR introduce _any_ user-facing change?
'No'.
Just update the document for some string expressions.
### How was this patch tested?
`ExpressionInfoSuite`
Closes #37458 from beliefer/SPARK-40028.
Authored-by: Jiaan Geng <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
.../catalyst/expressions/stringExpressions.scala | 52 ++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
index d4504c36e4e..d0f07959cb8 100755
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala
@@ -250,6 +250,8 @@ case class ConcatWs(children: Seq[Expression])
Examples:
> SELECT _FUNC_(1, 'scala', 'java');
scala
+ > SELECT _FUNC_(1, encode('scala', 'utf-8'), encode('java', 'utf-8'));
+ scala
""",
since = "2.0.0",
group = "string_funcs")
@@ -537,10 +539,16 @@ case class BinaryPredicate(override val prettyName:
String, left: Expression, ri
Examples:
> SELECT _FUNC_('Spark SQL', 'Spark');
true
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('Spark', 'utf-8'));
+ true
> SELECT _FUNC_('Spark SQL', 'SPARK');
false
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('SPARK', 'utf-8'));
+ false
> SELECT _FUNC_('Spark SQL', null);
NULL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), null);
+ NULL
> SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
true
""",
@@ -572,10 +580,16 @@ case class Contains(left: Expression, right: Expression)
extends StringPredicate
Examples:
> SELECT _FUNC_('Spark SQL', 'Spark');
true
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('Spark', 'utf-8'));
+ true
> SELECT _FUNC_('Spark SQL', 'SQL');
false
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('SQL', 'utf-8'));
+ false
> SELECT _FUNC_('Spark SQL', null);
NULL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), null);
+ NULL
> SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
true
> SELECT _FUNC_(x'537061726b2053514c', x'53514c');
@@ -609,8 +623,12 @@ case class StartsWith(left: Expression, right: Expression)
extends StringPredica
Examples:
> SELECT _FUNC_('Spark SQL', 'SQL');
true
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('SQL', 'utf-8'));
+ true
> SELECT _FUNC_('Spark SQL', 'Spark');
false
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), encode('Spark', 'utf-8'));
+ false
> SELECT _FUNC_('Spark SQL', null);
NULL
> SELECT _FUNC_(x'537061726b2053514c', x'537061726b');
@@ -1502,10 +1520,16 @@ trait PadExpressionBuilderBase extends
ExpressionBuilder {
Examples:
> SELECT _FUNC_('hi', 5, '??');
???hi
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 5, encode('??', 'utf-8'));
+ ???hi
> SELECT _FUNC_('hi', 1, '??');
h
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 1, encode('??', 'utf-8'));
+ h
> SELECT _FUNC_('hi', 5);
hi
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 5);
+ hi
> SELECT hex(_FUNC_(unhex('aabb'), 5));
000000AABB
> SELECT hex(_FUNC_(unhex('aabb'), 5, unhex('1122')));
@@ -1581,10 +1605,16 @@ case class BinaryPad(funcName: String, str: Expression,
len: Expression, pad: Ex
Examples:
> SELECT _FUNC_('hi', 5, '??');
hi???
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 5, encode('??', 'utf-8'));
+ hi???
> SELECT _FUNC_('hi', 1, '??');
h
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 1, encode('??', 'utf-8'));
+ h
> SELECT _FUNC_('hi', 5);
hi
+ > SELECT _FUNC_(encode('hi', 'utf-8'), 5);
+ hi
> SELECT hex(_FUNC_(unhex('aabb'), 5));
AABB000000
> SELECT hex(_FUNC_(unhex('aabb'), 5, unhex('1122')));
@@ -1855,16 +1885,28 @@ case class StringSpace(child: Expression)
Examples:
> SELECT _FUNC_('Spark SQL', 5);
k SQL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), 5);
+ k SQL
> SELECT _FUNC_('Spark SQL', -3);
SQL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), -3);
+ SQL
> SELECT _FUNC_('Spark SQL', 5, 1);
k
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), 5, 1);
+ k
> SELECT _FUNC_('Spark SQL' FROM 5);
k SQL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8') FROM 5);
+ k SQL
> SELECT _FUNC_('Spark SQL' FROM -3);
SQL
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8') FROM -3);
+ SQL
> SELECT _FUNC_('Spark SQL' FROM 5 FOR 1);
k
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8') FROM 5 FOR 1);
+ k
""",
since = "1.5.0",
group = "string_funcs")
@@ -1956,6 +1998,8 @@ case class Right(str: Expression, len: Expression)
extends RuntimeReplaceable
Examples:
> SELECT _FUNC_('Spark SQL', 3);
Spa
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'), 3);
+ Spa
""",
since = "2.3.0",
group = "string_funcs")
@@ -1988,6 +2032,8 @@ case class Left(str: Expression, len: Expression) extends
RuntimeReplaceable
Examples:
> SELECT _FUNC_('Spark SQL ');
10
+ > SELECT _FUNC_(encode('Spark SQL ', 'utf-8'));
+ 10
> SELECT CHAR_LENGTH('Spark SQL ');
10
> SELECT CHARACTER_LENGTH('Spark SQL ');
@@ -2025,6 +2071,8 @@ case class Length(child: Expression)
Examples:
> SELECT _FUNC_('Spark SQL');
72
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'));
+ 72
""",
since = "2.3.0",
group = "string_funcs")
@@ -2061,6 +2109,8 @@ case class BitLength(child: Expression)
Examples:
> SELECT _FUNC_('Spark SQL');
9
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'));
+ 9
""",
since = "2.3.0",
group = "string_funcs")
@@ -2250,6 +2300,8 @@ case class Chr(child: Expression)
Examples:
> SELECT _FUNC_('Spark SQL');
U3BhcmsgU1FM
+ > SELECT _FUNC_(encode('Spark SQL', 'utf-8'));
+ U3BhcmsgU1FM
""",
since = "1.5.0",
group = "string_funcs")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]