This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new a18ba89df9aa [SPARK-49342][SQL] Make TO_AVRO SQL function
'jsonFormatSchema' argument optional
a18ba89df9aa is described below
commit a18ba89df9aaa66cecb8c564a5b3bccdb51a2041
Author: Daniel Tenedorio <[email protected]>
AuthorDate: Fri Aug 23 07:11:34 2024 +0200
[SPARK-49342][SQL] Make TO_AVRO SQL function 'jsonFormatSchema' argument
optional
### What changes were proposed in this pull request?
This PR make the `TO_AVRO` SQL function `jsonFormatSchema` argument
optional.
For example, now it is possible to just call it with a single input
argument:
```
create table t as
select named_struct('u', named_struct('member0', member0, 'member1',
member1)) as s
from values (1, null), (null, 'a') tab(member0, member1);
select length(to_avro(s)) > 0 from t;
> true
true
```
### Why are the changes needed?
This makes the `TO_AVRO` SQL function easier to use.
### Does this PR introduce _any_ user-facing change?
Yes, see above.
### How was this patch tested?
This PR adds unit test coverge.
### Was this patch authored or co-authored using generative AI tooling?
No.
Closes #47834 from dtenedor/to-avro-schema-optional.
Authored-by: Daniel Tenedorio <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../org/apache/spark/sql/avro/AvroFunctionsSuite.scala | 4 ++++
.../sql/catalyst/expressions/toFromAvroSqlFunctions.scala | 14 +++++++++++---
2 files changed, 15 insertions(+), 3 deletions(-)
diff --git
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index c807685db0f0..42ddfd48892c 100644
---
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -316,6 +316,10 @@ class AvroFunctionsSuite extends QueryTest with
SharedSparkSession {
spark.sql(s"select from_avro(result, '$jsonFormatSchema', map()).u
from ($toAvroSql)"),
Seq(Row(Row(1, null)),
Row(Row(null, "a"))))
+ // The 'jsonFormatSchema' argument of the 'to_avro' function is optional.
+ checkAnswer(
+ spark.sql(s"select length(to_avro(s)) > 0 from t"),
+ Seq(Row(true), Row(true)))
// Negative tests.
checkError(
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
index 88a53e459118..46f80cacc4fe 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/toFromAvroSqlFunctions.scala
@@ -131,12 +131,14 @@ case class FromAvro(child: Expression, jsonFormatSchema:
Expression, options: Ex
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = """
- _FUNC_(child, jsonFormatSchema) - Converts a Catalyst binary input value
into its corresponding
- Avro format result.
+ _FUNC_(child[, jsonFormatSchema]) - Converts a Catalyst binary input value
into its
+ corresponding Avro format result.
""",
examples = """
Examples:
- > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{
"name": "u", "type": ["int","string"] }]}', MAP()) IS NULL FROM (SELECT NULL AS
s);
+ > SELECT _FUNC_(s, '{"type": "record", "name": "struct", "fields": [{
"name": "u", "type": ["int","string"] }]}') IS NULL FROM (SELECT NULL AS s);
+ [true]
+ > SELECT _FUNC_(s) IS NULL FROM (SELECT NULL AS s);
[true]
""",
group = "misc_funcs",
@@ -145,6 +147,9 @@ case class FromAvro(child: Expression, jsonFormatSchema:
Expression, options: Ex
// scalastyle:on line.size.limit
case class ToAvro(child: Expression, jsonFormatSchema: Expression)
extends BinaryExpression with RuntimeReplaceable {
+
+ def this(child: Expression) = this(child, Literal(null))
+
override def left: Expression = child
override def right: Expression = jsonFormatSchema
@@ -157,6 +162,9 @@ case class ToAvro(child: Expression, jsonFormatSchema:
Expression)
jsonFormatSchema.dataType match {
case _: StringType if jsonFormatSchema.foldable =>
TypeCheckResult.TypeCheckSuccess
+ case _: NullType =>
+ // The 'jsonFormatSchema' argument is optional.
+ TypeCheckResult.TypeCheckSuccess
case _ =>
TypeCheckResult.TypeCheckFailure(
"The second argument of the TO_AVRO SQL function must be a constant
string " +
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]