This is an automated email from the ASF dual-hosted git repository.
beliefer pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new dc02991fa662 [SPARK-43380][SQL][FOLLOWUP] Deprecate
toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean): SchemaType
dc02991fa662 is described below
commit dc02991fa662c2f760315f190893cf09545e1b83
Author: Jiaan Geng <[email protected]>
AuthorDate: Mon Oct 30 21:34:30 2023 +0800
[SPARK-43380][SQL][FOLLOWUP] Deprecate toSqlType(avroSchema: Schema,
…useStableIdForUnionType: Boolean): SchemaType
### What changes were proposed in this pull request?
https://github.com/apache/spark/pull/43530 provides a new method:
```
/**
* Converts an Avro schema to a corresponding Spark SQL schema.
*
* since 4.0.0
*/
def toSqlType(avroSchema: Schema, useStableIdForUnionType: Boolean):
SchemaType = {
toSqlTypeHelper(avroSchema, Set.empty, useStableIdForUnionType)
}
```
Because take `AvroOptions` as parameter causes the performance regression,
the old `toSqlType` looks very useless.
This PR also improve some caller of `toSqlType` by pass
`useStableIdForUnionType` directly.
### Why are the changes needed?
Deprecate toSqlType(avroSchema: Schema, …useStableIdForUnionType: Boolean):
SchemaType
### Does this PR introduce _any_ user-facing change?
'No'.
### How was this patch tested?
Exists test cases.
### Was this patch authored or co-authored using generative AI tooling?
'No'.
Closes #43557 from beliefer/SPARK-43380_followup.
Authored-by: Jiaan Geng <[email protected]>
Signed-off-by: Jiaan Geng <[email protected]>
---
.../src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala | 3 ++-
.../avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala | 2 +-
.../src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala | 2 ++
.../src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala | 2 +-
4 files changed, 6 insertions(+), 3 deletions(-)
diff --git
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
index 2c2a45fc3f14..06388409284a 100644
---
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
+++
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala
@@ -39,7 +39,8 @@ private[sql] case class AvroDataToCatalyst(
override def inputTypes: Seq[AbstractDataType] = Seq(BinaryType)
override lazy val dataType: DataType = {
- val dt = SchemaConverters.toSqlType(expectedSchema, options).dataType
+ val dt = SchemaConverters.toSqlType(
+ expectedSchema, avroOptions.useStableIdForUnionType).dataType
parseMode match {
// With PermissiveMode, the output Catalyst row might contain columns of
null values for
// corrupt records, even if some of the columns are not nullable in the
user-provided schema.
diff --git
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
index e738f541ca79..0e27e4a604c4 100644
--- a/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
+++ b/connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala
@@ -61,7 +61,7 @@ private[sql] object AvroUtils extends Logging {
new
FileSourceOptions(CaseInsensitiveMap(options)).ignoreCorruptFiles)
}
- SchemaConverters.toSqlType(avroSchema, options).dataType match {
+ SchemaConverters.toSqlType(avroSchema,
parsedOptions.useStableIdForUnionType).dataType match {
case t: StructType => Some(t)
case _ => throw new RuntimeException(
s"""Avro schema cannot be converted to a Spark SQL StructType:
diff --git
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
index ba01a18d76f7..00fb32794e3a 100644
---
a/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
+++
b/connector/avro/src/main/scala/org/apache/spark/sql/avro/SchemaConverters.scala
@@ -62,6 +62,8 @@ object SchemaConverters {
def toSqlType(avroSchema: Schema): SchemaType = {
toSqlType(avroSchema, false)
}
+
+ @deprecated("using toSqlType(..., useStableIdForUnionType: Boolean)
instead", "4.0.0")
def toSqlType(avroSchema: Schema, options: Map[String, String]): SchemaType
= {
toSqlTypeHelper(avroSchema, Set.empty,
AvroOptions(options).useStableIdForUnionType)
}
diff --git
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
index 07865787d287..9095f1c0831a 100644
---
a/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
+++
b/connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroFunctionsSuite.scala
@@ -264,7 +264,7 @@ class AvroFunctionsSuite extends QueryTest with
SharedSparkSession {
val avroOptions = AvroOptions(options)
val avroSchema = avroOptions.schema.get
val sparkSchema = SchemaConverters
- .toSqlType(avroSchema, options)
+ .toSqlType(avroSchema, avroOptions.useStableIdForUnionType)
.dataType
.asInstanceOf[StructType]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]