[
https://issues.apache.org/jira/browse/SPARK-34512?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Apache Spark reassigned SPARK-34512:
------------------------------------
Assignee: Apache Spark
> Disable validate default values when parsing Avro schemas
> ---------------------------------------------------------
>
> Key: SPARK-34512
> URL: https://issues.apache.org/jira/browse/SPARK-34512
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Affects Versions: 3.2.0
> Reporter: Yuming Wang
> Assignee: Apache Spark
> Priority: Major
>
> This is a regression problem. How to reproduce this issue:
> {code:scala}
> // Add this test to HiveSerDeReadWriteSuite
> test("SPARK-34512") {
> withTable("t1") {
> hiveClient.runSqlHive(
> """
> |CREATE TABLE t1
> | ROW FORMAT SERDE
> | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe'
> | STORED AS INPUTFORMAT
> | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat'
> | OUTPUTFORMAT
> | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat'
> | TBLPROPERTIES (
> | 'avro.schema.literal'='{
> | "namespace": "org.apache.spark.sql.hive.test",
> | "name": "schema_with_default_value",
> | "type": "record",
> | "fields": [
> | {
> | "name": "ARRAY_WITH_DEFAULT",
> | "type": {"type": "array", "items": "string"},
> | "default": null
> | }
> | ]
> | }')
> |""".stripMargin)
> spark.sql("select * from t1").show
> }
> }
> {code}
> {noformat}
> org.apache.avro.AvroTypeException: Invalid default for field
> ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"}
> at org.apache.avro.Schema.validateDefault(Schema.java:1571)
> at org.apache.avro.Schema.access$500(Schema.java:87)
> at org.apache.avro.Schema$Field.<init>(Schema.java:544)
> at org.apache.avro.Schema.parse(Schema.java:1678)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1425)
> at org.apache.avro.Schema$Parser.parse(Schema.java:1413)
> at
> org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268)
> at
> org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111)
> at
> org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187)
> at
> org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107)
> at
> org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83)
> at
> org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533)
> at
> org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450)
> at
> org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437)
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281)
> at
> org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263)
> at
> org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641)
> at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831)
> at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867)
> at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356)
> at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354)
> at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199)
> at
> org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100)
> at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183)
> at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839)
> at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237)
> at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800)
> at
> org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787)
> {noformat}
--
This message was sent by Atlassian Jira
(v8.3.4#803005)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]