[ https://issues.apache.org/jira/browse/SPARK-34512?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dongjoon Hyun resolved SPARK-34512. ----------------------------------- Fix Version/s: 3.2.0 Resolution: Fixed Issue resolved by pull request 32750 [https://github.com/apache/spark/pull/32750] > Disable validate default values when parsing Avro schemas > --------------------------------------------------------- > > Key: SPARK-34512 > URL: https://issues.apache.org/jira/browse/SPARK-34512 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.2.0 > Reporter: Yuming Wang > Assignee: Yuming Wang > Priority: Major > Fix For: 3.2.0 > > > This is a regression problem. How to reproduce this issue: > {code:scala} > // Add this test to HiveSerDeReadWriteSuite > test("SPARK-34512") { > withTable("t1") { > hiveClient.runSqlHive( > """ > |CREATE TABLE t1 > | ROW FORMAT SERDE > | 'org.apache.hadoop.hive.serde2.avro.AvroSerDe' > | STORED AS INPUTFORMAT > | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' > | OUTPUTFORMAT > | 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' > | TBLPROPERTIES ( > | 'avro.schema.literal'='{ > | "namespace": "org.apache.spark.sql.hive.test", > | "name": "schema_with_default_value", > | "type": "record", > | "fields": [ > | { > | "name": "ARRAY_WITH_DEFAULT", > | "type": {"type": "array", "items": "string"}, > | "default": null > | } > | ] > | }') > |""".stripMargin) > spark.sql("select * from t1").show > } > } > {code} > {noformat} > org.apache.avro.AvroTypeException: Invalid default for field > ARRAY_WITH_DEFAULT: null not a {"type":"array","items":"string"} > at org.apache.avro.Schema.validateDefault(Schema.java:1571) > at org.apache.avro.Schema.access$500(Schema.java:87) > at org.apache.avro.Schema$Field.<init>(Schema.java:544) > at org.apache.avro.Schema.parse(Schema.java:1678) > at org.apache.avro.Schema$Parser.parse(Schema.java:1425) > at org.apache.avro.Schema$Parser.parse(Schema.java:1413) > at > org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.getSchemaFor(AvroSerdeUtils.java:268) > at > org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.determineSchemaOrThrowException(AvroSerdeUtils.java:111) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.determineSchemaOrReturnErrorSchema(AvroSerDe.java:187) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:107) > at > org.apache.hadoop.hive.serde2.avro.AvroSerDe.initialize(AvroSerDe.java:83) > at > org.apache.hadoop.hive.serde2.SerDeUtils.initializeSerDe(SerDeUtils.java:533) > at > org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:450) > at > org.apache.hadoop.hive.metastore.MetaStoreUtils.getDeserializer(MetaStoreUtils.java:437) > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializerFromMetaStore(Table.java:281) > at > org.apache.hadoop.hive.ql.metadata.Table.getDeserializer(Table.java:263) > at > org.apache.hadoop.hive.ql.metadata.Table.getColsInternal(Table.java:641) > at org.apache.hadoop.hive.ql.metadata.Table.getCols(Table.java:624) > at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:831) > at org.apache.hadoop.hive.ql.metadata.Hive.createTable(Hive.java:867) > at org.apache.hadoop.hive.ql.exec.DDLTask.createTable(DDLTask.java:4356) > at org.apache.hadoop.hive.ql.exec.DDLTask.execute(DDLTask.java:354) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:199) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:100) > at org.apache.hadoop.hive.ql.Driver.launchTask(Driver.java:2183) > at org.apache.hadoop.hive.ql.Driver.execute(Driver.java:1839) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:1526) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1237) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:1227) > at > org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$runHive$1(HiveClientImpl.scala:820) > at > org.apache.spark.sql.hive.client.HiveClientImpl.$anonfun$withHiveState$1(HiveClientImpl.scala:291) > at > org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:224) > at > org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:223) > at > org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:273) > at > org.apache.spark.sql.hive.client.HiveClientImpl.runHive(HiveClientImpl.scala:800) > at > org.apache.spark.sql.hive.client.HiveClientImpl.runSqlHive(HiveClientImpl.scala:787) > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org