cloud-fan commented on code in PR #42424:
URL: https://github.com/apache/spark/pull/42424#discussion_r1289540318
##########
connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala:
##########
@@ -118,268 +118,225 @@ private[sql] class AvroDeserializer(
val incompatibleMsg = errorPrefix +
s"schema is incompatible (avroType = $avroType, sqlType =
${catalystType.sql})"
+ val realDataType = SchemaConverters.toSqlType(avroType).dataType
val confKey = SQLConf.LEGACY_AVRO_ALLOW_INCOMPATIBLE_SCHEMA
val preventReadingIncorrectType = !SQLConf.get.getConf(confKey)
- val logicalDataType = SchemaConverters.toSqlType(avroType).dataType
- avroType.getType match {
- case NULL =>
- (logicalDataType, catalystType) match {
- case (_, NullType) => (updater, ordinal, _) =>
- updater.setNullAt(ordinal)
- case _ => throw new IncompatibleSchemaException(incompatibleMsg)
- }
+ (avroType.getType, catalystType) match {
+ case (NULL, NullType) => (updater, ordinal, _) =>
+ updater.setNullAt(ordinal)
+
// TODO: we can avoid boxing if future version of avro provide primitive
accessors.
- case BOOLEAN =>
- (logicalDataType, catalystType) match {
- case (_, BooleanType) => (updater, ordinal, value) =>
- updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
- case _ => throw new IncompatibleSchemaException(incompatibleMsg)
- }
+ case (BOOLEAN, BooleanType) => (updater, ordinal, value) =>
+ updater.setBoolean(ordinal, value.asInstanceOf[Boolean])
+
+ case (INT, IntegerType) => (updater, ordinal, value) =>
+ updater.setInt(ordinal, value.asInstanceOf[Int])
+
+ case (LONG, dt: TimestampType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[DayTimeIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (LONG, dt: TimestampNTZType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[DayTimeIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (LONG, dt: DateType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[DayTimeIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (INT, dt: TimestampType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[YearMonthIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (INT, dt: TimestampNTZType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[YearMonthIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (INT, dt: DateType)
+ if preventReadingIncorrectType &&
realDataType.isInstanceOf[YearMonthIntervalType] =>
+ throw
QueryCompilationErrors.avroIncorrectTypeError(toFieldStr(avroPath),
+ toFieldStr(catalystPath), realDataType.catalogString,
dt.catalogString, confKey.key)
+
+ case (INT, DateType) => (updater, ordinal, value) =>
Review Comment:
It seems the logic is also changed. Are you sure where the perf regression
comes from? the code structure (pattern match) or some logic?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]