viirya commented on a change in pull request #31319:
URL: https://github.com/apache/spark/pull/31319#discussion_r564066294
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
##########
@@ -167,20 +168,33 @@ object ParquetReadSupport {
}
private def clipParquetType(
- parquetType: Type, catalystType: DataType, caseSensitive: Boolean): Type
= {
+ parquetType: Type,
+ catalystType: DataType,
+ caseSensitive: Boolean,
+ enableVectorizedReader: Boolean): Type = {
catalystType match {
case t: ArrayType if !isPrimitiveCatalystType(t.elementType) =>
// Only clips array types with nested type as element type.
- clipParquetListType(parquetType.asGroupType(), t.elementType,
caseSensitive)
+ clipParquetListType(
+ parquetType.asGroupType(), t.elementType, caseSensitive,
enableVectorizedReader)
case t: MapType
if !isPrimitiveCatalystType(t.keyType) ||
!isPrimitiveCatalystType(t.valueType) =>
// Only clips map types with nested key type or value type
- clipParquetMapType(parquetType.asGroupType(), t.keyType, t.valueType,
caseSensitive)
+ clipParquetMapType(
+ parquetType.asGroupType(), t.keyType, t.valueType, caseSensitive,
enableVectorizedReader)
case t: StructType =>
- clipParquetGroup(parquetType.asGroupType(), t, caseSensitive)
+ clipParquetGroup(parquetType.asGroupType(), t, caseSensitive,
enableVectorizedReader)
+
+ case t: DecimalType if enableVectorizedReader =>
+ val p = parquetType.asPrimitiveType().getDecimalMetadata()
+ if (t.precision == p.getPrecision && t.scale == p.getScale) {
+ parquetType
+ } else {
+ throw new UnsupportedOperationException("Schema evolution not
supported.")
Review comment:
Shall we tell that MR path is supported for the reading? I image that
Spark apps might be able to read parquet, but once the reading adds a complex
column, then it fails to read. Maybe confusing for end-users.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]