Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/22148#discussion_r211136238
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetReadSupport.scala
---
@@ -277,14 +291,38 @@ private[parquet] object ParquetReadSupport {
* @return A list of clipped [[GroupType]] fields, which can be empty.
*/
private def clipParquetGroupFields(
- parquetRecord: GroupType, structType: StructType): Seq[Type] = {
- val parquetFieldMap = parquetRecord.getFields.asScala.map(f =>
f.getName -> f).toMap
+ parquetRecord: GroupType, structType: StructType, caseSensitive:
Boolean): Seq[Type] = {
val toParquet = new
SparkToParquetSchemaConverter(writeLegacyParquetFormat = false)
- structType.map { f =>
- parquetFieldMap
- .get(f.name)
- .map(clipParquetType(_, f.dataType))
- .getOrElse(toParquet.convertField(f))
+ if (caseSensitive) {
+ val caseSensitiveParquetFieldMap =
+ parquetRecord.getFields.asScala.map(f => f.getName -> f).toMap
+ structType.map { f => {
+ caseSensitiveParquetFieldMap
+ .get(f.name)
+ .map(clipParquetType(_, f.dataType, caseSensitive))
+ .getOrElse(toParquet.convertField(f))
+ }
--- End diff --
nit: I would remove this brace per
https://github.com/databricks/scala-style-guide#anonymous-methods
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]