sadikovi commented on a change in pull request #34199:
URL: https://github.com/apache/spark/pull/34199#discussion_r724741180
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetSchemaConverter.scala
##########
@@ -60,40 +58,106 @@ class ParquetToSparkSchemaConverter(
/**
* Converts Parquet [[MessageType]] `parquetSchema` to a Spark SQL
[[StructType]].
*/
- def convert(parquetSchema: MessageType): StructType =
convert(parquetSchema.asGroupType())
+ def convert(parquetSchema: MessageType): StructType = {
+ val column = new ColumnIOFactory().getColumnIO(parquetSchema)
+ val converted = convertInternal(column)
+ converted.sparkType.asInstanceOf[StructType]
+ }
- private def convert(parquetSchema: GroupType): StructType = {
- val fields = parquetSchema.getFields.asScala.map { field =>
- field.getRepetition match {
- case OPTIONAL =>
- StructField(field.getName, convertField(field), nullable = true)
+ /**
+ * Convert `parquetSchema` into a [[ParquetType]] which contains its
corresponding Spark
Review comment:
Isn't what column descriptor is for? Can we just use that instead of
introducing another abstraction?
##########
File path:
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetRowConverter.scala
##########
@@ -609,7 +610,9 @@ private[parquet] class ParquetRowConverter(
//
// If the element type does not match the Catalyst type and the
underlying repeated type
// does not belong to the legacy LIST type, then it is case 1;
otherwise, it is case 2.
- val guessedElementType = schemaConverter.convertField(repeatedType)
+ val messageType =
Types.buildMessage().addField(repeatedType).named("foo")
Review comment:
I am a bit confused, why this conversion is required at all?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]