YannByron commented on code in PR #5201: URL: https://github.com/apache/hudi/pull/5201#discussion_r841354324
########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala: ########## @@ -209,14 +219,37 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext, val fileSplits = collectFileSplits(partitionFilters, dataFilters) - val partitionSchema = StructType(Nil) - val tableSchema = HoodieTableSchema(tableStructSchema, if (internalSchema.isEmptySchema) tableAvroSchema.toString else AvroInternalSchemaConverter.convert(internalSchema, tableAvroSchema.getName).toString, internalSchema) - val requiredSchema = HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString, requiredInternalSchema) + val partitionSchema = if (dropPartitionColumnsWhenWrite) { + // when hoodie.datasource.write.drop.partition.columns is true, partition columns can't be persisted in + // data files. + StructType(partitionColumns.map(StructField(_, StringType))) + } else { + StructType(Nil) + } + val tableSchema = HoodieTableSchema(tableStructSchema, if (internalSchema.isEmptySchema) tableAvroSchema.toString else AvroInternalSchemaConverter.convert(internalSchema, tableAvroSchema.getName).toString, internalSchema) + val dataSchema = if (dropPartitionColumnsWhenWrite) { + val dataStructType = StructType(tableStructSchema.filterNot(f => partitionColumns.contains(f.name))) + HoodieTableSchema( + dataStructType, + sparkAdapter.getAvroSchemaConverters.toAvroType(dataStructType, nullable = false, "record").toString() + ) + } else { + tableSchema + } + val requiredSchema = if (dropPartitionColumnsWhenWrite) { + val requiredStructType = StructType(requiredStructSchema.filterNot(f => partitionColumns.contains(f.name))) + HoodieTableSchema( + requiredStructType, + sparkAdapter.getAvroSchemaConverters.toAvroType(requiredStructType, nullable = false, "record").toString() + ) + } else { + HoodieTableSchema(requiredStructSchema, requiredAvroSchema.toString, requiredInternalSchema) + } Review Comment: hard to build UT for `buildScan`. and here just `filter out` partition cols from requiredStructSchema and tableStructSchema. and create Avro Schema by them. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org