codope commented on code in PR #11710:
URL: https://github.com/apache/hudi/pull/11710#discussion_r1745790366
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/SparkHoodieTableFileIndex.scala:
##########
@@ -111,45 +111,12 @@ class SparkHoodieTableFileIndex(spark: SparkSession,
/**
* Get the partition schema from the hoodie.properties.
*/
- private lazy val _partitionSchemaFromProperties: StructType = {
- val tableConfig = metaClient.getTableConfig
- val partitionColumns = tableConfig.getPartitionFields
- val nameFieldMap = generateFieldMap(schema)
-
- if (partitionColumns.isPresent) {
- // Note that key generator class name could be null
- val keyGeneratorClassName = tableConfig.getKeyGeneratorClassName
- if
(classOf[TimestampBasedKeyGenerator].getName.equalsIgnoreCase(keyGeneratorClassName)
- ||
classOf[TimestampBasedAvroKeyGenerator].getName.equalsIgnoreCase(keyGeneratorClassName))
{
- val partitionFields: Array[StructField] =
partitionColumns.get().map(column => StructField(column, StringType))
- StructType(partitionFields)
- } else {
- val partitionFields: Array[StructField] =
partitionColumns.get().filter(column => nameFieldMap.contains(column))
- .map(column => nameFieldMap.apply(column))
-
- if (partitionFields.length != partitionColumns.get().length) {
- val isBootstrapTable = tableConfig.getBootstrapBasePath.isPresent
- if (isBootstrapTable) {
- // For bootstrapped tables its possible the schema does not
contain partition field when source table
- // is hive style partitioned. In this case we would like to treat
the table as non-partitioned
- // as opposed to failing
- new StructType()
- } else {
- throw new IllegalArgumentException(s"Cannot find columns: " +
- s"'${partitionColumns.get().filter(col =>
!nameFieldMap.contains(col)).mkString(",")}' " +
- s"in the schema[${schema.fields.mkString(",")}]")
- }
- } else {
- new StructType(partitionFields)
- }
- }
- } else {
- // If the partition columns have not stored in hoodie.properties(the
table that was
- // created earlier), we trait it as a non-partitioned table.
- logWarning("No partition columns available from hoodie.properties." +
- " Partition pruning will not work")
- new StructType()
- }
+ lazy val _partitionSchemaFromProperties: StructType = {
+ getPartitionSchema()
+ }
+
+ def getPartitionSchema(): StructType = {
+ sparkParsePartitionUtil.getPartitionSchema(metaClient.getTableConfig,
schema, handleCustomKeyGenerator = false)
Review Comment:
got it, thanks for the clarification.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]