nsivabalan commented on code in PR #6358:
URL: https://github.com/apache/hudi/pull/6358#discussion_r2425086622
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieSparkSqlWriter.scala:
##########
@@ -429,37 +563,41 @@ object HoodieSparkSqlWriter {
}
}
- /**
- * Checks if schema needs upgrade (if incoming record's write schema is old
while table schema got evolved).
- *
- * @param fs instance of FileSystem.
- * @param basePath base path.
- * @param sparkContext instance of spark context.
- * @param schema incoming record's schema.
- * @return Pair of(boolean, table schema), where first entry will be true
only if schema conversion is required.
- */
- def getLatestTableSchema(fs: FileSystem, basePath: Path, sparkContext:
SparkContext): Option[Schema] = {
- if (FSUtils.isTableExists(basePath.toString, fs)) {
- val tableMetaClient = HoodieTableMetaClient.builder
- .setConf(sparkContext.hadoopConfiguration)
- .setBasePath(basePath.toString)
- .build()
- val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
-
-
toScalaOption(tableSchemaResolver.getTableAvroSchemaFromLatestCommit(false))
- } else {
- None
+ private def registerAvroSchemasWithKryo(sparkContext: SparkContext,
targetAvroSchemas: Schema*): Unit = {
+ sparkContext.getConf.registerAvroSchemas(targetAvroSchemas: _*)
+ }
+
+ private def getLatestTableSchema(spark: SparkSession,
+ tableBasePath: Path,
+ tableId: TableIdentifier,
+ hadoopConf: Configuration): Option[Schema]
= {
+ val fs = tableBasePath.getFileSystem(hadoopConf)
+ val latestTableSchemaFromCommitMetadata =
+ if (FSUtils.isTableExists(tableBasePath.toString, fs)) {
+ val tableMetaClient = HoodieTableMetaClient.builder
+ .setConf(hadoopConf)
+ .setBasePath(tableBasePath.toString)
+ .build()
+ val tableSchemaResolver = new TableSchemaResolver(tableMetaClient)
+
toScalaOption(tableSchemaResolver.getTableAvroSchemaFromLatestCommit(false))
+ } else {
+ None
+ }
+
+ latestTableSchemaFromCommitMetadata.orElse {
+ getCatalogTable(spark, tableId).map { catalogTable =>
Review Comment:
not sure why we added this fix to poll the catalog if its table does not
have any valid commits.
we should just remove this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]