jonvex commented on code in PR #12270: URL: https://github.com/apache/hudi/pull/12270#discussion_r1848854643
########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala: ########## @@ -56,9 +56,10 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport { }) def getTableSqlSchema(metaClient: HoodieTableMetaClient, Review Comment: actually, getTableSqlSchema is only used in one place, and is set to true. So we can just use the current impl of getTableAvroSchema and add ``` if (timestamp == null) { return getTableAvroSchema(metaClient.getTableConfig().populateMetaFields()); } ``` to the beginning of it ########## hudi-spark-datasource/hudi-spark3.3.x/src/main/scala/org/apache/spark/sql/parser/HoodieSpark3_3ExtendedSqlParser.scala: ########## @@ -121,11 +120,7 @@ class HoodieSpark3_3ExtendedSqlParser(session: SparkSession, delegate: ParserInt private def isHoodieCommand(sqlText: String): Boolean = { val normalized = sqlText.toLowerCase(Locale.ROOT).trim().replaceAll("\\s+", " ") - normalized.contains("system_time as of") || Review Comment: we are ok getting rid of all of these aliases? ########## hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java: ########## @@ -150,6 +150,23 @@ public Schema getTableAvroSchema(String timestamp) throws Exception { .orElseThrow(schemaNotFoundError()); } + /** + * Fetches tables schema in Avro format as of the given instant + * + * @param timestamp as of which table's schema will be fetched + */ + public Schema getTableAvroSchema(String timestamp, boolean includeMetadataFields) throws Exception { Review Comment: I think there is some code reuse we can do with the method above ########## hudi-spark-datasource/hudi-spark3-common/src/main/scala/org/apache/spark/sql/hudi/catalog/HoodieCatalog.scala: ########## @@ -153,6 +155,72 @@ class HoodieCatalog extends DelegatingCatalogExtension } } + override def loadTable(ident: Identifier, timestamp: Long): Table = { + super.loadTable(ident) match { + case V1Table(catalogTable0) if sparkAdapter.isHoodieTable(catalogTable0) => + + // spark passes microseconds + val milliseconds = TimeUnit.MICROSECONDS.toMillis(timestamp) + val targetTimestamp = HoodieInstantTimeGenerator.formatMillis(milliseconds) + + constructTable(ident, catalogTable0, targetTimestamp) + + case t => t + } + } + + override def loadTable(ident: Identifier, version: String): Table = { + super.loadTable(ident) match { + case V1Table(catalogTable0) if sparkAdapter.isHoodieTable(catalogTable0) => + + if (!HoodieInstantTimeGenerator.isValidInstantTime(version)) { + throw new AnalysisException(s"invalid snapshot id: $version") + } + + constructTable(ident, catalogTable0, version); + + case t => t + } + } + + private def constructTable(ident: Identifier, + catalogTable0: CatalogTable, + timestampAsOf: String): Table = { + val catalogTable = catalogTable0.comment match { + case Some(v) => + val newProps = catalogTable0.storage.properties + ( Review Comment: catalogTable0.storage.copy(properties = newProps)) is this creating a new instance so the original catalog table is not modified. ########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieInstantTimeGenerator.java: ########## @@ -88,6 +88,18 @@ public static String createNewInstantTime(boolean shouldLock, TimeGenerator time }); } + public static String formatMillis(long milliseconds) { Review Comment: This seems to be a copy of org.apache.hudi.common.table.timeline.HoodieInstantTimeGenerator#createNewInstantTime. can we try to reuse the existing code? If the HoodieInstantTimeGenerator implementation is changed, then we will need to update this too ########## hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/hudi/HoodieSqlCommonUtils.scala: ########## @@ -56,9 +56,10 @@ object HoodieSqlCommonUtils extends SparkAdapterSupport { }) def getTableSqlSchema(metaClient: HoodieTableMetaClient, Review Comment: and get rid of includeMetadataFields: Boolean = false, from getTableSqlSchema -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: commits-unsubscr...@hudi.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org