yihua commented on code in PR #11947:
URL: https://github.com/apache/hudi/pull/11947#discussion_r1802062995
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/IncrementalRelation.scala:
##########
@@ -90,24 +78,24 @@ class IncrementalRelation(val sqlContext: SQLContext,
throw new HoodieException("Incremental queries are not supported when meta
fields are disabled")
}
+ private val queryContext: IncrementalQueryAnalyzer.QueryContext =
+ IncrementalQueryAnalyzer.builder()
+ .metaClient(metaClient)
+ .startTime(optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key))
+ .endTime(optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key,
null))
+ .rangeType(InstantRange.RangeType.OPEN_CLOSED)
+ .build()
+ .analyze()
+
+ private val commitsToReturn = List.concat(
+ queryContext.getArchivedInstants.asScala,
+ queryContext.getActiveInstants.asScala)
+ // TODO: investigate failure:
TestHoodieIncrSource#testHoodieIncrSourceInflightCommitBeforeCompletedCommit
+ // .sortBy(_.getCompletionTime) // why would sorting by completion time
fail this test?
Review Comment:
This needs investigation.
##########
hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/MergeOnReadIncrementalRelation.scala:
##########
@@ -205,51 +204,46 @@ trait HoodieIncrementalRelationTrait extends
HoodieBaseRelation {
|| affectedFilesInCommits.asScala.exists(fileStatus =>
!metaClient.getStorage.exists(fileStatus.getPath)))
}
- protected lazy val includedCommits: immutable.Seq[HoodieInstant] = {
- if (!startInstantArchived || !endInstantArchived) {
- // If endTimestamp commit is not archived, will filter instants
- // before endTimestamp.
- if (hollowCommitHandling == USE_TRANSITION_TIME) {
- super.timeline.findInstantsInRangeByCompletionTime(startTimestamp,
endTimestamp).getInstants.asScala.toList
- } else {
- super.timeline.findInstantsInRange(startTimestamp,
endTimestamp).getInstants.asScala.toList
- }
- } else {
- super.timeline.getInstants.asScala.toList
- }
- }
+ protected lazy val queryContext: IncrementalQueryAnalyzer.QueryContext =
+ IncrementalQueryAnalyzer.builder()
+ .metaClient(metaClient)
+ .startTime(optParams(DataSourceReadOptions.BEGIN_INSTANTTIME.key))
+ .endTime(optParams.getOrElse(DataSourceReadOptions.END_INSTANTTIME.key,
null))
+ .rangeType(InstantRange.RangeType.OPEN_CLOSED)
+ .limit(optParams.getOrElse(
+ DataSourceReadOptions.INCREMENTAL_LIMIT.key,
+ DataSourceReadOptions.INCREMENTAL_LIMIT.defaultValue).toInt)
+ .build()
+ .analyze()
+
+ protected lazy val includedCommits: immutable.Seq[HoodieInstant] =
List.concat(
+ queryContext.getArchivedInstants.asScala,
+ queryContext.getActiveInstants.asScala)
protected lazy val commitsMetadata =
includedCommits.map(getCommitMetadata(_, super.timeline)).asJava
protected lazy val affectedFilesInCommits: java.util.List[StoragePathInfo] =
{
listAffectedFilesForCommits(conf, metaClient.getBasePath, commitsMetadata)
}
- protected lazy val (includeStartTime, startTs) = if (startInstantArchived) {
- (false, startTimestamp)
Review Comment:
Got it. I think this part of code was hard to understand so it's good that
we clean it up and make the logic unified without unnecessary flags.
##########
hudi-common/src/test/java/org/apache/hudi/common/testutils/HoodieTestDataGenerator.java:
##########
@@ -681,6 +681,10 @@ public List<HoodieRecord> generateInserts(String
instantTime, Integer n) {
return generateInserts(instantTime, n, false);
}
+ public List<HoodieRecord> generateInserts(Long instantTime, Integer n) {
Review Comment:
Nit: to remove this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]