vinothchandar commented on code in PR #10845: URL: https://github.com/apache/hudi/pull/10845#discussion_r1521484287
########## hudi-client/hudi-client-common/src/test/java/org/apache/hudi/common/table/timeline/TestHoodieGlobalTimeline.java: ########## @@ -0,0 +1,153 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hudi.common.table.timeline; + +import org.apache.hudi.DummyActiveAction; +import org.apache.hudi.client.timeline.LSMTimelineWriter; +import org.apache.hudi.common.engine.HoodieEngineContext; +import org.apache.hudi.common.engine.HoodieLocalEngineContext; +import org.apache.hudi.common.engine.LocalTaskContextSupplier; +import org.apache.hudi.common.model.HoodieCommitMetadata; +import org.apache.hudi.common.model.WriteOperationType; +import org.apache.hudi.common.testutils.HoodieCommonTestHarness; +import org.apache.hudi.common.testutils.HoodieTestTable; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.config.HoodieIndexConfig; +import org.apache.hudi.config.HoodieWriteConfig; +import org.apache.hudi.index.HoodieIndex; + +import org.apache.hadoop.conf.Configuration; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; + +/** + * Test cases for {@link HoodieGlobalTimeline}. + */ +public class TestHoodieGlobalTimeline extends HoodieCommonTestHarness { + @BeforeEach + public void setUp() throws Exception { + initMetaClient(); + } + + @AfterEach + public void tearDown() throws Exception { + cleanMetaClient(); + } + + /** + * The test for checking whether an instant is archived. + */ + @Test + void testArchivingCheck() throws Exception { + writeArchivedTimeline(10, 10000000, 50); + writeActiveTimeline(10000050, 10); + HoodieGlobalTimeline globalTimeline = new HoodieGlobalTimeline(this.metaClient, Option.empty()); + assertTrue(globalTimeline.isBeforeTimelineStarts("10000049"), "The instant should be active"); Review Comment: I see what you are alluding to. For now, let's rename these `isBeforeActiveTimelineStarts` to make it explicit. We can then clean up these methods. ########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieDefaultTimeline.java: ########## @@ -167,12 +167,13 @@ public HoodieDefaultTimeline getWriteTimeline() { @Override public HoodieTimeline getContiguousCompletedWriteTimeline() { - Option<HoodieInstant> earliestPending = getWriteTimeline().filterInflightsAndRequested().firstInstant(); + HoodieDefaultTimeline writeTimeline = getWriteTimeline(); Review Comment: is this now changing this method to look for contiguous timeline on both active + archived? whats the effect of this change ########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieGlobalTimeline.java: ########## @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.table.timeline; + +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * A global timeline view with both active and archived timeline involved. + */ +public class HoodieGlobalTimeline extends HoodieDefaultTimeline { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(HoodieGlobalTimeline.class); + private final HoodieTableMetaClient metaClient; + private final HoodieActiveTimeline activeTimeline; + private final HoodieArchivedTimeline archivedTimeline; + + protected HoodieGlobalTimeline(HoodieTableMetaClient metaClient, Option<String> startInstant) { + this.metaClient = metaClient; + this.activeTimeline = new HoodieActiveTimeline(metaClient); + archivedTimeline = startInstant.isPresent() ? new HoodieArchivedTimeline(metaClient, startInstant.get()) : new HoodieArchivedTimeline(metaClient); + this.details = FederatedDetails.create(this.activeTimeline, archivedTimeline); + setInstants(mergeInstants(archivedTimeline.getInstants(), activeTimeline.getInstants())); Review Comment: are we leading all the archived instants here eagerly? can this be lazily done? ########## hudi-common/src/main/java/org/apache/hudi/common/table/timeline/HoodieGlobalTimeline.java: ########## @@ -0,0 +1,203 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.table.timeline; + +import org.apache.hudi.common.table.HoodieTableMetaClient; +import org.apache.hudi.common.util.Option; +import org.apache.hudi.common.util.ValidationUtils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.annotation.Nullable; + +import java.util.List; +import java.util.Objects; +import java.util.function.Function; + +/** + * A global timeline view with both active and archived timeline involved. + */ +public class HoodieGlobalTimeline extends HoodieDefaultTimeline { + private static final long serialVersionUID = 1L; + private static final Logger LOG = LoggerFactory.getLogger(HoodieGlobalTimeline.class); + private final HoodieTableMetaClient metaClient; + private final HoodieActiveTimeline activeTimeline; + private final HoodieArchivedTimeline archivedTimeline; + + protected HoodieGlobalTimeline(HoodieTableMetaClient metaClient, Option<String> startInstant) { + this.metaClient = metaClient; + this.activeTimeline = new HoodieActiveTimeline(metaClient); + archivedTimeline = startInstant.isPresent() ? new HoodieArchivedTimeline(metaClient, startInstant.get()) : new HoodieArchivedTimeline(metaClient); + this.details = FederatedDetails.create(this.activeTimeline, archivedTimeline); + setInstants(mergeInstants(archivedTimeline.getInstants(), activeTimeline.getInstants())); + } + + protected HoodieGlobalTimeline(HoodieActiveTimeline activeTimeline, HoodieArchivedTimeline archivedTimeline) { + this.metaClient = activeTimeline.metaClient; + this.activeTimeline = activeTimeline; + this.archivedTimeline = archivedTimeline; + this.details = FederatedDetails.create(this.activeTimeline, archivedTimeline); + setInstants(mergeInstants(archivedTimeline.getInstants(), activeTimeline.getInstants())); + } + + /** + * For serialization and de-serialization only. + */ + public HoodieGlobalTimeline() { + this.activeTimeline = null; + this.archivedTimeline = null; + this.metaClient = null; + } + + @Override + public HoodieTimeline filterPendingCompactionTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingCompactionTimeline(); + } + + @Override + public HoodieTimeline filterPendingLogCompactionTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingLogCompactionTimeline(); + } + + @Override + public HoodieTimeline filterPendingMajorOrMinorCompactionTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingMajorOrMinorCompactionTimeline(); + } + + @Override + public HoodieTimeline filterPendingReplaceTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingReplaceTimeline(); + } + + @Override + public HoodieTimeline filterPendingRollbackTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingRollbackTimeline(); + } + + @Override + public HoodieTimeline filterRequestedRollbackTimeline() { + // override for efficiency + return this.activeTimeline.filterRequestedRollbackTimeline(); + } + + @Override + public HoodieTimeline filterPendingIndexTimeline() { + // override for efficiency + return this.activeTimeline.filterPendingIndexTimeline(); + } + + @Override + public boolean empty() { + return this.activeTimeline.empty(); + } + + /** + * Returns whether the active timeline contains the given instant or the instant is archived. + * Needs to rethink the new semantics and rename this method with global timeline introduced. + */ + @Override + public boolean containsOrBeforeTimelineStarts(String ts) { + return this.activeTimeline.containsOrBeforeTimelineStarts(ts); + } + + @Override + public boolean isBeforeTimelineStarts(String ts) { + return this.activeTimeline.isBeforeTimelineStarts(ts); + } + + @Override + public Option<HoodieInstant> getFirstNonSavepointCommit() { + return this.activeTimeline.getFirstNonSavepointCommit(); + } + + @Override + public Option<HoodieInstant> getLastPendingClusterInstant() { + // override for efficiency + return this.activeTimeline.getLastPendingClusterInstant(); + } + + /** + * Needs to fix this method to only check on active timeline. + */ + @Override + protected Option<HoodieInstant> findFirstNonSavepointCommit(List<HoodieInstant> instants) { + return this.activeTimeline.findFirstNonSavepointCommit(instants); + } + + /** + * Reloads all the instants. + */ + public HoodieGlobalTimeline reload() { + return new HoodieGlobalTimeline(this.metaClient, Option.empty()); + } + + /** + * The active timeline is always reloaded, + * the archived timeline is reloaded based on the given start timestamp {@code startTs}. + */ + public HoodieGlobalTimeline reload(String startTs) { + HoodieActiveTimeline reloadedActiveTimeline = this.metaClient.reloadActiveTimeline(); + HoodieInstant oldFirstActiveCommit = this.activeTimeline.firstInstant().orElse(null); + HoodieInstant newFirstActiveCommit = reloadedActiveTimeline.firstInstant().orElse(null); + // reload the archived timeline incrementally if the archiving snapshot does not change. Review Comment: some way to only reload newer instants in the archived timeline? without reloading the whole tree again? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
