vinothchandar commented on code in PR #13577:
URL: https://github.com/apache/hudi/pull/13577#discussion_r2236578852
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/transaction/TransactionManager.java:
##########
@@ -38,35 +43,122 @@ public class TransactionManager implements Serializable,
AutoCloseable {
protected static final Logger LOG =
LoggerFactory.getLogger(TransactionManager.class);
protected final LockManager lockManager;
protected final boolean isLockRequired;
+ private final transient TimeGenerator timeGenerator;
+ protected boolean hasLock;
protected Option<HoodieInstant> changeActionInstant = Option.empty();
private Option<HoodieInstant> lastCompletedActionInstant = Option.empty();
public TransactionManager(HoodieWriteConfig config, HoodieStorage storage) {
- this(new LockManager(config, storage), config.isLockRequired());
+ this(new LockManager(config, storage), config);
}
- protected TransactionManager(LockManager lockManager, boolean
isLockRequired) {
+ protected TransactionManager(LockManager lockManager, HoodieWriteConfig
writeConfig) {
+ this(lockManager, writeConfig.isLockRequired(),
TimeGenerators.getTimeGenerator(writeConfig.getTimeGeneratorConfig()));
+ }
+
+ public TransactionManager(LockManager lockManager, boolean isLockRequired,
TimeGenerator timeGenerator) {
this.lockManager = lockManager;
this.isLockRequired = isLockRequired;
+ this.timeGenerator = timeGenerator;
+ }
+
+ public String generateInstantTime() {
+ if (!hasLock && isLockRequired) {
+ throw new HoodieLockException("Cannot create instant without acquiring a
lock first.");
+ }
+ return HoodieInstantTimeGenerator.createNewInstantTime(timeGenerator, 0L);
+ }
+
+ /**
+ * Generates an instant time and executes an action that requires that
instant time within a lock.
+ * @param instantTimeConsumingAction a function that takes the generated
instant time and performs some action
+ * @return the result of the action
+ * @param <T> type of the result
+ */
+ public <T> T executeStateChangeWithInstant(Function<String, T>
instantTimeConsumingAction) {
+ return executeStateChangeWithInstant(Option.empty(), Option.empty(),
instantTimeConsumingAction);
+ }
+
+ /**
+ * Uses the provided instant if present or else generates an instant time
and executes an action that requires that instant time within a lock.
+ * @param providedInstantTime an optional instant time provided by the
caller. If not provided, a new instant time will be generated.
+ * @param instantTimeConsumingAction a function that takes the generated
instant time and performs some action
+ * @return the result of the action
+ * @param <T> type of the result
+ */
+ public <T> T executeStateChangeWithInstant(Option<String>
providedInstantTime, Function<String, T> instantTimeConsumingAction) {
+ return executeStateChangeWithInstant(providedInstantTime, Option.empty(),
instantTimeConsumingAction);
+ }
+
+ /**
+ * Uses the provided instant if present or else generates an instant time
and executes an action that requires that instant time within a lock.
+ * @param providedInstantTime an optional instant time provided by the
caller. If not provided, a new instant time will be generated.
+ * @param lastCompletedActionInstant optional input representing the last
completed instant, used for logging purposes.
+ * @param instantTimeConsumingAction a function that takes the generated
instant time and performs some action
+ * @return the result of the action
+ * @param <T> type of the result
+ */
+ public <T> T executeStateChangeWithInstant(Option<String>
providedInstantTime, Option<HoodieInstant> lastCompletedActionInstant,
Function<String, T> instantTimeConsumingAction) {
+ if (isLockRequired()) {
+ acquireLock();
+ }
+ String requestedInstant = providedInstantTime.orElseGet(() ->
HoodieInstantTimeGenerator.createNewInstantTime(timeGenerator, 0L));
+ try {
+ if (lastCompletedActionInstant.isEmpty()) {
+ LOG.info("State change starting for {}", changeActionInstant);
+ } else {
+ LOG.info("State change starting for {} with latest completed action
instant {}", changeActionInstant, lastCompletedActionInstant.get());
+ }
+ return instantTimeConsumingAction.apply(requestedInstant);
+ } finally {
+ if (isLockRequired()) {
+ releaseLock();
+ LOG.info("State change ended for {}", requestedInstant);
+ }
+ }
+ }
+
+ public void beginStateChange() {
+ beginStateChange(Option.empty(), Option.empty());
}
public void beginStateChange(Option<HoodieInstant> changeActionInstant,
Option<HoodieInstant>
lastCompletedActionInstant) {
if (isLockRequired) {
LOG.info("State change starting for {} with latest completed action
instant {}",
changeActionInstant, lastCompletedActionInstant);
- lockManager.lock();
+ acquireLock();
reset(this.changeActionInstant, changeActionInstant,
lastCompletedActionInstant);
LOG.info("State change started for {} with latest completed action
instant {}",
changeActionInstant, lastCompletedActionInstant);
}
}
+ public void endStateChange() {
+ endStateChange(Option.empty());
+ }
+
+ private void acquireLock() {
Review Comment:
should this be synchronized
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java:
##########
@@ -1135,9 +1108,8 @@ public void dropIndex(List<String> metadataPartitions) {
}
}
}
- } finally {
- this.txnManager.endStateChange(Option.of(ownerInstant));
- }
+ return null;
Review Comment:
nts: can we avoid the `null` return
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java:
##########
@@ -1809,34 +1810,39 @@ static HoodieActiveTimeline
runPendingTableServicesOperationsAndRefreshTimeline(
* 2. In multi-writer scenario, a parallel operation with a greater
instantTime may have completed creating a
* deltacommit.
*/
- void compactIfNecessary(BaseHoodieWriteClient<?,I,?,O> writeClient,
Option<String> latestDeltaCommitTimeOpt) {
+ void compactIfNecessary(final BaseHoodieWriteClient<?,I,?,O> writeClient,
Option<String> latestDeltaCommitTimeOpt) {
// IMPORTANT: Trigger compaction with max instant time that is smaller
than(or equals) the earliest pending instant from DT.
// The compaction planner will manage to filter out the log files that
finished with greater completion time.
// see BaseHoodieCompactionPlanGenerator.generateCompactionPlan for more
details.
HoodieTimeline metadataCompletedTimeline =
metadataMetaClient.getActiveTimeline().filterCompletedInstants();
- final String compactionInstantTime = dataMetaClient.reloadActiveTimeline()
+ final Option<String> requiredCompactionInstantOpt =
dataMetaClient.reloadActiveTimeline()
// The filtering strategy is kept in line with the rollback premise,
if an instant is pending on DT but completed on MDT,
// generates a compaction time smaller than it so that the instant
could then been rolled back.
.filterInflightsAndRequested().filter(instant ->
metadataCompletedTimeline.containsInstant(instant.requestedTime())).firstInstant()
// minus the pending instant time by 1 millisecond to avoid conflicts
on the MDT.
- .map(instant ->
HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L))
- .orElse(writeClient.createNewInstantTime(false));
+ .map(instant ->
HoodieInstantTimeGenerator.instantTimeMinusMillis(instant.requestedTime(), 1L));
// we need to avoid checking compaction w/ same instant again.
// let's say we trigger compaction after C5 in MDT and so compaction
completes with C4001. but C5 crashed before completing in MDT.
// and again w/ C6, we will re-attempt compaction at which point latest
delta commit is C4 in MDT.
// and so we try compaction w/ instant C4001. So, we can avoid compaction
if we already have compaction w/ same instant time.
- if
(metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime))
{
- LOG.info("Compaction with same {} time is already present in the
timeline.", compactionInstantTime);
- } else if (writeClient.scheduleCompactionAtInstant(compactionInstantTime,
Option.empty())) {
- LOG.info("Compaction is scheduled for timestamp {}",
compactionInstantTime);
- writeClient.compact(compactionInstantTime, true);
- } else if (metadataWriteConfig.isLogCompactionEnabled()) {
- // Schedule and execute log compaction with new instant time.
- Option<String> scheduledLogCompaction =
writeClient.scheduleLogCompaction(Option.empty());
- if (scheduledLogCompaction.isPresent()) {
- LOG.info("Log compaction is scheduled for timestamp {}",
scheduledLogCompaction.get());
- writeClient.logCompact(scheduledLogCompaction.get(), true);
+ if (requiredCompactionInstantOpt.map(compactionInstantTime ->
metadataMetaClient.getActiveTimeline().filterCompletedInstants().containsInstant(compactionInstantTime)).orElse(false))
{
Review Comment:
simplify
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java:
##########
@@ -740,18 +737,13 @@ void rollbackInflightInstant(HoodieInstant
inflightInstant,
*/
public void rollbackInflightLogCompaction(HoodieInstant inflightInstant,
Function<String, Option<HoodiePendingRollbackInfo>>
getPendingRollbackInstantFunc,
TransactionManager
transactionManager) {
- transactionManager.beginStateChange(Option.empty(), Option.empty());
Review Comment:
same. review usages where non empty options are passed in
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java:
##########
@@ -740,18 +737,13 @@ void rollbackInflightInstant(HoodieInstant
inflightInstant,
*/
public void rollbackInflightLogCompaction(HoodieInstant inflightInstant,
Function<String, Option<HoodiePendingRollbackInfo>>
getPendingRollbackInstantFunc,
TransactionManager
transactionManager) {
- transactionManager.beginStateChange(Option.empty(), Option.empty());
- final String commitTime;
- try {
- commitTime =
getPendingRollbackInstantFunc.apply(inflightInstant.requestedTime()).map(entry
- -> entry.getRollbackInstant().requestedTime())
- .orElseGet(() -> getMetaClient().createNewInstantTime(false));
- scheduleRollback(context, commitTime, inflightInstant, false,
config.shouldRollbackUsingMarkers(),
- false);
- } finally {
- transactionManager.endStateChange(Option.empty());
- }
- rollback(context, commitTime, inflightInstant, true, false);
+ Option<String> existingCommitTime =
getPendingRollbackInstantFunc.apply(inflightInstant.requestedTime()).map(entry
Review Comment:
this is now out of lock.
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java:
##########
@@ -678,11 +680,9 @@ Option<String> scheduleTableServiceInternal(Option<String>
providedInstantTime,
// so it is handled differently to avoid locking for planning.
return scheduleCleaning(createTable(config, storageConf),
providedInstantTime);
}
- txnManager.beginStateChange(Option.empty(), Option.empty());
- try {
+ return txnManager.executeStateChangeWithInstant(providedInstantTime,
instantTime -> {
Option<String> option;
HoodieTable<?, ?, ?, ?> table = createTable(config, storageConf);
- String instantTime = providedInstantTime.orElseGet(() ->
createNewInstantTime(false));
Review Comment:
nts: check for change in behavior. whether we lock or not now . and if
thatsok
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieTableServiceClient.java:
##########
@@ -1160,17 +1147,10 @@ public boolean rollback(final String commitInstantTime,
Option<HoodiePendingRoll
LOG.warn("Cannot find instant {} in the timeline of table {} for
rollback", commitInstantTime, config.getBasePath());
return false;
}
- if (!skipLocking) {
Review Comment:
nts: is `skipLocking` still used/needed?
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/upgrade/UpgradeDowngradeUtils.java:
##########
@@ -176,9 +175,6 @@ static void rollbackFailedWritesAndCompact(HoodieTable
table, HoodieEngineContex
// NOTE: at this stage rollback should use the current writer version
and disable auto upgrade/downgrade
TypedProperties properties = new TypedProperties();
properties.putAll(config.getProps());
- // TimeGenerators are cached and re-used based on table base path. Since
here we are changing the lock configurations, avoiding the cache use
Review Comment:
why is this an issue
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/savepoint/SavepointActionExecutor.java:
##########
@@ -137,16 +138,18 @@ public HoodieSavepointMetadata execute() {
}, null);
}
- HoodieSavepointMetadata metadata =
TimelineMetadataUtils.convertSavepointMetadata(user, comment, latestFilesMap);
- // Nothing to save in the savepoint
- table.getActiveTimeline().createNewInstant(
- instantGenerator.createNewInstant(HoodieInstant.State.INFLIGHT,
HoodieTimeline.SAVEPOINT_ACTION, instantTime));
- table.getActiveTimeline()
- .saveAsComplete(
- true,
instantGenerator.createNewInstant(HoodieInstant.State.INFLIGHT,
HoodieTimeline.SAVEPOINT_ACTION, instantTime), Option.of(metadata),
- savepointCompletedInstant ->
table.getMetaClient().getTableFormat().savepoint(savepointCompletedInstant,
table.getContext(), table.getMetaClient(), table.getViewManager()));
- LOG.info("Savepoint " + instantTime + " created");
- return metadata;
+ try (TransactionManager transactionManager = new
TransactionManager(config, table.getStorage())) {
Review Comment:
would this pose any issues when using in process lock provider, since the
txnManager can be different? but we just seem to using time generation
##########
hudi-common/src/main/java/org/apache/hudi/common/table/timeline/versioning/v2/ActiveTimelineV2.java:
##########
@@ -496,36 +481,36 @@ public <T> HoodieInstant
transitionClusterRequestedToInflight(HoodieInstant requ
@Override
public HoodieInstant transitionReplaceInflightToComplete(
- boolean shouldLock, HoodieInstant inflightInstant,
HoodieReplaceCommitMetadata metadata) {
+ HoodieInstant inflightInstant, HoodieReplaceCommitMetadata metadata,
String completionInstant) {
Review Comment:
make naming consistent.. `completionTime` vs `completionInstant`
##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDTableServiceClient.java:
##########
@@ -47,8 +48,9 @@ public class SparkRDDTableServiceClient<T> extends
BaseHoodieTableServiceClient<
private final StreamingMetadataWriteHandler streamingMetadataWriteHandler =
new StreamingMetadataWriteHandler();
protected SparkRDDTableServiceClient(HoodieEngineContext context,
HoodieWriteConfig clientConfig,
- Option<EmbeddedTimelineService>
timelineService) {
- super(context, clientConfig, timelineService);
+ Option<EmbeddedTimelineService>
timelineService,
+ TransactionManager transactionManager) {
Review Comment:
nts: check caller
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/BaseHoodieWriteClient.java:
##########
@@ -1035,15 +1021,12 @@ String startCommit(Option<String> providedInstantTime,
String actionType, Hoodie
}
if (ClusteringUtils.isClusteringOrReplaceCommitAction(actionType)) {
- instant =
metaClient.getActiveTimeline().createRequestedCommitWithReplaceMetadata(instantTime,
actionType);
+
metaClient.getActiveTimeline().createRequestedCommitWithReplaceMetadata(instantTime,
actionType);
} else {
- instant = metaClient.createNewInstant(State.REQUESTED, actionType,
instantTime);
- metaClient.getActiveTimeline().createNewInstant(instant);
+
metaClient.getActiveTimeline().createNewInstant(metaClient.createNewInstant(State.REQUESTED,
actionType, instantTime));
}
- } finally {
- txnManager.endStateChange(Option.ofNullable(instant));
Review Comment:
nts: confirm not passing in the `instant` arg is ok
##########
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieTimeGeneratorConfig.java:
##########
@@ -56,15 +56,6 @@ public class HoodieTimeGeneratorConfig extends HoodieConfig {
.withDocumentation("The max expected clock skew time in ms between two
processes generating time. Used by "
+ TimeGeneratorType.WAIT_TO_ADJUST_SKEW.name() + " time generator to
implement TrueTime semantics.");
- public static final ConfigProperty<Boolean> TIME_GENERATOR_REUSE_ENABLE =
ConfigProperty
Review Comment:
we just killed this for reduced complexity?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]