vinothchandar commented on a change in pull request #3426:
URL: https://github.com/apache/hudi/pull/3426#discussion_r700518633
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
##########
@@ -187,6 +188,7 @@ public boolean commitStats(String instantTime,
List<HoodieWriteStat> stats, Opti
lastCompletedTxnAndMetadata.isPresent() ?
Option.of(lastCompletedTxnAndMetadata.get().getLeft()) : Option.empty());
try {
preCommit(instantTime, metadata);
+ table.getMetadataWriter().ifPresent(w ->
((HoodieTableMetadataWriter)w).update(metadata, instantTime));
Review comment:
nts: first committing to metadata table
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -110,23 +119,31 @@ protected HoodieBackedTableMetadataWriter(Configuration
hadoopConf, HoodieWriteC
ValidationUtils.checkArgument(!this.metadataWriteConfig.useFileListingMetadata(),
"File listing cannot be used for Metadata Table");
initRegistry();
- HoodieTableMetaClient datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
- initialize(engineContext, datasetMetaClient);
- if (enabled) {
- // This is always called even in case the table was created for the
first time. This is because
- // initFromFilesystem() does file listing and hence may take a long
time during which some new updates
- // may have occurred on the table. Hence, calling this always ensures
that the metadata is brought in sync
- // with the active timeline.
- HoodieTimer timer = new HoodieTimer().startTimer();
- syncFromInstants(datasetMetaClient);
- metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.SYNC_STR,
timer.endTimer()));
- }
+ this.datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
Review comment:
nts: loading this afresh here.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -396,37 +408,56 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
}
/**
- * Sync the Metadata Table from the instants created on the dataset.
+ * Initialize shards for a partition.
*
- * @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
*/
- private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
- ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it
is not enabled");
- // (re) init the metadata for reading.
- initTableMetadata();
- try {
- List<HoodieInstant> instantsToSync =
metadata.findInstantsToSyncForWriter();
- if (instantsToSync.isEmpty()) {
- return;
- }
-
- LOG.info("Syncing " + instantsToSync.size() + " instants to metadata
table: " + instantsToSync);
-
- // Read each instant in order and sync it to metadata table
- for (HoodieInstant instant : instantsToSync) {
- LOG.info("Syncing instant " + instant + " to metadata table");
-
- Option<List<HoodieRecord>> records =
HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant,
getLatestSyncedInstantTime());
- if (records.isPresent()) {
- commit(records.get(), MetadataPartitionType.FILES.partitionPath(),
instant.getTimestamp());
- }
+ private void initializeShards(HoodieTableMetaClient datasetMetaClient,
String partition, String instantTime,
+ int shardCount) throws IOException {
+ ValidationUtils.checkArgument(shardCount <= 9999, "Maximum 9999 shards are
supported.");
+
+ final String newFileId = FSUtils.createNewFileIdPfx();
+ final String newFileIdPrefix = newFileId.substring(0, 32);
+ final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
+ blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
+ final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0],
blockHeader);
+
+ LOG.info(String.format("Creating %d shards for partition %s with base
fileId %s at instant time %s",
+ shardCount, partition, newFileId, instantTime));
+ for (int i = 0; i < shardCount; ++i) {
+ // Generate a indexed fileId for each shard and write a log block into
it to create the file.
+ final String shardFileId = String.format("%s%04d", newFileIdPrefix, i +
1);
+ ValidationUtils.checkArgument(newFileId.length() ==
shardFileId.length(), "FileId should be of length " + newFileId.length());
+ try {
+ HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
+
.onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(),
partition))
+ .withFileId(shardFileId).overBaseCommit(instantTime)
+ .withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
+ .withFileSize(0L)
+ .withSizeThreshold(metadataWriteConfig.getLogFileMaxSize())
+ .withFs(datasetMetaClient.getFs())
+ .withRolloverLogWriteToken(FSUtils.makeWriteToken(0, 0, 0))
+ .withLogWriteToken(FSUtils.makeWriteToken(0, 0, 0))
+ .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+ writer.appendBlock(block);
+ writer.close();
+ } catch (InterruptedException e) {
+ throw new IOException("Failed to created record level index shard " +
shardFileId, e);
}
- initTableMetadata();
- } catch (IOException ioe) {
- throw new HoodieIOException("Unable to sync instants from data to
metadata table.", ioe);
}
}
+ protected String getShardFileName(String fileId, int shardIndex) {
+ ValidationUtils.checkArgument(shardIndex <= 9999, "Maximum 9999 shards are
supported.");
Review comment:
`9999`
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -396,37 +408,56 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
}
/**
- * Sync the Metadata Table from the instants created on the dataset.
+ * Initialize shards for a partition.
*
- * @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
*/
- private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
- ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it
is not enabled");
- // (re) init the metadata for reading.
- initTableMetadata();
- try {
- List<HoodieInstant> instantsToSync =
metadata.findInstantsToSyncForWriter();
- if (instantsToSync.isEmpty()) {
- return;
- }
-
- LOG.info("Syncing " + instantsToSync.size() + " instants to metadata
table: " + instantsToSync);
-
- // Read each instant in order and sync it to metadata table
- for (HoodieInstant instant : instantsToSync) {
- LOG.info("Syncing instant " + instant + " to metadata table");
-
- Option<List<HoodieRecord>> records =
HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant,
getLatestSyncedInstantTime());
- if (records.isPresent()) {
- commit(records.get(), MetadataPartitionType.FILES.partitionPath(),
instant.getTimestamp());
- }
+ private void initializeShards(HoodieTableMetaClient datasetMetaClient,
String partition, String instantTime,
+ int shardCount) throws IOException {
+ ValidationUtils.checkArgument(shardCount <= 9999, "Maximum 9999 shards are
supported.");
+
+ final String newFileId = FSUtils.createNewFileIdPfx();
+ final String newFileIdPrefix = newFileId.substring(0, 32);
+ final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
+ blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
+ final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0],
blockHeader);
+
+ LOG.info(String.format("Creating %d shards for partition %s with base
fileId %s at instant time %s",
+ shardCount, partition, newFileId, instantTime));
+ for (int i = 0; i < shardCount; ++i) {
+ // Generate a indexed fileId for each shard and write a log block into
it to create the file.
+ final String shardFileId = String.format("%s%04d", newFileIdPrefix, i +
1);
+ ValidationUtils.checkArgument(newFileId.length() ==
shardFileId.length(), "FileId should be of length " + newFileId.length());
+ try {
+ HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
+
.onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(),
partition))
+ .withFileId(shardFileId).overBaseCommit(instantTime)
+ .withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
+ .withFileSize(0L)
Review comment:
legit?
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/HoodieSparkTable.java
##########
@@ -66,4 +70,20 @@ protected HoodieSparkTable(HoodieWriteConfig config,
HoodieEngineContext context
protected HoodieIndex<T, JavaRDD<HoodieRecord<T>>, JavaRDD<HoodieKey>,
JavaRDD<WriteStatus>> getIndex(HoodieWriteConfig config, HoodieEngineContext
context) {
return SparkHoodieIndex.createIndex(config);
}
+
+ @Override
+ public Option<HoodieTableMetadataWriter> getMetadataWriter() {
+ if (!config.useFileListingMetadata()) {
+ return Option.empty();
+ }
+
+ try {
+ if (!metaClient.getFs().exists(new
Path(HoodieTableMetadata.getMetadataTableBasePath(metaClient.getBasePath())))) {
Review comment:
could we avoid this `exists()` somehow?
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
##########
@@ -435,7 +425,6 @@ protected void postCommit(HoodieTable<T, I, K, O> table,
HoodieCommitMetadata me
HoodieTimelineArchiveLog archiveLog = new
HoodieTimelineArchiveLog(config, table);
archiveLog.archiveIfRequired(context);
autoCleanOnCommit();
- syncTableMetadata();
Review comment:
there is no more additional sync process, with this re-design.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -73,14 +73,13 @@
// Metadata table's timeline and metaclient
private HoodieTableMetaClient metaClient;
private HoodieTableConfig tableConfig;
- private List<FileSlice> latestFileSystemMetadataSlices;
// should we reuse the open file handles, across calls
private final boolean reuse;
-
- // Readers for the base and log file which store the metadata
- private transient HoodieFileReader<GenericRecord> baseFileReader;
- private transient HoodieMetadataMergedLogRecordScanner logRecordScanner;
+ // Shards for each partition
+ private Map<String, List<FileSlice>> partitionToShardsMap;
Review comment:
file groups?
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -86,10 +93,12 @@
protected HoodieBackedTableMetadata metadata;
protected HoodieTableMetaClient metaClient;
+ protected HoodieTableMetaClient datasetMetaClient;
protected Option<HoodieMetadataMetrics> metrics;
protected boolean enabled;
protected SerializableConfiguration hadoopConf;
protected final transient HoodieEngineContext engineContext;
+ protected TransactionManager txnManager;
Review comment:
nts: this is so that metadata table itself can take multiple writers
committing at the same time.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -464,8 +487,14 @@ public void update(HoodieCleanerPlan cleanerPlan, String
instantTime) {
@Override
public void update(HoodieCleanMetadata cleanMetadata, String instantTime) {
if (enabled) {
- List<HoodieRecord> records =
HoodieTableMetadataUtil.convertMetadataToRecords(cleanMetadata, instantTime);
- commit(records, MetadataPartitionType.FILES.partitionPath(),
instantTime);
+ this.txnManager.beginTransaction(Option.of(new
HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, instantTime)),
Review comment:
can we share this code across these update() method overload
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -396,37 +408,56 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
}
/**
- * Sync the Metadata Table from the instants created on the dataset.
+ * Initialize shards for a partition.
*
- * @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
*/
- private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
- ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it
is not enabled");
- // (re) init the metadata for reading.
- initTableMetadata();
- try {
- List<HoodieInstant> instantsToSync =
metadata.findInstantsToSyncForWriter();
- if (instantsToSync.isEmpty()) {
- return;
- }
-
- LOG.info("Syncing " + instantsToSync.size() + " instants to metadata
table: " + instantsToSync);
-
- // Read each instant in order and sync it to metadata table
- for (HoodieInstant instant : instantsToSync) {
- LOG.info("Syncing instant " + instant + " to metadata table");
-
- Option<List<HoodieRecord>> records =
HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant,
getLatestSyncedInstantTime());
- if (records.isPresent()) {
- commit(records.get(), MetadataPartitionType.FILES.partitionPath(),
instant.getTimestamp());
- }
+ private void initializeShards(HoodieTableMetaClient datasetMetaClient,
String partition, String instantTime,
+ int shardCount) throws IOException {
+ ValidationUtils.checkArgument(shardCount <= 9999, "Maximum 9999 shards are
supported.");
Review comment:
add a constant for `9999`
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/client/AbstractHoodieWriteClient.java
##########
@@ -399,14 +397,6 @@ protected void preWrite(String instantTime,
WriteOperationType writeOperationTyp
HoodieTableMetaClient metaClient) {
setOperationType(writeOperationType);
this.lastCompletedTxnAndMetadata =
TransactionUtils.getLastCompletedTxnInstantAndMetadata(metaClient);
- this.txnManager.beginTransaction(Option.of(new
HoodieInstant(State.INFLIGHT, metaClient.getCommitActionType(), instantTime)),
lastCompletedTxnAndMetadata
Review comment:
nts: this lock was only being taken for purposes of syncing. So removing
this is fine.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -304,6 +315,7 @@ private boolean bootstrapFromFilesystem(HoodieEngineContext
engineContext, Hoodi
.initTable(hadoopConf.get(), metadataWriteConfig.getBasePath());
initTableMetadata();
+ initializeShards(datasetMetaClient,
MetadataPartitionType.FILES.partitionPath(), createInstantTime, 1);
Review comment:
we should try to avoid introducing the new shard terminology. We should
have it for bucketing, if thats what we intend it for.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -492,8 +527,26 @@ public void update(HoodieRestoreMetadata restoreMetadata,
String instantTime) {
@Override
public void update(HoodieRollbackMetadata rollbackMetadata, String
instantTime) {
if (enabled) {
- List<HoodieRecord> records =
HoodieTableMetadataUtil.convertMetadataToRecords(rollbackMetadata, instantTime,
metadata.getSyncedInstantTime());
- commit(records, MetadataPartitionType.FILES.partitionPath(),
instantTime);
+ this.txnManager.beginTransaction(Option.of(new
HoodieInstant(State.INFLIGHT, HoodieTimeline.DELTA_COMMIT_ACTION, instantTime)),
+ Option.empty());
+ try {
+ // Is this rollback of an instant that has been synced to the metadata
table?
+ String rollbackInstant = rollbackMetadata.getCommitsRollback().get(0);
+ boolean wasSynced = metaClient.getActiveTimeline().containsInstant(new
HoodieInstant(false, HoodieTimeline.DELTA_COMMIT_ACTION, rollbackInstant));
+ if (!wasSynced) {
+ // A compaction may have taken place on metadata table which would
have included this instant being rolled back.
Review comment:
any scope for simplifyinf this?
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java
##########
@@ -32,18 +36,36 @@
*/
public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable
{
- void update(HoodieCommitMetadata commitMetadata, String instantTime);
-
- void update(HoodieCleanerPlan cleanerPlan, String instantTime);
+ // Update the metadata table due to a COMMIT operation
+ void update(HoodieCommitMetadata option, String instantTime);
+ // Update the metadata table due to a CLEAN operation
void update(HoodieCleanMetadata cleanMetadata, String instantTime);
+ // Update the metadata table due to a RESTORE operation
void update(HoodieRestoreMetadata restoreMetadata, String instantTime);
+ // Update the metadata table due to a ROLLBACK operation
void update(HoodieRollbackMetadata rollbackMetadata, String instantTime);
/**
* Return the timestamp of the latest instant synced to the metadata table.
*/
Option<String> getLatestSyncedInstantTime();
+
+ /**
+ * Remove the metadata table for the dataset.
+ *
+ * @param basePath base path of the dataset
+ * @param context
+ */
+ static void removeMetadataTable(String basePath, HoodieEngineContext
context) {
Review comment:
rename `deleteMetadataTable`
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -396,37 +408,56 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
}
/**
- * Sync the Metadata Table from the instants created on the dataset.
+ * Initialize shards for a partition.
*
- * @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
*/
- private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
- ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it
is not enabled");
- // (re) init the metadata for reading.
- initTableMetadata();
- try {
- List<HoodieInstant> instantsToSync =
metadata.findInstantsToSyncForWriter();
- if (instantsToSync.isEmpty()) {
- return;
- }
-
- LOG.info("Syncing " + instantsToSync.size() + " instants to metadata
table: " + instantsToSync);
-
- // Read each instant in order and sync it to metadata table
- for (HoodieInstant instant : instantsToSync) {
- LOG.info("Syncing instant " + instant + " to metadata table");
-
- Option<List<HoodieRecord>> records =
HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant,
getLatestSyncedInstantTime());
- if (records.isPresent()) {
- commit(records.get(), MetadataPartitionType.FILES.partitionPath(),
instant.getTimestamp());
- }
+ private void initializeShards(HoodieTableMetaClient datasetMetaClient,
String partition, String instantTime,
+ int shardCount) throws IOException {
+ ValidationUtils.checkArgument(shardCount <= 9999, "Maximum 9999 shards are
supported.");
+
+ final String newFileId = FSUtils.createNewFileIdPfx();
+ final String newFileIdPrefix = newFileId.substring(0, 32);
+ final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
+ blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
+ final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0],
blockHeader);
Review comment:
we are writing an empty delete block? why? just to create the file
group/shard upfront?
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataWriter.java
##########
@@ -32,18 +36,36 @@
*/
public interface HoodieTableMetadataWriter extends Serializable, AutoCloseable
{
- void update(HoodieCommitMetadata commitMetadata, String instantTime);
-
- void update(HoodieCleanerPlan cleanerPlan, String instantTime);
+ // Update the metadata table due to a COMMIT operation
+ void update(HoodieCommitMetadata option, String instantTime);
Review comment:
why `option`?
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
##########
@@ -299,6 +302,7 @@ protected void completeCompaction(HoodieCommitMetadata
metadata, JavaRDD<WriteSt
this.context.setJobStatus(this.getClass().getSimpleName(), "Collect
compaction write status and commit compaction");
List<HoodieWriteStat> writeStats =
writeStatuses.map(WriteStatus::getStat).collect();
finalizeWrite(table, compactionCommitTime, writeStats);
+ table.getMetadataWriter().ifPresent(w -> w.update(metadata,
compactionCommitTime));
Review comment:
nts: audit all paths that commit to timeline and ensure this is done
everywhere. this may also be an opportunity to streamline such code occurrences.
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTimelineArchiveLog.java
##########
@@ -200,20 +200,19 @@ public boolean archiveIfRequired(HoodieEngineContext
context) throws IOException
.collect(Collectors.groupingBy(i -> Pair.of(i.getTimestamp(),
HoodieInstant.getComparableAction(i.getAction()))));
- // If metadata table is enabled, do not archive instants which are more
recent that the latest synced
- // instant on the metadata table. This is required for metadata table sync.
+ // If metadata table is enabled, do not archive instants which are more
recent that the last compaction on the
+ // metadata table.
if (config.useFileListingMetadata()) {
try (HoodieTableMetadata tableMetadata =
HoodieTableMetadata.create(table.getContext(), config.getMetadataConfig(),
config.getBasePath(),
FileSystemViewStorageConfig.FILESYSTEM_VIEW_SPILLABLE_DIR.defaultValue())) {
- Option<String> lastSyncedInstantTime =
tableMetadata.getSyncedInstantTime();
-
- if (lastSyncedInstantTime.isPresent()) {
- LOG.info("Limiting archiving of instants to last synced instant on
metadata table at " + lastSyncedInstantTime.get());
- instants = instants.filter(i ->
HoodieTimeline.compareTimestamps(i.getTimestamp(), HoodieTimeline.LESSER_THAN,
- lastSyncedInstantTime.get()));
- } else {
- LOG.info("Not archiving as there is no instants yet on the metadata
table");
+ Option<String> latestCompactionTime =
tableMetadata.getLatestCompactionTime();
Review comment:
Need to understand why it matters that metadata table be compacted
before it commit can be be archived on the data timelien
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
##########
@@ -152,49 +134,57 @@ protected void commit(List<HoodieRecord> records, String
partitionName, String i
}
/**
- * Tag each record with the location.
+ * Perform a compaction on the Metadata Table.
*
- * Since we only read the latest base file in a partition, we tag the
records with the instant time of the latest
- * base file.
+ * Cases to be handled:
+ * 1. We cannot perform compaction if there are previous inflight
operations on the dataset. This is because
+ * a compacted metadata base file at time Tx should represent all the
actions on the dataset till time Tx.
+ *
+ * 2. In multi-writer scenario, a parallel operation with a greater
instantTime may have completed creating a
+ * deltacommit.
*/
- private JavaRDD<HoodieRecord> prepRecords(List<HoodieRecord> records, String
partitionName) {
- HoodieTable table = HoodieSparkTable.create(metadataWriteConfig,
engineContext);
- TableFileSystemView.SliceView fsView = table.getSliceView();
- List<HoodieBaseFile> baseFiles = fsView.getLatestFileSlices(partitionName)
- .map(FileSlice::getBaseFile)
- .filter(Option::isPresent)
- .map(Option::get)
- .collect(Collectors.toList());
-
- // All the metadata fits within a single base file
- if (partitionName.equals(MetadataPartitionType.FILES.partitionPath())) {
- if (baseFiles.size() > 1) {
- throw new HoodieMetadataException("Multiple base files found in
metadata partition");
- }
+ private void compactIfNecessary(SparkRDDWriteClient writeClient, String
instantTime) {
+ String latestDeltacommitTime =
metaClient.getActiveTimeline().getDeltaCommitTimeline().filterCompletedInstants().lastInstant()
+ .get().getTimestamp();
+ List<HoodieInstant> pendingInstants =
datasetMetaClient.reloadActiveTimeline().filterInflightsAndRequested()
+
.findInstantsBefore(latestDeltacommitTime).getInstants().collect(Collectors.toList());
+
+ if (!pendingInstants.isEmpty()) {
+ LOG.info(String.format("Cannot compact metadata table as there are %d
inflight instants before latest deltacommit %s: %s",
+ pendingInstants.size(), latestDeltacommitTime,
Arrays.toString(pendingInstants.toArray())));
+ return;
}
- JavaSparkContext jsc = ((HoodieSparkEngineContext)
engineContext).getJavaSparkContext();
- String fileId;
- String instantTime;
- if (!baseFiles.isEmpty()) {
- fileId = baseFiles.get(0).getFileId();
- instantTime = baseFiles.get(0).getCommitTime();
- } else {
- // If there is a log file then we can assume that it has the data
- List<HoodieLogFile> logFiles =
fsView.getLatestFileSlices(MetadataPartitionType.FILES.partitionPath())
- .map(FileSlice::getLatestLogFile)
- .filter(Option::isPresent)
- .map(Option::get)
- .collect(Collectors.toList());
- if (logFiles.isEmpty()) {
- // No base and log files. All are new inserts
- return jsc.parallelize(records, 1);
- }
-
- fileId = logFiles.get(0).getFileId();
- instantTime = logFiles.get(0).getBaseCommitTime();
+ // Trigger compaction with suffixes based on the same instant time. This
ensures that any future
+ // delta commits synced over will not have an instant time lesser than the
last completed instant on the
+ // metadata table.
+ final String compactionInstantTime = latestDeltacommitTime + "001";
+ if (writeClient.scheduleCompactionAtInstant(compactionInstantTime,
Option.empty())) {
+ writeClient.compact(compactionInstantTime);
}
+ }
+
+ private void cleanIfNecessary(SparkRDDWriteClient writeClient, String
instantTime) {
Review comment:
why the `IfNecessary` part?
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/SparkRDDWriteClient.java
##########
@@ -96,6 +94,11 @@ public SparkRDDWriteClient(HoodieEngineContext context,
HoodieWriteConfig writeC
public SparkRDDWriteClient(HoodieEngineContext context, HoodieWriteConfig
writeConfig,
Option<EmbeddedTimelineService> timelineService) {
super(context, writeConfig, timelineService);
+ if (config.useFileListingMetadata()) {
Review comment:
should we use a flag for metadata table enable/disable ? not just file
listing
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/OneToTwoUpgradeHandler.java
##########
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.table.upgrade;
+
+import org.apache.hudi.common.engine.HoodieEngineContext;
+import org.apache.hudi.config.HoodieWriteConfig;
+import org.apache.hudi.metadata.HoodieTableMetadataWriter;
+
+/**
+ * Upgrade handle to assist in upgrading hoodie table from version 1 to 2.
+ */
+public class OneToTwoUpgradeHandler implements UpgradeHandler {
Review comment:
I thiink this will now be TwoToThree
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/metadata/SparkHoodieBackedTableMetadataWriter.java
##########
@@ -101,7 +94,7 @@ protected void initialize(HoodieEngineContext engineContext,
HoodieTableMetaClie
@Override
protected void commit(List<HoodieRecord> records, String partitionName,
String instantTime) {
Review comment:
nts: this cannot be a List<> for the record level index
##########
File path:
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/table/upgrade/SparkUpgradeDowngrade.java
##########
@@ -48,7 +48,9 @@ public void run(HoodieTableMetaClient metaClient,
@Override
protected void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion
toVersion, String instantTime) {
- if (fromVersion == HoodieTableVersion.ZERO && toVersion ==
HoodieTableVersion.ONE) {
+ if (fromVersion == HoodieTableVersion.ONE && toVersion ==
HoodieTableVersion.TWO) {
Review comment:
there are some conflicts to be resolved here.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/config/HoodieMetadataConfig.java
##########
@@ -40,17 +40,10 @@
// Enable the internal Metadata Table which saves file listings
public static final ConfigProperty<Boolean> METADATA_ENABLE_PROP =
ConfigProperty
.key(METADATA_PREFIX + ".enable")
- .defaultValue(false)
+ .defaultValue(true)
Review comment:
so all tests pass now?!!!
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java
##########
@@ -229,116 +196,11 @@ protected BaseTableMetadata(HoodieEngineContext
engineContext, HoodieMetadataCon
statuses =
hoodieRecord.get().getData().getFileStatuses(hadoopConf.get(), partitionPath);
}
- if (metadataConfig.validateFileListingMetadata()) {
Review comment:
yay!
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -331,4 +301,28 @@ public HoodieTableMetaClient getMetaClient() {
public Map<String, String> stats() {
return metrics.map(m -> m.getStats(true, metaClient, this)).orElse(new
HashMap<>());
}
+
+ @Override
+ public Option<String> getSyncedInstantTime() {
Review comment:
these methods are the same!
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
##########
@@ -340,4 +297,77 @@ private static void
processRollbackMetadata(HoodieRollbackMetadata rollbackMetad
return records;
}
+
+ /**
+ * Returns a list of commits which were rolled back as part of a Rollback or
Restore operation.
+ *
+ * @param instant The Rollback operation to read
+ * @param timeline
+ */
+ public static List<String> getCommitsRolledback(HoodieInstant instant,
HoodieActiveTimeline timeline) {
Review comment:
this should belong some place else?
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
##########
@@ -223,11 +174,17 @@
return convertFilesToRecords(partitionToDeletedFiles,
partitionToAppendedFiles, instantTime, "Restore");
}
- public static List<HoodieRecord>
convertMetadataToRecords(HoodieRollbackMetadata rollbackMetadata, String
instantTime, Option<String> lastSyncTs) {
+ public static List<HoodieRecord>
convertMetadataToRecords(HoodieRollbackMetadata rollbackMetadata, String
instantTime,
+ Option<String> lastSyncTs, boolean wasSynced) {
Map<String, Map<String, Long>> partitionToAppendedFiles = new HashMap<>();
Map<String, List<String>> partitionToDeletedFiles = new HashMap<>();
processRollbackMetadata(rollbackMetadata, partitionToDeletedFiles,
partitionToAppendedFiles, lastSyncTs);
+ if (!wasSynced) {
Review comment:
nts: need to revisit again with rollback/restore issues fixed
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -162,157 +168,121 @@ private void initIfNeeded() {
throw new HoodieIOException("Error merging records from metadata table
for key :" + key, ioe);
} finally {
if (!reuse) {
- closeOrThrow();
+ close(partitionName);
}
}
}
- private void openReadersIfNeededOrThrow() {
- try {
- openReadersIfNeeded();
- } catch (IOException e) {
- throw new HoodieIOException("Error opening readers to the Metadata
Table: ", e);
- }
- }
-
/**
* Returns a new pair of readers to the base and log files.
*/
- private void openReadersIfNeeded() throws IOException {
- if (reuse && (baseFileReader != null || logRecordScanner != null)) {
- // quickly exit out without synchronizing if reusing and readers are
already open
- return;
- }
-
- // we always force synchronization, if reuse=false, to handle concurrent
close() calls as well.
- synchronized (this) {
- if (baseFileReader != null || logRecordScanner != null) {
- return;
- }
-
- final long baseFileOpenMs;
- final long logScannerOpenMs;
-
- // Metadata is in sync till the latest completed instant on the dataset
- HoodieTimer timer = new HoodieTimer().startTimer();
- String latestInstantTime = getLatestDatasetInstantTime();
- ValidationUtils.checkArgument(latestFileSystemMetadataSlices.size() ==
1, "must be at-least one valid metadata file slice");
-
- // If the base file is present then create a reader
- Option<HoodieBaseFile> basefile =
latestFileSystemMetadataSlices.get(0).getBaseFile();
- if (basefile.isPresent()) {
- String basefilePath = basefile.get().getPath();
- baseFileReader =
HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(basefilePath));
- baseFileOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata base file from %s at instant
%s in %d ms", basefilePath,
- basefile.get().getCommitTime(), baseFileOpenMs));
- } else {
- baseFileOpenMs = 0;
- timer.endTimer();
- }
-
- // Open the log record scanner using the log files from the latest file
slice
- timer.startTimer();
- List<String> logFilePaths =
latestFileSystemMetadataSlices.get(0).getLogFiles()
- .sorted(HoodieLogFile.getLogFileComparator())
- .map(o -> o.getPath().toString())
- .collect(Collectors.toList());
- Option<HoodieInstant> lastInstant =
metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
- String latestMetaInstantTimestamp =
lastInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
-
- // Load the schema
- Schema schema =
HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
- HoodieCommonConfig commonConfig =
HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
- logRecordScanner = HoodieMetadataMergedLogRecordScanner.newBuilder()
- .withFileSystem(metaClient.getFs())
- .withBasePath(metadataBasePath)
- .withLogFilePaths(logFilePaths)
- .withReaderSchema(schema)
- .withLatestInstantTime(latestMetaInstantTimestamp)
- .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
- .withBufferSize(BUFFER_SIZE)
- .withSpillableMapBasePath(spillableMapDirectory)
- .withDiskMapType(commonConfig.getSpillableDiskMapType())
-
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
- .build();
-
- logScannerOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata log files from %s at instant
(dataset instant=%s, metadata instant=%s) in %d ms",
- logFilePaths, latestInstantTime, latestMetaInstantTimestamp,
logScannerOpenMs));
-
- metrics.ifPresent(metrics ->
metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, baseFileOpenMs +
logScannerOpenMs));
- }
- }
+ private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner>
openReadersIfNeeded(String key, String partitionName) throws IOException {
Review comment:
nts: ensure all the reuse of these readers for timeline server etc is
working really well
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieTableMetadataUtil.java
##########
@@ -340,4 +297,77 @@ private static void
processRollbackMetadata(HoodieRollbackMetadata rollbackMetad
return records;
}
+
+ /**
+ * Returns a list of commits which were rolled back as part of a Rollback or
Restore operation.
+ *
+ * @param instant The Rollback operation to read
+ * @param timeline
+ */
+ public static List<String> getCommitsRolledback(HoodieInstant instant,
HoodieActiveTimeline timeline) {
+ try {
+ if (instant.getAction().equals(HoodieTimeline.ROLLBACK_ACTION)) {
+ HoodieRollbackMetadata rollbackMetadata =
TimelineMetadataUtils.deserializeHoodieRollbackMetadata(
+ timeline.getInstantDetails(instant).get());
+ return rollbackMetadata.getCommitsRollback();
+ }
+
+ List<String> commitsRolledback = new LinkedList<>();
+
+ if (instant.getAction().equals(HoodieTimeline.RESTORE_ACTION)) {
+ // Restore is made up of several rollbacks
+ HoodieRestoreMetadata restoreMetadata =
TimelineMetadataUtils.deserializeHoodieRestoreMetadata(
+ timeline.getInstantDetails(instant).get());
+ restoreMetadata.getHoodieRestoreMetadata().values().forEach(rms -> {
+ rms.forEach(rm -> commitsRolledback.addAll(rm.getCommitsRollback()));
+ });
+ }
+
+ return commitsRolledback;
+ } catch (IOException e) {
+ throw new HoodieMetadataException("Error retrieving rollback commits for
instant " + instant, e);
+ }
+ }
+
+ /**
+ * Map a key to a shard.
+ *
+ * Note: For hashing, the algorithm is same as String.hashCode() but is
being defined here as hashCode()
+ * implementation is not guaranteed by the JVM to be consistent across JVM
versions and implementations.
+ *
+ * @param str
+ * @return An integer hash of the given string
+ */
+ public static int keyToShard(String str, int numShards) {
+ int h = 0;
+ for (int i = 0; i < str.length(); ++i) {
+ h = 31 * h + str.charAt(i);
+ }
+
+ return Math.abs(Math.abs(h) % numShards);
+ }
+
+ /**
+ * Loads the list of shards for a partition of the Metadata Table.
+ *
+ * The list of shards is returned sorted in the correct order of shard index.
+ * @param metaClient
+ * @param partition The name of the partition whose shards are to be loaded.
+ * @return List of shards
+ */
+ public static List<FileSlice> loadPartitionShards(HoodieTableMetaClient
metaClient, String partition) {
Review comment:
lets streamline all this naming.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -162,157 +168,121 @@ private void initIfNeeded() {
throw new HoodieIOException("Error merging records from metadata table
for key :" + key, ioe);
} finally {
if (!reuse) {
- closeOrThrow();
+ close(partitionName);
}
}
}
- private void openReadersIfNeededOrThrow() {
- try {
- openReadersIfNeeded();
- } catch (IOException e) {
- throw new HoodieIOException("Error opening readers to the Metadata
Table: ", e);
- }
- }
-
/**
* Returns a new pair of readers to the base and log files.
*/
- private void openReadersIfNeeded() throws IOException {
- if (reuse && (baseFileReader != null || logRecordScanner != null)) {
- // quickly exit out without synchronizing if reusing and readers are
already open
- return;
- }
-
- // we always force synchronization, if reuse=false, to handle concurrent
close() calls as well.
- synchronized (this) {
- if (baseFileReader != null || logRecordScanner != null) {
- return;
- }
-
- final long baseFileOpenMs;
- final long logScannerOpenMs;
-
- // Metadata is in sync till the latest completed instant on the dataset
- HoodieTimer timer = new HoodieTimer().startTimer();
- String latestInstantTime = getLatestDatasetInstantTime();
- ValidationUtils.checkArgument(latestFileSystemMetadataSlices.size() ==
1, "must be at-least one valid metadata file slice");
-
- // If the base file is present then create a reader
- Option<HoodieBaseFile> basefile =
latestFileSystemMetadataSlices.get(0).getBaseFile();
- if (basefile.isPresent()) {
- String basefilePath = basefile.get().getPath();
- baseFileReader =
HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(basefilePath));
- baseFileOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata base file from %s at instant
%s in %d ms", basefilePath,
- basefile.get().getCommitTime(), baseFileOpenMs));
- } else {
- baseFileOpenMs = 0;
- timer.endTimer();
- }
-
- // Open the log record scanner using the log files from the latest file
slice
- timer.startTimer();
- List<String> logFilePaths =
latestFileSystemMetadataSlices.get(0).getLogFiles()
- .sorted(HoodieLogFile.getLogFileComparator())
- .map(o -> o.getPath().toString())
- .collect(Collectors.toList());
- Option<HoodieInstant> lastInstant =
metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
- String latestMetaInstantTimestamp =
lastInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
-
- // Load the schema
- Schema schema =
HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
- HoodieCommonConfig commonConfig =
HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
- logRecordScanner = HoodieMetadataMergedLogRecordScanner.newBuilder()
- .withFileSystem(metaClient.getFs())
- .withBasePath(metadataBasePath)
- .withLogFilePaths(logFilePaths)
- .withReaderSchema(schema)
- .withLatestInstantTime(latestMetaInstantTimestamp)
- .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
- .withBufferSize(BUFFER_SIZE)
- .withSpillableMapBasePath(spillableMapDirectory)
- .withDiskMapType(commonConfig.getSpillableDiskMapType())
-
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
- .build();
-
- logScannerOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata log files from %s at instant
(dataset instant=%s, metadata instant=%s) in %d ms",
- logFilePaths, latestInstantTime, latestMetaInstantTimestamp,
logScannerOpenMs));
-
- metrics.ifPresent(metrics ->
metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, baseFileOpenMs +
logScannerOpenMs));
- }
- }
+ private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner>
openReadersIfNeeded(String key, String partitionName) throws IOException {
+ return shardReaders.computeIfAbsent(partitionName, k -> {
+ try {
+ final long baseFileOpenMs;
+ final long logScannerOpenMs;
+ HoodieFileReader baseFileReader = null;
+ HoodieMetadataMergedLogRecordScanner logRecordScanner = null;
+
+ // Metadata is in sync till the latest completed instant on the dataset
+ HoodieTimer timer = new HoodieTimer().startTimer();
+ List<FileSlice> shards =
HoodieTableMetadataUtil.loadPartitionShards(metaClient, partitionName);
+ ValidationUtils.checkArgument(shards.size() == 1,
String.format("Invalid number of shards: found=%d, required=%d", shards.size(),
1));
+ final FileSlice slice =
shards.get(HoodieTableMetadataUtil.keyToShard(key, shards.size()));
+
+ // If the base file is present then create a reader
+ Option<HoodieBaseFile> basefile = slice.getBaseFile();
+ if (basefile.isPresent()) {
Review comment:
do we send initial data to log files? without any base? is this why we
are creating the log files with empty delete block upfront?
##########
File path:
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/action/commit/BaseFlinkCommitActionExecutor.java
##########
@@ -141,13 +141,14 @@ protected void commit(Option<Map<String, String>>
extraMetadata, HoodieWriteMeta
result.setWriteStats(writeStats);
// Finalize write
finalizeWrite(instantTime, writeStats, result);
- syncTableMetadata();
try {
LOG.info("Committing " + instantTime + ", action Type " +
getCommitActionType());
HoodieActiveTimeline activeTimeline = table.getActiveTimeline();
HoodieCommitMetadata metadata = CommitUtils.buildMetadata(writeStats,
result.getPartitionToReplaceFileIds(),
extraMetadata, operationType, getSchemaToStoreInCommit(),
getCommitActionType());
+ syncTableMetadata(metadata);
Review comment:
For Flink, this code is still executed at the driver, right?
##########
File path:
hudi-client/hudi-flink-client/src/main/java/org/apache/hudi/table/upgrade/FlinkUpgradeDowngrade.java
##########
@@ -43,7 +43,9 @@ public void run(HoodieTableMetaClient metaClient,
HoodieTableVersion toVersion,
@Override
protected void upgrade(HoodieTableVersion fromVersion, HoodieTableVersion
toVersion, String instantTime) {
- if (fromVersion == HoodieTableVersion.ZERO && toVersion ==
HoodieTableVersion.ONE) {
+ if (fromVersion == HoodieTableVersion.ONE && toVersion ==
HoodieTableVersion.TWO) {
+ // TODO:
Review comment:
+1
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -381,6 +387,57 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
return partitionToFileStatus;
}
+ /**
+ * Initialize shards for a partition.
+ *
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
Review comment:
if we use the 0000-9999 as a hash partition, then we cannot reuse that?
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/BaseActionExecutor.java
##########
@@ -46,4 +50,24 @@ public BaseActionExecutor(HoodieEngineContext context,
HoodieWriteConfig config,
}
public abstract R execute();
+
+ protected final void syncTableMetadata(HoodieCommitMetadata metadata) {
Review comment:
+1
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -162,157 +168,121 @@ private void initIfNeeded() {
throw new HoodieIOException("Error merging records from metadata table
for key :" + key, ioe);
} finally {
if (!reuse) {
- closeOrThrow();
+ close(partitionName);
}
}
}
- private void openReadersIfNeededOrThrow() {
- try {
- openReadersIfNeeded();
- } catch (IOException e) {
- throw new HoodieIOException("Error opening readers to the Metadata
Table: ", e);
- }
- }
-
/**
* Returns a new pair of readers to the base and log files.
*/
- private void openReadersIfNeeded() throws IOException {
- if (reuse && (baseFileReader != null || logRecordScanner != null)) {
- // quickly exit out without synchronizing if reusing and readers are
already open
- return;
- }
-
- // we always force synchronization, if reuse=false, to handle concurrent
close() calls as well.
- synchronized (this) {
- if (baseFileReader != null || logRecordScanner != null) {
- return;
- }
-
- final long baseFileOpenMs;
- final long logScannerOpenMs;
-
- // Metadata is in sync till the latest completed instant on the dataset
- HoodieTimer timer = new HoodieTimer().startTimer();
- String latestInstantTime = getLatestDatasetInstantTime();
- ValidationUtils.checkArgument(latestFileSystemMetadataSlices.size() ==
1, "must be at-least one valid metadata file slice");
-
- // If the base file is present then create a reader
- Option<HoodieBaseFile> basefile =
latestFileSystemMetadataSlices.get(0).getBaseFile();
- if (basefile.isPresent()) {
- String basefilePath = basefile.get().getPath();
- baseFileReader =
HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(basefilePath));
- baseFileOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata base file from %s at instant
%s in %d ms", basefilePath,
- basefile.get().getCommitTime(), baseFileOpenMs));
- } else {
- baseFileOpenMs = 0;
- timer.endTimer();
- }
-
- // Open the log record scanner using the log files from the latest file
slice
- timer.startTimer();
- List<String> logFilePaths =
latestFileSystemMetadataSlices.get(0).getLogFiles()
- .sorted(HoodieLogFile.getLogFileComparator())
- .map(o -> o.getPath().toString())
- .collect(Collectors.toList());
- Option<HoodieInstant> lastInstant =
metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
- String latestMetaInstantTimestamp =
lastInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
-
- // Load the schema
- Schema schema =
HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
- HoodieCommonConfig commonConfig =
HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
- logRecordScanner = HoodieMetadataMergedLogRecordScanner.newBuilder()
- .withFileSystem(metaClient.getFs())
- .withBasePath(metadataBasePath)
- .withLogFilePaths(logFilePaths)
- .withReaderSchema(schema)
- .withLatestInstantTime(latestMetaInstantTimestamp)
- .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
- .withBufferSize(BUFFER_SIZE)
- .withSpillableMapBasePath(spillableMapDirectory)
- .withDiskMapType(commonConfig.getSpillableDiskMapType())
-
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
- .build();
-
- logScannerOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata log files from %s at instant
(dataset instant=%s, metadata instant=%s) in %d ms",
- logFilePaths, latestInstantTime, latestMetaInstantTimestamp,
logScannerOpenMs));
-
- metrics.ifPresent(metrics ->
metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, baseFileOpenMs +
logScannerOpenMs));
- }
- }
+ private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner>
openReadersIfNeeded(String key, String partitionName) throws IOException {
+ return shardReaders.computeIfAbsent(partitionName, k -> {
+ try {
+ final long baseFileOpenMs;
+ final long logScannerOpenMs;
+ HoodieFileReader baseFileReader = null;
+ HoodieMetadataMergedLogRecordScanner logRecordScanner = null;
+
+ // Metadata is in sync till the latest completed instant on the dataset
+ HoodieTimer timer = new HoodieTimer().startTimer();
+ List<FileSlice> shards =
HoodieTableMetadataUtil.loadPartitionShards(metaClient, partitionName);
+ ValidationUtils.checkArgument(shards.size() == 1,
String.format("Invalid number of shards: found=%d, required=%d", shards.size(),
1));
+ final FileSlice slice =
shards.get(HoodieTableMetadataUtil.keyToShard(key, shards.size()));
+
+ // If the base file is present then create a reader
+ Option<HoodieBaseFile> basefile = slice.getBaseFile();
+ if (basefile.isPresent()) {
+ String basefilePath = basefile.get().getPath();
+ baseFileReader =
HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(basefilePath));
+ baseFileOpenMs = timer.endTimer();
+ LOG.info(String.format("Opened metadata base file from %s at instant
%s in %d ms", basefilePath,
+ basefile.get().getCommitTime(), baseFileOpenMs));
+ } else {
+ baseFileOpenMs = 0;
+ timer.endTimer();
+ }
- private void close(HoodieFileReader localFileReader,
HoodieMetadataMergedLogRecordScanner localLogScanner) {
- try {
- if (localFileReader != null) {
- localFileReader.close();
- }
- if (localLogScanner != null) {
- localLogScanner.close();
+ // Open the log record scanner using the log files from the latest
file slice
+ timer.startTimer();
+ List<String> logFilePaths = slice.getLogFiles()
+ .sorted(HoodieLogFile.getLogFileComparator())
+ .map(o -> o.getPath().toString())
+ .collect(Collectors.toList());
+
+ // Only those log files which have a corresponding completed instant
on the dataset should be read
+ // This is because the metadata table is updated before the dataset
instants are committed.
+ HoodieActiveTimeline datasetTimeline =
datasetMetaClient.getActiveTimeline();
+ Set<String> validInstantTimestamps =
datasetTimeline.filterCompletedInstants().getInstants()
+ .map(i -> i.getTimestamp()).collect(Collectors.toSet());
+
+ // For any rollbacks and restores, we cannot neglect the instants that
they are rolling back.
+ // The rollback instant should be more recent than the start of the
timeline for it to have rolled back any
+ // instant which we have a log block for.
+ final String minInstantTime = validInstantTimestamps.isEmpty() ?
SOLO_COMMIT_TIMESTAMP : Collections.min(validInstantTimestamps);
+
datasetTimeline.getRollbackAndRestoreTimeline().filterCompletedInstants().getInstants()
+ .filter(instant ->
HoodieTimeline.compareTimestamps(instant.getTimestamp(),
HoodieTimeline.GREATER_THAN, minInstantTime))
+ .forEach(instant -> {
+
validInstantTimestamps.addAll(HoodieTableMetadataUtil.getCommitsRolledback(instant,
datasetTimeline));
+ });
+
+ // SOLO_COMMIT_TIMESTAMP is used during bootstrap so it is a valid
timestamp
+ validInstantTimestamps.add(SOLO_COMMIT_TIMESTAMP);
+
+ Option<HoodieInstant> lastInstant =
metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
+ String latestMetaInstantTimestamp =
lastInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
+
+ // Load the schema
+ Schema schema =
HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
+ HoodieCommonConfig commonConfig =
HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
+ logRecordScanner = HoodieMetadataMergedLogRecordScanner.newBuilder()
+ .withFileSystem(metaClient.getFs())
+ .withBasePath(metadataBasePath)
+ .withLogFilePaths(logFilePaths)
+ .withReaderSchema(schema)
+ .withLatestInstantTime(latestMetaInstantTimestamp)
+ .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
+ .withBufferSize(BUFFER_SIZE)
+ .withSpillableMapBasePath(spillableMapDirectory)
+ .withDiskMapType(commonConfig.getSpillableDiskMapType())
+
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
+ .withLogBlockTimestamps(validInstantTimestamps)
Review comment:
this is what fences all uncommitted data from being read out of metadata
table
##########
File path:
hudi-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadata.java
##########
@@ -162,157 +168,121 @@ private void initIfNeeded() {
throw new HoodieIOException("Error merging records from metadata table
for key :" + key, ioe);
} finally {
if (!reuse) {
- closeOrThrow();
+ close(partitionName);
}
}
}
- private void openReadersIfNeededOrThrow() {
- try {
- openReadersIfNeeded();
- } catch (IOException e) {
- throw new HoodieIOException("Error opening readers to the Metadata
Table: ", e);
- }
- }
-
/**
* Returns a new pair of readers to the base and log files.
*/
- private void openReadersIfNeeded() throws IOException {
- if (reuse && (baseFileReader != null || logRecordScanner != null)) {
- // quickly exit out without synchronizing if reusing and readers are
already open
- return;
- }
-
- // we always force synchronization, if reuse=false, to handle concurrent
close() calls as well.
- synchronized (this) {
- if (baseFileReader != null || logRecordScanner != null) {
- return;
- }
-
- final long baseFileOpenMs;
- final long logScannerOpenMs;
-
- // Metadata is in sync till the latest completed instant on the dataset
- HoodieTimer timer = new HoodieTimer().startTimer();
- String latestInstantTime = getLatestDatasetInstantTime();
- ValidationUtils.checkArgument(latestFileSystemMetadataSlices.size() ==
1, "must be at-least one valid metadata file slice");
-
- // If the base file is present then create a reader
- Option<HoodieBaseFile> basefile =
latestFileSystemMetadataSlices.get(0).getBaseFile();
- if (basefile.isPresent()) {
- String basefilePath = basefile.get().getPath();
- baseFileReader =
HoodieFileReaderFactory.getFileReader(hadoopConf.get(), new Path(basefilePath));
- baseFileOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata base file from %s at instant
%s in %d ms", basefilePath,
- basefile.get().getCommitTime(), baseFileOpenMs));
- } else {
- baseFileOpenMs = 0;
- timer.endTimer();
- }
-
- // Open the log record scanner using the log files from the latest file
slice
- timer.startTimer();
- List<String> logFilePaths =
latestFileSystemMetadataSlices.get(0).getLogFiles()
- .sorted(HoodieLogFile.getLogFileComparator())
- .map(o -> o.getPath().toString())
- .collect(Collectors.toList());
- Option<HoodieInstant> lastInstant =
metaClient.getActiveTimeline().filterCompletedInstants().lastInstant();
- String latestMetaInstantTimestamp =
lastInstant.map(HoodieInstant::getTimestamp).orElse(SOLO_COMMIT_TIMESTAMP);
-
- // Load the schema
- Schema schema =
HoodieAvroUtils.addMetadataFields(HoodieMetadataRecord.getClassSchema());
- HoodieCommonConfig commonConfig =
HoodieCommonConfig.newBuilder().fromProperties(metadataConfig.getProps()).build();
- logRecordScanner = HoodieMetadataMergedLogRecordScanner.newBuilder()
- .withFileSystem(metaClient.getFs())
- .withBasePath(metadataBasePath)
- .withLogFilePaths(logFilePaths)
- .withReaderSchema(schema)
- .withLatestInstantTime(latestMetaInstantTimestamp)
- .withMaxMemorySizeInBytes(MAX_MEMORY_SIZE_IN_BYTES)
- .withBufferSize(BUFFER_SIZE)
- .withSpillableMapBasePath(spillableMapDirectory)
- .withDiskMapType(commonConfig.getSpillableDiskMapType())
-
.withBitCaskDiskMapCompressionEnabled(commonConfig.isBitCaskDiskMapCompressionEnabled())
- .build();
-
- logScannerOpenMs = timer.endTimer();
- LOG.info(String.format("Opened metadata log files from %s at instant
(dataset instant=%s, metadata instant=%s) in %d ms",
- logFilePaths, latestInstantTime, latestMetaInstantTimestamp,
logScannerOpenMs));
-
- metrics.ifPresent(metrics ->
metrics.updateMetrics(HoodieMetadataMetrics.SCAN_STR, baseFileOpenMs +
logScannerOpenMs));
- }
- }
+ private Pair<HoodieFileReader, HoodieMetadataMergedLogRecordScanner>
openReadersIfNeeded(String key, String partitionName) throws IOException {
Review comment:
this method needs to be broken down
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -110,23 +119,31 @@ protected HoodieBackedTableMetadataWriter(Configuration
hadoopConf, HoodieWriteC
ValidationUtils.checkArgument(!this.metadataWriteConfig.useFileListingMetadata(),
"File listing cannot be used for Metadata Table");
initRegistry();
- HoodieTableMetaClient datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
- initialize(engineContext, datasetMetaClient);
- if (enabled) {
- // This is always called even in case the table was created for the
first time. This is because
- // initFromFilesystem() does file listing and hence may take a long
time during which some new updates
- // may have occurred on the table. Hence, calling this always ensures
that the metadata is brought in sync
- // with the active timeline.
- HoodieTimer timer = new HoodieTimer().startTimer();
- syncFromInstants(datasetMetaClient);
- metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.SYNC_STR,
timer.endTimer()));
- }
+ this.datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
+ initTransactionManager();
+ initialize(engineContext);
+ initTableMetadata();
} else {
enabled = false;
this.metrics = Option.empty();
}
}
+ /**
+ * Initialize the {@code TransactionManager} to use for metadata table.
+ *
+ * In HUDI multi writer mode, each operation will sync to metadata table
before completion. Metadata table has common
+ * base and log files to update for each operation. So we can only support
serialized operations.
+ */
+ private void initTransactionManager() {
+ // The lock location should be different from the dataset
+ Properties properties = new Properties();
+ properties.putAll(datasetWriteConfig.getProps());
+ properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY,
properties.getProperty(FILESYSTEM_LOCK_PATH_PROP_KEY,
datasetWriteConfig.getBasePath() + "/.hoodie/.locks") + "/metadata");
Review comment:
+1
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -110,23 +119,31 @@ protected HoodieBackedTableMetadataWriter(Configuration
hadoopConf, HoodieWriteC
ValidationUtils.checkArgument(!this.metadataWriteConfig.useFileListingMetadata(),
"File listing cannot be used for Metadata Table");
initRegistry();
- HoodieTableMetaClient datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
- initialize(engineContext, datasetMetaClient);
- if (enabled) {
- // This is always called even in case the table was created for the
first time. This is because
- // initFromFilesystem() does file listing and hence may take a long
time during which some new updates
- // may have occurred on the table. Hence, calling this always ensures
that the metadata is brought in sync
- // with the active timeline.
- HoodieTimer timer = new HoodieTimer().startTimer();
- syncFromInstants(datasetMetaClient);
- metrics.ifPresent(m -> m.updateMetrics(HoodieMetadataMetrics.SYNC_STR,
timer.endTimer()));
- }
+ this.datasetMetaClient =
HoodieTableMetaClient.builder().setConf(hadoopConf).setBasePath(datasetWriteConfig.getBasePath()).build();
+ initTransactionManager();
+ initialize(engineContext);
+ initTableMetadata();
} else {
enabled = false;
this.metrics = Option.empty();
}
}
+ /**
+ * Initialize the {@code TransactionManager} to use for metadata table.
+ *
+ * In HUDI multi writer mode, each operation will sync to metadata table
before completion. Metadata table has common
+ * base and log files to update for each operation. So we can only support
serialized operations.
+ */
+ private void initTransactionManager() {
+ // The lock location should be different from the dataset
+ Properties properties = new Properties();
+ properties.putAll(datasetWriteConfig.getProps());
+ properties.setProperty(FILESYSTEM_LOCK_PATH_PROP_KEY,
properties.getProperty(FILESYSTEM_LOCK_PATH_PROP_KEY,
datasetWriteConfig.getBasePath() + "/.hoodie/.locks") + "/metadata");
Review comment:
filesystem based lock may not work on cloud storage. not sure if we can
assume this.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]