leesf commented on a change in pull request #3426:
URL: https://github.com/apache/hudi/pull/3426#discussion_r688548261
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/HoodieBackedTableMetadataWriter.java
##########
@@ -396,37 +408,56 @@ private boolean
bootstrapFromFilesystem(HoodieEngineContext engineContext, Hoodi
}
/**
- * Sync the Metadata Table from the instants created on the dataset.
+ * Initialize shards for a partition.
*
- * @param datasetMetaClient {@code HoodieTableMetaClient} for the dataset
+ * Each shard is a single log file with the following format:
+ * <fileIdPrefix>ABCD
+ * where ABCD are digits. This allows up to 9999 shards.
+ *
+ * Example:
+ * fc9f18eb-6049-4f47-bc51-23884bef0001
+ * fc9f18eb-6049-4f47-bc51-23884bef0002
*/
- private void syncFromInstants(HoodieTableMetaClient datasetMetaClient) {
- ValidationUtils.checkState(enabled, "Metadata table cannot be synced as it
is not enabled");
- // (re) init the metadata for reading.
- initTableMetadata();
- try {
- List<HoodieInstant> instantsToSync =
metadata.findInstantsToSyncForWriter();
- if (instantsToSync.isEmpty()) {
- return;
- }
-
- LOG.info("Syncing " + instantsToSync.size() + " instants to metadata
table: " + instantsToSync);
-
- // Read each instant in order and sync it to metadata table
- for (HoodieInstant instant : instantsToSync) {
- LOG.info("Syncing instant " + instant + " to metadata table");
-
- Option<List<HoodieRecord>> records =
HoodieTableMetadataUtil.convertInstantToMetaRecords(datasetMetaClient, instant,
getLatestSyncedInstantTime());
- if (records.isPresent()) {
- commit(records.get(), MetadataPartitionType.FILES.partitionPath(),
instant.getTimestamp());
- }
+ private void initializeShards(HoodieTableMetaClient datasetMetaClient,
String partition, String instantTime,
+ int shardCount) throws IOException {
+ ValidationUtils.checkArgument(shardCount <= 9999, "Maximum 9999 shards are
supported.");
+
+ final String newFileId = FSUtils.createNewFileIdPfx();
+ final String newFileIdPrefix = newFileId.substring(0, 32);
+ final HashMap<HeaderMetadataType, String> blockHeader = new HashMap<>();
+ blockHeader.put(HeaderMetadataType.INSTANT_TIME, instantTime);
+ final HoodieDeleteBlock block = new HoodieDeleteBlock(new HoodieKey[0],
blockHeader);
+
+ LOG.info(String.format("Creating %d shards for partition %s with base
fileId %s at instant time %s",
+ shardCount, partition, newFileId, instantTime));
+ for (int i = 0; i < shardCount; ++i) {
+ // Generate a indexed fileId for each shard and write a log block into
it to create the file.
+ final String shardFileId = String.format("%s%04d", newFileIdPrefix, i +
1);
+ ValidationUtils.checkArgument(newFileId.length() ==
shardFileId.length(), "FileId should be of length " + newFileId.length());
+ try {
+ HoodieLogFormat.Writer writer = HoodieLogFormat.newWriterBuilder()
+
.onParentPath(FSUtils.getPartitionPath(metadataWriteConfig.getBasePath(),
partition))
+ .withFileId(shardFileId).overBaseCommit(instantTime)
+ .withLogVersion(HoodieLogFile.LOGFILE_BASE_VERSION)
+ .withFileSize(0L)
+ .withSizeThreshold(metadataWriteConfig.getLogFileMaxSize())
+ .withFs(datasetMetaClient.getFs())
+ .withRolloverLogWriteToken(FSUtils.makeWriteToken(0, 0, 0))
+ .withLogWriteToken(FSUtils.makeWriteToken(0, 0, 0))
+ .withFileExtension(HoodieLogFile.DELTA_EXTENSION).build();
+ writer.appendBlock(block);
+ writer.close();
+ } catch (InterruptedException e) {
+ throw new IOException("Failed to created record level index shard " +
shardFileId, e);
}
- initTableMetadata();
- } catch (IOException ioe) {
- throw new HoodieIOException("Unable to sync instants from data to
metadata table.", ioe);
}
}
+ protected String getShardFileName(String fileId, int shardIndex) {
Review comment:
should be in FileUtils?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]