xushiyan commented on code in PR #6882:
URL: https://github.com/apache/hudi/pull/6882#discussion_r1037191899
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieCreateHandle.java:
##########
@@ -56,7 +56,7 @@
private static final Logger LOG =
LogManager.getLogger(HoodieCreateHandle.class);
protected final HoodieFileWriter<IndexedRecord> fileWriter;
- protected final Path path;
+ protected final Path physicalPath;
Review Comment:
better align on naming: storagePath
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/action/rollback/ListingBasedRollbackStrategy.java:
##########
@@ -217,6 +218,8 @@ private FileStatus[] listFilesToBeDeleted(String commit,
String basefileExtensio
}
return false;
};
+
+ // TODO: This would not work with HoodieStorageStrategy, need to list via
FSView or commitMetaData
return fs.listStatus(FSUtils.getPartitionPath(config.getBasePath(),
partitionPath), filter);
Review Comment:
sure, better making this class agnostic about the storage strategy - FSView
can proxy all the listing requests
##########
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java:
##########
@@ -195,8 +196,16 @@ public static long getFileSize(FileSystem fs, Path path)
throws IOException {
return fs.getFileStatus(path).getLen();
}
+ public static long getFileSizeWithStorageStrategy(FileSystem fs,
HoodieStorageStrategy storageStrategy,
+ String partitionPath, String fileId) throws IOException {
+ return fs.getFileStatus(new
Path(storageStrategy.storageLocation(partitionPath, fileId))).getLen();
+ }
+
public static String getFileId(String fullFileName) {
- return fullFileName.split("_")[0];
+ String firstPart = fullFileName.split("_")[0];
+ return firstPart.startsWith(".")
+ ? firstPart.substring(1)
+ : firstPart;
Review Comment:
any side effect of this change?
##########
hudi-common/src/main/java/org/apache/hudi/common/storage/HoodieDefaultStorageStrategy.java:
##########
@@ -0,0 +1,29 @@
+package org.apache.hudi.common.storage;
+
+import org.apache.hudi.common.config.HoodieConfig;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.util.StringUtils;
+
+public class HoodieDefaultStorageStrategy implements HoodieStorageStrategy {
+
+ private String basePath;
+
+ public HoodieDefaultStorageStrategy(HoodieConfig config) {
+ basePath = config.getString("hoodie.base.path");
+ assert (!StringUtils.isNullOrEmpty(basePath));
Review Comment:
use `ValidationUtils`
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieWriteHandle.java:
##########
@@ -158,27 +161,29 @@ private String makeWriteToken() {
return FSUtils.makeWriteToken(getPartitionId(), getStageId(),
getAttemptId());
}
- public Path makeNewPath(String partitionPath) {
- Path path = FSUtils.getPartitionPath(config.getBasePath(), partitionPath);
+ public Path makeNewPhysicalPath(String partitionPath) {
+ String storageLocation = storageStrategy.storageLocation(partitionPath,
fileId);
+ Path path = new Path(storageLocation);
try {
if (!fs.exists(path)) {
fs.mkdirs(path); // create a new partition as needed.
}
} catch (IOException e) {
throw new HoodieIOException("Failed to make dir " + path, e);
}
+ LOG.info("write handle, physical partition path: " + path);
- return new Path(path.toString(), FSUtils.makeBaseFileName(instantTime,
writeToken, fileId,
+ return new Path(storageLocation, FSUtils.makeBaseFileName(instantTime,
writeToken, fileId,
hoodieTable.getMetaClient().getTableConfig().getBaseFileFormat().getFileExtension()));
}
/**
* Make new file path with given file name.
*/
- protected Path makeNewFilePath(String partitionPath, String fileName) {
- String relativePath = new Path((partitionPath.isEmpty() ? "" :
partitionPath + "/")
- + fileName).toString();
- return new Path(config.getBasePath(), relativePath);
+ protected Path makeNewFilePhysicalPath(String partitionPath, String
fileName) {
Review Comment:
ditto; makeNewFileStoragePath for consistency
##########
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java:
##########
@@ -165,6 +167,16 @@ public class HoodieWriteConfig extends HoodieConfig {
+ "Hudi stores all the main meta-data about commits, savepoints,
cleaning audit logs "
+ "etc in .hoodie directory under this base path directory.");
+ public static final ConfigProperty<String> STORAGE_PATH = ConfigProperty
+ .key(HoodieTableConfig.HOODIE_STORAGE_PATH_KEY)
+ .noDefaultValue()
+ .withDocumentation("Base storage path for Hudi table");
+
+ public static final ConfigProperty<String> STORAGE_STRATEGY_CLASS_NAME =
ConfigProperty
+ .key(HoodieTableConfig.HOODIE_STORAGE_STRATEGY_CLASS_NAME_KEY)
+ .defaultValue(HoodieTableConfig.StorageStrategy.DEFAULT.value)
Review Comment:
why not reference to `HoodieTableConfig.StorageStrategy` ?
##########
hudi-common/src/main/java/org/apache/hudi/common/storage/HoodieObjectStoreStorageStrategy.java:
##########
@@ -0,0 +1,42 @@
+package org.apache.hudi.common.storage;
+
+import org.apache.hudi.common.config.HoodieConfig;
+import org.apache.hudi.common.fs.FSUtils;
+import org.apache.hudi.common.table.HoodieTableConfig;
+import org.apache.hudi.common.util.StringUtils;
+import org.apache.hudi.common.util.hash.HashID;
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+public class HoodieObjectStoreStorageStrategy implements HoodieStorageStrategy
{
+
+ private static final Logger LOG =
LogManager.getLogger(HoodieObjectStoreStorageStrategy.class);
+
+ private static final int HASH_SEED = 0x0e43cd7a;
+
+ private String tableName;
+ private String storagePath;
+
+ public HoodieObjectStoreStorageStrategy(HoodieConfig config) {
+ this.tableName = config.getString(HoodieTableConfig.HOODIE_TABLE_NAME_KEY);
+ this.storagePath =
config.getString(HoodieTableConfig.HOODIE_STORAGE_PATH_KEY);
+ }
+
+ public String storageLocation(String partitionPath, String fileId) {
+ if (StringUtils.isNullOrEmpty(partitionPath) ||
NON_PARTITIONED_NAME.equals(partitionPath)) {
+ // non-partitioned table
+ return String.format("%s/%08x/%s", storagePath, hash(fileId), tableName);
+ } else {
+ String properPartitionPath = FSUtils.stripLeadingSlash(partitionPath);
+ return String.format("%s/%08x/%s/%s", storagePath, hash(fileId),
tableName, properPartitionPath);
Review Comment:
thought the design is to hash partition path + file id for partitioned table?
##########
hudi-common/src/main/java/org/apache/hudi/metadata/BaseTableMetadata.java:
##########
@@ -367,7 +369,9 @@ Map<String, FileStatus[]>
fetchAllFilesInPartitionPaths(List<Path> partitionPath
HoodieMetadataPayload metadataPayload = record.getData();
checkForSpuriousDeletes(metadataPayload, partitionId);
- FileStatus[] files = metadataPayload.getFileStatuses(fs,
partitionPath);
+ FileStatus[] files = metadataPayload.getFileStatuses(fs,
partitionId, dataMetaClient.getStorageStrategy());
+
+ // return Pair<logicalPath, statuses>
Review Comment:
to be cleaned up
##########
hudi-common/src/main/java/org/apache/hudi/common/fs/FSUtils.java:
##########
@@ -533,6 +540,18 @@ public static Stream<HoodieLogFile>
getAllLogFiles(FileSystem fs, Path partition
}
}
+ public static Stream<HoodieLogFile> getAllLogFiles(FileSystem fs, String
partitionPath, final String fileId,
+ final String logFileExtension, final String baseCommitTime,
HoodieStorageStrategy storageStrategy) throws IOException {
Review Comment:
high-level design suggestion: there are quite some API calls that pass
strategy objects around. i think we need a centralized place providing APIs to
retrieve files so that callers do not need to know about the storage strategy.
maybe make use of FSView.
##########
hudi-common/src/main/java/org/apache/hudi/common/table/HoodieTableMetaClient.java:
##########
@@ -127,6 +132,20 @@ protected HoodieTableMetaClient(Configuration conf, String
basePath, boolean loa
this.fs = getFs();
TableNotFoundException.checkTableValidity(fs, this.basePath.get(),
metaPath.get());
this.tableConfig = new HoodieTableConfig(fs, metaPath.toString(),
payloadClassName);
+ this.tableConfig.setValue(HoodieTableConfig.HOODIE_BASE_PATH,
+
CachingPath.getPathWithoutSchemeAndAuthority(this.basePath.get()).toString());
Review Comment:
why do we have to set HOODIE_BASE_PATH explicitly?
##########
hudi-common/src/main/java/org/apache/hudi/common/model/HoodieCommitMetadata.java:
##########
@@ -141,12 +143,32 @@ public HashMap<String, String> getFileIdAndFullPaths(Path
basePath) {
return fullPaths;
}
- public List<String> getFullPathsByPartitionPath(String basePath, String
partitionPath) {
+ public HashMap<String, String> getFileIdAndFullPaths(HoodieStorageStrategy
storageStrategy) {
Review Comment:
return Map to hide implementation
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]