github-actions[bot] commented on code in PR #62023:
URL: https://github.com/apache/doris/pull/62023#discussion_r3037433990
##########
fe/fe-core/src/main/java/org/apache/doris/backup/Repository.java:
##########
@@ -386,45 +512,87 @@ public boolean ping() {
String path = location + "/" + joinPrefix(PREFIX_REPO, name) + "/" +
FILE_REPO_INFO;
try {
URI checkUri = new URI(path);
- Status st = fileSystem.exists(checkUri.normalize().toString());
- if (!st.ok()) {
- errMsg =
TimeUtils.longToTimeString(System.currentTimeMillis()) + ": " + st.getErrMsg();
- return false;
+ org.apache.doris.filesystem.FileSystem fs = acquireSpiFs();
+ try {
+ boolean exists =
fs.exists(Location.of(checkUri.normalize().toString()));
+ if (!exists) {
+ errMsg =
TimeUtils.longToTimeString(System.currentTimeMillis())
+ + ": path does not exist: " + path;
+ return false;
+ }
+ errMsg = null;
+ return true;
+ } finally {
+ releaseSpiFs(fs);
}
- // clear err msg
- errMsg = null;
-
- return true;
} catch (URISyntaxException e) {
errMsg = TimeUtils.longToTimeString(System.currentTimeMillis())
+ ": Invalid path. " + path + ", error: " + e.getMessage();
return false;
+ } catch (IOException e) {
+ errMsg = TimeUtils.longToTimeString(System.currentTimeMillis()) +
": " + e.getMessage();
+ return false;
}
}
// Visit the repository, and list all existing snapshot names
public Status listSnapshots(List<String> snapshotNames) {
- // list with prefix:
- // eg. __palo_repository_repo_name/__ss_*
- String listPath = Joiner.on(PATH_DELIMITER).join(location,
joinPrefix(PREFIX_REPO, name), PREFIX_SNAPSHOT_DIR)
- + "*";
- List<RemoteFile> result = Lists.newArrayList();
- Status st = fileSystem.globList(listPath, result);
- if (!st.ok()) {
- return st;
- }
-
- for (RemoteFile remoteFile : result) {
- if (remoteFile.isFile()) {
- if (LOG.isDebugEnabled()) {
- LOG.debug("get snapshot path{} which is not a dir",
remoteFile);
+ // List repo root directory and extract unique snapshot names.
+ // Object stores (S3/OSS) return a flat list of all objects under the
prefix, so we
+ // identify snapshot directories by finding "/<PREFIX_SNAPSHOT_DIR>"
in each URI and
+ // extracting the name segment that follows. HDFS/Broker return real
directory entries
+ // whose names start with PREFIX_SNAPSHOT_DIR, which is handled by the
same path.
+ //
+ // We do NOT append "*" here because:
+ // - Broker: list() passes the path to Hadoop globStatus and handles
it correctly even
+ // without a trailing wildcard when given a directory path.
+ // - HDFS: listStatusIterator does not support glob; it needs an
actual directory path.
+ // - S3/OSS: list() is prefix-based; a trailing "*" would be treated
as a literal key
+ // character, matching nothing.
+ String repoRootPath = Joiner.on(PATH_DELIMITER).join(getLocation(),
joinPrefix(PREFIX_REPO, name))
+ + PATH_DELIMITER;
+ org.apache.doris.filesystem.FileSystem fs;
+ try {
+ fs = acquireSpiFs();
+ } catch (IOException e) {
+ return new Status(ErrCode.COMMON_ERROR, "Failed to acquire
filesystem: " + e.getMessage());
+ }
+ try {
+ Set<String> ssNameSet = new LinkedHashSet<>();
+ try (FileIterator it = fs.list(Location.of(repoRootPath))) {
+ while (it.hasNext()) {
+ FileEntry entry = it.next();
+ String uri = entry.location().uri();
+ if (entry.isDirectory()) {
+ // HDFS / Broker: real directory entry whose name is
"__ss_<snapshotName>"
+ String entryName = uri.substring(uri.lastIndexOf('/')
+ 1);
+ if (entryName.startsWith(PREFIX_SNAPSHOT_DIR)) {
+ ssNameSet.add(disjoinPrefix(PREFIX_SNAPSHOT_DIR,
entryName));
+ }
+ } else {
+ // S3 / OSS: flat object URI, e.g.
".../repo/__ss_snap1/__meta.xxx"
+ // Extract the snapshot name from the path component
after "/__ss_"
+ int ssIdx = uri.lastIndexOf(PATH_DELIMITER +
PREFIX_SNAPSHOT_DIR);
Review Comment:
For object stores this iterator returns flat object keys like
`.../__ss_snap1/__ss_content/__db...`. Using `lastIndexOf("/__ss_")` will
therefore match the later `/__ss_content` segment, so `snapshotName` becomes
`content` instead of `snap1`. After this change, `SHOW SNAPSHOT` / snapshot
lookup on S3-style repositories will misreport names or fail once a snapshot
contains objects. Please extract the segment immediately after the repo root
(or search for the first `__ss_` segment after `repoRootPath`) instead of the
last occurrence.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]