JNSimba commented on code in PR #56175:
URL: https://github.com/apache/doris/pull/56175#discussion_r2367394009
##########
fe/fe-core/src/main/java/org/apache/doris/fs/obj/S3ObjStorage.java:
##########
@@ -528,12 +529,50 @@ ListObjectsV2Response listObjectsV2(ListObjectsV2Request
request) throws UserExc
* Copy from `AzureObjStorage.GlobList`
*/
public Status globList(String remotePath, List<RemoteFile> result, boolean
fileNameOnly) {
+ GlobListResult globListResult = globListInternal(remotePath, result,
fileNameOnly, null, -1, -1);
+ return globListResult.getStatus();
+ }
+
+ /**
+ * List all files under the given path with glob pattern.
+ * For example, if the path is "s3://bucket/path/to/*.csv",
+ * it will list all files under "s3://bucket/path/to/" with ".csv" suffix.
+ * <p>
+ * Limit: Starting from startFile, until the total file size is greater
than fileSizeLimit,
+ * or the number of files is greater than fileNumLimit.
+ *
+ * @return The largest file name after listObject this time
+ */
+ public String globListWithLimit(String remotePath, List<RemoteFile>
result, String startFile,
+ long fileSizeLimit, long fileNumLimit) {
+ GlobListResult globListResult = globListInternal(remotePath, result,
true, startFile, fileSizeLimit,
+ fileNumLimit);
+ return globListResult.getMaxFile();
+ }
+
+ /**
+ * List all files under the given path with glob pattern.
+ * For example, if the path is "s3://bucket/path/to/*.csv",
+ * it will list all files under "s3://bucket/path/to/" with ".csv" suffix.
+ * <p>
+ * Copy from `AzureObjStorage.GlobList`
+ */
+ private GlobListResult globListInternal(String remotePath,
List<RemoteFile> result, boolean fileNameOnly,
Review Comment:
There is no limit. The partition will be based on the last public path. For
example, `s3://bucket/*/ab/*` will become `s3://bucket/{xx,xx}`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]