>From Hussain Towaileb <[email protected]>: Hussain Towaileb has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443 )
Change subject: [NO ISSUE]: Avoid calling cloud to return list of all cached/uncached files ...................................................................... [NO ISSUE]: Avoid calling cloud to return list of all cached/uncached files - user model changes: no - storage format changes: no - interface changes: yes Details: - As listing all file keys from the cloud can take a long time in case we have a huge number of files, we will depend on listing the files from disk + get uncached from our cacher tracker. Ext-ref: MB-62555 Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443 Reviewed-by: Wail Alkowaileet <[email protected]> Tested-by: Hussain Towaileb <[email protected]> Integration-Tests: Hussain Towaileb <[email protected]> --- M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java M asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java 9 files changed, 130 insertions(+), 2 deletions(-) Approvals: Wail Alkowaileet: Looks good to me, approved Hussain Towaileb: Verified; Verified diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java index 033f135..4913f83 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java @@ -49,6 +49,7 @@ import org.apache.hyracks.api.io.FileReference; import org.apache.hyracks.api.io.IFileHandle; import org.apache.hyracks.api.io.IIOBulkOperation; +import org.apache.hyracks.api.io.IODeviceHandle; import org.apache.hyracks.api.util.IoUtil; import org.apache.hyracks.cloud.io.ICloudIOManager; import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest; @@ -61,6 +62,8 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; public abstract class AbstractCloudIOManager extends IOManager implements IPartitionBootstrapper, ICloudIOManager { private static final Logger LOGGER = LogManager.getLogger(); @@ -169,6 +172,8 @@ protected abstract void downloadPartitions(boolean metadataNode, int metadataPartition) throws HyracksDataException; + protected abstract Set<UncachedFileReference> getUncachedFiles(); + /* * ****************************************************************** * ICloudIOManager functions @@ -362,13 +367,59 @@ } /** - * Returns a list of all stored objects (sorted ASC by path) in the cloud and their sizes + * Returns a list of all stored objects (sorted ASC by path) in the cloud and their sizes. The already cached files + * are retrieved by listing the local disk, while the uncached files are retrieved from uncached files trackers. * * @param objectMapper to create the result {@link JsonNode} * @return {@link JsonNode} with stored objects' information */ public final JsonNode listAsJson(ObjectMapper objectMapper) { - return cloudClient.listAsJson(objectMapper, bucket); + ArrayNode objectsInfo = objectMapper.createArrayNode(); + List<CloudFile> allFiles = new ArrayList<>(); + try { + // get cached files (read from disk) + for (IODeviceHandle deviceHandle : getIODevices()) { + FileReference storageRoot = deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME); + + Set<FileReference> deviceFiles; + try { + deviceFiles = localIoManager.list(storageRoot, IoUtil.NO_OP_FILTER); + } catch (Throwable th) { + LOGGER.warn("Failed to get local storage files for root {}", storageRoot.getRelativePath(), th); + continue; + } + + for (FileReference fileReference : deviceFiles) { + try { + allFiles.add(CloudFile.of(fileReference.getRelativePath(), fileReference.getFile().length())); + } catch (Throwable th) { + LOGGER.warn("Encountered issue for local storage file {}", fileReference.getRelativePath(), th); + } + } + } + + // get uncached files from uncached files tracker + for (UncachedFileReference uncachedFile : getUncachedFiles()) { + allFiles.add(CloudFile.of(uncachedFile.getRelativePath(), uncachedFile.getSize())); + } + + // combine all and sort + allFiles.sort((x, y) -> String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath())); + + for (CloudFile file : allFiles) { + ObjectNode objectInfo = objectsInfo.addObject(); + objectInfo.put("path", file.getPath()); + objectInfo.put("size", file.getSize()); + } + + return objectsInfo; + } catch (Throwable th) { + LOGGER.warn("Failed to retrieve list of all cloud files", th); + objectsInfo.removeAll(); + ObjectNode objectInfo = objectsInfo.addObject(); + objectInfo.put("error", "Failed to retrieve list of all cloud files. " + th.getMessage()); + return objectsInfo; + } } /** diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java index 764d436..1cb6077 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java @@ -67,6 +67,11 @@ } @Override + protected Set<UncachedFileReference> getUncachedFiles() { + return Collections.emptySet(); + } + + @Override protected void onOpen(CloudFileHandle fileHandle) { // NoOp } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java index 35b7255..1c5efd9 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java @@ -137,6 +137,11 @@ } } + @Override + protected Set<UncachedFileReference> getUncachedFiles() { + return accessor.getUncachedFiles(); + } + private ILazyAccessor createAccessor(ParallelCacher cacher, boolean canReplaceAccessor) { if (canReplaceAccessor) { return new ReplaceableCloudAccessor(cloudClient, bucket, localIoManager, partitions, replacer, cacher); diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java index aebeab2..486074d 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java @@ -23,6 +23,7 @@ import java.util.Collection; import java.util.Set; +import org.apache.asterix.cloud.UncachedFileReference; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.io.FileReference; @@ -38,6 +39,13 @@ /** * Returns a list of all uncached files * + * @return all uncached files + */ + Set<UncachedFileReference> getUncachedFiles(); + + /** + * Returns a list of all uncached files of specified directory + * * @param dir directory to list * @param filter file name filter * @return set of uncached files diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java index c953de5..b1947ec 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java @@ -23,6 +23,7 @@ import java.util.Collections; import java.util.Set; +import org.apache.asterix.cloud.UncachedFileReference; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.io.FileReference; @@ -35,6 +36,11 @@ } @Override + public Set<UncachedFileReference> getUncachedFiles() { + return Collections.emptySet(); + } + + @Override public Set<FileReference> getUncachedFiles(FileReference dir, FilenameFilter filter) { return Collections.emptySet(); } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java index 56619c8..24f5694 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java @@ -88,6 +88,14 @@ } @Override + public synchronized Set<UncachedFileReference> getUncachedFiles() { + Set<UncachedFileReference> uncached = new HashSet<>(); + uncached.addAll(uncachedDataFiles.values()); + uncached.addAll(uncachedMetadataFiles.values()); + return uncached; + } + + @Override public Set<FileReference> getUncachedFiles(FileReference dir, FilenameFilter filter) { if (dir.getRelativePath().endsWith(StorageConstants.STORAGE_ROOT_DIR_NAME)) { return uncachedDataFiles.keySet().stream() diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java index e6c0692..fc1d98a 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java @@ -22,6 +22,7 @@ import java.util.Set; import org.apache.asterix.cloud.CloudFileHandle; +import org.apache.asterix.cloud.UncachedFileReference; import org.apache.asterix.cloud.bulk.IBulkOperationCallBack; import org.apache.hyracks.api.exceptions.HyracksDataException; import org.apache.hyracks.api.io.FileReference; @@ -111,4 +112,11 @@ * @param directory to evict */ void doEvict(FileReference directory) throws HyracksDataException; + + /** + * Returns all uncached files + * + * @return all uncached files + */ + Set<UncachedFileReference> getUncachedFiles(); } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java index ae32402..c843074 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java @@ -19,9 +19,11 @@ package org.apache.asterix.cloud.lazy.accessor; import java.io.FilenameFilter; +import java.util.Collections; import java.util.Set; import org.apache.asterix.cloud.CloudFileHandle; +import org.apache.asterix.cloud.UncachedFileReference; import org.apache.asterix.cloud.bulk.IBulkOperationCallBack; import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack; import org.apache.asterix.cloud.clients.ICloudClient; @@ -84,4 +86,9 @@ cloudClient.write(bucket, fileReference.getRelativePath(), bytes); localIoManager.overwrite(fileReference, bytes); } + + @Override + public Set<UncachedFileReference> getUncachedFiles() { + return Collections.emptySet(); + } } diff --git a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java index 1a440e7..91046a1 100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java @@ -23,6 +23,7 @@ import java.util.Set; import org.apache.asterix.cloud.CloudFileHandle; +import org.apache.asterix.cloud.UncachedFileReference; import org.apache.asterix.cloud.bulk.IBulkOperationCallBack; import org.apache.asterix.cloud.clients.CloudFile; import org.apache.asterix.cloud.clients.ICloudClient; @@ -185,4 +186,9 @@ return dir.getRelativePath().startsWith(StorageConstants.METADATA_TXN_NOWAL_DIR_NAME) || dir.getName().equals(StorageConstants.GLOBAL_TXN_DIR_NAME); } + + @Override + public Set<UncachedFileReference> getUncachedFiles() { + return cacher.getUncachedFiles(); + } } -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443 To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065 Gerrit-Change-Number: 18443 Gerrit-PatchSet: 7 Gerrit-Owner: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Wail Alkowaileet <[email protected]> Gerrit-CC: Michael Blow <[email protected]> Gerrit-MessageType: merged
