>From Hussain Towaileb <[email protected]>:
Hussain Towaileb has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443 )
Change subject: [NO ISSUE]: Avoid calling cloud to return list of all
cached/uncached files
......................................................................
[NO ISSUE]: Avoid calling cloud to return list of all cached/uncached files
- user model changes: no
- storage format changes: no
- interface changes: yes
Details:
- As listing all file keys from the cloud can take a long time
in case we have a huge number of files, we will depend on
listing the files from disk + get uncached from our cacher
tracker.
Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065
---
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
M
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
9 files changed, 120 insertions(+), 1 deletion(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/43/18443/1
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
index 91c24e8..4f32c57 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
@@ -26,6 +26,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
@@ -49,6 +51,7 @@
import org.apache.hyracks.api.io.FileReference;
import org.apache.hyracks.api.io.IFileHandle;
import org.apache.hyracks.api.io.IIOBulkOperation;
+import org.apache.hyracks.api.io.IODeviceHandle;
import org.apache.hyracks.api.util.IoUtil;
import org.apache.hyracks.cloud.io.ICloudIOManager;
import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest;
@@ -61,6 +64,8 @@
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;
public abstract class AbstractCloudIOManager extends IOManager implements
IPartitionBootstrapper, ICloudIOManager {
private static final Logger LOGGER = LogManager.getLogger();
@@ -169,6 +174,8 @@
protected abstract void downloadPartitions(boolean metadataNode, int
metadataPartition) throws HyracksDataException;
+ protected abstract Set<UncachedFileReference> getUncachedFiles();
+
/*
* ******************************************************************
* ICloudIOManager functions
@@ -351,7 +358,45 @@
* @return {@link JsonNode} with stored objects' information
*/
public final JsonNode listAsJson(ObjectMapper objectMapper) {
- return cloudClient.listAsJson(objectMapper, bucket);
+ ArrayNode objectsInfo = objectMapper.createArrayNode();
+ try {
+ // get cached files (read from disk)
+ Set<CloudFile> cached = new HashSet<>();
+ for (IODeviceHandle deviceHandle : getIODevices()) {
+ FileReference storageRoot =
deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME);
+ Set<FileReference> deviceFiles =
localIoManager.list(storageRoot, IoUtil.NO_OP_FILTER);
+ for (FileReference fileReference : deviceFiles) {
+ cached.add(CloudFile.of(fileReference.getRelativePath(),
+
Files.size(Path.of(fileReference.getAbsolutePath()))));
+ }
+ }
+
+ // get uncached files
+ Set<CloudFile> uncached = new HashSet<>();
+ for (UncachedFileReference uncachedFile : getUncachedFiles()) {
+ uncached.add(CloudFile.of(uncachedFile.getRelativePath(),
uncachedFile.getSize()));
+ }
+
+ // combine all and sort
+ List<CloudFile> allFiles = new ArrayList<>();
+ allFiles.addAll(cached);
+ allFiles.addAll(uncached);
+ allFiles.sort((x, y) ->
String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath()));
+
+ for (CloudFile file : allFiles) {
+ ObjectNode objectInfo = objectsInfo.addObject();
+ objectInfo.put("path", file.getPath());
+ objectInfo.put("size", file.getSize());
+ }
+
+ return objectsInfo;
+ } catch (IOException e) {
+ LOGGER.warn("Failed to retrieve list of all cloud files", e);
+ objectsInfo.removeAll();
+ ObjectNode objectInfo = objectsInfo.addObject();
+ objectInfo.put("error", "Failed to retrieve list of all cloud
files. " + e.getMessage());
+ return objectsInfo;
+ }
}
/**
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
index 764d436..1cb6077 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
@@ -67,6 +67,11 @@
}
@Override
+ protected Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
+
+ @Override
protected void onOpen(CloudFileHandle fileHandle) {
// NoOp
}
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
index 35b7255..1c5efd9 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
@@ -137,6 +137,11 @@
}
}
+ @Override
+ protected Set<UncachedFileReference> getUncachedFiles() {
+ return accessor.getUncachedFiles();
+ }
+
private ILazyAccessor createAccessor(ParallelCacher cacher, boolean
canReplaceAccessor) {
if (canReplaceAccessor) {
return new ReplaceableCloudAccessor(cloudClient, bucket,
localIoManager, partitions, replacer, cacher);
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
index aebeab2..486074d 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
@@ -23,6 +23,7 @@
import java.util.Collection;
import java.util.Set;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -38,6 +39,13 @@
/**
* Returns a list of all uncached files
*
+ * @return all uncached files
+ */
+ Set<UncachedFileReference> getUncachedFiles();
+
+ /**
+ * Returns a list of all uncached files of specified directory
+ *
* @param dir directory to list
* @param filter file name filter
* @return set of uncached files
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
index c953de5..b1947ec 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
@@ -23,6 +23,7 @@
import java.util.Collections;
import java.util.Set;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -35,6 +36,11 @@
}
@Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
+
+ @Override
public Set<FileReference> getUncachedFiles(FileReference dir,
FilenameFilter filter) {
return Collections.emptySet();
}
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
index 56619c8..dfcb8e4 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
@@ -88,6 +88,16 @@
}
@Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ Set<UncachedFileReference> uncached = new HashSet<>();
+ uncached.addAll(uncachedDataFiles.values());
+ uncached.addAll(uncachedMetadataFiles.values());
+
+ // return a copy to avoid any modifications
+ return new HashSet<>(uncached);
+ }
+
+ @Override
public Set<FileReference> getUncachedFiles(FileReference dir,
FilenameFilter filter) {
if
(dir.getRelativePath().endsWith(StorageConstants.STORAGE_ROOT_DIR_NAME)) {
return uncachedDataFiles.keySet().stream()
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
index e6c0692..fc1d98a 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
@@ -22,6 +22,7 @@
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.hyracks.api.exceptions.HyracksDataException;
import org.apache.hyracks.api.io.FileReference;
@@ -111,4 +112,11 @@
* @param directory to evict
*/
void doEvict(FileReference directory) throws HyracksDataException;
+
+ /**
+ * Returns all uncached files
+ *
+ * @return all uncached files
+ */
+ Set<UncachedFileReference> getUncachedFiles();
}
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
index ae32402..c843074 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
@@ -19,9 +19,11 @@
package org.apache.asterix.cloud.lazy.accessor;
import java.io.FilenameFilter;
+import java.util.Collections;
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack;
import org.apache.asterix.cloud.clients.ICloudClient;
@@ -84,4 +86,9 @@
cloudClient.write(bucket, fileReference.getRelativePath(), bytes);
localIoManager.overwrite(fileReference, bytes);
}
+
+ @Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return Collections.emptySet();
+ }
}
diff --git
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
index 1a440e7..91046a1 100644
---
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
+++
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
@@ -23,6 +23,7 @@
import java.util.Set;
import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
import org.apache.asterix.cloud.clients.CloudFile;
import org.apache.asterix.cloud.clients.ICloudClient;
@@ -185,4 +186,9 @@
return
dir.getRelativePath().startsWith(StorageConstants.METADATA_TXN_NOWAL_DIR_NAME)
|| dir.getName().equals(StorageConstants.GLOBAL_TXN_DIR_NAME);
}
+
+ @Override
+ public Set<UncachedFileReference> getUncachedFiles() {
+ return cacher.getUncachedFiles();
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065
Gerrit-Change-Number: 18443
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange