>From Hussain Towaileb <[email protected]>:

Hussain Towaileb has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443 )


Change subject: [NO ISSUE]: Avoid calling cloud to return list of all 
cached/uncached files
......................................................................

[NO ISSUE]: Avoid calling cloud to return list of all cached/uncached files

- user model changes: no
- storage format changes: no
- interface changes: yes

Details:
- As listing all file keys from the cloud can take a long time
  in case we have a huge number of files, we will depend on
  listing the files from disk + get uncached from our cacher
  tracker.

Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065
---
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
9 files changed, 120 insertions(+), 1 deletion(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/43/18443/1

diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
index 91c24e8..4f32c57 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
@@ -26,6 +26,8 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.List;
@@ -49,6 +51,7 @@
 import org.apache.hyracks.api.io.FileReference;
 import org.apache.hyracks.api.io.IFileHandle;
 import org.apache.hyracks.api.io.IIOBulkOperation;
+import org.apache.hyracks.api.io.IODeviceHandle;
 import org.apache.hyracks.api.util.IoUtil;
 import org.apache.hyracks.cloud.io.ICloudIOManager;
 import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest;
@@ -61,6 +64,8 @@

 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.node.ArrayNode;
+import com.fasterxml.jackson.databind.node.ObjectNode;

 public abstract class AbstractCloudIOManager extends IOManager implements 
IPartitionBootstrapper, ICloudIOManager {
     private static final Logger LOGGER = LogManager.getLogger();
@@ -169,6 +174,8 @@

     protected abstract void downloadPartitions(boolean metadataNode, int 
metadataPartition) throws HyracksDataException;

+    protected abstract Set<UncachedFileReference> getUncachedFiles();
+
     /*
      * ******************************************************************
      * ICloudIOManager functions
@@ -351,7 +358,45 @@
      * @return {@link JsonNode} with stored objects' information
      */
     public final JsonNode listAsJson(ObjectMapper objectMapper) {
-        return cloudClient.listAsJson(objectMapper, bucket);
+        ArrayNode objectsInfo = objectMapper.createArrayNode();
+        try {
+            // get cached files (read from disk)
+            Set<CloudFile> cached = new HashSet<>();
+            for (IODeviceHandle deviceHandle : getIODevices()) {
+                FileReference storageRoot = 
deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME);
+                Set<FileReference> deviceFiles = 
localIoManager.list(storageRoot, IoUtil.NO_OP_FILTER);
+                for (FileReference fileReference : deviceFiles) {
+                    cached.add(CloudFile.of(fileReference.getRelativePath(),
+                            
Files.size(Path.of(fileReference.getAbsolutePath()))));
+                }
+            }
+
+            // get uncached files
+            Set<CloudFile> uncached = new HashSet<>();
+            for (UncachedFileReference uncachedFile : getUncachedFiles()) {
+                uncached.add(CloudFile.of(uncachedFile.getRelativePath(), 
uncachedFile.getSize()));
+            }
+
+            // combine all and sort
+            List<CloudFile> allFiles = new ArrayList<>();
+            allFiles.addAll(cached);
+            allFiles.addAll(uncached);
+            allFiles.sort((x, y) -> 
String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath()));
+
+            for (CloudFile file : allFiles) {
+                ObjectNode objectInfo = objectsInfo.addObject();
+                objectInfo.put("path", file.getPath());
+                objectInfo.put("size", file.getSize());
+            }
+
+            return objectsInfo;
+        } catch (IOException e) {
+            LOGGER.warn("Failed to retrieve list of all cloud files", e);
+            objectsInfo.removeAll();
+            ObjectNode objectInfo = objectsInfo.addObject();
+            objectInfo.put("error", "Failed to retrieve list of all cloud 
files. " + e.getMessage());
+            return objectsInfo;
+        }
     }

     /**
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
index 764d436..1cb6077 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/EagerCloudIOManager.java
@@ -67,6 +67,11 @@
     }

     @Override
+    protected Set<UncachedFileReference> getUncachedFiles() {
+        return Collections.emptySet();
+    }
+
+    @Override
     protected void onOpen(CloudFileHandle fileHandle) {
         // NoOp
     }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
index 35b7255..1c5efd9 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/LazyCloudIOManager.java
@@ -137,6 +137,11 @@
         }
     }

+    @Override
+    protected Set<UncachedFileReference> getUncachedFiles() {
+        return accessor.getUncachedFiles();
+    }
+
     private ILazyAccessor createAccessor(ParallelCacher cacher, boolean 
canReplaceAccessor) {
         if (canReplaceAccessor) {
             return new ReplaceableCloudAccessor(cloudClient, bucket, 
localIoManager, partitions, replacer, cacher);
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
index aebeab2..486074d 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/IParallelCacher.java
@@ -23,6 +23,7 @@
 import java.util.Collection;
 import java.util.Set;

+import org.apache.asterix.cloud.UncachedFileReference;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.io.FileReference;

@@ -38,6 +39,13 @@
     /**
      * Returns a list of all uncached files
      *
+     * @return all uncached files
+     */
+    Set<UncachedFileReference> getUncachedFiles();
+
+    /**
+     * Returns a list of all uncached files of specified directory
+     *
      * @param dir    directory to list
      * @param filter file name filter
      * @return set of uncached files
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
index c953de5..b1947ec 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/NoOpParallelCacher.java
@@ -23,6 +23,7 @@
 import java.util.Collections;
 import java.util.Set;

+import org.apache.asterix.cloud.UncachedFileReference;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.io.FileReference;

@@ -35,6 +36,11 @@
     }

     @Override
+    public Set<UncachedFileReference> getUncachedFiles() {
+        return Collections.emptySet();
+    }
+
+    @Override
     public Set<FileReference> getUncachedFiles(FileReference dir, 
FilenameFilter filter) {
         return Collections.emptySet();
     }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
index 56619c8..dfcb8e4 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/ParallelCacher.java
@@ -88,6 +88,16 @@
     }

     @Override
+    public Set<UncachedFileReference> getUncachedFiles() {
+        Set<UncachedFileReference> uncached = new HashSet<>();
+        uncached.addAll(uncachedDataFiles.values());
+        uncached.addAll(uncachedMetadataFiles.values());
+
+        // return a copy to avoid any modifications
+        return new HashSet<>(uncached);
+    }
+
+    @Override
     public Set<FileReference> getUncachedFiles(FileReference dir, 
FilenameFilter filter) {
         if 
(dir.getRelativePath().endsWith(StorageConstants.STORAGE_ROOT_DIR_NAME)) {
             return uncachedDataFiles.keySet().stream()
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
index e6c0692..fc1d98a 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ILazyAccessor.java
@@ -22,6 +22,7 @@
 import java.util.Set;
 
 import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
 import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
 import org.apache.hyracks.api.exceptions.HyracksDataException;
 import org.apache.hyracks.api.io.FileReference;
@@ -111,4 +112,11 @@
      * @param directory to evict
      */
     void doEvict(FileReference directory) throws HyracksDataException;
+
+    /**
+     * Returns all uncached files
+     *
+     * @return all uncached files
+     */
+    Set<UncachedFileReference> getUncachedFiles();
 }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
index ae32402..c843074 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/LocalAccessor.java
@@ -19,9 +19,11 @@
 package org.apache.asterix.cloud.lazy.accessor;

 import java.io.FilenameFilter;
+import java.util.Collections;
 import java.util.Set;

 import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
 import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
 import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack;
 import org.apache.asterix.cloud.clients.ICloudClient;
@@ -84,4 +86,9 @@
         cloudClient.write(bucket, fileReference.getRelativePath(), bytes);
         localIoManager.overwrite(fileReference, bytes);
     }
+
+    @Override
+    public Set<UncachedFileReference> getUncachedFiles() {
+        return Collections.emptySet();
+    }
 }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
index 1a440e7..91046a1 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/lazy/accessor/ReplaceableCloudAccessor.java
@@ -23,6 +23,7 @@
 import java.util.Set;

 import org.apache.asterix.cloud.CloudFileHandle;
+import org.apache.asterix.cloud.UncachedFileReference;
 import org.apache.asterix.cloud.bulk.IBulkOperationCallBack;
 import org.apache.asterix.cloud.clients.CloudFile;
 import org.apache.asterix.cloud.clients.ICloudClient;
@@ -185,4 +186,9 @@
         return 
dir.getRelativePath().startsWith(StorageConstants.METADATA_TXN_NOWAL_DIR_NAME)
                 || dir.getName().equals(StorageConstants.GLOBAL_TXN_DIR_NAME);
     }
+
+    @Override
+    public Set<UncachedFileReference> getUncachedFiles() {
+        return cacher.getUncachedFiles();
+    }
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/18443
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I9f1dfa38a1aae986de7adbac283d126260606065
Gerrit-Change-Number: 18443
Gerrit-PatchSet: 1
Gerrit-Owner: Hussain Towaileb <[email protected]>
Gerrit-MessageType: newchange

Reply via email to