>From Michael Blow <[email protected]>:

Michael Blow has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19724 )


Change subject: [NO ISSUE][*DB][STO] Support validating returned objects from 
list op
......................................................................

[NO ISSUE][*DB][STO] Support validating returned objects from list op

Add new parameter, CLOUD_STORAGE_LIST_INCLUDES_DELETES, which indicates when a
cloud provider's list result may include deleted objects. This allows the
returned list to be validated before further processing occurs.

Ext-ref: MB-66438
Change-Id: I7520dea29d15110e85ff770d6454cb69cdeaec51
---
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
M 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
3 files changed, 69 insertions(+), 7 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/24/19724/1

diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
index 5414099..8c8e43d 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3ClientConfig.java
@@ -44,16 +44,18 @@
     private final int requestsMaxHttpConnections;
     private final boolean forcePathStyle;
     private final boolean disableSslVerify;
+    private final boolean storageListIncludesDeletes;

     public S3ClientConfig(String region, String endpoint, String prefix, 
boolean anonymousAuth,
             long profilerLogInterval, int writeBufferSize) {
-        this(region, endpoint, prefix, anonymousAuth, profilerLogInterval, 
writeBufferSize, 1, 0, 0, 0, false, false);
+        this(region, endpoint, prefix, anonymousAuth, profilerLogInterval, 
writeBufferSize, 1, 0, 0, 0, false, false,
+                false);
     }

     private S3ClientConfig(String region, String endpoint, String prefix, 
boolean anonymousAuth,
             long profilerLogInterval, int writeBufferSize, long 
tokenAcquireTimeout, int writeMaxRequestsPerSeconds,
             int readMaxRequestsPerSeconds, int requestsMaxHttpConnections, 
boolean forcePathStyle,
-            boolean disableSslVerify) {
+            boolean disableSslVerify, boolean storageListIncludesDeletes) {
         this.region = Objects.requireNonNull(region, "region");
         this.endpoint = endpoint;
         this.prefix = Objects.requireNonNull(prefix, "prefix");
@@ -66,6 +68,7 @@
         this.requestsMaxHttpConnections = requestsMaxHttpConnections;
         this.forcePathStyle = forcePathStyle;
         this.disableSslVerify = disableSslVerify;
+        this.storageListIncludesDeletes = storageListIncludesDeletes;
     }

     public static S3ClientConfig of(CloudProperties cloudProperties) {
@@ -74,7 +77,8 @@
                 cloudProperties.getProfilerLogInterval(), 
cloudProperties.getWriteBufferSize(),
                 cloudProperties.getTokenAcquireTimeout(), 
cloudProperties.getWriteMaxRequestsPerSecond(),
                 cloudProperties.getReadMaxRequestsPerSecond(), 
cloudProperties.getRequestsMaxHttpConnections(),
-                cloudProperties.isStorageForcePathStyle(), 
cloudProperties.isStorageDisableSSLVerify());
+                cloudProperties.isStorageForcePathStyle(), 
cloudProperties.isStorageDisableSSLVerify(),
+                cloudProperties.isStorageListIncludesDeletes());
     }

     public static S3ClientConfig of(Map<String, String> configuration, int 
writeBufferSize) {
@@ -144,6 +148,10 @@
         return forcePathStyle;
     }

+    public boolean isStorageListIncludesDeletes() {
+        return storageListIncludesDeletes;
+    }
+
     private boolean isS3Mock() {
         return endpoint != null && !endpoint.isEmpty();
     }
diff --git 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
index e4ba4b4..3b272d4 100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/aws/s3/S3CloudClient.java
@@ -33,6 +33,7 @@
 import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
+import java.util.function.Function;

 import org.apache.asterix.cloud.CloudResettableInputStream;
 import org.apache.asterix.cloud.IWriteBufferProvider;
@@ -132,7 +133,28 @@
         guardian.checkReadAccess(bucket, path);
         profiler.objectsList();
         path = config.isLocalS3Provider() ? encodeURI(path) : path;
-        return filterAndGet(listS3Objects(s3Client, bucket, config.getPrefix() 
+ path), filter);
+        return fixupStaleEntries(filterAndGet(listS3Objects(s3Client, bucket, 
config.getPrefix() + path), filter),
+                bucket, CloudFile::getPath);
+    }
+
+    private <T, C extends Collection<T>> C fixupStaleEntries(C cloudFiles, 
String bucket,
+            Function<T, String> pathExtractor) {
+        if (config.isStorageListIncludesDeletes()) {
+            return cloudFiles;
+        }
+        Iterator<T> iterator = cloudFiles.iterator();
+        while (iterator.hasNext()) {
+            String path = pathExtractor.apply(iterator.next());
+            try {
+                if (!exists(bucket, path)) {
+                    LOGGER.warn("Removing non-existent file from list result: 
{}", path);
+                    iterator.remove();
+                }
+            } catch (HyracksDataException e) {
+                LOGGER.warn("Ignoring exception on exists check on {}", path, 
e);
+            }
+        }
+        return cloudFiles;
     }

     @Override
@@ -221,7 +243,15 @@
             String destKey = 
destPath.getChildPath(IoUtil.getFileNameFromPath(srcKey));
             CopyObjectRequest copyReq = 
CopyObjectRequest.builder().sourceBucket(bucket).sourceKey(srcKey)
                     
.destinationBucket(bucket).destinationKey(config.getPrefix() + destKey).build();
-            s3Client.copyObject(copyReq);
+            try {
+                s3Client.copyObject(copyReq);
+            } catch (NoSuchKeyException ex) {
+                if (config.isStorageListIncludesDeletes()) {
+                    LOGGER.warn("ignoring failure to copy {} as 
isStorageListIncludesDeletes is true", srcKey);
+                } else {
+                    throw ex;
+                }
+            }
         }
     }

@@ -300,7 +330,8 @@

     @Override
     public JsonNode listAsJson(ObjectMapper objectMapper, String bucket) {
-        List<S3Object> objects = listS3Objects(s3Client, bucket, 
config.getPrefix());
+        List<S3Object> objects =
+                fixupStaleEntries(listS3Objects(s3Client, bucket, 
config.getPrefix()), bucket, S3Object::key);
         ArrayNode objectsInfo = objectMapper.createArrayNode();

         objects.sort((x, y) -> String.CASE_INSENSITIVE_ORDER.compare(x.key(), 
y.key()));
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java
index 1af4824..a6dbe27 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CloudProperties.java
@@ -67,7 +67,8 @@
         CLOUD_EVICTION_PLAN_REEVALUATE_THRESHOLD(POSITIVE_INTEGER, 50),
         CLOUD_REQUESTS_MAX_HTTP_CONNECTIONS(POSITIVE_INTEGER, 1000),
         CLOUD_STORAGE_FORCE_PATH_STYLE(BOOLEAN, false),
-        CLOUD_STORAGE_DISABLE_SSL_VERIFY(BOOLEAN, false);
+        CLOUD_STORAGE_DISABLE_SSL_VERIFY(BOOLEAN, false),
+        CLOUD_STORAGE_LIST_INCLUDES_DELETES(BOOLEAN, false);

         private final IOptionType interpreter;
         private final Object defaultValue;
@@ -102,6 +103,7 @@
                 case CLOUD_REQUESTS_MAX_HTTP_CONNECTIONS:
                 case CLOUD_STORAGE_FORCE_PATH_STYLE:
                 case CLOUD_STORAGE_DISABLE_SSL_VERIFY:
+                case CLOUD_STORAGE_LIST_INCLUDES_DELETES:
                     return Section.COMMON;
                 default:
                     return Section.NC;
@@ -177,6 +179,9 @@
                 case CLOUD_STORAGE_DISABLE_SSL_VERIFY:
                     return "Indicates whether or not to disable SSL 
certificate verification on the cloud storage. "
                             + "(default: false)";
+                case CLOUD_STORAGE_LIST_INCLUDES_DELETES:
+                    return "Indicates whether or not delete markers may be 
contained in list operations. (default:"
+                            + " false)";
                 default:
                     throw new IllegalStateException("NYI: " + this);
             }
@@ -284,4 +289,8 @@
     public boolean isStorageDisableSSLVerify() {
         return accessor.getBoolean(Option.CLOUD_STORAGE_DISABLE_SSL_VERIFY);
     }
+
+    public boolean isStorageListIncludesDeletes() {
+        return accessor.getBoolean(Option.CLOUD_STORAGE_LIST_INCLUDES_DELETES);
+    }
 }

-- 
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19724
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings

Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I7520dea29d15110e85ff770d6454cb69cdeaec51
Gerrit-Change-Number: 19724
Gerrit-PatchSet: 1
Gerrit-Owner: Michael Blow <[email protected]>
Gerrit-MessageType: newchange

Reply via email to