This is an automated email from the ASF dual-hosted git repository.

mblow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git

commit 13f95fc15ffcdddde6556c6f47b62efb92833bc0
Merge: 7a764591f0 8b342cc119
Author: Michael Blow <[email protected]>
AuthorDate: Sun Aug 24 07:19:26 2025 -0400

    Merge branch 'gerrit/trinity' into 'gerrit/ionic', update aws hadoop dep
    
    Merged commits from gerrit/trinity:
    
     * [NO ISSUE][HYR][MISC] Update commons-lang3 to 3.18.0 for CVE-2025-48924
     * [NO ISSUE][OTH] Update parquet version
     * [NO ISSUE][HYR] Update Jackson to 2.19.2 to address CVEs
    
    AWS dependency updates:
    
     - hadoop-awsjavasdk.version: 1.12.779 -> 1.12.788
    
    Update Azurite to 3.35.0
    
    Ext-ref: MB-68123
    Change-Id: I61c069e7537a4bc7535d0347b8104b8a2691f0ba

 asterixdb/asterix-app/pom.xml                      |   2 +-
 .../asterix/common/config/ConfigUsageTest.java     |   2 +-
 .../api/cluster_state_1/cluster_state_1.1.regexadm |   6 +
 .../cluster_state_1_full.1.regexadm                |   6 +
 .../cluster_state_1_less.1.regexadm                |   6 +
 .../src/test/resources/runtimets/sqlpp_queries.xml |   2 +-
 asterixdb/asterix-cloud/pom.xml                    |   2 +-
 .../asterix/cloud/AbstractCloudIOManager.java      |   1 +
 .../apache/asterix/cloud/clients/CloudFile.java    |   7 +-
 .../apache/asterix/cloud/util/CloudFileUtil.java   |   6 +-
 .../asterix/common/api/IPropertiesFactory.java     |   3 +
 .../asterix/common/config/AsterixProperties.java   |  10 +-
 .../asterix/common/config/JacksonProperties.java   | 138 +++++++++++++++++++++
 .../asterix/common/config/PropertiesFactory.java   |   5 +
 asterixdb/asterix-server/pom.xml                   |   4 +-
 asterixdb/pom.xml                                  | 101 ++++++++++++---
 .../org/apache/hyracks/control/cc/CCDriver.java    |   5 +-
 .../control/common/config/ConfigManager.java       |   7 +-
 .../control/common/controllers/NCConfig.java       |   3 +-
 .../java/org/apache/hyracks/util/StorageUtil.java  |  23 ++--
 .../org/apache/hyracks/util/StorageUnitTest.java   |  17 ++-
 hyracks-fullstack/pom.xml                          | 103 ++++++++++++++-
 22 files changed, 403 insertions(+), 56 deletions(-)

diff --cc asterixdb/asterix-app/pom.xml
index 4910e8241f,0c4b94c613..b04c95716f
--- a/asterixdb/asterix-app/pom.xml
+++ b/asterixdb/asterix-app/pom.xml
@@@ -607,65 -500,6 +607,65 @@@
          </plugins>
        </build>
      </profile>
 +    <profile>
 +      <id>asterix-gerrit-cloud-tests</id>
 +      <properties>
 +        <test.includes>
 +          **/CloudStorageTest.java,
 +          **/CloudStorageSparseTest.java,
 +          **/CloudStorageCancellationTest.java,
 +          **/SqlppSinglePointLookupExecutionTest.java, **/AwsS3*.java
 +        </test.includes>
 +        <failIfNoTests>false</failIfNoTests>
 +      </properties>
 +    </profile>
 +    <profile>
 +      <id>asterix-gerrit-cloud-nons3-tests</id>
 +      <properties>
 +        <test.includes>
 +          
**/GCSCloudStorageUnstableTest.java,**/CloudStorageAzTest.java,**/AzureBlobStorageExternalDatasetTest.java,
 +          
**/AzureBlobStorageExternalDatasetOnePartitionTest.java,**/CloudStorageUnstableTest.java,
 **/*SqlppHdfs*.java
 +        </test.includes>
 +        <failIfNoTests>false</failIfNoTests>
 +      </properties>
 +    </profile>
 +    <profile>
 +        <id>azurite-tests</id>
 +        <build>
 +            <plugins>
 +                <plugin>
 +                    <groupId>com.github.eirslett</groupId>
 +                    <artifactId>frontend-maven-plugin</artifactId>
 +                    <version>1.13.4</version>
 +                    <configuration>
 +                        <nodeVersion>v14.15.4</nodeVersion>
 +                        <npmVersion>6.14.11</npmVersion>
 +                        <workingDirectory>target/npm</workingDirectory>
 +                        <installDirectory>target/npm</installDirectory>
 +                    </configuration>
 +                    <executions>
 +                        <execution>
 +                            <id>install node and yarn</id>
 +                            <goals>
 +                                <goal>install-node-and-npm</goal>
 +                            </goals>
 +                            <phase>${azurite.npm.install.stage}</phase>
 +                        </execution>
 +                        <execution>
 +                            <id>azurite blob</id>
 +                            <phase>${azurite.install.stage}</phase>
 +                            <goals>
 +                                <goal>npm</goal>
 +                            </goals>
 +                            <configuration>
-                                 <arguments>install [email protected]</arguments>
++                                <arguments>install [email protected]</arguments>
 +                            </configuration>
 +                        </execution>
 +                    </executions>
 +                </plugin>
 +            </plugins>
 +        </build>
 +    </profile>
    </profiles>
    <dependencies>
      <dependency>
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
index 29061ec7c3,7db9ee1dd6..065435b55c
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm
@@@ -62,10 -30,15 +62,16 @@@
      "compiler\.textsearchmemory" : 163840,
      "compiler\.windowmemory" : 196608,
      "default\.dir" : "target/io/dir/asterixdb",
 +    "gcp.impersonate.service.account.duration" : 900,
+     "json\.max\.depth" : 1000,
+     "json\.max\.doc\.length" : -1,
+     "json\.max\.name\.length" : 50000,
+     "json\.max\.number\.length" : 1000,
+     "json\.max\.string\.length" : 2147483647,
+     "json\.max\.token\.count" : -1,
      "library\.deploy\.timeout" : 1800,
      "log\.dir" : "logs/",
 -    "log\.level" : "INFO",
 +    "log\.level" : "DEBUG",
      "max\.wait\.active\.cluster" : 60,
      "max.web.request.size" : 209715200,
      "messaging\.frame\.count" : 512,
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
index f2ea15baad,1c6f3432e5..28fa1f2b6c
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm
@@@ -62,7 -30,12 +62,13 @@@
      "compiler\.textsearchmemory" : 163840,
      "compiler\.windowmemory" : 196608,
      "default\.dir" : "target/io/dir/asterixdb",
 +    "gcp.impersonate.service.account.duration" : 900,
+     "json\.max\.depth" : 1000,
+     "json\.max\.doc\.length" : -1,
+     "json\.max\.name\.length" : 50000,
+     "json\.max\.number\.length" : 1000,
+     "json\.max\.string\.length" : 2147483647,
+     "json\.max\.token\.count" : -1,
      "library\.deploy\.timeout" : 1800,
      "log\.dir" : "logs/",
      "log\.level" : "WARN",
diff --cc 
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
index 685d28b7bc,49251a9817..8e0e9a6b00
--- 
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
+++ 
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm
@@@ -62,7 -30,12 +62,13 @@@
      "compiler\.textsearchmemory" : 163840,
      "compiler\.windowmemory" : 196608,
      "default\.dir" : "target/io/dir/asterixdb",
 +    "gcp.impersonate.service.account.duration" : 900,
+     "json\.max\.depth" : 1000,
+     "json\.max\.doc\.length" : -1,
+     "json\.max\.name\.length" : 50000,
+     "json\.max\.number\.length" : 1000,
+     "json\.max\.string\.length" : 2147483647,
+     "json\.max\.token\.count" : -1,
      "library\.deploy\.timeout" : 1800,
      "log\.dir" : "logs/",
      "log\.level" : "WARN",
diff --cc asterixdb/asterix-cloud/pom.xml
index 419ac4d762,0000000000..06ca83d491
mode 100644,000000..100644
--- a/asterixdb/asterix-cloud/pom.xml
+++ b/asterixdb/asterix-cloud/pom.xml
@@@ -1,277 -1,0 +1,277 @@@
 +<!--
 + ! Licensed to the Apache Software Foundation (ASF) under one
 + ! or more contributor license agreements.  See the NOTICE file
 + ! distributed with this work for additional information
 + ! regarding copyright ownership.  The ASF licenses this file
 + ! to you under the Apache License, Version 2.0 (the
 + ! "License"); you may not use this file except in compliance
 + ! with the License.  You may obtain a copy of the License at
 + !
 + !   http://www.apache.org/licenses/LICENSE-2.0
 + !
 + ! Unless required by applicable law or agreed to in writing,
 + ! software distributed under the License is distributed on an
 + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + ! KIND, either express or implied.  See the License for the
 + ! specific language governing permissions and limitations
 + ! under the License.
 + !-->
 +<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
 +         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
 +    <modelVersion>4.0.0</modelVersion>
 +    <parent>
 +        <artifactId>apache-asterixdb</artifactId>
 +        <groupId>org.apache.asterix</groupId>
 +        <version>0.9.10-SNAPSHOT</version>
 +    </parent>
 +    <artifactId>asterix-cloud</artifactId>
 +
 +    <licenses>
 +        <license>
 +            <name>Apache License, Version 2.0</name>
 +            <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 +            <distribution>repo</distribution>
 +            <comments>A business-friendly OSS license</comments>
 +        </license>
 +    </licenses>
 +
 +    <properties>
 +        <root.dir>${basedir}/..</root.dir>
 +    </properties>
 +
 +    <build>
 +        <plugins>
 +            <plugin>
 +                <groupId>org.apache.rat</groupId>
 +                <artifactId>apache-rat-plugin</artifactId>
 +                <executions>
 +                    <execution>
 +                        <id>default</id>
 +                        <phase>validate</phase>
 +                        <goals>
 +                            <goal>check</goal>
 +                        </goals>
 +                        <configuration>
 +                            <licenses>
 +                                <license 
implementation="org.apache.rat.analysis.license.ApacheSoftwareLicense20"/>
 +                            </licenses>
 +                            <excludes combine.children="append">
 +                                
<exclude>src/test/resources/result/**</exclude>
 +                            </excludes>
 +                        </configuration>
 +                    </execution>
 +                </executions>
 +            </plugin>
 +            <plugin>
 +                <groupId>com.googlecode.maven-download-plugin</groupId>
 +                <artifactId>download-maven-plugin</artifactId>
 +                <version>1.4.2</version>
 +                <executions>
 +                    <execution>
 +                        <id>install-fake-gcs</id>
 +                        <phase>${gcs.download.stage}</phase>
 +                        <goals>
 +                            <goal>wget</goal>
 +                        </goals>
 +                        <configuration>
 +                            <url>
 +                                
https://github.com/fsouza/fake-gcs-server/releases/download/v1.48.0/fake-gcs-server_1.48.0_Linux_amd64.tar.gz
 +                            </url>
 +                            
<outputFileName>fake-gcs-server_1.48.0_Linux_amd64.tar.gz</outputFileName>
 +                            
<outputDirectory>${project.build.directory}</outputDirectory>
 +                        </configuration>
 +                    </execution>
 +                </executions>
 +            </plugin>
 +            <plugin>
 +                <groupId>org.apache.maven.plugins</groupId>
 +                <artifactId>maven-antrun-plugin</artifactId>
 +                <executions>
 +                    <execution>
 +                        <id>extract-gcs</id>
 +                        <phase>${gcs.install.stage}</phase>
 +                        <configuration>
 +                            <target>
 +                                <echo message="Extracting fake-gcs-server"/>
 +                                <mkdir 
dir="${project.build.directory}/fake-gcs-server"/>
 +                                <gunzip 
src="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar.gz"
 +                                        
dest="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar"/>
 +                                <untar 
src="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar"
 +                                       
dest="${project.build.directory}/fake-gcs-server"/>
 +                                <chmod 
file="${project.build.directory}/fake-gcs-server/fake-gcs-server" 
perm="ugo+rx"/>
 +                            </target>
 +                        </configuration>
 +                        <goals>
 +                            <goal>run</goal>
 +                        </goals>
 +                    </execution>
 +                </executions>
 +            </plugin>
 +            <plugin>
 +                <groupId>org.codehaus.mojo</groupId>
 +                <artifactId>exec-maven-plugin</artifactId>
 +                <executions>
 +                    <execution>
 +                        <id>fake-gcs-server</id>
 +                        <phase>${gcs.stage}</phase>
 +                        <goals>
 +                            <goal>exec</goal>
 +                        </goals>
 +                        <configuration>
 +                            
<executable>${project.build.directory}/fake-gcs-server/fake-gcs-server</executable>
 +                            
<workingDirectory>${project.build.directory}/fake-gcs-server</workingDirectory>
 +                            <arguments>
 +                                <argument>-port</argument>
 +                                <argument>24443</argument>
 +                                <argument>-scheme</argument>
 +                                <argument>http</argument>
 +                                <argument>-host</argument>
 +                                <argument>127.0.0.1</argument>
 +                                <argument>-log-level</argument>
 +                                <argument>error</argument>
 +                                <argument>-filesystem-root</argument>
 +                                
<argument>${project.build.directory}/fake-gcs-server/storage</argument>
 +                            </arguments>
 +                            <async>true</async>
 +                        </configuration>
 +                    </execution>
 +                    <execution>
 +                        <id>azurite</id>
 +                        <phase>${azurite.stage}</phase>
 +                        <goals>
 +                            <goal>exec</goal>
 +                        </goals>
 +                        <configuration>
 +                            <!--suppress UnresolvedMavenProperty -->
 +                            
<executable>${project.build.directory}/npm/node_modules/.bin/azurite-blob</executable>
 +                            
<workingDirectory>${project.build.directory}</workingDirectory>
 +                            <environmentVariables>
 +                                
<PATH>${project.build.directory}/npm/node</PATH>
 +                            </environmentVariables>
 +                            <arguments>
 +                                <argument>--blobPort</argument>
 +                                <argument>15055</argument>
 +                                <argument>--location</argument>
 +                                
<argument>${project.build.directory}/azurite</argument>
 +                                <argument>--debug</argument>
 +                                
<argument>${project.build.directory}/azurite/logs/azurite-debug.log</argument>
 +                            </arguments>
 +                            <async>true</async>
 +                            
<outputFile>${project.build.directory}/azurite/logs/azurite.log</outputFile>
 +                        </configuration>
 +                    </execution>
 +                </executions>
 +            </plugin>
 +        </plugins>
 +    </build>
 +
 +    <profiles>
 +        <profile>
 +            <id>azurite-tests</id>
 +            <build>
 +                <plugins>
 +                    <plugin>
 +                        <groupId>com.github.eirslett</groupId>
 +                        <artifactId>frontend-maven-plugin</artifactId>
 +                        <version>1.13.4</version>
 +                        <configuration>
 +                            <nodeVersion>v14.15.4</nodeVersion>
 +                            <npmVersion>6.14.11</npmVersion>
 +                            <workingDirectory>target/npm</workingDirectory>
 +                            <installDirectory>target/npm</installDirectory>
 +                        </configuration>
 +                        <executions>
 +                            <execution>
 +                                <id>install node and yarn</id>
 +                                <goals>
 +                                    <goal>install-node-and-npm</goal>
 +                                </goals>
 +                                <phase>${azurite.npm.install.stage}</phase>
 +                            </execution>
 +                            <execution>
 +                                <id>azurite blob</id>
 +                                <phase>${azurite.install.stage}</phase>
 +                                <goals>
 +                                    <goal>npm</goal>
 +                                </goals>
 +                                <configuration>
-                                     <arguments>install 
[email protected]</arguments>
++                                    <arguments>install 
[email protected]</arguments>
 +                                </configuration>
 +                            </execution>
 +                        </executions>
 +                    </plugin>
 +                </plugins>
 +            </build>
 +        </profile>
 +    </profiles>
 +
 +    <dependencies>
 +        <dependency>
 +            <groupId>org.apache.hyracks</groupId>
 +            <artifactId>hyracks-cloud</artifactId>
 +            <version>${hyracks.version}</version>
 +        </dependency>
 +        <dependency>
 +            <groupId>org.apache.asterix</groupId>
 +            <artifactId>asterix-common</artifactId>
 +            <version>${project.version}</version>
 +        </dependency>
 +        <dependency>
 +            <groupId>org.apache.asterix</groupId>
 +            <artifactId>asterix-external-data</artifactId>
 +            <version>${project.version}</version>
 +        </dependency>
 +        <!-- aws s3 start -->
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>sdk-core</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>s3</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>regions</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>auth</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>s3-transfer-manager</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk.crt</groupId>
 +            <artifactId>aws-crt</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>software.amazon.awssdk</groupId>
 +            <artifactId>apache-client</artifactId>
 +        </dependency>
 +        <dependency>
 +            <groupId>junit</groupId>
 +            <artifactId>junit</artifactId>
 +            <scope>test</scope>
 +        </dependency>
 +        <dependency>
 +            <groupId>io.findify</groupId>
 +            <artifactId>s3mock_2.12</artifactId>
 +            <scope>test</scope>
 +        </dependency>
 +        <dependency>
 +            <groupId>com.typesafe.akka</groupId>
 +            <artifactId>akka-http-core_2.12</artifactId>
 +            <scope>test</scope>
 +        </dependency>
 +        <!-- aws s3 end -->
 +
 +        <dependency>
 +            <groupId>com.azure</groupId>
 +            <artifactId>azure-storage-blob-batch</artifactId>
 +            <version>12.23.0</version>
 +        </dependency>
 +
 +    </dependencies>
 +</project>
diff --cc 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
index 8fefcf5bf3,0000000000..d8f682648a
mode 100644,000000..100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java
@@@ -1,498 -1,0 +1,499 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *   http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.asterix.cloud;
 +
 +import static 
org.apache.asterix.common.utils.StorageConstants.METADATA_PARTITION;
 +import static 
org.apache.asterix.common.utils.StorageConstants.PARTITION_DIR_PREFIX;
 +import static 
org.apache.asterix.common.utils.StorageConstants.STORAGE_ROOT_DIR_NAME;
 +
 +import java.io.File;
 +import java.io.IOException;
 +import java.io.InputStream;
 +import java.nio.ByteBuffer;
 +import java.nio.file.FileStore;
 +import java.util.ArrayList;
 +import java.util.HashSet;
 +import java.util.List;
 +import java.util.Set;
 +import java.util.function.Predicate;
 +
 +import org.apache.asterix.cloud.bulk.DeleteBulkCloudOperation;
 +import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack;
 +import org.apache.asterix.cloud.clients.CloudClientProvider;
 +import org.apache.asterix.cloud.clients.CloudFile;
 +import org.apache.asterix.cloud.clients.ICloudClient;
 +import org.apache.asterix.cloud.clients.ICloudGuardian;
 +import org.apache.asterix.cloud.clients.ICloudWriter;
 +import org.apache.asterix.cloud.util.CloudFileUtil;
 +import org.apache.asterix.common.api.INamespacePathResolver;
 +import org.apache.asterix.common.cloud.IPartitionBootstrapper;
 +import org.apache.asterix.common.config.CloudProperties;
 +import org.apache.asterix.common.metadata.MetadataConstants;
 +import org.apache.asterix.common.transactions.IRecoveryManager.SystemState;
 +import org.apache.asterix.common.utils.StoragePathUtil;
 +import org.apache.hyracks.api.exceptions.HyracksDataException;
 +import org.apache.hyracks.api.io.FileReference;
 +import org.apache.hyracks.api.io.IFileHandle;
 +import org.apache.hyracks.api.io.IIOBulkOperation;
 +import org.apache.hyracks.api.io.IODeviceHandle;
 +import org.apache.hyracks.api.util.IoUtil;
 +import org.apache.hyracks.cloud.filesystem.PhysicalDrive;
 +import org.apache.hyracks.cloud.io.ICloudIOManager;
 +import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest;
 +import org.apache.hyracks.cloud.io.request.ICloudRequest;
 +import org.apache.hyracks.cloud.io.stream.CloudInputStream;
 +import org.apache.hyracks.cloud.util.CloudRetryableRequestUtil;
 +import org.apache.hyracks.control.nc.io.IOManager;
 +import org.apache.logging.log4j.LogManager;
 +import org.apache.logging.log4j.Logger;
 +
 +import com.fasterxml.jackson.databind.JsonNode;
 +import com.fasterxml.jackson.databind.ObjectMapper;
 +import com.fasterxml.jackson.databind.node.ArrayNode;
 +import com.fasterxml.jackson.databind.node.ObjectNode;
 +
 +public abstract class AbstractCloudIOManager extends IOManager implements 
IPartitionBootstrapper, ICloudIOManager {
 +    private static final Logger LOGGER = LogManager.getLogger();
 +    private static final byte[] EMPTY_FILE_BYTES = "empty".getBytes();
 +    private static final Predicate<String> NO_OP_LIST_FILES_FILTER = (path) 
-> true;
 +
 +    protected final ICloudClient cloudClient;
 +    protected final ICloudGuardian guardian;
 +    protected final IWriteBufferProvider writeBufferProvider;
 +    protected final String bucket;
 +    protected final Set<Integer> partitions;
 +    protected final List<FileReference> partitionPaths;
 +    protected final IOManager localIoManager;
 +    protected final INamespacePathResolver nsPathResolver;
 +    private final List<FileStore> drivePaths;
 +
 +    public AbstractCloudIOManager(IOManager ioManager, CloudProperties 
cloudProperties,
 +            INamespacePathResolver nsPathResolver, ICloudGuardian guardian) 
throws HyracksDataException {
 +        super(ioManager.getIODevices(), ioManager.getDeviceComputer(), 
ioManager.getIOParallelism(),
 +                ioManager.getQueueSize());
 +        this.nsPathResolver = nsPathResolver;
 +        this.bucket = cloudProperties.getStorageBucket();
 +        cloudClient = CloudClientProvider.getClient(cloudProperties, 
guardian);
 +        this.guardian = guardian;
 +        int numOfThreads = getIODevices().size() * getIOParallelism();
 +        writeBufferProvider = new WriteBufferProvider(numOfThreads, 
cloudClient.getWriteBufferSize());
 +        partitions = new HashSet<>();
 +        partitionPaths = new ArrayList<>();
 +        this.localIoManager = ioManager;
 +        drivePaths = PhysicalDrive.getDrivePaths(ioDevices);
 +    }
 +
 +    /*
 +     * ******************************************************************
 +     * IPartitionBootstrapper functions
 +     * ******************************************************************
 +     */
 +
 +    @Override
 +    public SystemState getSystemStateOnMissingCheckpoint() throws 
HyracksDataException {
 +        Set<CloudFile> existingMetadataFiles = 
getCloudMetadataPartitionFiles();
 +        CloudFile bootstrapMarkerPath = 
CloudFile.of(StoragePathUtil.getBootstrapMarkerRelativePath(nsPathResolver));
 +        if (existingMetadataFiles.isEmpty() || 
existingMetadataFiles.contains(bootstrapMarkerPath)) {
 +            LOGGER.info("First time to initialize this cluster: systemState = 
PERMANENT_DATA_LOSS");
 +            return SystemState.PERMANENT_DATA_LOSS;
 +        } else {
 +            LOGGER.info(
 +                    "Resuming a previously initialized cluster; setting 
system state to {} to force local recovery if needed",
 +                    SystemState.CORRUPTED);
 +            return SystemState.CORRUPTED;
 +        }
 +    }
 +
 +    @Override
 +    public final void bootstrap(Set<Integer> activePartitions, 
List<FileReference> currentOnDiskPartitions,
 +            boolean metadataNode, int metadataPartition, boolean 
ensureCompleteBootstrap) throws HyracksDataException {
 +        partitions.clear();
 +        partitions.addAll(activePartitions);
 +        if (metadataNode) {
 +            partitions.add(metadataPartition);
 +            if (ensureCompleteBootstrap) {
 +                ensureCompleteMetadataBootstrap();
 +            }
 +        }
 +
 +        partitionPaths.clear();
 +        for (Integer partition : activePartitions) {
 +            String partitionDir = PARTITION_DIR_PREFIX + partition;
 +            partitionPaths.add(resolve(STORAGE_ROOT_DIR_NAME + File.separator 
+ partitionDir));
 +        }
 +
 +        LOGGER.info("Initializing cloud manager with ({}) storage partitions: 
{}", partitions.size(), partitions);
 +        if (!currentOnDiskPartitions.isEmpty()) {
 +            deleteUnkeptPartitionDirs(currentOnDiskPartitions);
 +            cleanupLocalFiles();
 +        }
 +
 +        // Has different implementations depending on the caching policy
 +        downloadPartitions(metadataNode, metadataPartition);
 +    }
 +
 +    private void deleteUnkeptPartitionDirs(List<FileReference> 
currentOnDiskPartitions) throws HyracksDataException {
 +        for (FileReference partitionDir : currentOnDiskPartitions) {
 +            int partitionNum = 
StoragePathUtil.getPartitionNumFromRelativePath(partitionDir.getRelativePath());
 +            if (!partitions.contains(partitionNum)) {
 +                LOGGER.warn("Deleting storage partition {} as it does not 
belong to the current storage partitions {}",
 +                        partitionNum, partitions);
 +                localIoManager.delete(partitionDir);
 +            }
 +        }
 +    }
 +
 +    private void cleanupLocalFiles() throws HyracksDataException {
 +        Set<CloudFile> cloudFiles = cloudClient.listObjects(bucket, 
STORAGE_ROOT_DIR_NAME, IoUtil.NO_OP_FILTER);
++        LOGGER.debug("+cleanupLocalFiles: cloud files: {}", cloudFiles);
 +        if (cloudFiles.isEmpty()) {
 +            LOGGER.warn("No files in the cloud. Deleting all local files in 
partitions {}...", partitions);
 +            for (FileReference partitionPath : partitionPaths) {
 +                if (localIoManager.exists(partitionPath)) {
 +                    // Clean local dir from all files
 +                    localIoManager.cleanDirectory(partitionPath);
 +                }
 +            }
 +        } else {
 +            LOGGER.info("Cleaning node partitions...");
 +            for (FileReference partitionPath : partitionPaths) {
 +                CloudFileUtil.cleanDirectoryFiles(localIoManager, cloudFiles, 
partitionPath);
 +            }
 +        }
 +    }
 +
 +    protected abstract void downloadPartitions(boolean metadataNode, int 
metadataPartition) throws HyracksDataException;
 +
 +    protected abstract Set<UncachedFileReference> getUncachedFiles();
 +
 +    /*
 +     * ******************************************************************
 +     * ICloudIOManager functions
 +     * ******************************************************************
 +     */
 +
 +    @Override
 +    public final void cloudRead(IFileHandle fHandle, long offset, ByteBuffer 
data) throws HyracksDataException {
 +        int position = data.position();
 +        ICloudRequest request =
 +                () -> cloudClient.read(bucket, 
fHandle.getFileReference().getRelativePath(), offset, data);
 +        ICloudBeforeRetryRequest retry = () -> data.position(position);
 +        CloudRetryableRequestUtil.run(request, retry);
 +    }
 +
 +    @Override
 +    public final CloudInputStream cloudRead(IFileHandle fHandle, long offset, 
long length) throws HyracksDataException {
 +        return CloudRetryableRequestUtil.run(() -> new CloudInputStream(this, 
fHandle,
 +                cloudClient.getObjectStream(bucket, 
fHandle.getFileReference().getRelativePath(), offset, length),
 +                offset, length));
 +    }
 +
 +    @Override
 +    public void restoreStream(CloudInputStream cloudStream) {
 +        LOGGER.warn("Restoring stream from cloud, {}", cloudStream);
 +        /*
 +         * This cloud request should not be called using 
CloudRetryableRequestUtil as it is the responsibility of the
 +         * caller to warp this request as ICloudRequest or ICloudRetry.
 +         */
 +        InputStream stream = cloudClient.getObjectStream(bucket, 
cloudStream.getPath(), cloudStream.getOffset(),
 +                cloudStream.getRemaining());
 +        cloudStream.setInputStream(stream);
 +    }
 +
 +    @Override
 +    public final int localWriter(IFileHandle fHandle, long offset, ByteBuffer 
data) throws HyracksDataException {
 +        // Using syncWrite here to avoid closing the file channel when the 
thread is interrupted
 +        return localIoManager.syncWrite(fHandle, offset, data);
 +    }
 +
 +    @Override
 +    public final int cloudWrite(IFileHandle fHandle, long offset, ByteBuffer 
data) throws HyracksDataException {
 +        ICloudWriter cloudWriter = ((CloudFileHandle) 
fHandle).getCloudWriter();
 +        int writtenBytes;
 +        try {
 +            ensurePosition(fHandle, cloudWriter.position(), offset);
 +            writtenBytes = cloudWriter.write(data);
 +        } catch (HyracksDataException e) {
 +            cloudWriter.abort();
 +            throw e;
 +        }
 +        return writtenBytes;
 +    }
 +
 +    @Override
 +    public final long cloudWrite(IFileHandle fHandle, long offset, 
ByteBuffer[] data) throws HyracksDataException {
 +        ICloudWriter cloudWriter = ((CloudFileHandle) 
fHandle).getCloudWriter();
 +        int writtenBytes;
 +        try {
 +            ensurePosition(fHandle, cloudWriter.position(), offset);
 +            writtenBytes = cloudWriter.write(data[0], data[1]);
 +        } catch (HyracksDataException e) {
 +            cloudWriter.abort();
 +            throw e;
 +        }
 +        return writtenBytes;
 +    }
 +
 +    /*
 +     * ******************************************************************
 +     * IIOManager functions
 +     * ******************************************************************
 +     */
 +
 +    @Override
 +    public final IFileHandle open(FileReference fileRef, FileReadWriteMode 
rwMode, FileSyncMode syncMode)
 +            throws HyracksDataException {
 +        ICloudWriter cloudWriter = cloudClient.createWriter(bucket, 
fileRef.getRelativePath(), writeBufferProvider);
 +        CloudFileHandle fHandle = new CloudFileHandle(fileRef, cloudWriter);
 +        onOpen(fHandle);
 +        try {
 +            fHandle.open(rwMode, syncMode);
 +        } catch (IOException e) {
 +            throw HyracksDataException.create(e);
 +        }
 +        return fHandle;
 +    }
 +
 +    /**
 +     * Action required to do when opening a file
 +     *
 +     * @param fileHandle file to open
 +     */
 +    protected abstract void onOpen(CloudFileHandle fileHandle) throws 
HyracksDataException;
 +
 +    @Override
 +    public final long doSyncWrite(IFileHandle fHandle, long offset, 
ByteBuffer[] dataArray)
 +            throws HyracksDataException {
 +        // Save original position and limit
 +        ByteBuffer buffer1 = dataArray[0];
 +        int position1 = buffer1.position();
 +
 +        ByteBuffer buffer2 = dataArray[1];
 +        int position2 = buffer2.position();
 +
 +        long writtenBytes = localIoManager.doSyncWrite(fHandle, offset, 
dataArray);
 +
 +        // Restore original position
 +        buffer1.position(position1);
 +        buffer2.position(position2);
 +
 +        cloudWrite(fHandle, offset, dataArray);
 +        return writtenBytes;
 +    }
 +
 +    @Override
 +    public final int doSyncWrite(IFileHandle fHandle, long offset, ByteBuffer 
data) throws HyracksDataException {
 +        // Save original position and limit
 +        int position = data.position();
 +
 +        int writtenBytes = localIoManager.doSyncWrite(fHandle, offset, data);
 +
 +        // Restore original position
 +        data.position(position);
 +        cloudWrite(fHandle, offset, data);
 +        return writtenBytes;
 +    }
 +
 +    @Override
 +    public IIOBulkOperation createDeleteBulkOperation() {
 +        return new DeleteBulkCloudOperation(localIoManager, bucket, 
cloudClient, NoOpDeleteBulkCallBack.INSTANCE);
 +    }
 +
 +    @Override
 +    public final void close(IFileHandle fHandle) throws HyracksDataException {
 +        try {
 +            CloudFileHandle cloudFileHandle = (CloudFileHandle) fHandle;
 +            cloudFileHandle.close();
 +        } catch (IOException e) {
 +            throw HyracksDataException.create(e);
 +        }
 +    }
 +
 +    @Override
 +    public final void sync(IFileHandle fileHandle, boolean metadata) throws 
HyracksDataException {
 +        HyracksDataException savedEx = null;
 +        if (metadata) {
 +            // only finish writing if metadata == true to prevent write 
limiter from finishing the stream and
 +            // completing the upload.
 +            ICloudWriter cloudWriter = ((CloudFileHandle) 
fileHandle).getCloudWriter();
 +            try {
 +                cloudWriter.finish();
 +            } catch (HyracksDataException e) {
 +                savedEx = e;
 +            }
 +
 +            if (savedEx != null) {
 +                try {
 +                    cloudWriter.abort();
 +                } catch (HyracksDataException e) {
 +                    savedEx.addSuppressed(e);
 +                }
 +                throw savedEx;
 +            }
 +        }
 +        // Sync only after finalizing the upload to cloud storage
 +        localIoManager.sync(fileHandle, metadata);
 +    }
 +
 +    @Override
 +    public final void create(FileReference fileRef) throws 
HyracksDataException {
 +        // We need to delete the local file on create as the cloud storage 
didn't complete the upload
 +        // In other words, both cloud files and the local files are not in 
sync
 +        overwrite(fileRef, EMPTY_FILE_BYTES);
 +        localIoManager.delete(fileRef);
 +        localIoManager.create(fileRef);
 +    }
 +
 +    @Override
 +    public final void copyDirectory(FileReference srcFileRef, FileReference 
destFileRef) throws HyracksDataException {
 +        cloudClient.copy(bucket, srcFileRef.getRelativePath(), destFileRef);
 +        localIoManager.copyDirectory(srcFileRef, destFileRef);
 +    }
 +
 +    // TODO(htowaileb): the localIoManager is closed by the node controller 
service as well, check if we need this
 +    @Override
 +    public final void close() throws IOException {
 +        cloudClient.close();
 +        super.close();
 +        localIoManager.close();
 +    }
 +
 +    /**
 +     * Returns a list of all stored objects (sorted ASC by path) in the cloud 
and their sizes. The already cached files
 +     * are retrieved by listing the local disk, while the uncached files are 
retrieved from uncached files trackers.
 +     *
 +     * @param objectMapper to create the result {@link JsonNode}
 +     * @return {@link JsonNode} with stored objects' information
 +     */
 +    public final JsonNode listAsJson(ObjectMapper objectMapper) {
 +        ArrayNode objectsInfo = objectMapper.createArrayNode();
 +        try {
 +            List<CloudFile> allFiles = list();
 +            allFiles.sort((x, y) -> 
String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath()));
 +            for (CloudFile file : allFiles) {
 +                ObjectNode objectInfo = objectsInfo.addObject();
 +                objectInfo.put("path", file.getPath());
 +                objectInfo.put("size", file.getSize());
 +            }
 +            return objectsInfo;
 +        } catch (Throwable th) {
 +            LOGGER.warn("Failed to retrieve list of all cloud files", th);
 +            objectsInfo.removeAll();
 +            ObjectNode objectInfo = objectsInfo.addObject();
 +            objectInfo.put("error", "Failed to retrieve list of all cloud 
files. " + th.getMessage());
 +            return objectsInfo;
 +        }
 +    }
 +
 +    private List<CloudFile> list() {
 +        List<CloudFile> allFiles = new ArrayList<>();
 +        // get cached files (read from disk)
 +        for (IODeviceHandle deviceHandle : getIODevices()) {
 +            FileReference storageRoot = 
deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME);
 +
 +            Set<FileReference> deviceFiles;
 +            try {
 +                deviceFiles = localIoManager.list(storageRoot, 
IoUtil.NO_OP_FILTER);
 +            } catch (Throwable th) {
 +                LOGGER.warn("Failed to get local storage files for root {}", 
storageRoot.getRelativePath(), th);
 +                continue;
 +            }
 +
 +            for (FileReference fileReference : deviceFiles) {
 +                try {
 +                    
allFiles.add(CloudFile.of(fileReference.getRelativePath(), 
fileReference.getFile().length()));
 +                } catch (Throwable th) {
 +                    LOGGER.warn("Encountered issue for local storage file 
{}", fileReference.getRelativePath(), th);
 +                }
 +            }
 +        }
 +
 +        // get uncached files from uncached files tracker
 +        for (UncachedFileReference uncachedFile : getUncachedFiles()) {
 +            allFiles.add(CloudFile.of(uncachedFile.getRelativePath(), 
uncachedFile.getSize()));
 +        }
 +        return allFiles;
 +    }
 +
 +    /**
 +     * Writes the bytes to the specified key in the bucket
 +     *
 +     * @param key   the key where the bytes will be written
 +     * @param bytes the bytes to write
 +     */
 +    public final void put(String key, byte[] bytes) throws 
HyracksDataException {
 +        cloudClient.write(bucket, key, bytes);
 +    }
 +
 +    public ICloudClient getCloudClient() {
 +        return cloudClient;
 +    }
 +
 +    private Set<CloudFile> getCloudMetadataPartitionFiles() throws 
HyracksDataException {
 +        String metadataNamespacePath = 
StoragePathUtil.getNamespacePath(nsPathResolver,
 +                MetadataConstants.METADATA_NAMESPACE, METADATA_PARTITION);
 +        return cloudClient.listObjects(bucket, metadataNamespacePath, 
IoUtil.NO_OP_FILTER);
 +    }
 +
 +    private void ensureCompleteMetadataBootstrap() throws 
HyracksDataException {
 +        Set<CloudFile> metadataPartitionFiles = 
getCloudMetadataPartitionFiles();
 +        CloudFile marker = 
CloudFile.of(StoragePathUtil.getBootstrapMarkerRelativePath(nsPathResolver));
 +        boolean foundBootstrapMarker = 
metadataPartitionFiles.contains(marker);
 +        // if the bootstrap file exists, we failed to bootstrap --> delete 
all partial files in metadata partition
 +        if (foundBootstrapMarker) {
 +            LOGGER.info(
 +                    "detected failed bootstrap attempted, deleting all 
existing files in the metadata partition: {}",
 +                    metadataPartitionFiles);
 +            IIOBulkOperation deleteBulkOperation = 
createDeleteBulkOperation();
 +            for (CloudFile file : metadataPartitionFiles) {
 +                deleteBulkOperation.add(resolve(file.getPath()));
 +            }
 +            performBulkOperation(deleteBulkOperation);
 +        }
 +    }
 +
 +    private void ensurePosition(IFileHandle fileHandle, long cloudOffset, 
long requestedWriteOffset) {
 +        if (cloudOffset != requestedWriteOffset) {
 +            throw new IllegalStateException("Misaligned positions in " + 
fileHandle.getFileReference()
 +                    + ", cloudOffset: " + cloudOffset + " != 
requestedWriteOffset: " + requestedWriteOffset);
 +        }
 +    }
 +
 +    public long getTotalRemoteStorageSizeForNodeBytes() {
 +        return getSize(NO_OP_LIST_FILES_FILTER);
 +    }
 +
 +    @Override
 +    public long getSize(Predicate<String> relativePathFilter) {
 +        long totalSize = localIoManager.getSize(relativePathFilter);
 +
 +        // get uncached files from uncached files tracker
 +        for (UncachedFileReference uncachedFile : getUncachedFiles()) {
 +            if (relativePathFilter.test(uncachedFile.getRelativePath())) {
 +                totalSize += uncachedFile.getSize();
 +            }
 +        }
 +        return totalSize;
 +    }
 +
 +    @Override
 +    public long getTotalDiskUsage() {
 +        return PhysicalDrive.getUsedSpace(drivePaths);
 +    }
 +}
diff --cc 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java
index 50edd7b99e,0000000000..31f32fd86f
mode 100644,000000..100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java
@@@ -1,84 -1,0 +1,83 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *   http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.asterix.cloud.clients;
 +
 +import java.util.HashMap;
 +import java.util.Map;
 +import java.util.Set;
 +
 +public final class CloudFile {
 +    private static final long IGNORED_SIZE = -1;
 +    private final String path;
 +    private final long size;
 +
 +    private CloudFile(String path, long size) {
 +        this.path = path;
 +        this.size = size;
 +    }
 +
 +    public String getPath() {
 +        return path;
 +    }
 +
 +    public long getSize() {
 +        return size;
 +    }
 +
 +    @Override
 +    public int hashCode() {
 +        return path.hashCode();
 +    }
 +
 +    @Override
 +    public boolean equals(Object obj) {
-         if (!(obj instanceof CloudFile)) {
++        if (!(obj instanceof CloudFile other)) {
 +            return false;
 +        }
 +
-         CloudFile other = (CloudFile) obj;
 +        return path.equals(other.path) && compareSize(other.size);
 +    }
 +
 +    @Override
 +    public String toString() {
-         return path;
++        return path + '[' + size + ']';
 +    }
 +
 +    private boolean compareSize(long otherSize) {
-         // Compare sizes iff both sizes are not ignored
++        // Compare sizes if both sizes are not ignored
 +        return size == otherSize || size == IGNORED_SIZE || otherSize == 
IGNORED_SIZE;
 +    }
 +
 +    public static CloudFile of(String path, long size) {
 +        return new CloudFile(path, size);
 +    }
 +
 +    public static CloudFile of(String path) {
 +        return new CloudFile(path, IGNORED_SIZE);
 +    }
 +
 +    public static Map<String, CloudFile> toMap(Set<CloudFile> cloudFiles) {
 +        Map<String, CloudFile> map = new HashMap<>();
 +        for (CloudFile cloudFile : cloudFiles) {
 +            map.put(cloudFile.getPath(), cloudFile);
 +        }
 +
 +        return map;
 +    }
 +}
diff --cc 
asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java
index e4ad05e629,0000000000..7befacf027
mode 100644,000000..100644
--- 
a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java
+++ 
b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java
@@@ -1,92 -1,0 +1,92 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *   http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.asterix.cloud.util;
 +
 +import java.io.FilenameFilter;
 +import java.util.Iterator;
 +import java.util.Set;
 +
 +import org.apache.asterix.cloud.clients.CloudFile;
 +import org.apache.asterix.common.utils.StorageConstants;
 +import org.apache.hyracks.api.exceptions.HyracksDataException;
 +import org.apache.hyracks.api.io.FileReference;
 +import org.apache.hyracks.control.nc.io.IOManager;
 +import 
org.apache.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager;
 +import org.apache.logging.log4j.LogManager;
 +import org.apache.logging.log4j.Logger;
 +
 +public class CloudFileUtil {
 +    private static final Logger LOGGER = LogManager.getLogger();
 +
 +    // TODO Should we consider bloomfilter and LAF files as metadata files so 
that they are downloaded on bootstrap?
 +    public static final FilenameFilter METADATA_FILTER =
 +            ((dir, name) -> 
name.startsWith(StorageConstants.INDEX_NON_DATA_FILES_PREFIX)
 +                    || name.endsWith(AbstractLSMIndexFileManager.LAF_SUFFIX)
 +                    || 
name.endsWith(AbstractLSMIndexFileManager.BLOOM_FILTER_SUFFIX));
 +    public static final FilenameFilter DATA_FILTER = ((dir, name) -> 
!METADATA_FILTER.accept(dir, name));
 +
 +    private CloudFileUtil() {
 +    }
 +
 +    public static void cleanDirectoryFiles(IOManager ioManager, 
Set<CloudFile> cloudFiles, FileReference partitionPath)
 +            throws HyracksDataException {
 +        // First get the set of local files
 +        Set<FileReference> localFiles = ioManager.list(partitionPath);
 +        Iterator<FileReference> localFilesIter = localFiles.iterator();
 +        LOGGER.info("Cleaning partition {}.", 
partitionPath.getRelativePath());
 +
 +        // Reconcile local files and cloud files
 +        while (localFilesIter.hasNext()) {
 +            FileReference file = localFilesIter.next();
 +            if (file.getFile().isDirectory()) {
 +                continue;
 +            }
 +
 +            CloudFile path = CloudFile.of(file.getRelativePath(), 
ioManager.getSize(file));
 +            if (!cloudFiles.contains(path)) {
 +                /*
 +                 * Delete local files that do not exist in cloud storage (the 
ground truth for valid files), or files
 +                 * that has not been downloaded completely.
 +                 */
-                 logDeleteFile(file);
++                logDeleteFile(file, path);
 +                localFilesIter.remove();
 +                ioManager.delete(file);
 +            } else {
 +                // No need to re-add it in the following loop
 +                cloudFiles.remove(path);
 +            }
 +        }
 +
 +        // Add the remaining files that are not stored locally (if any)
 +        for (CloudFile cloudFile : cloudFiles) {
 +            String cloudFilePath = cloudFile.getPath();
 +            if (!cloudFilePath.contains(partitionPath.getRelativePath())) {
 +                continue;
 +            }
 +            localFiles.add(new FileReference(partitionPath.getDeviceHandle(),
 +                    
cloudFilePath.substring(cloudFilePath.indexOf(partitionPath.getRelativePath()))));
 +        }
 +    }
 +
-     private static void logDeleteFile(FileReference fileReference) {
++    private static void logDeleteFile(FileReference fileReference, CloudFile 
path) {
 +        LOGGER.info(
 +                "Deleting {} from the local cache as {} either doesn't exist 
in the cloud or it wasn't downloaded completely",
-                 fileReference, fileReference.getRelativePath());
++                fileReference, path);
 +    }
 +}
diff --cc 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java
index 1d3f14eaf0,0520fe68f5..24b6642711
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java
@@@ -20,9 -20,9 +20,10 @@@ package org.apache.asterix.common.api
  
  import org.apache.asterix.common.config.ActiveProperties;
  import org.apache.asterix.common.config.BuildProperties;
 +import org.apache.asterix.common.config.CloudProperties;
  import org.apache.asterix.common.config.CompilerProperties;
  import org.apache.asterix.common.config.ExternalProperties;
+ import org.apache.asterix.common.config.JacksonProperties;
  import org.apache.asterix.common.config.MessagingProperties;
  import org.apache.asterix.common.config.MetadataProperties;
  import org.apache.asterix.common.config.NodeProperties;
@@@ -102,5 -102,5 +103,7 @@@ public interface IPropertiesFactory 
       */
      NodeProperties newNodeProperties();
  
+     JacksonProperties newJacksonProperties();
++
 +    CloudProperties newCloudProperties();
  }
diff --cc 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java
index 2b91ded3f5,fab1d1ae61..8220b6e00b
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java
@@@ -37,7 -37,7 +37,7 @@@ public class AsterixProperties 
          configManager.register(NodeProperties.Option.class, 
CompilerProperties.Option.class,
                  MetadataProperties.Option.class, 
ExternalProperties.Option.class, ActiveProperties.Option.class,
                  MessagingProperties.Option.class, 
ReplicationProperties.Option.class, StorageProperties.Option.class,
-                 TransactionProperties.Option.class, 
CloudProperties.Option.class);
 -                TransactionProperties.Option.class, 
JacksonProperties.Option.class);
++                TransactionProperties.Option.class, 
CloudProperties.Option.class, JacksonProperties.Option.class);
  
          // we need to process the old-style asterix config before we apply 
defaults!
          
configManager.addConfigurator(IConfigManager.ConfiguratorMetric.APPLY_DEFAULTS.metric()
 - 1, () -> {
diff --cc 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java
index 87c853da46,88bf908fd5..25446b6ee8
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java
@@@ -78,8 -78,8 +78,13 @@@ public class PropertiesFactory implemen
          return new NodeProperties(propertiesAccessor);
      }
  
+     @Override
+     public JacksonProperties newJacksonProperties() {
+         return new JacksonProperties(propertiesAccessor);
+     }
++
 +    @Override
 +    public CloudProperties newCloudProperties() {
 +        return new CloudProperties(propertiesAccessor);
 +    }
  }
diff --cc asterixdb/asterix-server/pom.xml
index a20659c8ec,e450a18403..0fa467811d
--- a/asterixdb/asterix-server/pom.xml
+++ b/asterixdb/asterix-server/pom.xml
@@@ -489,13 -472,9 +489,13 @@@
                
<noticeUrl>https://raw.githubusercontent.com/msgpack/msgpack-java/0.8.20/NOTICE</noticeUrl>
              </override>
              <override>
-               <gav>com.github.luben:zstd-jni:1.5.0-1</gav>
-               
<url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.0-1/LICENSE</url>
+               <gav>com.github.luben:zstd-jni:1.5.6-6</gav>
+               
<url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.6-6/LICENSE</url>
              </override>
 +            <override>
 +              <gav>com.github.luben:zstd-jni:1.5.6-2</gav>
 +              
<url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.6-2/LICENSE</url>
 +            </override>
              <override>
                <gav>org.slf4j:slf4j-reload4j:1.7.36</gav>
                
<url>https://raw.githubusercontent.com/qos-ch/slf4j/v_1.7.36/LICENSE.txt</url>
diff --cc asterixdb/pom.xml
index 65157a11f7,a55752b2d9..3d337da4bf
--- a/asterixdb/pom.xml
+++ b/asterixdb/pom.xml
@@@ -94,20 -88,17 +94,24 @@@
      <hadoop.version>3.4.1</hadoop.version>
      <jacoco.version>0.7.6.201602180812</jacoco.version>
      <log4j.version>2.22.1</log4j.version>
 +    <!-- IMPORTANT: please keep the aws-crt version in sync with that defined 
in the AWS SDK BOM -->
 +    <!-- you can get this by inspecting the aws-sdk-java-pom for the SDK 
version. e.g.
 +     $ curl -s 
https://repo1.maven.org/maven2/software/amazon/awssdk/aws-sdk-java-pom/2.31.57/aws-sdk-java-pom-2.31.57.pom
 | grep awscrt.version
 +        <awscrt.version>0.38.1</awscrt.version>
 +      -->
      <awsjavasdk.version>2.29.27</awsjavasdk.version>
 -    <parquet.version>1.15.2</parquet.version> <!-- NOTICE: please update 
transitives from parquet below on any change -->
 -    <hadoop-awsjavasdk.version>1.12.779</hadoop-awsjavasdk.version>
 +    <awsjavasdk.crt.version>0.33.3</awsjavasdk.crt.version>
 +
 +    <parquet.version>1.15.2</parquet.version>
-     <hadoop-awsjavasdk.version>1.12.779</hadoop-awsjavasdk.version>
-     <azureblobjavasdk.version>12.25.1</azureblobjavasdk.version>
-     <azurecommonjavasdk.version>12.24.1</azurecommonjavasdk.version>
-     <azureidentity.version>1.13.3</azureidentity.version>
-     <azuredatalakejavasdk.version>12.18.1</azuredatalakejavasdk.version>
++    <hadoop-awsjavasdk.version>1.12.788</hadoop-awsjavasdk.version>
+ 
+     <azureblobjavasdk.version>12.31.1</azureblobjavasdk.version>
+     <azurecommonjavasdk.version>12.30.1</azurecommonjavasdk.version>
+     <azureidentity.version>1.17.0</azureidentity.version>
+     <azuredatalakejavasdk.version>12.24.1</azuredatalakejavasdk.version>
+     <azurecore.version>1.56.0</azurecore.version>
+     <azurecorehttpnetty.version>1.16.0</azurecorehttpnetty.version>
+ 
      <gcsjavasdk.version>2.45.0</gcsjavasdk.version>
      <hadoop-azuresdk.version>8.6.6</hadoop-azuresdk.version>
      <hadoop-gcs.version>hadoop3-2.2.25</hadoop-gcs.version>
@@@ -636,13 -626,7 +640,13 @@@
                -DrunSlowAQLTests=${runSlowAQLTests}
                -Xdebug
                
-Xrunjdwp:transport=dt_socket,server=y,address=8000,suspend=${debug.suspend.flag}
-             --add-opens 
jdk.management/com.sun.management.internal=ALL-UNNAMED 
-             --add-opens java.management/sun.management=ALL-UNNAMED 
-             --add-opens java.base/java.lang=ALL-UNNAMED 
-             --add-opens java.base/java.nio=ALL-UNNAMED 
-             --add-opens java.base/java.util=ALL-UNNAMED 
-             --add-opens java.base/sun.nio.ch=ALL-UNNAMED 
 -              
--add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED 
--add-opens=java.management/sun.management=ALL-UNNAMED 
--add-opens=java.base/java.lang=ALL-UNNAMED 
--add-opens=java.base/java.nio=ALL-UNNAMED 
--add-opens=java.base/java.util=ALL-UNNAMED
++            --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED
++            --add-opens java.management/sun.management=ALL-UNNAMED
++            --add-opens java.base/java.lang=ALL-UNNAMED
++            --add-opens java.base/java.nio=ALL-UNNAMED
++            --add-opens java.base/java.util=ALL-UNNAMED
++            --add-opens java.base/sun.nio.ch=ALL-UNNAMED
 +            --add-opens java.base/java.io=ALL-UNNAMED
                ${coverageArgLine}
                ${extraSurefireArgLine}
              </argLine>
@@@ -1702,26 -1725,46 +1740,53 @@@
        </dependency>
        <dependency>
          <groupId>org.apache.parquet</groupId>
-         <artifactId>parquet-common</artifactId>
+         <artifactId>parquet-format-structures</artifactId>
+         <version>${parquet.version}</version>
+       </dependency>
+       <dependency>
+         <groupId>org.apache.parquet</groupId>
+         <artifactId>parquet-encoding</artifactId>
          <version>${parquet.version}</version>
 +        <exclusions>
 +          <exclusion>
 +            <groupId>org.slf4j</groupId>
 +            <artifactId>slf4j-api</artifactId>
 +          </exclusion>
 +        </exclusions>
        </dependency>
        <dependency>
          <groupId>org.apache.parquet</groupId>
-         <artifactId>parquet-encoding</artifactId>
+         <artifactId>parquet-common</artifactId>
          <version>${parquet.version}</version>
 -      </dependency>
 -      <dependency>
 -        <groupId>org.apache.parquet</groupId>
 -        <artifactId>parquet-jackson</artifactId>
 -        <version>${parquet.version}</version>
 +        <exclusions>
 +          <exclusion>
 +            <groupId>org.slf4j</groupId>
 +            <artifactId>slf4j-api</artifactId>
 +          </exclusion>
 +        </exclusions>
        </dependency>
+       <!-- BEGIN: transitive from parquet, please validate with any update of 
${parquet.version} -->
+       <dependency>
+         <groupId>io.airlift</groupId>
+         <artifactId>aircompressor</artifactId>
+         <version>2.0.2</version>
+       </dependency>
+       <dependency>
+         <groupId>com.github.luben</groupId>
+         <artifactId>zstd-jni</artifactId>
+         <version>1.5.6-6</version>
+       </dependency>
+       <dependency>
+         <groupId>commons-pool</groupId>
+         <artifactId>commons-pool</artifactId>
+         <version>1.6</version>
+       </dependency>
+       <!-- END: transitive from parquet, please validate with any update of 
${parquet.version} -->
+       <dependency>
+         <groupId>org.kitesdk</groupId>
+         <artifactId>kite-data-core</artifactId>
+         <version>1.1.0</version>
+       </dependency>
        <!-- Hadoop AWS start -->
        <dependency>
          <!-- Pick a newer AWS SDK -->
diff --cc 
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 9a8d9acab6,db8014b8c1..ec07affbdf
--- 
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@@ -104,11 -102,8 +104,10 @@@ public class NCConfig extends Controlle
          PYTHON_DS_PATH(STRING, (String) null),
          CREDENTIAL_FILE(
                  OptionTypes.STRING,
-                 (Function<IApplicationConfig, String>) appConfig -> FileUtil
-                         
.joinPath(appConfig.getString(ControllerConfig.Option.DEFAULT_DIR), "passwd"),
+                 appConfig -> 
FileUtil.joinPath(appConfig.getString(ControllerConfig.Option.DEFAULT_DIR), 
"passwd"),
 -                ControllerConfig.Option.DEFAULT_DIR.cmdline() + "/passwd");
 +                ControllerConfig.Option.DEFAULT_DIR.cmdline() + "/passwd"),
 +        STORAGE_MAX_COLUMNS_IN_ZEROTH_SEGMENT(INTEGER_BYTE_UNIT, 5000),
 +        STORAGE_PAGE_ZERO_WRITER(STRING, "default");
  
          private final IOptionType parser;
          private final String defaultValueDescription;
diff --cc hyracks-fullstack/pom.xml
index aa1f8a2bad,241252fa81..fc3950bd93
--- a/hyracks-fullstack/pom.xml
+++ b/hyracks-fullstack/pom.xml
@@@ -73,12 -73,9 +73,12 @@@
      <jacoco.version>0.7.6.201602180812</jacoco.version>
      <log4j.version>2.22.1</log4j.version>
      <snappy.version>1.1.10.5</snappy.version>
-     <jackson.version>2.14.3</jackson.version>
+     <jackson.version>2.19.2</jackson.version>
      <jackson-databind.version>${jackson.version}</jackson-databind.version>
-     <netty.version>4.1.121.Final</netty.version>
+     <netty.version>4.1.124.Final</netty.version>
 +    <asm.version>9.3</asm.version>
 +    <awsjavasdk.version>2.29.27</awsjavasdk.version>
 +    <gcsjavasdk.version>2.45.0</gcsjavasdk.version>
  
      <implementation.title>Apache Hyracks and Algebricks - 
${project.name}</implementation.title>
      <implementation.url>https://asterixdb.apache.org/</implementation.url>
@@@ -383,6 -451,21 +454,26 @@@
          <artifactId>jackson-annotations</artifactId>
          <version>${jackson.version}</version>
        </dependency>
+       <dependency>
+         <groupId>com.fasterxml.jackson.dataformat</groupId>
+         <artifactId>jackson-dataformat-cbor</artifactId>
+         <version>${jackson.version}</version>
+       </dependency>
+       <dependency>
+         <groupId>com.fasterxml.jackson.dataformat</groupId>
+         <artifactId>jackson-dataformat-xml</artifactId>
+         <version>${jackson.version}</version>
+       </dependency>
++      <dependency>
++        <groupId>com.fasterxml.jackson.dataformat</groupId>
++        <artifactId>jackson-dataformat-csv</artifactId>
++        <version>${jackson.version}</version>
++      </dependency>
+       <dependency>
+         <groupId>com.fasterxml.jackson.datatype</groupId>
+         <artifactId>jackson-datatype-jsr310</artifactId>
+         <version>${jackson.version}</version>
+       </dependency>
        <dependency>
          <groupId>com.google.guava</groupId>
          <artifactId>guava</artifactId>

Reply via email to