This is an automated email from the ASF dual-hosted git repository. mblow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/asterixdb.git
commit 13f95fc15ffcdddde6556c6f47b62efb92833bc0 Merge: 7a764591f0 8b342cc119 Author: Michael Blow <[email protected]> AuthorDate: Sun Aug 24 07:19:26 2025 -0400 Merge branch 'gerrit/trinity' into 'gerrit/ionic', update aws hadoop dep Merged commits from gerrit/trinity: * [NO ISSUE][HYR][MISC] Update commons-lang3 to 3.18.0 for CVE-2025-48924 * [NO ISSUE][OTH] Update parquet version * [NO ISSUE][HYR] Update Jackson to 2.19.2 to address CVEs AWS dependency updates: - hadoop-awsjavasdk.version: 1.12.779 -> 1.12.788 Update Azurite to 3.35.0 Ext-ref: MB-68123 Change-Id: I61c069e7537a4bc7535d0347b8104b8a2691f0ba asterixdb/asterix-app/pom.xml | 2 +- .../asterix/common/config/ConfigUsageTest.java | 2 +- .../api/cluster_state_1/cluster_state_1.1.regexadm | 6 + .../cluster_state_1_full.1.regexadm | 6 + .../cluster_state_1_less.1.regexadm | 6 + .../src/test/resources/runtimets/sqlpp_queries.xml | 2 +- asterixdb/asterix-cloud/pom.xml | 2 +- .../asterix/cloud/AbstractCloudIOManager.java | 1 + .../apache/asterix/cloud/clients/CloudFile.java | 7 +- .../apache/asterix/cloud/util/CloudFileUtil.java | 6 +- .../asterix/common/api/IPropertiesFactory.java | 3 + .../asterix/common/config/AsterixProperties.java | 10 +- .../asterix/common/config/JacksonProperties.java | 138 +++++++++++++++++++++ .../asterix/common/config/PropertiesFactory.java | 5 + asterixdb/asterix-server/pom.xml | 4 +- asterixdb/pom.xml | 101 ++++++++++++--- .../org/apache/hyracks/control/cc/CCDriver.java | 5 +- .../control/common/config/ConfigManager.java | 7 +- .../control/common/controllers/NCConfig.java | 3 +- .../java/org/apache/hyracks/util/StorageUtil.java | 23 ++-- .../org/apache/hyracks/util/StorageUnitTest.java | 17 ++- hyracks-fullstack/pom.xml | 103 ++++++++++++++- 22 files changed, 403 insertions(+), 56 deletions(-) diff --cc asterixdb/asterix-app/pom.xml index 4910e8241f,0c4b94c613..b04c95716f --- a/asterixdb/asterix-app/pom.xml +++ b/asterixdb/asterix-app/pom.xml @@@ -607,65 -500,6 +607,65 @@@ </plugins> </build> </profile> + <profile> + <id>asterix-gerrit-cloud-tests</id> + <properties> + <test.includes> + **/CloudStorageTest.java, + **/CloudStorageSparseTest.java, + **/CloudStorageCancellationTest.java, + **/SqlppSinglePointLookupExecutionTest.java, **/AwsS3*.java + </test.includes> + <failIfNoTests>false</failIfNoTests> + </properties> + </profile> + <profile> + <id>asterix-gerrit-cloud-nons3-tests</id> + <properties> + <test.includes> + **/GCSCloudStorageUnstableTest.java,**/CloudStorageAzTest.java,**/AzureBlobStorageExternalDatasetTest.java, + **/AzureBlobStorageExternalDatasetOnePartitionTest.java,**/CloudStorageUnstableTest.java, **/*SqlppHdfs*.java + </test.includes> + <failIfNoTests>false</failIfNoTests> + </properties> + </profile> + <profile> + <id>azurite-tests</id> + <build> + <plugins> + <plugin> + <groupId>com.github.eirslett</groupId> + <artifactId>frontend-maven-plugin</artifactId> + <version>1.13.4</version> + <configuration> + <nodeVersion>v14.15.4</nodeVersion> + <npmVersion>6.14.11</npmVersion> + <workingDirectory>target/npm</workingDirectory> + <installDirectory>target/npm</installDirectory> + </configuration> + <executions> + <execution> + <id>install node and yarn</id> + <goals> + <goal>install-node-and-npm</goal> + </goals> + <phase>${azurite.npm.install.stage}</phase> + </execution> + <execution> + <id>azurite blob</id> + <phase>${azurite.install.stage}</phase> + <goals> + <goal>npm</goal> + </goals> + <configuration> - <arguments>install [email protected]</arguments> ++ <arguments>install [email protected]</arguments> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> </profiles> <dependencies> <dependency> diff --cc asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm index 29061ec7c3,7db9ee1dd6..065435b55c --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.regexadm @@@ -62,10 -30,15 +62,16 @@@ "compiler\.textsearchmemory" : 163840, "compiler\.windowmemory" : 196608, "default\.dir" : "target/io/dir/asterixdb", + "gcp.impersonate.service.account.duration" : 900, + "json\.max\.depth" : 1000, + "json\.max\.doc\.length" : -1, + "json\.max\.name\.length" : 50000, + "json\.max\.number\.length" : 1000, + "json\.max\.string\.length" : 2147483647, + "json\.max\.token\.count" : -1, "library\.deploy\.timeout" : 1800, "log\.dir" : "logs/", - "log\.level" : "INFO", + "log\.level" : "DEBUG", "max\.wait\.active\.cluster" : 60, "max.web.request.size" : 209715200, "messaging\.frame\.count" : 512, diff --cc asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm index f2ea15baad,1c6f3432e5..28fa1f2b6c --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_full/cluster_state_1_full.1.regexadm @@@ -62,7 -30,12 +62,13 @@@ "compiler\.textsearchmemory" : 163840, "compiler\.windowmemory" : 196608, "default\.dir" : "target/io/dir/asterixdb", + "gcp.impersonate.service.account.duration" : 900, + "json\.max\.depth" : 1000, + "json\.max\.doc\.length" : -1, + "json\.max\.name\.length" : 50000, + "json\.max\.number\.length" : 1000, + "json\.max\.string\.length" : 2147483647, + "json\.max\.token\.count" : -1, "library\.deploy\.timeout" : 1800, "log\.dir" : "logs/", "log\.level" : "WARN", diff --cc asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm index 685d28b7bc,49251a9817..8e0e9a6b00 --- a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm +++ b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1_less/cluster_state_1_less.1.regexadm @@@ -62,7 -30,12 +62,13 @@@ "compiler\.textsearchmemory" : 163840, "compiler\.windowmemory" : 196608, "default\.dir" : "target/io/dir/asterixdb", + "gcp.impersonate.service.account.duration" : 900, + "json\.max\.depth" : 1000, + "json\.max\.doc\.length" : -1, + "json\.max\.name\.length" : 50000, + "json\.max\.number\.length" : 1000, + "json\.max\.string\.length" : 2147483647, + "json\.max\.token\.count" : -1, "library\.deploy\.timeout" : 1800, "log\.dir" : "logs/", "log\.level" : "WARN", diff --cc asterixdb/asterix-cloud/pom.xml index 419ac4d762,0000000000..06ca83d491 mode 100644,000000..100644 --- a/asterixdb/asterix-cloud/pom.xml +++ b/asterixdb/asterix-cloud/pom.xml @@@ -1,277 -1,0 +1,277 @@@ +<!-- + ! Licensed to the Apache Software Foundation (ASF) under one + ! or more contributor license agreements. See the NOTICE file + ! distributed with this work for additional information + ! regarding copyright ownership. The ASF licenses this file + ! to you under the Apache License, Version 2.0 (the + ! "License"); you may not use this file except in compliance + ! with the License. You may obtain a copy of the License at + ! + ! http://www.apache.org/licenses/LICENSE-2.0 + ! + ! Unless required by applicable law or agreed to in writing, + ! software distributed under the License is distributed on an + ! "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + ! KIND, either express or implied. See the License for the + ! specific language governing permissions and limitations + ! under the License. + !--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <artifactId>apache-asterixdb</artifactId> + <groupId>org.apache.asterix</groupId> + <version>0.9.10-SNAPSHOT</version> + </parent> + <artifactId>asterix-cloud</artifactId> + + <licenses> + <license> + <name>Apache License, Version 2.0</name> + <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url> + <distribution>repo</distribution> + <comments>A business-friendly OSS license</comments> + </license> + </licenses> + + <properties> + <root.dir>${basedir}/..</root.dir> + </properties> + + <build> + <plugins> + <plugin> + <groupId>org.apache.rat</groupId> + <artifactId>apache-rat-plugin</artifactId> + <executions> + <execution> + <id>default</id> + <phase>validate</phase> + <goals> + <goal>check</goal> + </goals> + <configuration> + <licenses> + <license implementation="org.apache.rat.analysis.license.ApacheSoftwareLicense20"/> + </licenses> + <excludes combine.children="append"> + <exclude>src/test/resources/result/**</exclude> + </excludes> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <version>1.4.2</version> + <executions> + <execution> + <id>install-fake-gcs</id> + <phase>${gcs.download.stage}</phase> + <goals> + <goal>wget</goal> + </goals> + <configuration> + <url> + https://github.com/fsouza/fake-gcs-server/releases/download/v1.48.0/fake-gcs-server_1.48.0_Linux_amd64.tar.gz + </url> + <outputFileName>fake-gcs-server_1.48.0_Linux_amd64.tar.gz</outputFileName> + <outputDirectory>${project.build.directory}</outputDirectory> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <id>extract-gcs</id> + <phase>${gcs.install.stage}</phase> + <configuration> + <target> + <echo message="Extracting fake-gcs-server"/> + <mkdir dir="${project.build.directory}/fake-gcs-server"/> + <gunzip src="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar.gz" + dest="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar"/> + <untar src="${project.build.directory}/fake-gcs-server_1.48.0_Linux_amd64.tar" + dest="${project.build.directory}/fake-gcs-server"/> + <chmod file="${project.build.directory}/fake-gcs-server/fake-gcs-server" perm="ugo+rx"/> + </target> + </configuration> + <goals> + <goal>run</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>exec-maven-plugin</artifactId> + <executions> + <execution> + <id>fake-gcs-server</id> + <phase>${gcs.stage}</phase> + <goals> + <goal>exec</goal> + </goals> + <configuration> + <executable>${project.build.directory}/fake-gcs-server/fake-gcs-server</executable> + <workingDirectory>${project.build.directory}/fake-gcs-server</workingDirectory> + <arguments> + <argument>-port</argument> + <argument>24443</argument> + <argument>-scheme</argument> + <argument>http</argument> + <argument>-host</argument> + <argument>127.0.0.1</argument> + <argument>-log-level</argument> + <argument>error</argument> + <argument>-filesystem-root</argument> + <argument>${project.build.directory}/fake-gcs-server/storage</argument> + </arguments> + <async>true</async> + </configuration> + </execution> + <execution> + <id>azurite</id> + <phase>${azurite.stage}</phase> + <goals> + <goal>exec</goal> + </goals> + <configuration> + <!--suppress UnresolvedMavenProperty --> + <executable>${project.build.directory}/npm/node_modules/.bin/azurite-blob</executable> + <workingDirectory>${project.build.directory}</workingDirectory> + <environmentVariables> + <PATH>${project.build.directory}/npm/node</PATH> + </environmentVariables> + <arguments> + <argument>--blobPort</argument> + <argument>15055</argument> + <argument>--location</argument> + <argument>${project.build.directory}/azurite</argument> + <argument>--debug</argument> + <argument>${project.build.directory}/azurite/logs/azurite-debug.log</argument> + </arguments> + <async>true</async> + <outputFile>${project.build.directory}/azurite/logs/azurite.log</outputFile> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + + <profiles> + <profile> + <id>azurite-tests</id> + <build> + <plugins> + <plugin> + <groupId>com.github.eirslett</groupId> + <artifactId>frontend-maven-plugin</artifactId> + <version>1.13.4</version> + <configuration> + <nodeVersion>v14.15.4</nodeVersion> + <npmVersion>6.14.11</npmVersion> + <workingDirectory>target/npm</workingDirectory> + <installDirectory>target/npm</installDirectory> + </configuration> + <executions> + <execution> + <id>install node and yarn</id> + <goals> + <goal>install-node-and-npm</goal> + </goals> + <phase>${azurite.npm.install.stage}</phase> + </execution> + <execution> + <id>azurite blob</id> + <phase>${azurite.install.stage}</phase> + <goals> + <goal>npm</goal> + </goals> + <configuration> - <arguments>install [email protected]</arguments> ++ <arguments>install [email protected]</arguments> + </configuration> + </execution> + </executions> + </plugin> + </plugins> + </build> + </profile> + </profiles> + + <dependencies> + <dependency> + <groupId>org.apache.hyracks</groupId> + <artifactId>hyracks-cloud</artifactId> + <version>${hyracks.version}</version> + </dependency> + <dependency> + <groupId>org.apache.asterix</groupId> + <artifactId>asterix-common</artifactId> + <version>${project.version}</version> + </dependency> + <dependency> + <groupId>org.apache.asterix</groupId> + <artifactId>asterix-external-data</artifactId> + <version>${project.version}</version> + </dependency> + <!-- aws s3 start --> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>sdk-core</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>s3</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>regions</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>auth</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>s3-transfer-manager</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk.crt</groupId> + <artifactId>aws-crt</artifactId> + </dependency> + <dependency> + <groupId>software.amazon.awssdk</groupId> + <artifactId>apache-client</artifactId> + </dependency> + <dependency> + <groupId>junit</groupId> + <artifactId>junit</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>io.findify</groupId> + <artifactId>s3mock_2.12</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>com.typesafe.akka</groupId> + <artifactId>akka-http-core_2.12</artifactId> + <scope>test</scope> + </dependency> + <!-- aws s3 end --> + + <dependency> + <groupId>com.azure</groupId> + <artifactId>azure-storage-blob-batch</artifactId> + <version>12.23.0</version> + </dependency> + + </dependencies> +</project> diff --cc asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java index 8fefcf5bf3,0000000000..d8f682648a mode 100644,000000..100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/AbstractCloudIOManager.java @@@ -1,498 -1,0 +1,499 @@@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.cloud; + +import static org.apache.asterix.common.utils.StorageConstants.METADATA_PARTITION; +import static org.apache.asterix.common.utils.StorageConstants.PARTITION_DIR_PREFIX; +import static org.apache.asterix.common.utils.StorageConstants.STORAGE_ROOT_DIR_NAME; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.file.FileStore; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.function.Predicate; + +import org.apache.asterix.cloud.bulk.DeleteBulkCloudOperation; +import org.apache.asterix.cloud.bulk.NoOpDeleteBulkCallBack; +import org.apache.asterix.cloud.clients.CloudClientProvider; +import org.apache.asterix.cloud.clients.CloudFile; +import org.apache.asterix.cloud.clients.ICloudClient; +import org.apache.asterix.cloud.clients.ICloudGuardian; +import org.apache.asterix.cloud.clients.ICloudWriter; +import org.apache.asterix.cloud.util.CloudFileUtil; +import org.apache.asterix.common.api.INamespacePathResolver; +import org.apache.asterix.common.cloud.IPartitionBootstrapper; +import org.apache.asterix.common.config.CloudProperties; +import org.apache.asterix.common.metadata.MetadataConstants; +import org.apache.asterix.common.transactions.IRecoveryManager.SystemState; +import org.apache.asterix.common.utils.StoragePathUtil; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.api.io.IFileHandle; +import org.apache.hyracks.api.io.IIOBulkOperation; +import org.apache.hyracks.api.io.IODeviceHandle; +import org.apache.hyracks.api.util.IoUtil; +import org.apache.hyracks.cloud.filesystem.PhysicalDrive; +import org.apache.hyracks.cloud.io.ICloudIOManager; +import org.apache.hyracks.cloud.io.request.ICloudBeforeRetryRequest; +import org.apache.hyracks.cloud.io.request.ICloudRequest; +import org.apache.hyracks.cloud.io.stream.CloudInputStream; +import org.apache.hyracks.cloud.util.CloudRetryableRequestUtil; +import org.apache.hyracks.control.nc.io.IOManager; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ObjectNode; + +public abstract class AbstractCloudIOManager extends IOManager implements IPartitionBootstrapper, ICloudIOManager { + private static final Logger LOGGER = LogManager.getLogger(); + private static final byte[] EMPTY_FILE_BYTES = "empty".getBytes(); + private static final Predicate<String> NO_OP_LIST_FILES_FILTER = (path) -> true; + + protected final ICloudClient cloudClient; + protected final ICloudGuardian guardian; + protected final IWriteBufferProvider writeBufferProvider; + protected final String bucket; + protected final Set<Integer> partitions; + protected final List<FileReference> partitionPaths; + protected final IOManager localIoManager; + protected final INamespacePathResolver nsPathResolver; + private final List<FileStore> drivePaths; + + public AbstractCloudIOManager(IOManager ioManager, CloudProperties cloudProperties, + INamespacePathResolver nsPathResolver, ICloudGuardian guardian) throws HyracksDataException { + super(ioManager.getIODevices(), ioManager.getDeviceComputer(), ioManager.getIOParallelism(), + ioManager.getQueueSize()); + this.nsPathResolver = nsPathResolver; + this.bucket = cloudProperties.getStorageBucket(); + cloudClient = CloudClientProvider.getClient(cloudProperties, guardian); + this.guardian = guardian; + int numOfThreads = getIODevices().size() * getIOParallelism(); + writeBufferProvider = new WriteBufferProvider(numOfThreads, cloudClient.getWriteBufferSize()); + partitions = new HashSet<>(); + partitionPaths = new ArrayList<>(); + this.localIoManager = ioManager; + drivePaths = PhysicalDrive.getDrivePaths(ioDevices); + } + + /* + * ****************************************************************** + * IPartitionBootstrapper functions + * ****************************************************************** + */ + + @Override + public SystemState getSystemStateOnMissingCheckpoint() throws HyracksDataException { + Set<CloudFile> existingMetadataFiles = getCloudMetadataPartitionFiles(); + CloudFile bootstrapMarkerPath = CloudFile.of(StoragePathUtil.getBootstrapMarkerRelativePath(nsPathResolver)); + if (existingMetadataFiles.isEmpty() || existingMetadataFiles.contains(bootstrapMarkerPath)) { + LOGGER.info("First time to initialize this cluster: systemState = PERMANENT_DATA_LOSS"); + return SystemState.PERMANENT_DATA_LOSS; + } else { + LOGGER.info( + "Resuming a previously initialized cluster; setting system state to {} to force local recovery if needed", + SystemState.CORRUPTED); + return SystemState.CORRUPTED; + } + } + + @Override + public final void bootstrap(Set<Integer> activePartitions, List<FileReference> currentOnDiskPartitions, + boolean metadataNode, int metadataPartition, boolean ensureCompleteBootstrap) throws HyracksDataException { + partitions.clear(); + partitions.addAll(activePartitions); + if (metadataNode) { + partitions.add(metadataPartition); + if (ensureCompleteBootstrap) { + ensureCompleteMetadataBootstrap(); + } + } + + partitionPaths.clear(); + for (Integer partition : activePartitions) { + String partitionDir = PARTITION_DIR_PREFIX + partition; + partitionPaths.add(resolve(STORAGE_ROOT_DIR_NAME + File.separator + partitionDir)); + } + + LOGGER.info("Initializing cloud manager with ({}) storage partitions: {}", partitions.size(), partitions); + if (!currentOnDiskPartitions.isEmpty()) { + deleteUnkeptPartitionDirs(currentOnDiskPartitions); + cleanupLocalFiles(); + } + + // Has different implementations depending on the caching policy + downloadPartitions(metadataNode, metadataPartition); + } + + private void deleteUnkeptPartitionDirs(List<FileReference> currentOnDiskPartitions) throws HyracksDataException { + for (FileReference partitionDir : currentOnDiskPartitions) { + int partitionNum = StoragePathUtil.getPartitionNumFromRelativePath(partitionDir.getRelativePath()); + if (!partitions.contains(partitionNum)) { + LOGGER.warn("Deleting storage partition {} as it does not belong to the current storage partitions {}", + partitionNum, partitions); + localIoManager.delete(partitionDir); + } + } + } + + private void cleanupLocalFiles() throws HyracksDataException { + Set<CloudFile> cloudFiles = cloudClient.listObjects(bucket, STORAGE_ROOT_DIR_NAME, IoUtil.NO_OP_FILTER); ++ LOGGER.debug("+cleanupLocalFiles: cloud files: {}", cloudFiles); + if (cloudFiles.isEmpty()) { + LOGGER.warn("No files in the cloud. Deleting all local files in partitions {}...", partitions); + for (FileReference partitionPath : partitionPaths) { + if (localIoManager.exists(partitionPath)) { + // Clean local dir from all files + localIoManager.cleanDirectory(partitionPath); + } + } + } else { + LOGGER.info("Cleaning node partitions..."); + for (FileReference partitionPath : partitionPaths) { + CloudFileUtil.cleanDirectoryFiles(localIoManager, cloudFiles, partitionPath); + } + } + } + + protected abstract void downloadPartitions(boolean metadataNode, int metadataPartition) throws HyracksDataException; + + protected abstract Set<UncachedFileReference> getUncachedFiles(); + + /* + * ****************************************************************** + * ICloudIOManager functions + * ****************************************************************** + */ + + @Override + public final void cloudRead(IFileHandle fHandle, long offset, ByteBuffer data) throws HyracksDataException { + int position = data.position(); + ICloudRequest request = + () -> cloudClient.read(bucket, fHandle.getFileReference().getRelativePath(), offset, data); + ICloudBeforeRetryRequest retry = () -> data.position(position); + CloudRetryableRequestUtil.run(request, retry); + } + + @Override + public final CloudInputStream cloudRead(IFileHandle fHandle, long offset, long length) throws HyracksDataException { + return CloudRetryableRequestUtil.run(() -> new CloudInputStream(this, fHandle, + cloudClient.getObjectStream(bucket, fHandle.getFileReference().getRelativePath(), offset, length), + offset, length)); + } + + @Override + public void restoreStream(CloudInputStream cloudStream) { + LOGGER.warn("Restoring stream from cloud, {}", cloudStream); + /* + * This cloud request should not be called using CloudRetryableRequestUtil as it is the responsibility of the + * caller to warp this request as ICloudRequest or ICloudRetry. + */ + InputStream stream = cloudClient.getObjectStream(bucket, cloudStream.getPath(), cloudStream.getOffset(), + cloudStream.getRemaining()); + cloudStream.setInputStream(stream); + } + + @Override + public final int localWriter(IFileHandle fHandle, long offset, ByteBuffer data) throws HyracksDataException { + // Using syncWrite here to avoid closing the file channel when the thread is interrupted + return localIoManager.syncWrite(fHandle, offset, data); + } + + @Override + public final int cloudWrite(IFileHandle fHandle, long offset, ByteBuffer data) throws HyracksDataException { + ICloudWriter cloudWriter = ((CloudFileHandle) fHandle).getCloudWriter(); + int writtenBytes; + try { + ensurePosition(fHandle, cloudWriter.position(), offset); + writtenBytes = cloudWriter.write(data); + } catch (HyracksDataException e) { + cloudWriter.abort(); + throw e; + } + return writtenBytes; + } + + @Override + public final long cloudWrite(IFileHandle fHandle, long offset, ByteBuffer[] data) throws HyracksDataException { + ICloudWriter cloudWriter = ((CloudFileHandle) fHandle).getCloudWriter(); + int writtenBytes; + try { + ensurePosition(fHandle, cloudWriter.position(), offset); + writtenBytes = cloudWriter.write(data[0], data[1]); + } catch (HyracksDataException e) { + cloudWriter.abort(); + throw e; + } + return writtenBytes; + } + + /* + * ****************************************************************** + * IIOManager functions + * ****************************************************************** + */ + + @Override + public final IFileHandle open(FileReference fileRef, FileReadWriteMode rwMode, FileSyncMode syncMode) + throws HyracksDataException { + ICloudWriter cloudWriter = cloudClient.createWriter(bucket, fileRef.getRelativePath(), writeBufferProvider); + CloudFileHandle fHandle = new CloudFileHandle(fileRef, cloudWriter); + onOpen(fHandle); + try { + fHandle.open(rwMode, syncMode); + } catch (IOException e) { + throw HyracksDataException.create(e); + } + return fHandle; + } + + /** + * Action required to do when opening a file + * + * @param fileHandle file to open + */ + protected abstract void onOpen(CloudFileHandle fileHandle) throws HyracksDataException; + + @Override + public final long doSyncWrite(IFileHandle fHandle, long offset, ByteBuffer[] dataArray) + throws HyracksDataException { + // Save original position and limit + ByteBuffer buffer1 = dataArray[0]; + int position1 = buffer1.position(); + + ByteBuffer buffer2 = dataArray[1]; + int position2 = buffer2.position(); + + long writtenBytes = localIoManager.doSyncWrite(fHandle, offset, dataArray); + + // Restore original position + buffer1.position(position1); + buffer2.position(position2); + + cloudWrite(fHandle, offset, dataArray); + return writtenBytes; + } + + @Override + public final int doSyncWrite(IFileHandle fHandle, long offset, ByteBuffer data) throws HyracksDataException { + // Save original position and limit + int position = data.position(); + + int writtenBytes = localIoManager.doSyncWrite(fHandle, offset, data); + + // Restore original position + data.position(position); + cloudWrite(fHandle, offset, data); + return writtenBytes; + } + + @Override + public IIOBulkOperation createDeleteBulkOperation() { + return new DeleteBulkCloudOperation(localIoManager, bucket, cloudClient, NoOpDeleteBulkCallBack.INSTANCE); + } + + @Override + public final void close(IFileHandle fHandle) throws HyracksDataException { + try { + CloudFileHandle cloudFileHandle = (CloudFileHandle) fHandle; + cloudFileHandle.close(); + } catch (IOException e) { + throw HyracksDataException.create(e); + } + } + + @Override + public final void sync(IFileHandle fileHandle, boolean metadata) throws HyracksDataException { + HyracksDataException savedEx = null; + if (metadata) { + // only finish writing if metadata == true to prevent write limiter from finishing the stream and + // completing the upload. + ICloudWriter cloudWriter = ((CloudFileHandle) fileHandle).getCloudWriter(); + try { + cloudWriter.finish(); + } catch (HyracksDataException e) { + savedEx = e; + } + + if (savedEx != null) { + try { + cloudWriter.abort(); + } catch (HyracksDataException e) { + savedEx.addSuppressed(e); + } + throw savedEx; + } + } + // Sync only after finalizing the upload to cloud storage + localIoManager.sync(fileHandle, metadata); + } + + @Override + public final void create(FileReference fileRef) throws HyracksDataException { + // We need to delete the local file on create as the cloud storage didn't complete the upload + // In other words, both cloud files and the local files are not in sync + overwrite(fileRef, EMPTY_FILE_BYTES); + localIoManager.delete(fileRef); + localIoManager.create(fileRef); + } + + @Override + public final void copyDirectory(FileReference srcFileRef, FileReference destFileRef) throws HyracksDataException { + cloudClient.copy(bucket, srcFileRef.getRelativePath(), destFileRef); + localIoManager.copyDirectory(srcFileRef, destFileRef); + } + + // TODO(htowaileb): the localIoManager is closed by the node controller service as well, check if we need this + @Override + public final void close() throws IOException { + cloudClient.close(); + super.close(); + localIoManager.close(); + } + + /** + * Returns a list of all stored objects (sorted ASC by path) in the cloud and their sizes. The already cached files + * are retrieved by listing the local disk, while the uncached files are retrieved from uncached files trackers. + * + * @param objectMapper to create the result {@link JsonNode} + * @return {@link JsonNode} with stored objects' information + */ + public final JsonNode listAsJson(ObjectMapper objectMapper) { + ArrayNode objectsInfo = objectMapper.createArrayNode(); + try { + List<CloudFile> allFiles = list(); + allFiles.sort((x, y) -> String.CASE_INSENSITIVE_ORDER.compare(x.getPath(), y.getPath())); + for (CloudFile file : allFiles) { + ObjectNode objectInfo = objectsInfo.addObject(); + objectInfo.put("path", file.getPath()); + objectInfo.put("size", file.getSize()); + } + return objectsInfo; + } catch (Throwable th) { + LOGGER.warn("Failed to retrieve list of all cloud files", th); + objectsInfo.removeAll(); + ObjectNode objectInfo = objectsInfo.addObject(); + objectInfo.put("error", "Failed to retrieve list of all cloud files. " + th.getMessage()); + return objectsInfo; + } + } + + private List<CloudFile> list() { + List<CloudFile> allFiles = new ArrayList<>(); + // get cached files (read from disk) + for (IODeviceHandle deviceHandle : getIODevices()) { + FileReference storageRoot = deviceHandle.createFileRef(STORAGE_ROOT_DIR_NAME); + + Set<FileReference> deviceFiles; + try { + deviceFiles = localIoManager.list(storageRoot, IoUtil.NO_OP_FILTER); + } catch (Throwable th) { + LOGGER.warn("Failed to get local storage files for root {}", storageRoot.getRelativePath(), th); + continue; + } + + for (FileReference fileReference : deviceFiles) { + try { + allFiles.add(CloudFile.of(fileReference.getRelativePath(), fileReference.getFile().length())); + } catch (Throwable th) { + LOGGER.warn("Encountered issue for local storage file {}", fileReference.getRelativePath(), th); + } + } + } + + // get uncached files from uncached files tracker + for (UncachedFileReference uncachedFile : getUncachedFiles()) { + allFiles.add(CloudFile.of(uncachedFile.getRelativePath(), uncachedFile.getSize())); + } + return allFiles; + } + + /** + * Writes the bytes to the specified key in the bucket + * + * @param key the key where the bytes will be written + * @param bytes the bytes to write + */ + public final void put(String key, byte[] bytes) throws HyracksDataException { + cloudClient.write(bucket, key, bytes); + } + + public ICloudClient getCloudClient() { + return cloudClient; + } + + private Set<CloudFile> getCloudMetadataPartitionFiles() throws HyracksDataException { + String metadataNamespacePath = StoragePathUtil.getNamespacePath(nsPathResolver, + MetadataConstants.METADATA_NAMESPACE, METADATA_PARTITION); + return cloudClient.listObjects(bucket, metadataNamespacePath, IoUtil.NO_OP_FILTER); + } + + private void ensureCompleteMetadataBootstrap() throws HyracksDataException { + Set<CloudFile> metadataPartitionFiles = getCloudMetadataPartitionFiles(); + CloudFile marker = CloudFile.of(StoragePathUtil.getBootstrapMarkerRelativePath(nsPathResolver)); + boolean foundBootstrapMarker = metadataPartitionFiles.contains(marker); + // if the bootstrap file exists, we failed to bootstrap --> delete all partial files in metadata partition + if (foundBootstrapMarker) { + LOGGER.info( + "detected failed bootstrap attempted, deleting all existing files in the metadata partition: {}", + metadataPartitionFiles); + IIOBulkOperation deleteBulkOperation = createDeleteBulkOperation(); + for (CloudFile file : metadataPartitionFiles) { + deleteBulkOperation.add(resolve(file.getPath())); + } + performBulkOperation(deleteBulkOperation); + } + } + + private void ensurePosition(IFileHandle fileHandle, long cloudOffset, long requestedWriteOffset) { + if (cloudOffset != requestedWriteOffset) { + throw new IllegalStateException("Misaligned positions in " + fileHandle.getFileReference() + + ", cloudOffset: " + cloudOffset + " != requestedWriteOffset: " + requestedWriteOffset); + } + } + + public long getTotalRemoteStorageSizeForNodeBytes() { + return getSize(NO_OP_LIST_FILES_FILTER); + } + + @Override + public long getSize(Predicate<String> relativePathFilter) { + long totalSize = localIoManager.getSize(relativePathFilter); + + // get uncached files from uncached files tracker + for (UncachedFileReference uncachedFile : getUncachedFiles()) { + if (relativePathFilter.test(uncachedFile.getRelativePath())) { + totalSize += uncachedFile.getSize(); + } + } + return totalSize; + } + + @Override + public long getTotalDiskUsage() { + return PhysicalDrive.getUsedSpace(drivePaths); + } +} diff --cc asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java index 50edd7b99e,0000000000..31f32fd86f mode 100644,000000..100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/clients/CloudFile.java @@@ -1,84 -1,0 +1,83 @@@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.cloud.clients; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +public final class CloudFile { + private static final long IGNORED_SIZE = -1; + private final String path; + private final long size; + + private CloudFile(String path, long size) { + this.path = path; + this.size = size; + } + + public String getPath() { + return path; + } + + public long getSize() { + return size; + } + + @Override + public int hashCode() { + return path.hashCode(); + } + + @Override + public boolean equals(Object obj) { - if (!(obj instanceof CloudFile)) { ++ if (!(obj instanceof CloudFile other)) { + return false; + } + - CloudFile other = (CloudFile) obj; + return path.equals(other.path) && compareSize(other.size); + } + + @Override + public String toString() { - return path; ++ return path + '[' + size + ']'; + } + + private boolean compareSize(long otherSize) { - // Compare sizes iff both sizes are not ignored ++ // Compare sizes if both sizes are not ignored + return size == otherSize || size == IGNORED_SIZE || otherSize == IGNORED_SIZE; + } + + public static CloudFile of(String path, long size) { + return new CloudFile(path, size); + } + + public static CloudFile of(String path) { + return new CloudFile(path, IGNORED_SIZE); + } + + public static Map<String, CloudFile> toMap(Set<CloudFile> cloudFiles) { + Map<String, CloudFile> map = new HashMap<>(); + for (CloudFile cloudFile : cloudFiles) { + map.put(cloudFile.getPath(), cloudFile); + } + + return map; + } +} diff --cc asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java index e4ad05e629,0000000000..7befacf027 mode 100644,000000..100644 --- a/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java +++ b/asterixdb/asterix-cloud/src/main/java/org/apache/asterix/cloud/util/CloudFileUtil.java @@@ -1,92 -1,0 +1,92 @@@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.asterix.cloud.util; + +import java.io.FilenameFilter; +import java.util.Iterator; +import java.util.Set; + +import org.apache.asterix.cloud.clients.CloudFile; +import org.apache.asterix.common.utils.StorageConstants; +import org.apache.hyracks.api.exceptions.HyracksDataException; +import org.apache.hyracks.api.io.FileReference; +import org.apache.hyracks.control.nc.io.IOManager; +import org.apache.hyracks.storage.am.lsm.common.impls.AbstractLSMIndexFileManager; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +public class CloudFileUtil { + private static final Logger LOGGER = LogManager.getLogger(); + + // TODO Should we consider bloomfilter and LAF files as metadata files so that they are downloaded on bootstrap? + public static final FilenameFilter METADATA_FILTER = + ((dir, name) -> name.startsWith(StorageConstants.INDEX_NON_DATA_FILES_PREFIX) + || name.endsWith(AbstractLSMIndexFileManager.LAF_SUFFIX) + || name.endsWith(AbstractLSMIndexFileManager.BLOOM_FILTER_SUFFIX)); + public static final FilenameFilter DATA_FILTER = ((dir, name) -> !METADATA_FILTER.accept(dir, name)); + + private CloudFileUtil() { + } + + public static void cleanDirectoryFiles(IOManager ioManager, Set<CloudFile> cloudFiles, FileReference partitionPath) + throws HyracksDataException { + // First get the set of local files + Set<FileReference> localFiles = ioManager.list(partitionPath); + Iterator<FileReference> localFilesIter = localFiles.iterator(); + LOGGER.info("Cleaning partition {}.", partitionPath.getRelativePath()); + + // Reconcile local files and cloud files + while (localFilesIter.hasNext()) { + FileReference file = localFilesIter.next(); + if (file.getFile().isDirectory()) { + continue; + } + + CloudFile path = CloudFile.of(file.getRelativePath(), ioManager.getSize(file)); + if (!cloudFiles.contains(path)) { + /* + * Delete local files that do not exist in cloud storage (the ground truth for valid files), or files + * that has not been downloaded completely. + */ - logDeleteFile(file); ++ logDeleteFile(file, path); + localFilesIter.remove(); + ioManager.delete(file); + } else { + // No need to re-add it in the following loop + cloudFiles.remove(path); + } + } + + // Add the remaining files that are not stored locally (if any) + for (CloudFile cloudFile : cloudFiles) { + String cloudFilePath = cloudFile.getPath(); + if (!cloudFilePath.contains(partitionPath.getRelativePath())) { + continue; + } + localFiles.add(new FileReference(partitionPath.getDeviceHandle(), + cloudFilePath.substring(cloudFilePath.indexOf(partitionPath.getRelativePath())))); + } + } + - private static void logDeleteFile(FileReference fileReference) { ++ private static void logDeleteFile(FileReference fileReference, CloudFile path) { + LOGGER.info( + "Deleting {} from the local cache as {} either doesn't exist in the cloud or it wasn't downloaded completely", - fileReference, fileReference.getRelativePath()); ++ fileReference, path); + } +} diff --cc asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java index 1d3f14eaf0,0520fe68f5..24b6642711 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/api/IPropertiesFactory.java @@@ -20,9 -20,9 +20,10 @@@ package org.apache.asterix.common.api import org.apache.asterix.common.config.ActiveProperties; import org.apache.asterix.common.config.BuildProperties; +import org.apache.asterix.common.config.CloudProperties; import org.apache.asterix.common.config.CompilerProperties; import org.apache.asterix.common.config.ExternalProperties; + import org.apache.asterix.common.config.JacksonProperties; import org.apache.asterix.common.config.MessagingProperties; import org.apache.asterix.common.config.MetadataProperties; import org.apache.asterix.common.config.NodeProperties; @@@ -102,5 -102,5 +103,7 @@@ public interface IPropertiesFactory */ NodeProperties newNodeProperties(); + JacksonProperties newJacksonProperties(); ++ + CloudProperties newCloudProperties(); } diff --cc asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java index 2b91ded3f5,fab1d1ae61..8220b6e00b --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/AsterixProperties.java @@@ -37,7 -37,7 +37,7 @@@ public class AsterixProperties configManager.register(NodeProperties.Option.class, CompilerProperties.Option.class, MetadataProperties.Option.class, ExternalProperties.Option.class, ActiveProperties.Option.class, MessagingProperties.Option.class, ReplicationProperties.Option.class, StorageProperties.Option.class, - TransactionProperties.Option.class, CloudProperties.Option.class); - TransactionProperties.Option.class, JacksonProperties.Option.class); ++ TransactionProperties.Option.class, CloudProperties.Option.class, JacksonProperties.Option.class); // we need to process the old-style asterix config before we apply defaults! configManager.addConfigurator(IConfigManager.ConfiguratorMetric.APPLY_DEFAULTS.metric() - 1, () -> { diff --cc asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java index 87c853da46,88bf908fd5..25446b6ee8 --- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java +++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/PropertiesFactory.java @@@ -78,8 -78,8 +78,13 @@@ public class PropertiesFactory implemen return new NodeProperties(propertiesAccessor); } + @Override + public JacksonProperties newJacksonProperties() { + return new JacksonProperties(propertiesAccessor); + } ++ + @Override + public CloudProperties newCloudProperties() { + return new CloudProperties(propertiesAccessor); + } } diff --cc asterixdb/asterix-server/pom.xml index a20659c8ec,e450a18403..0fa467811d --- a/asterixdb/asterix-server/pom.xml +++ b/asterixdb/asterix-server/pom.xml @@@ -489,13 -472,9 +489,13 @@@ <noticeUrl>https://raw.githubusercontent.com/msgpack/msgpack-java/0.8.20/NOTICE</noticeUrl> </override> <override> - <gav>com.github.luben:zstd-jni:1.5.0-1</gav> - <url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.0-1/LICENSE</url> + <gav>com.github.luben:zstd-jni:1.5.6-6</gav> + <url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.6-6/LICENSE</url> </override> + <override> + <gav>com.github.luben:zstd-jni:1.5.6-2</gav> + <url>https://raw.githubusercontent.com/luben/zstd-jni/v1.5.6-2/LICENSE</url> + </override> <override> <gav>org.slf4j:slf4j-reload4j:1.7.36</gav> <url>https://raw.githubusercontent.com/qos-ch/slf4j/v_1.7.36/LICENSE.txt</url> diff --cc asterixdb/pom.xml index 65157a11f7,a55752b2d9..3d337da4bf --- a/asterixdb/pom.xml +++ b/asterixdb/pom.xml @@@ -94,20 -88,17 +94,24 @@@ <hadoop.version>3.4.1</hadoop.version> <jacoco.version>0.7.6.201602180812</jacoco.version> <log4j.version>2.22.1</log4j.version> + <!-- IMPORTANT: please keep the aws-crt version in sync with that defined in the AWS SDK BOM --> + <!-- you can get this by inspecting the aws-sdk-java-pom for the SDK version. e.g. + $ curl -s https://repo1.maven.org/maven2/software/amazon/awssdk/aws-sdk-java-pom/2.31.57/aws-sdk-java-pom-2.31.57.pom | grep awscrt.version + <awscrt.version>0.38.1</awscrt.version> + --> <awsjavasdk.version>2.29.27</awsjavasdk.version> - <parquet.version>1.15.2</parquet.version> <!-- NOTICE: please update transitives from parquet below on any change --> - <hadoop-awsjavasdk.version>1.12.779</hadoop-awsjavasdk.version> + <awsjavasdk.crt.version>0.33.3</awsjavasdk.crt.version> + + <parquet.version>1.15.2</parquet.version> - <hadoop-awsjavasdk.version>1.12.779</hadoop-awsjavasdk.version> - <azureblobjavasdk.version>12.25.1</azureblobjavasdk.version> - <azurecommonjavasdk.version>12.24.1</azurecommonjavasdk.version> - <azureidentity.version>1.13.3</azureidentity.version> - <azuredatalakejavasdk.version>12.18.1</azuredatalakejavasdk.version> ++ <hadoop-awsjavasdk.version>1.12.788</hadoop-awsjavasdk.version> + + <azureblobjavasdk.version>12.31.1</azureblobjavasdk.version> + <azurecommonjavasdk.version>12.30.1</azurecommonjavasdk.version> + <azureidentity.version>1.17.0</azureidentity.version> + <azuredatalakejavasdk.version>12.24.1</azuredatalakejavasdk.version> + <azurecore.version>1.56.0</azurecore.version> + <azurecorehttpnetty.version>1.16.0</azurecorehttpnetty.version> + <gcsjavasdk.version>2.45.0</gcsjavasdk.version> <hadoop-azuresdk.version>8.6.6</hadoop-azuresdk.version> <hadoop-gcs.version>hadoop3-2.2.25</hadoop-gcs.version> @@@ -636,13 -626,7 +640,13 @@@ -DrunSlowAQLTests=${runSlowAQLTests} -Xdebug -Xrunjdwp:transport=dt_socket,server=y,address=8000,suspend=${debug.suspend.flag} - --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED - --add-opens java.management/sun.management=ALL-UNNAMED - --add-opens java.base/java.lang=ALL-UNNAMED - --add-opens java.base/java.nio=ALL-UNNAMED - --add-opens java.base/java.util=ALL-UNNAMED - --add-opens java.base/sun.nio.ch=ALL-UNNAMED - --add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED --add-opens=java.management/sun.management=ALL-UNNAMED --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.nio=ALL-UNNAMED --add-opens=java.base/java.util=ALL-UNNAMED ++ --add-opens jdk.management/com.sun.management.internal=ALL-UNNAMED ++ --add-opens java.management/sun.management=ALL-UNNAMED ++ --add-opens java.base/java.lang=ALL-UNNAMED ++ --add-opens java.base/java.nio=ALL-UNNAMED ++ --add-opens java.base/java.util=ALL-UNNAMED ++ --add-opens java.base/sun.nio.ch=ALL-UNNAMED + --add-opens java.base/java.io=ALL-UNNAMED ${coverageArgLine} ${extraSurefireArgLine} </argLine> @@@ -1702,26 -1725,46 +1740,53 @@@ </dependency> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-common</artifactId> + <artifactId>parquet-format-structures</artifactId> + <version>${parquet.version}</version> + </dependency> + <dependency> + <groupId>org.apache.parquet</groupId> + <artifactId>parquet-encoding</artifactId> <version>${parquet.version}</version> + <exclusions> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </exclusion> + </exclusions> </dependency> <dependency> <groupId>org.apache.parquet</groupId> - <artifactId>parquet-encoding</artifactId> + <artifactId>parquet-common</artifactId> <version>${parquet.version}</version> - </dependency> - <dependency> - <groupId>org.apache.parquet</groupId> - <artifactId>parquet-jackson</artifactId> - <version>${parquet.version}</version> + <exclusions> + <exclusion> + <groupId>org.slf4j</groupId> + <artifactId>slf4j-api</artifactId> + </exclusion> + </exclusions> </dependency> + <!-- BEGIN: transitive from parquet, please validate with any update of ${parquet.version} --> + <dependency> + <groupId>io.airlift</groupId> + <artifactId>aircompressor</artifactId> + <version>2.0.2</version> + </dependency> + <dependency> + <groupId>com.github.luben</groupId> + <artifactId>zstd-jni</artifactId> + <version>1.5.6-6</version> + </dependency> + <dependency> + <groupId>commons-pool</groupId> + <artifactId>commons-pool</artifactId> + <version>1.6</version> + </dependency> + <!-- END: transitive from parquet, please validate with any update of ${parquet.version} --> + <dependency> + <groupId>org.kitesdk</groupId> + <artifactId>kite-data-core</artifactId> + <version>1.1.0</version> + </dependency> <!-- Hadoop AWS start --> <dependency> <!-- Pick a newer AWS SDK --> diff --cc hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java index 9a8d9acab6,db8014b8c1..ec07affbdf --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java @@@ -104,11 -102,8 +104,10 @@@ public class NCConfig extends Controlle PYTHON_DS_PATH(STRING, (String) null), CREDENTIAL_FILE( OptionTypes.STRING, - (Function<IApplicationConfig, String>) appConfig -> FileUtil - .joinPath(appConfig.getString(ControllerConfig.Option.DEFAULT_DIR), "passwd"), + appConfig -> FileUtil.joinPath(appConfig.getString(ControllerConfig.Option.DEFAULT_DIR), "passwd"), - ControllerConfig.Option.DEFAULT_DIR.cmdline() + "/passwd"); + ControllerConfig.Option.DEFAULT_DIR.cmdline() + "/passwd"), + STORAGE_MAX_COLUMNS_IN_ZEROTH_SEGMENT(INTEGER_BYTE_UNIT, 5000), + STORAGE_PAGE_ZERO_WRITER(STRING, "default"); private final IOptionType parser; private final String defaultValueDescription; diff --cc hyracks-fullstack/pom.xml index aa1f8a2bad,241252fa81..fc3950bd93 --- a/hyracks-fullstack/pom.xml +++ b/hyracks-fullstack/pom.xml @@@ -73,12 -73,9 +73,12 @@@ <jacoco.version>0.7.6.201602180812</jacoco.version> <log4j.version>2.22.1</log4j.version> <snappy.version>1.1.10.5</snappy.version> - <jackson.version>2.14.3</jackson.version> + <jackson.version>2.19.2</jackson.version> <jackson-databind.version>${jackson.version}</jackson-databind.version> - <netty.version>4.1.121.Final</netty.version> + <netty.version>4.1.124.Final</netty.version> + <asm.version>9.3</asm.version> + <awsjavasdk.version>2.29.27</awsjavasdk.version> + <gcsjavasdk.version>2.45.0</gcsjavasdk.version> <implementation.title>Apache Hyracks and Algebricks - ${project.name}</implementation.title> <implementation.url>https://asterixdb.apache.org/</implementation.url> @@@ -383,6 -451,21 +454,26 @@@ <artifactId>jackson-annotations</artifactId> <version>${jackson.version}</version> </dependency> + <dependency> + <groupId>com.fasterxml.jackson.dataformat</groupId> + <artifactId>jackson-dataformat-cbor</artifactId> + <version>${jackson.version}</version> + </dependency> + <dependency> + <groupId>com.fasterxml.jackson.dataformat</groupId> + <artifactId>jackson-dataformat-xml</artifactId> + <version>${jackson.version}</version> + </dependency> ++ <dependency> ++ <groupId>com.fasterxml.jackson.dataformat</groupId> ++ <artifactId>jackson-dataformat-csv</artifactId> ++ <version>${jackson.version}</version> ++ </dependency> + <dependency> + <groupId>com.fasterxml.jackson.datatype</groupId> + <artifactId>jackson-datatype-jsr310</artifactId> + <version>${jackson.version}</version> + </dependency> <dependency> <groupId>com.google.guava</groupId> <artifactId>guava</artifactId>
