This is an automated email from the ASF dual-hosted git repository.

imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new d494c31110 [ASTERIXDB-3609][API][RT] Library archive limits
d494c31110 is described below

commit d494c311107da4d21a17dd9d2f6cde5f04b8d969
Author: Ian Maxon <[email protected]>
AuthorDate: Tue May 13 18:52:28 2025 -0700

    [ASTERIXDB-3609][API][RT] Library archive limits
    
    - user model changes: no
    - storage format changes: no
    - interface changes: no
    
    Details:
    Library archives might be highly compressed or have
    a very high number of entries. We should check for
    these conditions when extracting them and make sure
    they conform to reasonable limits for the use cases
    we expect.
    
    Ext-ref: MB-66704
    Change-Id: I524cbfb6685bd3c1e326ad2db48482e5aba1ce6d
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19771
    Integration-Tests: Jenkins <[email protected]>
    Tested-by: Jenkins <[email protected]>
    Reviewed-by: Ian Maxon <[email protected]>
    Reviewed-by: Peeyush Gupta <[email protected]>
---
 .../asterix/app/nc/task/RetrieveLibrariesTask.java |  4 +-
 .../asterix/common/library/ILibraryManager.java    |  6 ++-
 .../external/library/ExternalLibraryManager.java   | 45 ++++++++++++++++++----
 .../control/common/controllers/NCConfig.java       | 22 +++++++++++
 4 files changed, 67 insertions(+), 10 deletions(-)

diff --git 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
index 715e62659b..38ad7ac300 100644
--- 
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
+++ 
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
@@ -91,7 +91,9 @@ public class RetrieveLibrariesTask implements 
INCLifecycleTask {
             libraryManager.download(targetFileRef, authToken, libraryURI);
             Path outputDirPath = 
libraryManager.getStorageDir().getFile().toPath().toAbsolutePath().normalize();
             FileReference outPath = 
appContext.getIoManager().resolveAbsolutePath(outputDirPath.toString());
-            libraryManager.unzip(targetFileRef, outPath);
+            //we have to bypass limits here on the unzip, because this archive 
is all libraries, so potentially limit*N
+            //and knowing N here would be difficult since metadata is not 
available.
+            libraryManager.unzip(targetFileRef, outPath, false);
         } catch (IOException e) {
             LOGGER.error("Unable to retrieve UDFs from " + 
libraryURI.toString() + " before timeout");
             throw HyracksDataException.create(e);
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
index db87c60daa..5c888111d3 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
@@ -73,8 +73,10 @@ public interface ILibraryManager {
 
     void unzip(FileReference sourceFile, FileReference outputDir) throws 
IOException;
 
-    void writeAndForce(FileReference outputFile, InputStream dataStream, 
byte[] copyBuffer, IIOManager localIoManager)
-            throws IOException;
+    void unzip(FileReference sourceFile, FileReference outputDir, boolean 
limited) throws IOException;
+
+    long writeAndForce(FileReference outputFile, InputStream dataStream, 
byte[] copyBuffer, IIOManager localIoManager,
+            boolean limited) throws IOException;
 
     void setUploadClient(Function<ILibraryManager, CloseableHttpClient> f);
 
diff --git 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
index f15e96fa81..706b1f9022 100755
--- 
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
+++ 
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
@@ -111,6 +111,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.io.ByteStreams;
 
 public class ExternalLibraryManager implements ILibraryManager, 
ILifeCycleComponent {
 
@@ -152,6 +153,9 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
     private final INamespacePathResolver namespacePathResolver;
     private final boolean sslEnabled;
     private final boolean cloudMode;
+    private final long maxFileSize;
+    private final long maxTotalSize;
+    private final int maxEntries;
     private Function<ILibraryManager, CloseableHttpClient> uploadClientSupp;
 
     public ExternalLibraryManager(NodeControllerService ncs, 
IPersistedResourceRegistry reg, FileReference appDir,
@@ -171,6 +175,9 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
         this.ioManager = ioManager;
         uploadClientSupp = ExternalLibraryManager::defaultHttpClient;
         cloudMode = ncs.getConfiguration().isCloudDeployment();
+        maxFileSize = ncs.getConfiguration().getLibraryMaxFileSize();
+        maxTotalSize = ncs.getConfiguration().getLibraryMaxExtractedSize();
+        maxEntries = ncs.getConfiguration().getLibraryMaxArchiveEntries();
     }
 
     public void initialize(boolean resetStorageData) throws 
HyracksDataException {
@@ -655,6 +662,11 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
 
     @Override
     public void unzip(FileReference sourceFile, FileReference outputDir) 
throws IOException {
+        unzip(sourceFile, outputDir, true);
+    }
+
+    @Override
+    public void unzip(FileReference sourceFile, FileReference outputDir, 
boolean limited) throws IOException {
         boolean logTraceEnabled = LOGGER.isTraceEnabled();
         IIOManager localIoManager = ioManager;
         if (ncs.getConfiguration().isCloudDeployment()) {
@@ -665,7 +677,18 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
         try (ZipFile zipFile = new ZipFile(sourceFile.getFile())) {
             Enumeration<? extends ZipEntry> entries = zipFile.entries();
             byte[] writeBuf = new byte[4096];
+            int numEntries = 0;
+            long totalSize = 0;
             while (entries.hasMoreElements()) {
+                if (limited && numEntries >= maxEntries) {
+                    throw new IOException(
+                            "Library archive contains more files and 
directories than configuration permits");
+                }
+                //may exceed the total allowable size by the maximum size of 
one file, because we can't know how
+                //big the file is until we actually attempt to write it.
+                if (limited && totalSize > maxTotalSize) {
+                    throw new IOException("Library archive extracted size 
exceeds maximum configured allowable size");
+                }
                 ZipEntry entry = entries.nextElement();
                 if (entry.isDirectory()) {
                     continue;
@@ -685,8 +708,9 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
                     if (logTraceEnabled) {
                         LOGGER.trace("Extracting file {}", entryOutputFileRef);
                     }
-                    writeAndForce(entryOutputFileRef, in, writeBuf, 
localIoManager);
+                    totalSize += writeAndForce(entryOutputFileRef, in, 
writeBuf, localIoManager, limited);
                 }
+                numEntries++;
             }
         }
         for (Path newDir : newDirs) {
@@ -695,16 +719,23 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
     }
 
     @Override
-    public void writeAndForce(FileReference outputFile, InputStream 
dataStream, byte[] copyBuffer,
-            IIOManager localIoManager) throws IOException {
+    public long writeAndForce(FileReference outputFile, InputStream 
dataStream, byte[] copyBuffer,
+            IIOManager localIoManager, boolean limited) throws IOException {
+        long written;
         outputFile.getFile().createNewFile();
         IFileHandle fHandle = localIoManager.open(outputFile, 
IIOManager.FileReadWriteMode.READ_WRITE,
                 IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
         WritableByteChannel outChannel = 
localIoManager.newWritableChannel(fHandle);
         try (OutputStream outputStream = Channels.newOutputStream(outChannel)) 
{
-            IOUtils.copyLarge(dataStream, outputStream, copyBuffer);
+            InputStream limitedStream = ByteStreams.limit(dataStream, 
maxFileSize);
+            written = ByteStreams.copy(limitedStream, outputStream);
+            //Check if after writing the limited stream, there's still data to 
be written from the entry
+            if (limited && dataStream.available() > 0) {
+                throw new IOException("Library contains file exceeding maximum 
configured allowable size");
+            }
             outputStream.flush();
             localIoManager.sync(fHandle, true);
+            return written;
         } finally {
             localIoManager.close(fHandle);
         }
@@ -750,9 +781,9 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
         }
         try {
             if (ncs.getConfiguration().isCloudDeployment()) {
-                writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) 
ioManager).getLocalIOManager());
+                writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager) 
ioManager).getLocalIOManager(), true);
             } else {
-                writeAndForce(outputFile, is, copyBuf, ioManager);
+                writeAndForce(outputFile, is, copyBuf, ioManager, true);
             }
         } finally {
             is.close();
@@ -768,7 +799,7 @@ public class ExternalLibraryManager implements 
ILibraryManager, ILifeCycleCompon
             boolean cloud, byte[] copyBuf) throws IOException {
         byte[] bytes = libraryManager.serializeLibraryDescriptor(desc);
         libraryManager.writeAndForce(descFile, new 
ByteArrayInputStream(bytes), copyBuf,
-                libraryManager.getCloudIOManager());
+                libraryManager.getCloudIOManager(), true);
     }
 
 }
diff --git 
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
 
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 44b2fdc39b..90bc499966 100644
--- 
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++ 
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -24,6 +24,7 @@ import static 
org.apache.hyracks.control.common.config.OptionTypes.INTEGER_BYTE_
 import static org.apache.hyracks.control.common.config.OptionTypes.LONG;
 import static 
org.apache.hyracks.control.common.config.OptionTypes.NONNEGATIVE_INTEGER;
 import static 
org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_INTEGER;
+import static 
org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_LONG_BYTE_UNIT;
 import static org.apache.hyracks.control.common.config.OptionTypes.STRING;
 import static 
org.apache.hyracks.control.common.config.OptionTypes.STRING_ARRAY;
 
@@ -102,6 +103,9 @@ public class NCConfig extends ControllerConfig {
         PYTHON_ARGS(STRING_ARRAY, (String[]) null),
         PYTHON_ENV(STRING_ARRAY, (String[]) null),
         PYTHON_DS_PATH(STRING, (String) null),
+        LIBRARY_MAX_FILE_SIZE(POSITIVE_LONG_BYTE_UNIT, 250L * 1024 * 1024), 
//250MB
+        LIBRARY_MAX_EXTRACTED_SIZE(POSITIVE_LONG_BYTE_UNIT, 1000L * 1024 * 
1024), //1GB
+        LIBRARY_MAX_ARCHIVE_ENTRIES(INTEGER, 4096),
         CREDENTIAL_FILE(
                 OptionTypes.STRING,
                 (Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -253,6 +257,12 @@ public class NCConfig extends ControllerConfig {
                     return "List of environment variables to set when invoking 
the Python interpreter for Python UDFs. E.g. FOO=1";
                 case PYTHON_DS_PATH:
                     return "Path to systemd socket for fenced Python UDFs. 
Requires JDK17+, *nix operating system, and ";
+                case LIBRARY_MAX_FILE_SIZE:
+                    return "Maximum file size for any one given file in a zip 
archive";
+                case LIBRARY_MAX_EXTRACTED_SIZE:
+                    return "Maximum overall extracted size for a library";
+                case LIBRARY_MAX_ARCHIVE_ENTRIES:
+                    return "Maximum number of files and directories allowed 
within a library";
                 case CREDENTIAL_FILE:
                     return "Path to HTTP basic credentials";
                 case ABORT_TASKS_TIMEOUT:
@@ -636,4 +646,16 @@ public class NCConfig extends ControllerConfig {
         return appConfig.getInt(Option.ABORT_TASKS_TIMEOUT);
     }
 
+    public long getLibraryMaxFileSize() {
+        return appConfig.getLong(Option.LIBRARY_MAX_FILE_SIZE);
+    }
+
+    public long getLibraryMaxExtractedSize() {
+        return appConfig.getLong(Option.LIBRARY_MAX_EXTRACTED_SIZE);
+    }
+
+    public int getLibraryMaxArchiveEntries() {
+        return appConfig.getInt(Option.LIBRARY_MAX_ARCHIVE_ENTRIES);
+    }
+
 }

Reply via email to