This is an automated email from the ASF dual-hosted git repository.
imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git
The following commit(s) were added to refs/heads/master by this push:
new d494c31110 [ASTERIXDB-3609][API][RT] Library archive limits
d494c31110 is described below
commit d494c311107da4d21a17dd9d2f6cde5f04b8d969
Author: Ian Maxon <[email protected]>
AuthorDate: Tue May 13 18:52:28 2025 -0700
[ASTERIXDB-3609][API][RT] Library archive limits
- user model changes: no
- storage format changes: no
- interface changes: no
Details:
Library archives might be highly compressed or have
a very high number of entries. We should check for
these conditions when extracting them and make sure
they conform to reasonable limits for the use cases
we expect.
Ext-ref: MB-66704
Change-Id: I524cbfb6685bd3c1e326ad2db48482e5aba1ce6d
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/19771
Integration-Tests: Jenkins <[email protected]>
Tested-by: Jenkins <[email protected]>
Reviewed-by: Ian Maxon <[email protected]>
Reviewed-by: Peeyush Gupta <[email protected]>
---
.../asterix/app/nc/task/RetrieveLibrariesTask.java | 4 +-
.../asterix/common/library/ILibraryManager.java | 6 ++-
.../external/library/ExternalLibraryManager.java | 45 ++++++++++++++++++----
.../control/common/controllers/NCConfig.java | 22 +++++++++++
4 files changed, 67 insertions(+), 10 deletions(-)
diff --git
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
index 715e62659b..38ad7ac300 100644
---
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
+++
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/nc/task/RetrieveLibrariesTask.java
@@ -91,7 +91,9 @@ public class RetrieveLibrariesTask implements
INCLifecycleTask {
libraryManager.download(targetFileRef, authToken, libraryURI);
Path outputDirPath =
libraryManager.getStorageDir().getFile().toPath().toAbsolutePath().normalize();
FileReference outPath =
appContext.getIoManager().resolveAbsolutePath(outputDirPath.toString());
- libraryManager.unzip(targetFileRef, outPath);
+ //we have to bypass limits here on the unzip, because this archive
is all libraries, so potentially limit*N
+ //and knowing N here would be difficult since metadata is not
available.
+ libraryManager.unzip(targetFileRef, outPath, false);
} catch (IOException e) {
LOGGER.error("Unable to retrieve UDFs from " +
libraryURI.toString() + " before timeout");
throw HyracksDataException.create(e);
diff --git
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
index db87c60daa..5c888111d3 100644
---
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
+++
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/library/ILibraryManager.java
@@ -73,8 +73,10 @@ public interface ILibraryManager {
void unzip(FileReference sourceFile, FileReference outputDir) throws
IOException;
- void writeAndForce(FileReference outputFile, InputStream dataStream,
byte[] copyBuffer, IIOManager localIoManager)
- throws IOException;
+ void unzip(FileReference sourceFile, FileReference outputDir, boolean
limited) throws IOException;
+
+ long writeAndForce(FileReference outputFile, InputStream dataStream,
byte[] copyBuffer, IIOManager localIoManager,
+ boolean limited) throws IOException;
void setUploadClient(Function<ILibraryManager, CloseableHttpClient> f);
diff --git
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
index f15e96fa81..706b1f9022 100755
---
a/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
+++
b/asterixdb/asterix-external-data/src/main/java/org/apache/asterix/external/library/ExternalLibraryManager.java
@@ -111,6 +111,7 @@ import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
+import com.google.common.io.ByteStreams;
public class ExternalLibraryManager implements ILibraryManager,
ILifeCycleComponent {
@@ -152,6 +153,9 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
private final INamespacePathResolver namespacePathResolver;
private final boolean sslEnabled;
private final boolean cloudMode;
+ private final long maxFileSize;
+ private final long maxTotalSize;
+ private final int maxEntries;
private Function<ILibraryManager, CloseableHttpClient> uploadClientSupp;
public ExternalLibraryManager(NodeControllerService ncs,
IPersistedResourceRegistry reg, FileReference appDir,
@@ -171,6 +175,9 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
this.ioManager = ioManager;
uploadClientSupp = ExternalLibraryManager::defaultHttpClient;
cloudMode = ncs.getConfiguration().isCloudDeployment();
+ maxFileSize = ncs.getConfiguration().getLibraryMaxFileSize();
+ maxTotalSize = ncs.getConfiguration().getLibraryMaxExtractedSize();
+ maxEntries = ncs.getConfiguration().getLibraryMaxArchiveEntries();
}
public void initialize(boolean resetStorageData) throws
HyracksDataException {
@@ -655,6 +662,11 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
@Override
public void unzip(FileReference sourceFile, FileReference outputDir)
throws IOException {
+ unzip(sourceFile, outputDir, true);
+ }
+
+ @Override
+ public void unzip(FileReference sourceFile, FileReference outputDir,
boolean limited) throws IOException {
boolean logTraceEnabled = LOGGER.isTraceEnabled();
IIOManager localIoManager = ioManager;
if (ncs.getConfiguration().isCloudDeployment()) {
@@ -665,7 +677,18 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
try (ZipFile zipFile = new ZipFile(sourceFile.getFile())) {
Enumeration<? extends ZipEntry> entries = zipFile.entries();
byte[] writeBuf = new byte[4096];
+ int numEntries = 0;
+ long totalSize = 0;
while (entries.hasMoreElements()) {
+ if (limited && numEntries >= maxEntries) {
+ throw new IOException(
+ "Library archive contains more files and
directories than configuration permits");
+ }
+ //may exceed the total allowable size by the maximum size of
one file, because we can't know how
+ //big the file is until we actually attempt to write it.
+ if (limited && totalSize > maxTotalSize) {
+ throw new IOException("Library archive extracted size
exceeds maximum configured allowable size");
+ }
ZipEntry entry = entries.nextElement();
if (entry.isDirectory()) {
continue;
@@ -685,8 +708,9 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
if (logTraceEnabled) {
LOGGER.trace("Extracting file {}", entryOutputFileRef);
}
- writeAndForce(entryOutputFileRef, in, writeBuf,
localIoManager);
+ totalSize += writeAndForce(entryOutputFileRef, in,
writeBuf, localIoManager, limited);
}
+ numEntries++;
}
}
for (Path newDir : newDirs) {
@@ -695,16 +719,23 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
}
@Override
- public void writeAndForce(FileReference outputFile, InputStream
dataStream, byte[] copyBuffer,
- IIOManager localIoManager) throws IOException {
+ public long writeAndForce(FileReference outputFile, InputStream
dataStream, byte[] copyBuffer,
+ IIOManager localIoManager, boolean limited) throws IOException {
+ long written;
outputFile.getFile().createNewFile();
IFileHandle fHandle = localIoManager.open(outputFile,
IIOManager.FileReadWriteMode.READ_WRITE,
IIOManager.FileSyncMode.METADATA_ASYNC_DATA_ASYNC);
WritableByteChannel outChannel =
localIoManager.newWritableChannel(fHandle);
try (OutputStream outputStream = Channels.newOutputStream(outChannel))
{
- IOUtils.copyLarge(dataStream, outputStream, copyBuffer);
+ InputStream limitedStream = ByteStreams.limit(dataStream,
maxFileSize);
+ written = ByteStreams.copy(limitedStream, outputStream);
+ //Check if after writing the limited stream, there's still data to
be written from the entry
+ if (limited && dataStream.available() > 0) {
+ throw new IOException("Library contains file exceeding maximum
configured allowable size");
+ }
outputStream.flush();
localIoManager.sync(fHandle, true);
+ return written;
} finally {
localIoManager.close(fHandle);
}
@@ -750,9 +781,9 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
}
try {
if (ncs.getConfiguration().isCloudDeployment()) {
- writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager)
ioManager).getLocalIOManager());
+ writeAndForce(outputFile, is, copyBuf, ((ICloudIOManager)
ioManager).getLocalIOManager(), true);
} else {
- writeAndForce(outputFile, is, copyBuf, ioManager);
+ writeAndForce(outputFile, is, copyBuf, ioManager, true);
}
} finally {
is.close();
@@ -768,7 +799,7 @@ public class ExternalLibraryManager implements
ILibraryManager, ILifeCycleCompon
boolean cloud, byte[] copyBuf) throws IOException {
byte[] bytes = libraryManager.serializeLibraryDescriptor(desc);
libraryManager.writeAndForce(descFile, new
ByteArrayInputStream(bytes), copyBuf,
- libraryManager.getCloudIOManager());
+ libraryManager.getCloudIOManager(), true);
}
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 44b2fdc39b..90bc499966 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -24,6 +24,7 @@ import static
org.apache.hyracks.control.common.config.OptionTypes.INTEGER_BYTE_
import static org.apache.hyracks.control.common.config.OptionTypes.LONG;
import static
org.apache.hyracks.control.common.config.OptionTypes.NONNEGATIVE_INTEGER;
import static
org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_INTEGER;
+import static
org.apache.hyracks.control.common.config.OptionTypes.POSITIVE_LONG_BYTE_UNIT;
import static org.apache.hyracks.control.common.config.OptionTypes.STRING;
import static
org.apache.hyracks.control.common.config.OptionTypes.STRING_ARRAY;
@@ -102,6 +103,9 @@ public class NCConfig extends ControllerConfig {
PYTHON_ARGS(STRING_ARRAY, (String[]) null),
PYTHON_ENV(STRING_ARRAY, (String[]) null),
PYTHON_DS_PATH(STRING, (String) null),
+ LIBRARY_MAX_FILE_SIZE(POSITIVE_LONG_BYTE_UNIT, 250L * 1024 * 1024),
//250MB
+ LIBRARY_MAX_EXTRACTED_SIZE(POSITIVE_LONG_BYTE_UNIT, 1000L * 1024 *
1024), //1GB
+ LIBRARY_MAX_ARCHIVE_ENTRIES(INTEGER, 4096),
CREDENTIAL_FILE(
OptionTypes.STRING,
(Function<IApplicationConfig, String>) appConfig -> FileUtil
@@ -253,6 +257,12 @@ public class NCConfig extends ControllerConfig {
return "List of environment variables to set when invoking
the Python interpreter for Python UDFs. E.g. FOO=1";
case PYTHON_DS_PATH:
return "Path to systemd socket for fenced Python UDFs.
Requires JDK17+, *nix operating system, and ";
+ case LIBRARY_MAX_FILE_SIZE:
+ return "Maximum file size for any one given file in a zip
archive";
+ case LIBRARY_MAX_EXTRACTED_SIZE:
+ return "Maximum overall extracted size for a library";
+ case LIBRARY_MAX_ARCHIVE_ENTRIES:
+ return "Maximum number of files and directories allowed
within a library";
case CREDENTIAL_FILE:
return "Path to HTTP basic credentials";
case ABORT_TASKS_TIMEOUT:
@@ -636,4 +646,16 @@ public class NCConfig extends ControllerConfig {
return appConfig.getInt(Option.ABORT_TASKS_TIMEOUT);
}
+ public long getLibraryMaxFileSize() {
+ return appConfig.getLong(Option.LIBRARY_MAX_FILE_SIZE);
+ }
+
+ public long getLibraryMaxExtractedSize() {
+ return appConfig.getLong(Option.LIBRARY_MAX_EXTRACTED_SIZE);
+ }
+
+ public int getLibraryMaxArchiveEntries() {
+ return appConfig.getInt(Option.LIBRARY_MAX_ARCHIVE_ENTRIES);
+ }
+
}