This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch opennlp-1.x
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/opennlp-1.x by this push:
     new dfd561a22 [1.x] OPENNLP-1821: Prevent OutOfMemory due to huge array 
allocation (#1079)
dfd561a22 is described below

commit dfd561a22ecb95cb43f452edd4ad478fa14c97bc
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri Jun 12 16:29:00 2026 +0200

    [1.x] OPENNLP-1821: Prevent OutOfMemory due to huge array allocation (#1079)
    
    Backport of #1022 to opennlp-1.x.
    
    AbstractModelReader read count fields (outcomes, outcome patterns,
    predicates) straight from the model stream and used them as array
    sizes. A crafted model could declare an oversized or negative count
    and trigger OOM / DoS before any data was read.
    
    Guard each count against a configurable upper bound (MAX_ENTRIES,
    default 10_000_000, overridable via the OPENNLP_MAX_ENTRIES system
    property) and reject negative values with IllegalArgumentException
    before allocating.
    
    Adapted for opennlp-1.x: JUnit 4 test; the 2.x ModelParameterChunker
    hunk is omitted (that class does not exist on 1.x and the change was a
    cosmetic blank line).
---
 .../tools/ml/model/AbstractModelReader.java        |  38 ++++++
 .../tools/ml/model/AbstractModelReaderOomTest.java | 133 +++++++++++++++++++++
 2 files changed, 171 insertions(+)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java 
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
index 085b85608..433e2c265 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/ml/model/AbstractModelReader.java
@@ -27,6 +27,32 @@ import java.util.zip.GZIPInputStream;
 
 public abstract class AbstractModelReader {
 
+  /**
+   * System property for overriding the maximum number of entries (outcomes, 
predicates,
+   * outcome patterns, chunk counts) that may be read from a model file or 
training data.
+   * Set at JVM startup, e.g. {@code -DOPENNLP_MAX_ENTRIES=5000000}.
+   * Falls back to {@code 10_000_000} if absent or invalid.
+   */
+  public static final String MAX_ENTRIES_PROPERTY = "OPENNLP_MAX_ENTRIES";
+
+  /**
+   * Upper bound on count fields read from a model file.
+   * Prevents OOM on crafted inputs with oversized array size declarations.
+   * Configurable via the {@link #MAX_ENTRIES_PROPERTY} system property.
+   */
+  static final int MAX_ENTRIES = initMaxEntries();
+
+  private static int initMaxEntries() {
+    String prop = System.getProperty(MAX_ENTRIES_PROPERTY, "").trim();
+    if (!prop.isEmpty()) {
+      try {
+        int val = Integer.parseInt(prop);
+        if (val > 0) return val;
+      } catch (NumberFormatException ignore) { }
+    }
+    return 10_000_000;
+  }
+
   /**
    * The number of predicates contained in the model.
    */
@@ -91,6 +117,10 @@ public abstract class AbstractModelReader {
 
   protected String[] getOutcomes() throws java.io.IOException {
     int numOutcomes = readInt();
+    if (numOutcomes < 0 || numOutcomes > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Outcome count " + numOutcomes + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     String[] outcomeLabels = new String[numOutcomes];
     for (int i = 0; i < numOutcomes; i++) outcomeLabels[i] = readUTF();
     return outcomeLabels;
@@ -98,6 +128,10 @@ public abstract class AbstractModelReader {
 
   protected int[][] getOutcomePatterns() throws java.io.IOException {
     int numOCTypes = readInt();
+    if (numOCTypes < 0 || numOCTypes > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Outcome pattern count " + numOCTypes + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     int[][] outcomePatterns = new int[numOCTypes][];
     for (int i = 0; i < numOCTypes; i++) {
       StringTokenizer tok = new StringTokenizer(readUTF(), " ");
@@ -112,6 +146,10 @@ public abstract class AbstractModelReader {
 
   protected String[] getPredicates() throws java.io.IOException {
     NUM_PREDS = readInt();
+    if (NUM_PREDS < 0 || NUM_PREDS > MAX_ENTRIES) {
+      throw new IllegalArgumentException(
+          "Predicate count " + NUM_PREDS + " exceeds safe limit of " + 
MAX_ENTRIES);
+    }
     String[] predLabels = new String[NUM_PREDS];
     for (int i = 0; i < NUM_PREDS; i++)
         predLabels[i] = readUTF();
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
new file mode 100644
index 000000000..96b09b43d
--- /dev/null
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/ml/model/AbstractModelReaderOomTest.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+/**
+ * Verifies that crafted model files with oversized count fields are rejected 
before array
+ * allocation occurs, preventing OOM DoS. See OPENNLP-1821.
+ */
+public class AbstractModelReaderOomTest {
+
+  /**
+   * Minimal concrete subclass that exposes the three protected methods under 
test.
+   */
+  static class TestableReader extends AbstractModelReader {
+    TestableReader(DataReader dr) {
+      super(dr);
+    }
+
+    @Override
+    public void checkModelType() {
+    }
+
+    @Override
+    public AbstractModel constructModel() {
+      return null;
+    }
+
+    String[] outcomes() throws IOException {
+      return getOutcomes();
+    }
+
+    int[][] outcomePatterns() throws IOException {
+      return getOutcomePatterns();
+    }
+
+    String[] predicates() throws IOException {
+      return getPredicates();
+    }
+  }
+
+  /** Reader whose stream starts with a single int (the count field). */
+  private static TestableReader readerFor(int countValue) throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(countValue);
+    dos.flush();
+    DataInputStream dis = new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()));
+    return new TestableReader(new BinaryFileDataReader(dis));
+  }
+
+  @Test
+  public void testGetOutcomes_RejectsMaxValue() throws IOException {
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::outcomes);
+  }
+
+  @Test
+  public void testGetOutcomePatterns_RejectsMaxValue() throws IOException {
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::outcomePatterns);
+  }
+
+  @Test
+  public void testGetPredicates_RejectsMaxValue() throws IOException {
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(Integer.MAX_VALUE)::predicates);
+  }
+
+  @Test
+  public void testGetOutcomes_RejectsNegativeCount() throws IOException {
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(-1)::outcomes);
+  }
+
+  @Test
+  public void testGetOutcomePatterns_RejectsNegativeCount() throws IOException 
{
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(-1)::outcomePatterns);
+  }
+
+  @Test
+  public void testGetPredicates_RejectsNegativeCount() throws IOException {
+    Assert.assertThrows(IllegalArgumentException.class, 
readerFor(-1)::predicates);
+  }
+
+  @Test
+  public void testGetOutcomes_ValidCountReturnsLabels() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(2);
+    dos.writeUTF("label-A");
+    dos.writeUTF("label-B");
+    dos.flush();
+
+    TestableReader reader = new TestableReader(
+        new BinaryFileDataReader(new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()))));
+    Assert.assertArrayEquals(new String[]{"label-A", "label-B"}, 
reader.outcomes());
+  }
+
+  @Test
+  public void testGetPredicates_ValidCountReturnsLabels() throws IOException {
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    DataOutputStream dos = new DataOutputStream(baos);
+    dos.writeInt(3);
+    dos.writeUTF("pred-X");
+    dos.writeUTF("pred-Y");
+    dos.writeUTF("pred-Z");
+    dos.flush();
+
+    TestableReader reader = new TestableReader(
+        new BinaryFileDataReader(new DataInputStream(new 
ByteArrayInputStream(baos.toByteArray()))));
+    Assert.assertArrayEquals(new String[]{"pred-X", "pred-Y", "pred-Z"}, 
reader.predicates());
+  }
+}

Reply via email to