This is an automated email from the ASF dual-hosted git repository.

psalagnac pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git


The following commit(s) were added to refs/heads/branch_9x by this push:
     new 89b5e93244d SOLR-18237: Do not copy data before noggit parsing (#4415)
89b5e93244d is described below

commit 89b5e93244dbff36069e414284e4f85dc0ab2a75
Author: Pierre Salagnac <[email protected]>
AuthorDate: Wed Jun 17 14:06:15 2026 +0200

    SOLR-18237: Do not copy data before noggit parsing (#4415)
    
    This removes the char[] array buffer when parsing collection states and 
other Json data, since allocation of this large buffer is a cause of OOM errors.
    Instead, the UTF8 decoding is done on the fly using JVM tolling.
    
    (cherry picked from commit 2c8cb3235e16f3b0c45df50c3ceb218d093095d4)
---
 changelog/unreleased/SOLR-18237-no-buffer.yml           |  7 +++++++
 .../src/java/org/apache/solr/common/util/Utils.java     | 17 ++++-------------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/changelog/unreleased/SOLR-18237-no-buffer.yml 
b/changelog/unreleased/SOLR-18237-no-buffer.yml
new file mode 100644
index 00000000000..e3e9482ebdc
--- /dev/null
+++ b/changelog/unreleased/SOLR-18237-no-buffer.yml
@@ -0,0 +1,7 @@
+title: Avoid OOMs when deserializing collection states by not copying full 
data for UTF8 to Java string conversion.
+type: fixed
+authors:
+  - name: Pierre Salagnac
+links:
+  - name: SOLR-18237
+    url: https://issues.apache.org/jira/browse/SOLR-18237
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java 
b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
index afcb82f392c..903220c74a5 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
@@ -78,12 +78,9 @@ import org.noggit.JSONParser;
 import org.noggit.JSONWriter;
 import org.noggit.ObjectBuilder;
 import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
 
 public class Utils {
 
-  private static final Logger log = 
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
   public static final Random RANDOM;
 
   static {
@@ -275,16 +272,10 @@ public class Utils {
     if (utf8 == null || utf8.length == 0 || length == 0) {
       return Collections.emptyMap();
     }
-    // convert directly from bytes to chars
-    // and parse directly from that instead of going through
-    // intermediate strings or readers
-    CharArr chars = new CharArr();
-    ByteUtils.UTF8toUTF16(utf8, offset, length, chars);
-    JSONParser parser = new JSONParser(chars.getArray(), chars.getStart(), 
chars.length());
-    parser.setFlags(
-        parser.getFlags()
-            | JSONParser.ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT
-            | JSONParser.OPTIONAL_OUTER_BRACES);
+    // convert from bytes to chars on-the-fly and parse directly
+    // from that instead of going through intermediate buffers
+    Reader reader = new InputStreamReader(new ByteArrayInputStream(utf8, 
offset, length), UTF_8);
+    JSONParser parser = getJSONParser(reader);
     try {
       return fun.apply(parser).getValStrict();
     } catch (IOException e) {

Reply via email to