This is an automated email from the ASF dual-hosted git repository.
psalagnac pushed a commit to branch branch_10x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_10x by this push:
new c4b69c2d92b SOLR-18237: Do not copy data before noggit parsing (#4415)
c4b69c2d92b is described below
commit c4b69c2d92bdbbaffc9a34c8fad335a50179a1dd
Author: Pierre Salagnac <[email protected]>
AuthorDate: Wed Jun 17 14:06:15 2026 +0200
SOLR-18237: Do not copy data before noggit parsing (#4415)
This removes the char[] array buffer when parsing collection states and
other Json data, since allocation of this large buffer is a cause of OOM errors.
Instead, the UTF8 decoding is done on the fly using JVM tolling.
(cherry picked from commit 2c8cb3235e16f3b0c45df50c3ceb218d093095d4)
---
changelog/unreleased/SOLR-18237-no-buffer.yml | 7 +++++++
.../src/java/org/apache/solr/common/util/Utils.java | 17 ++++-------------
2 files changed, 11 insertions(+), 13 deletions(-)
diff --git a/changelog/unreleased/SOLR-18237-no-buffer.yml
b/changelog/unreleased/SOLR-18237-no-buffer.yml
new file mode 100644
index 00000000000..e3e9482ebdc
--- /dev/null
+++ b/changelog/unreleased/SOLR-18237-no-buffer.yml
@@ -0,0 +1,7 @@
+title: Avoid OOMs when deserializing collection states by not copying full
data for UTF8 to Java string conversion.
+type: fixed
+authors:
+ - name: Pierre Salagnac
+links:
+ - name: SOLR-18237
+ url: https://issues.apache.org/jira/browse/SOLR-18237
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
index ddae531d3f5..82c76d899a7 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Utils.java
@@ -83,12 +83,9 @@ import org.noggit.JSONParser;
import org.noggit.JSONWriter;
import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
public class Utils {
- private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
public static final Random RANDOM;
static {
@@ -297,16 +294,10 @@ public class Utils {
if (utf8 == null || utf8.length == 0 || length == 0) {
return Map.of();
}
- // convert directly from bytes to chars
- // and parse directly from that instead of going through
- // intermediate strings or readers
- CharArr chars = new CharArr();
- ByteUtils.UTF8toUTF16(utf8, offset, length, chars);
- JSONParser parser = new JSONParser(chars.getArray(), chars.getStart(),
chars.length());
- parser.setFlags(
- parser.getFlags()
- | JSONParser.ALLOW_MISSING_COLON_COMMA_BEFORE_OBJECT
- | JSONParser.OPTIONAL_OUTER_BRACES);
+ // convert from bytes to chars on-the-fly and parse directly
+ // from that instead of going through intermediate buffers
+ Reader reader = new InputStreamReader(new ByteArrayInputStream(utf8,
offset, length), UTF_8);
+ JSONParser parser = getJSONParser(reader);
try {
return fun.apply(parser).getValStrict();
} catch (IOException e) {