This is an automated email from the ASF dual-hosted git repository.
psalagnac pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new f3fe28b872a SOLR-18157: Optimize buffer allocation in JavaBinCodec
(#4208)
f3fe28b872a is described below
commit f3fe28b872aae7a91d2d17d9a94c63e91f643a38
Author: Pierre Salagnac <[email protected]>
AuthorDate: Thu Mar 19 11:12:31 2026 +0100
SOLR-18157: Optimize buffer allocation in JavaBinCodec (#4208)
Implements a smarter allocation strategy using powers of 2 for the
internal buffer of JavaBinCodec.
---
changelog/unreleased/SOLR-18157-javabin-buffer.yml | 8 ++
.../apache/solr/bench/search/RequestWriters.java | 121 +++++++++++++++++++++
.../org/apache/solr/common/util/JavaBinCodec.java | 36 +++++-
.../apache/solr/common/util/TestJavaBinCodec.java | 10 ++
4 files changed, 172 insertions(+), 3 deletions(-)
diff --git a/changelog/unreleased/SOLR-18157-javabin-buffer.yml
b/changelog/unreleased/SOLR-18157-javabin-buffer.yml
new file mode 100644
index 00000000000..dc5999cc7f0
--- /dev/null
+++ b/changelog/unreleased/SOLR-18157-javabin-buffer.yml
@@ -0,0 +1,8 @@
+title: Optimize the size of the internal buffer of JavaBin codec to reduce the
number of allocations. This reduces GC pressure on SolrJ client under high
indexing load.
+type: changed
+authors:
+ - name: Pierre Salagnac
+links:
+ - name: SOLR-18157
+ url: https://issues.apache.org/jira/browse/SOLR-18157
+
diff --git
a/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java
b/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java
new file mode 100644
index 00000000000..d55800ad06e
--- /dev/null
+++ b/solr/benchmark/src/java/org/apache/solr/bench/search/RequestWriters.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.bench.search;
+
+import static org.apache.solr.bench.Docs.docs;
+import static org.apache.solr.bench.generators.SourceDSL.integers;
+import static org.apache.solr.bench.generators.SourceDSL.longs;
+import static org.apache.solr.bench.generators.SourceDSL.strings;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+import java.util.function.Supplier;
+import org.apache.commons.io.output.NullOutputStream;
+import org.apache.solr.bench.Docs;
+import org.apache.solr.client.solrj.request.JavaBinRequestWriter;
+import org.apache.solr.client.solrj.request.RequestWriter;
+import org.apache.solr.client.solrj.request.UpdateRequest;
+import org.apache.solr.client.solrj.request.XMLRequestWriter;
+import org.apache.solr.common.SolrInputDocument;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Threads;
+import org.openjdk.jmh.annotations.Warmup;
+
+/**
+ * Benchmark for serialization of requests by the client. This only focuses on
the serialization
+ * itself, ignoring sending the request over the network (and for sure
ignoring processing the
+ * request).
+ */
+@Fork(value = 1)
+@BenchmarkMode(Mode.Throughput)
+@Warmup(time = 2, iterations = 1)
+@Measurement(time = 5, iterations = 5)
+@Threads(value = 1)
+public class RequestWriters {
+
+ @State(Scope.Benchmark)
+ public static class BenchState {
+
+ @Param({"xml", "binary"})
+ String type;
+
+ @Param({"10", "100", "1000", "10000"})
+ int batchSize;
+
+ private final int docCount = 50000;
+
+ private Supplier<RequestWriter> writerSupplier;
+ private Iterator<SolrInputDocument> docIterator;
+
+ @Setup(Level.Trial)
+ public void setup() throws Exception {
+ preGenerateDocs();
+
+ switch (type) {
+ case "xml":
+ writerSupplier = XMLRequestWriter::new;
+ break;
+ case "javabin":
+ writerSupplier = JavaBinRequestWriter::new;
+ break;
+
+ default:
+ throw new Error("Unsupported type: " + type);
+ }
+ }
+
+ private void preGenerateDocs() throws Exception {
+ Docs docs =
+ docs()
+ .field("id", integers().incrementing())
+ .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
+ .field(strings().basicLatinAlphabet().ofLengthBetween(10, 64))
+
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
+
.field(strings().basicLatinAlphabet().multi(312).ofLengthBetween(10, 64))
+ .field(integers().all())
+ .field(integers().all())
+ .field(longs().all());
+
+ docs.preGenerate(docCount);
+ docIterator = docs.generatedDocsCircularIterator();
+ }
+ }
+
+ @Benchmark
+ public void writeUpdate(BenchState state) throws IOException {
+
+ OutputStream sink = NullOutputStream.INSTANCE;
+
+ UpdateRequest request = new UpdateRequest();
+ for (int i = 0; i < state.batchSize; i++) {
+ request.add(state.docIterator.next());
+ }
+
+ RequestWriter writer = state.writerSupplier.get();
+ writer.write(request, sink);
+ }
+}
diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
index a19896ff2b3..9cd5171f125 100644
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@@ -113,9 +113,10 @@ public class JavaBinCodec implements PushWriter {
NAMED_LST = (byte) (6 << 5), // NamedList
EXTERN_STRING = (byte) (7 << 5);
+ private static final int MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 512;
private static final int MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY = 65536;
- private static byte VERSION = 2;
+ private static final byte VERSION = 2;
private final ObjectResolver resolver;
protected FastOutputStream daos;
private StringCache stringCache;
@@ -1072,7 +1073,11 @@ public class JavaBinCodec implements PushWriter {
int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;
if (maxSize <= MAX_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
- if (bytes == null || bytes.length < maxSize) bytes = new byte[maxSize];
+ if (bytes == null || bytes.length < maxSize) {
+ int bufferSize = getBufferSize(maxSize);
+ bytes = new byte[bufferSize];
+ }
+
int sz = ByteUtils.UTF16toUTF8(s, 0, end, bytes, 0);
writeTag(STR, sz);
daos.write(bytes, 0, sz);
@@ -1105,7 +1110,10 @@ public class JavaBinCodec implements PushWriter {
private CharSequence _readStr(DataInputInputStream dis, StringCache
stringCache, int sz)
throws IOException {
- if (bytes == null || bytes.length < sz) bytes = new byte[sz];
+ if (bytes == null || bytes.length < sz) {
+ int bufferSize = getBufferSize(sz);
+ bytes = new byte[bufferSize];
+ }
dis.readFully(bytes, 0, sz);
if (stringCache != null) {
return stringCache.get(bytesRef.reset(bytes, 0, sz));
@@ -1116,6 +1124,28 @@ public class JavaBinCodec implements PushWriter {
}
}
+ /**
+ * Compute the buffer size for given required size. This returns the next
power of 2 that is
+ * greater than or equal to the given size.
+ *
+ * <p>This is a trade-off so we don't start with a useless too big buffer,
but we don't do too
+ * many allocations.
+ */
+ static int getBufferSize(int required) {
+
+ if (required < MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY) {
+ return MIN_UTF8_SIZE_FOR_ARRAY_GROW_STRATEGY;
+ }
+
+ int oneBit = Integer.highestOneBit(required);
+
+ if (oneBit == required) {
+ return oneBit;
+ } else {
+ return oneBit << 1;
+ }
+ }
+
/////////// code to optimize reading UTF8
static final int MAX_UTF8_SZ = 1024 * 64; // too big strings can cause too
much memory allocation
private Function<ByteArrayUtf8CharSequence, String> stringProvider;
diff --git
a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
index 52d5cf1e3c9..65a89bbc21b 100644
--- a/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/TestJavaBinCodec.java
@@ -496,6 +496,16 @@ public class TestJavaBinCodec extends SolrTestCaseJ4 {
assertSame(l1.get(1), l2.get(1));
}
+ @Test
+ public void testBufferSize() {
+ assertEquals(512, JavaBinCodec.getBufferSize(1));
+ assertEquals(512, JavaBinCodec.getBufferSize(200));
+ assertEquals(512, JavaBinCodec.getBufferSize(500));
+ assertEquals(512, JavaBinCodec.getBufferSize(512));
+ assertEquals(1024, JavaBinCodec.getBufferSize(513));
+ assertEquals(2048, JavaBinCodec.getBufferSize(1500));
+ }
+
public void genBinaryFiles() throws IOException {
Object data = generateAllDataTypes();