This is an automated email from the ASF dual-hosted git repository.
houston pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/main by this push:
new 0c4f7e18864 SOLR-17750: Let S3 File downloads gracefully handle
connection issues (#3341)
0c4f7e18864 is described below
commit 0c4f7e18864d502538f741cf1e6fdbb1e1bf1ae3
Author: Houston Putman <[email protected]>
AuthorDate: Tue Jun 10 11:34:28 2025 -0500
SOLR-17750: Let S3 File downloads gracefully handle connection issues
(#3341)
---
solr/CHANGES.txt | 2 +
.../java/org/apache/solr/s3/S3StorageClient.java | 23 ++-
.../org/apache/solr/s3/AbstractS3ClientTest.java | 27 ++-
.../test/org/apache/solr/s3/S3ReadWriteTest.java | 82 +++++++++
.../solr/common/util/ResumableInputStream.java | 190 +++++++++++++++++++++
.../src/java/org/apache/solr/util/LogListener.java | 5 +
6 files changed, 324 insertions(+), 5 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3632528713d..5e82c2b527e 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -243,6 +243,8 @@ Improvements
* SOLR-17187: The polling interval for PULL and TLOG replicas can now be
overridden using the `commitPollInterval` setting, which takes a String
formatted as "HH:mm:ss" (Torsten Koster via Christine Poerschke, Jason
Gerlowski)
+* SOLR-17750: S3 File downloads now handle connection issues more gracefully
(Houston Putman, Mark Miller)
+
Optimizations
---------------------
* SOLR-17578: Remove ZkController internal core supplier, for slightly faster
reconnection after Zookeeper session loss. (Pierre Salagnac)
diff --git
a/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
b/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
index 527a951e71e..d6b52acc4e5 100644
---
a/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
+++
b/solr/modules/s3-repository/src/java/org/apache/solr/s3/S3StorageClient.java
@@ -31,6 +31,7 @@ import java.util.Locale;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
+import org.apache.solr.common.util.ResumableInputStream;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.SuppressForbidden;
import org.slf4j.Logger;
@@ -42,6 +43,7 @@ import software.amazon.awssdk.core.exception.SdkException;
import software.amazon.awssdk.core.retry.RetryMode;
import software.amazon.awssdk.core.retry.RetryPolicy;
import software.amazon.awssdk.core.sync.RequestBody;
+import software.amazon.awssdk.core.sync.ResponseTransformer;
import software.amazon.awssdk.http.apache.ApacheHttpClient;
import software.amazon.awssdk.http.apache.ProxyConfiguration;
import software.amazon.awssdk.regions.Region;
@@ -53,6 +55,7 @@ import software.amazon.awssdk.services.s3.model.Delete;
import software.amazon.awssdk.services.s3.model.DeleteObjectsRequest;
import software.amazon.awssdk.services.s3.model.DeleteObjectsResponse;
import software.amazon.awssdk.services.s3.model.DeletedObject;
+import software.amazon.awssdk.services.s3.model.GetObjectRequest;
import software.amazon.awssdk.services.s3.model.HeadObjectResponse;
import software.amazon.awssdk.services.s3.model.NoSuchBucketException;
import software.amazon.awssdk.services.s3.model.NoSuchKeyException;
@@ -374,8 +377,26 @@ public class S3StorageClient {
final String s3Path = sanitizedFilePath(path);
try {
+ GetObjectRequest.Builder getBuilder =
+ GetObjectRequest.builder().bucket(bucketName).key(s3Path);
// This InputStream instance needs to be closed by the caller
- return s3Client.getObject(b -> b.bucket(bucketName).key(s3Path));
+ return s3Client.getObject(
+ getBuilder.build(),
+ ResponseTransformer.unmanaged(
+ (response, inputStream) -> {
+ final long contentLength = response.contentLength();
+ return new ResumableInputStream(
+ inputStream,
+ bytesRead -> {
+ if (contentLength > 0 && bytesRead >= contentLength) {
+ // No more bytes to read
+ return null;
+ } else if (bytesRead > 0) {
+ getBuilder.range(String.format(Locale.ROOT,
"bytes=%d-", bytesRead));
+ }
+ return s3Client.getObject(getBuilder.build());
+ });
+ }));
} catch (SdkException sdke) {
throw handleAmazonException(sdke);
}
diff --git
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/AbstractS3ClientTest.java
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/AbstractS3ClientTest.java
index 4e97358756b..60ccd0fcace 100644
---
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/AbstractS3ClientTest.java
+++
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/AbstractS3ClientTest.java
@@ -24,6 +24,7 @@ import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.client.solrj.cloud.SocketProxy;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
@@ -32,7 +33,7 @@ import
software.amazon.awssdk.profiles.ProfileFileSystemSetting;
/** Abstract class for test with S3Mock. */
public class AbstractS3ClientTest extends SolrTestCaseJ4 {
- private static final String BUCKET_NAME = "test-bucket";
+ protected static final String BUCKET_NAME = "test-bucket";
@ClassRule
@SuppressWarnings("removal")
@@ -40,14 +41,18 @@ public class AbstractS3ClientTest extends SolrTestCaseJ4 {
S3MockRule.builder().withInitialBuckets(BUCKET_NAME).build();
S3StorageClient client;
+ private SocketProxy proxy;
@Before
- public void setUpClient() throws URISyntaxException {
+ public void setUpClient() throws Exception {
System.setProperty("aws.accessKeyId", "foo");
System.setProperty("aws.secretAccessKey", "bar");
setS3ConfFile();
+ // We are using a proxy in front of S3Mock to be able to test connection
loss
+ proxy = new SocketProxy();
+ proxy.open(URI.create("http://localhost:" + S3_MOCK_RULE.getHttpPort()));
client =
new S3StorageClient(
BUCKET_NAME,
@@ -55,7 +60,7 @@ public class AbstractS3ClientTest extends SolrTestCaseJ4 {
"us-east-1",
"",
false,
- "http://localhost:" + S3_MOCK_RULE.getHttpPort(),
+ "http://localhost:" + proxy.getListenPort(),
false);
}
@@ -73,6 +78,7 @@ public class AbstractS3ClientTest extends SolrTestCaseJ4 {
@After
public void tearDownClient() {
client.close();
+ proxy.close();
}
/**
@@ -82,10 +88,23 @@ public class AbstractS3ClientTest extends SolrTestCaseJ4 {
* @param content Arbitrary content for the test.
*/
void pushContent(String path, String content) throws S3Exception {
+ pushContent(path, content.getBytes(StandardCharsets.UTF_8));
+ }
+
+ void pushContent(String path, byte[] content) throws S3Exception {
try (OutputStream output = client.pushStream(path)) {
- output.write(content.getBytes(StandardCharsets.UTF_8));
+ output.write(content);
} catch (IOException e) {
throw new S3Exception(e);
}
}
+
+ /**
+ * Test a connection loss in S3. This will close the existing connections
receiving socket, while
+ * keeping S3 open to new connections. This affects all connections open to
S3 at the time of
+ * calling.
+ */
+ void initiateS3ConnectionLoss() {
+ proxy.halfClose();
+ }
}
diff --git
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3ReadWriteTest.java
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3ReadWriteTest.java
index ccfad11a7f2..d4d13c71758 100644
---
a/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3ReadWriteTest.java
+++
b/solr/modules/s3-repository/src/test/org/apache/solr/s3/S3ReadWriteTest.java
@@ -18,8 +18,12 @@ package org.apache.solr.s3;
import static org.hamcrest.Matchers.containsString;
+import com.carrotsearch.randomizedtesting.generators.RandomBytes;
+import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
+import org.apache.solr.common.util.ResumableInputStream;
+import org.apache.solr.util.LogListener;
import org.junit.Test;
/** Basic test that write data and read them through the S3 client. */
@@ -101,4 +105,82 @@ public class S3ReadWriteTest extends AbstractS3ClientTest {
assertThrows(S3NotFoundException.class, () ->
client.pullStream("/not-found"));
assertThrows(S3NotFoundException.class, () -> client.length("/not-found"));
}
+
+ /** Check that a read can succeed even with Connection Loss. */
+ @Test
+ public void testReadWithConnectionLoss() throws IOException {
+ String key = "flush-very-large";
+
+ int numBytes = 20_000_000;
+ pushContent(key, RandomBytes.randomBytesOfLength(random(), numBytes));
+
+ int numExceptions = 20;
+ int bytesPerException = numBytes / numExceptions;
+ // Check we can re-read same content
+
+ int maxBuffer = 100;
+ byte[] buffer = new byte[maxBuffer];
+ boolean done = false;
+ try (LogListener logListener =
LogListener.warn(ResumableInputStream.class)) {
+ try (InputStream input = client.pullStream(key)) {
+ long byteCount = 0;
+ while (!done) {
+ // Use the same number of bytes no matter which method we are testing
+ int numBytesToRead = random().nextInt(maxBuffer) + 1;
+ // test both read() and read(buffer, off, len)
+ switch (random().nextInt(3)) {
+ // read()
+ case 0:
+ {
+ for (int i = 0; i < numBytesToRead && !done; i++) {
+ done = input.read() == -1;
+ if (!done) {
+ byteCount++;
+ }
+ }
+ }
+ break;
+ // read(byte, off, len)
+ case 1:
+ {
+ int readLen = input.read(buffer, 0, numBytesToRead);
+ if (readLen > 0) {
+ byteCount += readLen;
+ } else {
+ // We are done when readLen = -1
+ done = true;
+ }
+ }
+ break;
+ // skip(len)
+ case 2:
+ {
+ // We only want to skip 1 because
+ long bytesSkipped = input.skip(numBytesToRead);
+ byteCount += bytesSkipped;
+ if (bytesSkipped < numBytesToRead) {
+ // We are done when no bytes are skipped
+ done = true;
+ }
+ }
+ break;
+ }
+ // Initiate a connection loss at the beginning of every
"bytesPerException" cycle.
+ // The input stream will not immediately see an error, it will have
pre-loaded some data.
+ if ((byteCount % bytesPerException <= maxBuffer)) {
+ initiateS3ConnectionLoss();
+ }
+ }
+ assertEquals("Wrong amount of data found from InputStream", numBytes,
byteCount);
+ }
+ // We just need to ensure we saw at least one IOException
+ assertNotEquals(
+ "There was no logging of an IOException that caused the InputStream
to be resumed",
+ 0,
+ logListener.getCount());
+ // LogListener will fail because we haven't polled for each warning.
+ // Just clear the queue instead, we only care that the queue is not
empty.
+ logListener.clearQueue();
+ }
+ }
}
diff --git
a/solr/solrj/src/java/org/apache/solr/common/util/ResumableInputStream.java
b/solr/solrj/src/java/org/apache/solr/common/util/ResumableInputStream.java
new file mode 100644
index 00000000000..4be449194c8
--- /dev/null
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ResumableInputStream.java
@@ -0,0 +1,190 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.common.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.lang.invoke.MethodHandles;
+import java.util.function.Function;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * An {@link InputStream} that can be resumed when the connection that is
driving the input is
+ * interrupted.
+ */
+public class ResumableInputStream extends InputStream {
+
+ private static final Logger log =
LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ private long bytesRead;
+ private long markedBytesRead;
+ private final Function<Long, InputStream> nextInputStreamSupplier;
+ private InputStream delegate;
+
+ /**
+ * Create a new ResumableInputStream
+ *
+ * @param delegate The original {@link InputStream} that will be used as a
delegate
+ * @param nextInputStreamSupplier A function to create the next InputStream
given the number of
+ * bytes already read. These inputs can, for example, be used to
populate the <a
+ * href="https://www.rfc-editor.org/rfc/rfc9110.html#name-range">HTTP
Range header</a>. If an
+ * unsupported input is provided (more bytes than exist), then a
<code>null</code> {@link
+ * InputStream} should be returned.
+ */
+ public ResumableInputStream(
+ InputStream delegate, Function<Long, InputStream>
nextInputStreamSupplier) {
+ this.delegate = delegate;
+ this.nextInputStreamSupplier = nextInputStreamSupplier;
+ bytesRead = 0;
+ markedBytesRead = 0;
+ }
+
+ /**
+ * If an IOException is thrown by the delegate while reading, the delegate
will be recreated and
+ * the read will be retried once during this read call.
+ */
+ @Override
+ public int read() throws IOException {
+ return read(false);
+ }
+
+ public int read(boolean isRetry) throws IOException {
+ checkAndRefreshDelegate();
+ int val;
+ try {
+ val = delegate.read();
+ if (val >= 0) {
+ bytesRead += 1;
+ }
+ } catch (IOException e) {
+ // Only retry once on a single read
+ if (isRetry) {
+ throw e;
+ }
+ log.warn(
+ "Exception thrown while consuming InputStream, retrying from byte:
{}", bytesRead, e);
+ closeDelegate();
+ val = read(true);
+ }
+ return val;
+ }
+
+ /**
+ * If an IOException is thrown by the delegate while reading, the delegate
will be recreated and
+ * the read will be retried once during this read call.
+ */
+ @Override
+ public int read(byte[] b, int off, int len) throws IOException {
+ return read(b, off, len, false);
+ }
+
+ public int read(byte[] b, int off, int len, boolean isRetry) throws
IOException {
+ checkAndRefreshDelegate();
+ int readLen;
+ try {
+ readLen = delegate.read(b, off, len);
+ if (readLen >= 0) {
+ bytesRead += readLen;
+ }
+ } catch (IOException e) {
+ // Only retry once on a single read
+ if (isRetry) {
+ throw e;
+ }
+ log.warn(
+ "Exception thrown while consuming InputStream, retrying from byte:
{}", bytesRead, e);
+ closeDelegate();
+ readLen = read(b, off, len, true);
+ }
+ return readLen;
+ }
+
+ /**
+ * If an IOException is thrown by the delegate while skipping, the delegate
will be recreated from
+ * the position being skipped to and the return value will be
<code>n</code>. This may be longer
+ * than the remaining number of bytes, and if so the delegate will be set to
a NullInputStream.
+ */
+ @Override
+ public long skip(final long n) throws IOException {
+ checkAndRefreshDelegate();
+ long skippedBytes;
+ try {
+ skippedBytes = delegate.skip(n);
+ bytesRead += skippedBytes;
+ } catch (IOException e) {
+ closeDelegate();
+
+ // Go ahead and skip the bytes before refreshing the delegate. Tell the
caller we skipped n
+ // bytes, even though we don't know if that many bytes actually exist.
+ // This might be more than exist, and if so, the delegate will be set to
a NullInputStream
+ bytesRead += n;
+ log.warn(
+ "Exception thrown while skipping {} bytes in InputStream, resuming
at byte: {}",
+ n,
+ bytesRead,
+ e);
+ checkAndRefreshDelegate();
+ skippedBytes = n;
+ }
+ return skippedBytes;
+ }
+
+ @Override
+ public boolean markSupported() {
+ return true;
+ }
+
+ @Override
+ public void mark(int readlimit) {
+ markedBytesRead = bytesRead;
+ }
+
+ @Override
+ public int available() throws IOException {
+ checkAndRefreshDelegate();
+ return delegate.available();
+ }
+
+ @Override
+ public void reset() {
+ bytesRead = markedBytesRead;
+ closeDelegate();
+ }
+
+ @Override
+ public void close() throws IOException {
+ if (delegate != null) {
+ delegate.close();
+ }
+ }
+
+ private void closeDelegate() {
+ IOUtils.closeQuietly(delegate);
+ delegate = null;
+ }
+
+ private void checkAndRefreshDelegate() {
+ if (delegate == null) {
+ delegate = nextInputStreamSupplier.apply(bytesRead);
+ // The supplier returning null tells us there is nothing else to read
+ if (delegate == null) {
+ delegate = InputStream.nullInputStream();
+ }
+ }
+ }
+}
diff --git a/solr/test-framework/src/java/org/apache/solr/util/LogListener.java
b/solr/test-framework/src/java/org/apache/solr/util/LogListener.java
index a5f46739e72..9bbd08e6e09 100644
--- a/solr/test-framework/src/java/org/apache/solr/util/LogListener.java
+++ b/solr/test-framework/src/java/org/apache/solr/util/LogListener.java
@@ -264,6 +264,11 @@ public final class LogListener implements Closeable,
AutoCloseable {
return this;
}
+ /** Clear the queue of any recorded events */
+ public void clearQueue() {
+ loggerAppender.getQueue().clear();
+ }
+
/**
* Modifies this listener to filter the log events that are recorded to
events that match the
* specified substring.