This is an automated email from the ASF dual-hosted git repository.
broustant pushed a commit to branch branch_9x
in repository https://gitbox.apache.org/repos/asf/solr.git
The following commit(s) were added to refs/heads/branch_9x by this push:
new 0410368dd13 SOLR-17805: Exception in TransactionLog constructor
deletes the file and does not block subsequent updates. (#3412)
0410368dd13 is described below
commit 0410368dd13ff32b28cd74231596a83733f23da2
Author: Bruno Roustant <[email protected]>
AuthorDate: Wed Jul 2 10:23:04 2025 +0200
SOLR-17805: Exception in TransactionLog constructor deletes the file and
does not block subsequent updates. (#3412)
---
solr/CHANGES.txt | 2 +
.../java/org/apache/solr/core/CoreContainer.java | 15 +-------
.../apache/solr/handler/RequestHandlerBase.java | 33 +++++++++++++---
.../solr/jersey/CatchAllExceptionMapper.java | 2 +-
.../apache/solr/update/DirectUpdateHandler2.java | 11 +++++-
.../org/apache/solr/update/TransactionLog.java | 4 ++
.../src/java/org/apache/solr/update/UpdateLog.java | 44 ++++++++++++++++++++--
.../solr/handler/RequestHandlerBaseTest.java | 6 +--
.../test/org/apache/solr/update/UpdateLogTest.java | 39 +++++++++++++++++++
9 files changed, 129 insertions(+), 27 deletions(-)
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 50b1f67bbbd..4a11d877a6a 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -140,6 +140,8 @@ Bug Fixes
* SOLR-17800: Security Manager should handle symlink on /tmp (Kevin Risden)
+* SOLR-17805: Exception in TransactionLog constructor deletes the file and
does not block subsequent updates. (Bruno Roustant)
+
Dependency Upgrades
---------------------
* SOLR-17471: Upgrade Lucene to 9.12.1. (Pierre Salagnac, Christine Poerschke)
diff --git a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
index 6f62eb26151..d5106edb252 100644
--- a/solr/core/src/java/org/apache/solr/core/CoreContainer.java
+++ b/solr/core/src/java/org/apache/solr/core/CoreContainer.java
@@ -2541,6 +2541,8 @@ public class CoreContainer {
}
/**
+ * Checks whether a tragic exception was thrown during update (including
update log).
+ *
* @param solrCore the core against which we check if there has been a
tragic exception
* @return whether this Solr core has tragic exception
* @see org.apache.lucene.index.IndexWriter#getTragicException()
@@ -2553,19 +2555,6 @@ public class CoreContainer {
// failed to open an indexWriter
tragicException = e;
}
-
- if (tragicException != null && isZooKeeperAware()) {
- getZkController().giveupLeadership(solrCore.getCoreDescriptor());
-
- try {
- // If the error was something like a full file system disconnect, this
probably won't help
- // But if it is a transient disk failure then it's worth a try
- solrCore.getSolrCoreState().newIndexWriter(solrCore, false); // should
we rollback?
- } catch (IOException e) {
- log.warn("Could not roll index writer after tragedy");
- }
- }
-
return tragicException != null;
}
diff --git a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
index 1d7ba554bc7..d584706a972 100644
--- a/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
+++ b/solr/core/src/java/org/apache/solr/handler/RequestHandlerBase.java
@@ -22,6 +22,7 @@ import static
org.apache.solr.response.SolrQueryResponse.haveCompleteResults;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Meter;
import com.codahale.metrics.Timer;
+import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.Collection;
import java.util.List;
@@ -33,9 +34,11 @@ import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SuppressForbidden;
+import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.MetricsConfig;
import org.apache.solr.core.PluginBag;
import org.apache.solr.core.PluginInfo;
+import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoBean;
import org.apache.solr.metrics.SolrDelegateRegistryMetricsContext;
import org.apache.solr.metrics.SolrMetricManager;
@@ -235,7 +238,7 @@ public abstract class RequestHandlerBase
} catch (QueryLimitsExceededException e) {
rsp.setPartialResults(req);
} catch (Exception e) {
- e = normalizeReceivedException(req, e);
+ e = processReceivedException(req, e);
processErrorMetricsOnException(e, metrics);
rsp.setException(e);
} finally {
@@ -265,10 +268,30 @@ public abstract class RequestHandlerBase
}
}
- public static Exception normalizeReceivedException(SolrQueryRequest req,
Exception e) {
- if (req.getCore() != null) {
- assert req.getCoreContainer() != null;
- if (req.getCoreContainer().checkTragicException(req.getCore())) {
+ /**
+ * Processes and normalizes any exceptions that are received from the
request handler. This method
+ * is called before any error metrics are recorded.
+ *
+ * <p>If a tragic exception occurred in the index writer, this method also
gives up leadership of
+ * the shard, and replaces the index writer with a new one to attempt to get
out of a transient
+ * failure (e.g. disk failure).
+ */
+ public static Exception processReceivedException(SolrQueryRequest req,
Exception e) {
+ SolrCore core = req.getCore();
+ if (core != null) {
+ CoreContainer coreContainer = req.getCoreContainer();
+ assert coreContainer != null;
+ if (coreContainer.checkTragicException(core)) {
+ if (coreContainer.isZooKeeperAware()) {
+
coreContainer.getZkController().giveupLeadership(core.getCoreDescriptor());
+ try {
+ // If the error was something like a full file system disconnect,
this probably won't
+ // help, but if it is a transient disk failure then it's worth a
try.
+ core.getSolrCoreState().newIndexWriter(core, false); // should we
rollback?
+ } catch (IOException ioe) {
+ log.warn("Could not roll index writer after tragedy");
+ }
+ }
return SolrException.wrapLuceneTragicExceptionIfNecessary(e);
}
}
diff --git
a/solr/core/src/java/org/apache/solr/jersey/CatchAllExceptionMapper.java
b/solr/core/src/java/org/apache/solr/jersey/CatchAllExceptionMapper.java
index 9f5b106184e..889b8ceb657 100644
--- a/solr/core/src/java/org/apache/solr/jersey/CatchAllExceptionMapper.java
+++ b/solr/core/src/java/org/apache/solr/jersey/CatchAllExceptionMapper.java
@@ -89,7 +89,7 @@ public class CatchAllExceptionMapper implements
ExceptionMapper<Exception> {
ContainerRequestContext containerRequestContext) {
// First, handle any exception-related metrics
final Exception normalizedException =
- RequestHandlerBase.normalizeReceivedException(solrQueryRequest,
exception);
+ RequestHandlerBase.processReceivedException(solrQueryRequest,
exception);
final RequestHandlerBase.HandlerMetrics metrics =
(RequestHandlerBase.HandlerMetrics)
containerRequestContext.getProperty(HANDLER_METRICS);
if (metrics != null) {
diff --git
a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
index 4c55eaab9b5..60dbb9c4f28 100644
--- a/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
+++ b/solr/core/src/java/org/apache/solr/update/DirectUpdateHandler2.java
@@ -336,11 +336,18 @@ public class DirectUpdateHandler2 extends UpdateHandler
+ errorDetails;
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, errorMsg,
iae);
} catch (RuntimeException t) {
+ SolrException.ErrorCode errorCode =
+ core.getCoreContainer().checkTragicException(core)
+ ? SolrException.ErrorCode.SERVER_ERROR
+ : SolrException.ErrorCode.BAD_REQUEST;
String errorMsg =
"Exception writing document id "
+ cmd.getPrintableId()
- + " to the index; possible analysis error.";
- throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, errorMsg,
t);
+ + " to the index"
+ + (errorCode == SolrException.ErrorCode.SERVER_ERROR
+ ? "."
+ : "; possible analysis error.");
+ throw new SolrException(errorCode, errorMsg, t);
}
}
diff --git a/solr/core/src/java/org/apache/solr/update/TransactionLog.java
b/solr/core/src/java/org/apache/solr/update/TransactionLog.java
index f8c0d48731d..77044c5b2fb 100644
--- a/solr/core/src/java/org/apache/solr/update/TransactionLog.java
+++ b/solr/core/src/java/org/apache/solr/update/TransactionLog.java
@@ -37,6 +37,7 @@ import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.IOUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.SolrInputDocument;
@@ -249,6 +250,9 @@ public class TransactionLog implements Closeable {
} catch (Exception e) {
log.error("Error closing tlog file (after error opening)", e);
}
+ if (!openExisting) {
+ IOUtils.deleteFilesIgnoringExceptions(tlog);
+ }
}
}
assert ObjectReleaseTracker.track(this);
diff --git a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
index 4d148ed0af4..b9862d7f008 100644
--- a/solr/core/src/java/org/apache/solr/update/UpdateLog.java
+++ b/solr/core/src/java/org/apache/solr/update/UpdateLog.java
@@ -743,7 +743,10 @@ public class UpdateLog implements PluginInfoInitialized,
SolrMetricProducer {
}
public long getLastLogId() {
- if (id != -1) return id;
+ return id == -1 ? scanLastLogId(tlogFiles) : id;
+ }
+
+ static long scanLastLogId(String[] tlogFiles) {
if (tlogFiles.length == 0) return -1;
String last = tlogFiles[tlogFiles.length - 1];
return Long.parseLong(last.substring(TLOG_NAME.length() + 1));
@@ -1590,10 +1593,45 @@ public class UpdateLog implements
PluginInfoInitialized, SolrMetricProducer {
}
}
+ /**
+ * Ensures a transaction log is ready. It is either the current one, or a
new one. This method
+ * must be called with the synchronization monitor on this {@link UpdateLog}.
+ */
protected void ensureLog() {
if (tlog == null) {
- String newLogName = String.format(Locale.ROOT, LOG_FILENAME_PATTERN,
TLOG_NAME, id);
- tlog = newTransactionLog(tlogDir.resolve(newLogName), globalStrings,
false);
+ Path newLogPath;
+ int numAttempts = 0;
+ while (true) {
+ String newLogName = String.format(Locale.ROOT, LOG_FILENAME_PATTERN,
TLOG_NAME, id);
+ newLogPath = tlogDir.resolve(newLogName);
+ // We expect that the log file does not exist since id is designed to
give the index of the
+ // next transaction log to create. But in very rare cases, the log
files listed in the
+ // init() method may be stale here (file system delay?), and id may
point to an existing
+ // file.
+ if (!Files.exists(newLogPath)) {
+ break;
+ }
+ // If the "new" log file already exists, refresh the log list and
recompute id.
+ // Ideally we would want to include the missing log in the old logs
tracking (see init()),
+ // but the init() method is not designed to be executed twice. Given
that in the rare cases
+ // we have seen this missing log, the log was always empty (file size
0), then we simply
+ // refresh log list and update the id.
+ try {
+ log.error(
+ "New transaction log already exists {} size={}, skipping it",
+ newLogPath,
+ Files.size(newLogPath));
+ } catch (IOException e) {
+ log.error("New transaction log already exists {} size unknown,
skipping it", newLogPath);
+ }
+ if (++numAttempts >= 2) {
+ throw new SolrException(
+ ErrorCode.SERVER_ERROR, "Cannot recover from already existing
logs");
+ }
+ tlogFiles = getLogList(tlogDir);
+ id = scanLastLogId(tlogFiles) + 1; // add 1 since we create a new log
+ }
+ tlog = newTransactionLog(newLogPath, globalStrings, false);
}
}
diff --git
a/solr/core/src/test/org/apache/solr/handler/RequestHandlerBaseTest.java
b/solr/core/src/test/org/apache/solr/handler/RequestHandlerBaseTest.java
index c3f2f9bd865..ba8fff85468 100644
--- a/solr/core/src/test/org/apache/solr/handler/RequestHandlerBaseTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/RequestHandlerBaseTest.java
@@ -105,7 +105,7 @@ public class RequestHandlerBaseTest extends SolrTestCaseJ4 {
};
final Exception e = new SyntaxError("Some syntax error");
- final Exception normalized =
RequestHandlerBase.normalizeReceivedException(solrQueryRequest, e);
+ final Exception normalized =
RequestHandlerBase.processReceivedException(solrQueryRequest, e);
assertEquals(SolrException.class, normalized.getClass());
final SolrException normalizedSolrException = (SolrException) normalized;
@@ -123,7 +123,7 @@ public class RequestHandlerBaseTest extends SolrTestCaseJ4 {
};
final Exception e = new RuntimeException("Some generic,
non-SolrException");
- final Exception normalized =
RequestHandlerBase.normalizeReceivedException(solrQueryRequest, e);
+ final Exception normalized =
RequestHandlerBase.processReceivedException(solrQueryRequest, e);
assertEquals(normalized, e);
}
@@ -140,7 +140,7 @@ public class RequestHandlerBaseTest extends SolrTestCaseJ4 {
};
final Exception e = new RuntimeException("Some generic,
non-SolrException");
- final Exception normalized =
RequestHandlerBase.normalizeReceivedException(solrQueryRequest, e);
+ final Exception normalized =
RequestHandlerBase.processReceivedException(solrQueryRequest, e);
assertEquals(SolrException.class, normalized.getClass());
final SolrException normalizedSolrException = (SolrException) normalized;
diff --git a/solr/core/src/test/org/apache/solr/update/UpdateLogTest.java
b/solr/core/src/test/org/apache/solr/update/UpdateLogTest.java
index 6a35c3df32c..c951ad9141c 100644
--- a/solr/core/src/test/org/apache/solr/update/UpdateLogTest.java
+++ b/solr/core/src/test/org/apache/solr/update/UpdateLogTest.java
@@ -19,7 +19,12 @@ package org.apache.solr.update;
import static org.apache.solr.common.params.CommonParams.VERSION_FIELD;
import static org.hamcrest.core.StringContains.containsString;
+import java.io.IOException;
+import java.nio.channels.FileChannel;
+import java.nio.file.Path;
+import java.nio.file.StandardOpenOption;
import java.util.List;
+import java.util.Locale;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
@@ -218,6 +223,23 @@ public class UpdateLogTest extends SolrTestCaseJ4 {
}
}
+ @Test
+ public void testLogCleanupWhenNewTransactionLogConstructionFails() throws
Exception {
+ // Simulate the appearance of a new log file after UpdateLog.init().
+ Path tlogDir = Path.of(ulog.getTlogDir());
+ long lastLogId = scanLastLogId(tlogDir);
+ String newLogName =
+ String.format(
+ Locale.ROOT, UpdateLog.LOG_FILENAME_PATTERN, UpdateLog.TLOG_NAME,
lastLogId + 1);
+ Path newLogPath = tlogDir.resolve(newLogName);
+ createLogFile(newLogPath);
+
+ // Add a doc and expect no "New transaction log already exists" error when
creating the new
+ // transaction log.
+ ulogAdd(
+ ulog, null, sdoc("id", "1", "title_s", "title1", "val1_i_dvo", "1",
"_version_", "100"));
+ }
+
/** Simulate a commit on a given updateLog */
private static void ulogCommit(UpdateLog ulog) {
try (SolrQueryRequest req = req()) {
@@ -289,4 +311,21 @@ public class UpdateLogTest extends SolrTestCaseJ4 {
cmd.setVersion(Long.parseLong(cmd.solrDoc.getFieldValue(VERSION_FIELD).toString()));
return cmd;
}
+
+ private long scanLastLogId(Path tlogDir) throws IOException {
+ try (UpdateLog uLog = new UpdateLog()) {
+ String[] tlogFiles = uLog.getLogList(tlogDir);
+ return UpdateLog.scanLastLogId(tlogFiles);
+ }
+ }
+
+ private void createLogFile(Path logPath) throws IOException {
+ FileChannel fc =
+ FileChannel.open(
+ logPath,
+ StandardOpenOption.READ,
+ StandardOpenOption.WRITE,
+ StandardOpenOption.CREATE_NEW);
+ fc.close();
+ }
}