Author: kwright
Date: Sat Jan 19 13:30:07 2013
New Revision: 1435542
URL: http://svn.apache.org/viewvc?rev=1435542&view=rev
Log:
Pull up fix for CONNECTORS-616 from trunk.
Modified:
manifoldcf/branches/release-1.1-branch/ (props changed)
manifoldcf/branches/release-1.1-branch/CHANGES.txt
manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Propchange: manifoldcf/branches/release-1.1-branch/
------------------------------------------------------------------------------
Merged /manifoldcf/trunk:r1435540
Modified: manifoldcf/branches/release-1.1-branch/CHANGES.txt
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/CHANGES.txt?rev=1435542&r1=1435541&r2=1435542&view=diff
==============================================================================
--- manifoldcf/branches/release-1.1-branch/CHANGES.txt (original)
+++ manifoldcf/branches/release-1.1-branch/CHANGES.txt Sat Jan 19 13:30:07 2013
@@ -3,6 +3,12 @@ $Id$
======================= Release 1.1 =====================
+CONNECTORS-616: Work around Solr 4.0 or Jetty bug where connections
+are dropped randomly under multithreaded load. Broken pipe exceptions
+are now retried after a minute, for up to three times, before the Solr
+connector concludes that the document cannot be indexed and skips it.
+(Shinichiro Abe, Karl Wright)
+
CONNECTORS-613: Add a way of getting a document's mime type
to Solr, since Tika needs mime type in order to extract content
since Solr 4.0.0.
Modified:
manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
URL:
http://svn.apache.org/viewvc/manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java?rev=1435542&r1=1435541&r2=1435542&view=diff
==============================================================================
---
manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
(original)
+++
manifoldcf/branches/release-1.1-branch/connectors/solr/connector/src/main/java/org/apache/manifoldcf/agents/output/solr/HttpPoster.java
Sat Jan 19 13:30:07 2013
@@ -403,14 +403,24 @@ public class HttpPoster
if (e.getClass().getName().equals("java.net.SocketException"))
{
- // Intercept "broken pipe" exception, since that seems to be what we get
if the ingestion API kills the socket right after a 400 goes out.
- // Basically, we have no choice but to interpret that in the same manner
as a 400, since no matter how we do it, it's a race and the 'broken pipe'
- // result is always possible. So we might as well expect it and treat
it properly.
+ // In the past we would have treated this as a straight document
rejection, and
+ // treated it in the same manner as a 400. The reasoning is that the
server can
+ // perfectly legally send out a 400 and drop the connection immediately
thereafter,
+ // this a race condition.
+ // However, Solr 4.0 (or the Jetty version that the example runs on)
seems
+ // to have a bug where it drops the connection when two simultaneous
documents come in
+ // at the same time. This is the final version of Solr 4.0 so we need
to deal with
+ // this.
if (e.getMessage().toLowerCase().indexOf("broken pipe") != -1)
- // We've seen what looks like the ingestion interface forcibly closing
the socket.
- // We *choose* to interpret this just like a 400 response. However,
we log in the history using a different code,
- // since we really don't know what happened for sure.
- return;
+ // Treat it as a service interruption, but with a limited number of
retries.
+ // In that way we won't burden the user with a huge retry interval; it
should
+ // give up fairly quickly, and yet NOT give up if the error was merely
transient
+ throw new ServiceInterruption("Server dropped connection during
"+context+": "+e.getMessage(),
+ e,
+ currentTime + interruptionRetryTime,
+ -1L,
+ 3,
+ false);
// Other socket exceptions are service interruptions - but if we keep
getting them, it means
// that a socket timeout is probably set too low to accept this
particular document. So
@@ -418,7 +428,7 @@ public class HttpPoster
throw new ServiceInterruption("Socket timeout exception during
"+context+": "+e.getMessage(),
e,
currentTime + interruptionRetryTime,
- currentTime + 1L * 60L * 60000L,
+ currentTime + 20L * 60000L,
-1,
false);
}
@@ -811,10 +821,17 @@ public class HttpPoster
// Log what happened to us
activityStart = new Long(fullStartTime);
activityBytes = new Long(length);
- activityCode = "FAILED";
activityDetails = e.getMessage() +
((e.getCause() != null)?": "+e.getCause().getMessage():"");
+ // Broken pipe exceptions we log specially because they usually
mean
+ // Solr has rejected the document, and the user will want to know
that.
+ if (e.getCause() != null &&
e.getCause().getClass().getName().equals("java.net.SocketException") &&
+ activityDetails.indexOf("broken pipe") != -1)
+ activityCode = "SOLR REJECT";
+ else
+ activityCode = "FAILED";
+
// Rethrow; will interpret at a higher level
throw e;
}