Author: kwright
Date: Tue Aug 14 06:29:38 2012
New Revision: 1372743
URL: http://svn.apache.org/viewvc?rev=1372743&view=rev
Log:
More downstream fixes as a result of CONNECTORS-501. This time fixing handling
of ServiceInterruptions.
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1372743&r1=1372742&r2=1372743&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Tue Aug 14 06:29:38 2012
@@ -335,6 +335,9 @@ public class WorkerThread extends Thread
job.getID()+" connection
'"+job.getConnectionName()+"': "+
e.getMessage());
+ if (e.isAbortOnFail())
+ abortOnFail = new ManifoldCFException("Repeated
service interruptions - failure getting document
version"+((e.getCause()!=null)?": "+e.getCause().getMessage():""),e.getCause());
+
// Mark the current documents to be recrawled at the
// time specified, with the proper error handling.
List<QueuedDocument> newActiveList = new
ArrayList<QueuedDocument>(activeDocuments.size());
@@ -351,7 +354,6 @@ public class WorkerThread extends Thread
// Treat this as a hard failure.
if (e.isAbortOnFail())
{
- abortOnFail = new ManifoldCFException("Repeated
service interruptions - failure getting document
version"+((e.getCause()!=null)?": "+e.getCause().getMessage():""),e.getCause());
rescanList.add(qd);
}
// We want this particular document to be not
included in the
@@ -517,7 +519,7 @@ public class WorkerThread extends Thread
try
{
- // Fetchlist contains what we need to process.
+ // Finishlist and Fetchlist are parallel. Fetchlist
contains what we need to process.
if (fetchList.size() > 0)
{
// Build a list of id's and flags
@@ -579,34 +581,52 @@ public class WorkerThread extends Thread
deleteList.clear();
ArrayList requeueList = new ArrayList();
+ if (e.isAbortOnFail())
+ abortOnFail = new
ManifoldCFException("Repeated service interruptions - failure processing
document"+((e.getCause()!=null)?":
"+e.getCause().getMessage():""),e.getCause());
+
+ Set<String> fetchDocuments = new
HashSet<String>();
+ for (int i = 0; i < fetchList.size(); i++)
+ {
+
fetchDocuments.add(fetchList.get(i).getDocument().getDocumentDescription().getDocumentIdentifierHash());
+ }
+ List<QueuedDocument> newFinishList = new
ArrayList<QueuedDocument>();
for (int i = 0; i < finishList.size(); i++)
{
QueuedDocument qd = finishList.get(i);
- DocumentDescription dd =
qd.getDocumentDescription();
- if (dd.getFailTime() != -1L &&
dd.getFailTime() < e.getRetryTime() ||
- dd.getFailRetryCount() == 0)
+ if
(fetchDocuments.contains(qd.getDocumentDescription().getDocumentIdentifierHash()))
{
- // Treat this as a hard failure.
- if (e.isAbortOnFail())
+ DocumentDescription dd =
qd.getDocumentDescription();
+ if (dd.getFailTime() != -1L &&
dd.getFailTime() < e.getRetryTime() ||
+ dd.getFailRetryCount() == 0)
{
- abortOnFail = new
ManifoldCFException("Repeated service interruptions - failure processing
document"+((e.getCause()!=null)?":
"+e.getCause().getMessage():""),e.getCause());
- rescanList.add(qd);
+ // Treat this as a hard failure.
+ if (e.isAbortOnFail())
+ {
+ rescanList.add(qd);
+ }
+ else
+ {
+ // We want this particular document to
be not included in the
+ // reprocessing. Therefore, we do the
same thing as we would
+ // if we got back a null version.
+ deleteList.add(qd);
+ }
+ }
+ else
+ {
+ requeueList.add(qd);
}
- // We want this particular document to be
not included in the
- // reprocessing. Therefore, we do the same
thing as we would
- // if we got back a null version.
- deleteList.add(qd);
}
else
- {
- requeueList.add(qd);
- }
+ newFinishList.add(qd);
}
// Requeue the documents we've identified
requeueDocuments(jobManager,requeueList,e.getRetryTime(),e.getFailTime(),
e.getFailRetryCount());
+ // We've disposed of all the documents, so
finishlist is now clear
+ finishList = newFinishList;
}
} // End of fetching