Author: kwright
Date: Fri Aug 14 11:19:04 2015
New Revision: 1695857
URL: http://svn.apache.org/r1695857
Log:
Improve scheduling debug logging. Part of CONNECTORS-1227.
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
Fri Aug 14 11:19:04 2015
@@ -18,7 +18,6 @@
*/
package org.apache.manifoldcf.crawler.interfaces;
-import org.apache.manifoldcf.crawler.system.Logging;
import org.apache.manifoldcf.core.interfaces.*;
import java.io.*;
import java.util.*;
@@ -80,16 +79,6 @@ public class QueueTracker
*/
public void addRecord(String[] binNames)
{
- if (Logging.scheduling.isDebugEnabled())
- {
- StringBuilder sb = new StringBuilder();
- int j = 0;
- while (j < binNames.length)
- {
- sb.append(binNames[j++]).append(" ");
- }
- Logging.scheduling.debug("Putting document with bins ["+sb.toString()+"]
onto active queue");
- }
int i = 0;
while (i < binNames.length)
{
@@ -114,9 +103,6 @@ public class QueueTracker
*/
public void noteConnectionPerformance(int docCount, String connectionName,
long elapsedTime)
{
- if (Logging.scheduling.isDebugEnabled())
- Logging.scheduling.debug("Worker thread for connection
"+connectionName+" took "+new Long(elapsedTime).toString()+"ms to handle
"+Integer.toString(docCount)+" documents");
-
performanceStatistics.noteDocumentsCompleted(connectionName,docCount,elapsedTime);
}
@@ -131,17 +117,6 @@ public class QueueTracker
*/
public void beginProcessing(String[] binNames)
{
- if (Logging.scheduling.isDebugEnabled())
- {
- StringBuilder sb = new StringBuilder();
- int j = 0;
- while (j < binNames.length)
- {
- sb.append(binNames[j++]).append(" ");
- }
- Logging.scheduling.debug("Handing document with bins ["+sb.toString()+"]
to worker thread");
- }
-
// Effectively, we are moving the document from one status to another, so
we adjust the bin counts of the source and
// the target both.
@@ -181,17 +156,6 @@ public class QueueTracker
*/
public void endProcessing(String[] binNames)
{
- if (Logging.scheduling.isDebugEnabled())
- {
- StringBuilder sb = new StringBuilder();
- int j = 0;
- while (j < binNames.length)
- {
- sb.append(binNames[j++]).append(" ");
- }
- Logging.scheduling.debug("Worker thread done document with bins
["+sb.toString()+"]");
- }
-
// Remove the document from the active queue, by decrementing the
corresponding active bin counts.
int i = 0;
@@ -242,20 +206,7 @@ public class QueueTracker
}
// Take the ith root of the bin rating, and leave it in log form
- double rval = ratingLog/(double)i;
-
- if (false && Logging.scheduling.isDebugEnabled())
- {
- StringBuilder sb = new StringBuilder();
- int j = 0;
- while (j < binNames.length)
- {
- sb.append(binNames[j++]).append(" ");
- }
- Logging.scheduling.debug("Document with bins ["+sb.toString()+"] given
assignment rating "+new Double(rval).toString());
- }
-
- return rval;
+ return ratingLog/(double)i;
}
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
Fri Aug 14 11:19:04 2015
@@ -2724,7 +2724,7 @@ public class JobManager implements IJobM
ThrottleLimit vList = new ThrottleLimit(n);
IResultSet jobconnections = jobs.getActiveJobConnections();
- HashMap connectionSet = new HashMap();
+ Set<String> connectionSet = new HashSet<String>();
int i = 0;
while (i < jobconnections.getRowCount())
{
@@ -2732,29 +2732,26 @@ public class JobManager implements IJobM
Long jobid = (Long)row.getValue("jobid");
String connectionName = (String)row.getValue("connectionname");
vList.addJob(jobid,connectionName);
- connectionSet.put(connectionName,connectionName);
+ connectionSet.add(connectionName);
}
// Find the active connection names. We'll load these, and then get
throttling info
// from each one.
String[] activeConnectionNames = new String[connectionSet.size()];
- Iterator iter = connectionSet.keySet().iterator();
i = 0;
- while (iter.hasNext())
+ for (String connectionName : connectionSet)
{
- activeConnectionNames[i++] = (String)iter.next();
+ activeConnectionNames[i++] = connectionName;
}
IRepositoryConnection[] connections =
connectionMgr.loadMultiple(activeConnectionNames);
// Accumulate a sum of the max_connection_count * avg_connection_rate
values, so we can calculate the appropriate adjustment
// factor and set the connection limits.
- HashMap rawFetchCounts = new HashMap();
+ Map<String,Double> rawFetchCounts = new HashMap<String,Double>();
double rawFetchCountTotal = 0.0;
- i = 0;
- while (i < connections.length)
+ for (IRepositoryConnection connection : connections)
{
- IRepositoryConnection connection = connections[i++];
String connectionName = connection.getName();
int maxConnections = connection.getMaxConnections();
double avgFetchRate =
statistics.calculateConnectionFetchRate(connectionName);
@@ -2771,18 +2768,14 @@ public class JobManager implements IJobM
// and also randomly select an extra fetch based on the fractional
probability. (This latter is
// necessary for the case where the maximum fetch rate is specified to be
pretty low.)
//
- i = 0;
- while (i < connections.length)
+ for (IRepositoryConnection connection : connections)
{
- IRepositoryConnection connection = connections[i++];
String connectionName = connection.getName();
// Check if throttled...
String[] throttles = connection.getThrottles();
- int k = 0;
- while (k < throttles.length)
+ for (String throttle : throttles)
{
// The key is the regexp value itself
- String throttle = throttles[k++];
float throttleValue = connection.getThrottleValue(throttle);
// For the given connection, set the fetch limit per bin. This is
calculated using the time interval
// and the desired fetch rate. The fractional remainder is used to
conditionally provide an "extra fetch"
@@ -2801,7 +2794,7 @@ public class JobManager implements IJobM
vList.addLimit(connectionName,throttle,fetches);
}
// For the overall connection, we also have a limit which is based on
the number of connections there are actually available.
- Double weightedRawFetchCount =
(Double)rawFetchCounts.get(connectionName);
+ Double weightedRawFetchCount = rawFetchCounts.get(connectionName);
double adjustedFetchCount = weightedRawFetchCount.doubleValue() *
fetchCountAdjustmentFactor;
// Note well: Queuing starvation that results from there being very few
available documents for high-priority connections is dealt with here by simply
allowing
@@ -2856,7 +2849,7 @@ public class JobManager implements IJobM
// have any chance of getting returned twice.
// Accumulate the answers here
- ArrayList answers = new ArrayList();
+ List<DocumentDescription> answers = new ArrayList<DocumentDescription>();
// The current time value
Long currentTimeValue = new Long(currentTime);
@@ -2887,10 +2880,9 @@ public class JobManager implements IJobM
// Convert the saved answers to an array
DocumentDescription[] rval = new DocumentDescription[answers.size()];
i = 0;
- while (i < rval.length)
+ for (DocumentDescription answer : answers)
{
- rval[i] = (DocumentDescription)answers.get(i);
- i++;
+ rval[i++] = answer;
}
// After we're done pulling stuff from the queue, find the eligible row
with the best priority on the queue, and save the bins for assessment.
@@ -2983,7 +2975,7 @@ public class JobManager implements IJobM
}
/** Fetch and process documents matching the passed-in criteria */
- protected void fetchAndProcessDocuments(ArrayList answers, Long
currentTimeValue, Long currentPriorityValue,
+ protected void fetchAndProcessDocuments(List<DocumentDescription> answers,
Long currentTimeValue, Long currentPriorityValue,
ThrottleLimit vList, IRepositoryConnection[] connections, String processID)
throws ManifoldCFException
{
@@ -3111,8 +3103,7 @@ public class JobManager implements IJobM
Map<String,DocumentDescription> storageMap = new
HashMap<String,DocumentDescription>();
Map<String,Integer> statusMap = new HashMap<String,Integer>();
- int i = 0;
- while (i < set.getRowCount())
+ for (int i = 0; i < set.getRowCount(); i++)
{
IResultRow row = set.getRow(i);
Long id = (Long)row.getValue(jobQueue.idField);
@@ -3142,26 +3133,21 @@ public class JobManager implements IJobM
Double docPriority =
(Double)row.getValue(jobQueue.docPriorityField);
Logging.scheduling.debug("Stuffing document '"+docID+"' that
has priority "+docPriority.toString()+" onto active list");
}
- i++;
}
// No duplicates are possible here
java.util.Arrays.sort(docIDHashes);
- i = 0;
- while (i < docIDHashes.length)
+ for (String docIDHash : docIDHashes)
{
- String docIDHash = docIDHashes[i];
- DocumentDescription dd =
(DocumentDescription)storageMap.get(docIDHash);
+ DocumentDescription dd = storageMap.get(docIDHash);
Long id = dd.getID();
- int status = ((Integer)statusMap.get(docIDHash)).intValue();
+ int status = statusMap.get(docIDHash).intValue();
// Set status to "ACTIVE".
jobQueue.updateActiveRecord(id,status,processID);
answers.add(dd);
-
- i++;
}
TrackerClass.notePrecommit();
database.performCommit();
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
Fri Aug 14 11:19:04 2015
@@ -73,6 +73,16 @@ public class QueuedDocumentSet
while (l < documents.length)
{
QueuedDocument d = documents[l++];
+ if (Logging.scheduling.isDebugEnabled())
+ {
+ StringBuilder sb = new StringBuilder();
+ for (String binName : d.getBinNames())
+ {
+ sb.append(binName).append(" ");
+ }
+ Logging.scheduling.debug("Handing document
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins
["+sb.toString()+"] to worker thread");
+ }
+
queueTracker.beginProcessing(d.getBinNames());
}
}
@@ -84,6 +94,16 @@ public class QueuedDocumentSet
while (l < documents.length)
{
QueuedDocument d = documents[l++];
+ if (Logging.scheduling.isDebugEnabled())
+ {
+ StringBuilder sb = new StringBuilder();
+ for (String binName : d.getBinNames())
+ {
+ sb.append(binName).append(" ");
+ }
+ Logging.scheduling.debug("Worker thread done document
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins
["+sb.toString()+"]");
+ }
+
queueTracker.endProcessing(d.getBinNames());
}
@@ -100,8 +120,19 @@ public class QueuedDocumentSet
int i = 0;
while (i < documents.length)
{
- String[] binNames = documents[i++].getBinNames();
- ratingAccumulator +=
overlapCalculator.calculateAssignmentRating(binNames,connection);
+ QueuedDocument d = documents[i++];
+ double rating =
overlapCalculator.calculateAssignmentRating(d.getBinNames(),connection);
+ if (false && Logging.scheduling.isDebugEnabled())
+ {
+ StringBuilder sb = new StringBuilder();
+ for (String binName : d.getBinNames())
+ {
+ sb.append(binName).append(" ");
+ }
+ Logging.scheduling.debug("Document
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins
["+sb.toString()+"] given assignment rating "+new Double(rating).toString());
+ }
+
+ ratingAccumulator += rating;
}
return ratingAccumulator / (double)documents.length;
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
Fri Aug 14 11:19:04 2015
@@ -376,6 +376,16 @@ public class StufferThread extends Threa
set.add(qd);
// Note the queuing activity
+ if (Logging.scheduling.isDebugEnabled())
+ {
+ StringBuilder sb = new StringBuilder();
+ int j = 0;
+ while (j < binNames.length)
+ {
+ sb.append(binNames[j++]).append(" ");
+ }
+ Logging.scheduling.debug("Putting document
'"+descs[i].getDocumentIdentifier()+"' with bins ["+sb.toString()+"] onto
active queue");
+ }
queueTracker.addRecord(binNames);
if (set.size() >= maxDocuments)
Modified:
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL:
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
---
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
(original)
+++
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
Fri Aug 14 11:19:04 2015
@@ -731,7 +731,10 @@ public class WorkerThread extends Thread
// Successful processing of the set
// We count 'get version' time in the average, so even if
we decide not to process a doc
// it still counts.
-
queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,System.currentTimeMillis()
- processingStartTime);
+ long elapsedTime = System.currentTimeMillis() -
processingStartTime;
+ if (Logging.scheduling.isDebugEnabled())
+ Logging.scheduling.debug("Worker thread for connection
"+connectionName+" took "+new Long(elapsedTime).toString()+"ms to handle
"+Integer.toString(qds.getCount())+" documents");
+
queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,elapsedTime);
}