Author: kwright
Date: Fri Aug 14 11:19:04 2015
New Revision: 1695857

URL: http://svn.apache.org/r1695857
Log:
Improve scheduling debug logging.  Part of CONNECTORS-1227.

Modified:
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/interfaces/QueueTracker.java
 Fri Aug 14 11:19:04 2015
@@ -18,7 +18,6 @@
 */
 package org.apache.manifoldcf.crawler.interfaces;
 
-import org.apache.manifoldcf.crawler.system.Logging;
 import org.apache.manifoldcf.core.interfaces.*;
 import java.io.*;
 import java.util.*;
@@ -80,16 +79,6 @@ public class QueueTracker
   */
   public void addRecord(String[] binNames)
   {
-    if (Logging.scheduling.isDebugEnabled())
-    {
-      StringBuilder sb = new StringBuilder();
-      int j = 0;
-      while (j < binNames.length)
-      {
-        sb.append(binNames[j++]).append(" ");
-      }
-      Logging.scheduling.debug("Putting document with bins ["+sb.toString()+"] 
onto active queue");
-    }
     int i = 0;
     while (i < binNames.length)
     {
@@ -114,9 +103,6 @@ public class QueueTracker
   */
   public void noteConnectionPerformance(int docCount, String connectionName, 
long elapsedTime)
   {
-    if (Logging.scheduling.isDebugEnabled())
-      Logging.scheduling.debug("Worker thread for connection 
"+connectionName+" took "+new Long(elapsedTime).toString()+"ms to handle 
"+Integer.toString(docCount)+" documents");
-
     
performanceStatistics.noteDocumentsCompleted(connectionName,docCount,elapsedTime);
   }
 
@@ -131,17 +117,6 @@ public class QueueTracker
   */
   public void beginProcessing(String[] binNames)
   {
-    if (Logging.scheduling.isDebugEnabled())
-    {
-      StringBuilder sb = new StringBuilder();
-      int j = 0;
-      while (j < binNames.length)
-      {
-        sb.append(binNames[j++]).append(" ");
-      }
-      Logging.scheduling.debug("Handing document with bins ["+sb.toString()+"] 
to worker thread");
-    }
-
     // Effectively, we are moving the document from one status to another, so 
we adjust the bin counts of the source and
     // the target both.
 
@@ -181,17 +156,6 @@ public class QueueTracker
   */
   public void endProcessing(String[] binNames)
   {
-    if (Logging.scheduling.isDebugEnabled())
-    {
-      StringBuilder sb = new StringBuilder();
-      int j = 0;
-      while (j < binNames.length)
-      {
-        sb.append(binNames[j++]).append(" ");
-      }
-      Logging.scheduling.debug("Worker thread done document with bins 
["+sb.toString()+"]");
-    }
-
     // Remove the document from the active queue, by decrementing the 
corresponding active bin counts.
 
     int i = 0;
@@ -242,20 +206,7 @@ public class QueueTracker
     }
 
     // Take the ith root of the bin rating, and leave it in log form
-    double rval = ratingLog/(double)i;
-
-    if (false && Logging.scheduling.isDebugEnabled())
-    {
-      StringBuilder sb = new StringBuilder();
-      int j = 0;
-      while (j < binNames.length)
-      {
-        sb.append(binNames[j++]).append(" ");
-      }
-      Logging.scheduling.debug("Document with bins ["+sb.toString()+"] given 
assignment rating "+new Double(rval).toString());
-    }
-
-    return rval;
+    return ratingLog/(double)i;
   }
 
 

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/JobManager.java
 Fri Aug 14 11:19:04 2015
@@ -2724,7 +2724,7 @@ public class JobManager implements IJobM
     ThrottleLimit vList = new ThrottleLimit(n);
 
     IResultSet jobconnections = jobs.getActiveJobConnections();
-    HashMap connectionSet = new HashMap();
+    Set<String> connectionSet = new HashSet<String>();
     int i = 0;
     while (i < jobconnections.getRowCount())
     {
@@ -2732,29 +2732,26 @@ public class JobManager implements IJobM
       Long jobid = (Long)row.getValue("jobid");
       String connectionName = (String)row.getValue("connectionname");
       vList.addJob(jobid,connectionName);
-      connectionSet.put(connectionName,connectionName);
+      connectionSet.add(connectionName);
     }
 
     // Find the active connection names.  We'll load these, and then get 
throttling info
     // from each one.
     String[] activeConnectionNames = new String[connectionSet.size()];
-    Iterator iter = connectionSet.keySet().iterator();
     i = 0;
-    while (iter.hasNext())
+    for (String connectionName : connectionSet)
     {
-      activeConnectionNames[i++] = (String)iter.next();
+      activeConnectionNames[i++] = connectionName;
     }
     IRepositoryConnection[] connections = 
connectionMgr.loadMultiple(activeConnectionNames);
 
 
     // Accumulate a sum of the max_connection_count * avg_connection_rate 
values, so we can calculate the appropriate adjustment
     // factor and set the connection limits.
-    HashMap rawFetchCounts = new HashMap();
+    Map<String,Double> rawFetchCounts = new HashMap<String,Double>();
     double rawFetchCountTotal = 0.0;
-    i = 0;
-    while (i < connections.length)
+    for (IRepositoryConnection connection : connections)
     {
-      IRepositoryConnection connection = connections[i++];
       String connectionName = connection.getName();
       int maxConnections = connection.getMaxConnections();
       double avgFetchRate = 
statistics.calculateConnectionFetchRate(connectionName);
@@ -2771,18 +2768,14 @@ public class JobManager implements IJobM
     // and also randomly select an extra fetch based on the fractional 
probability.  (This latter is
     // necessary for the case where the maximum fetch rate is specified to be 
pretty low.)
     //
-    i = 0;
-    while (i < connections.length)
+    for (IRepositoryConnection connection : connections)
     {
-      IRepositoryConnection connection = connections[i++];
       String connectionName = connection.getName();
       // Check if throttled...
       String[] throttles = connection.getThrottles();
-      int k = 0;
-      while (k < throttles.length)
+      for (String throttle : throttles)
       {
         // The key is the regexp value itself
-        String throttle = throttles[k++];
         float throttleValue = connection.getThrottleValue(throttle);
         // For the given connection, set the fetch limit per bin.  This is 
calculated using the time interval
         // and the desired fetch rate.  The fractional remainder is used to 
conditionally provide an "extra fetch"
@@ -2801,7 +2794,7 @@ public class JobManager implements IJobM
         vList.addLimit(connectionName,throttle,fetches);
       }
       // For the overall connection, we also have a limit which is based on 
the number of connections there are actually available.
-      Double weightedRawFetchCount = 
(Double)rawFetchCounts.get(connectionName);
+      Double weightedRawFetchCount = rawFetchCounts.get(connectionName);
       double adjustedFetchCount = weightedRawFetchCount.doubleValue() * 
fetchCountAdjustmentFactor;
 
       // Note well: Queuing starvation that results from there being very few 
available documents for high-priority connections is dealt with here by simply 
allowing
@@ -2856,7 +2849,7 @@ public class JobManager implements IJobM
     // have any chance of getting returned twice.
 
     // Accumulate the answers here
-    ArrayList answers = new ArrayList();
+    List<DocumentDescription> answers = new ArrayList<DocumentDescription>();
 
     // The current time value
     Long currentTimeValue = new Long(currentTime);
@@ -2887,10 +2880,9 @@ public class JobManager implements IJobM
     // Convert the saved answers to an array
     DocumentDescription[] rval = new DocumentDescription[answers.size()];
     i = 0;
-    while (i < rval.length)
+    for (DocumentDescription answer : answers)
     {
-      rval[i] = (DocumentDescription)answers.get(i);
-      i++;
+      rval[i++] = answer;
     }
 
     // After we're done pulling stuff from the queue, find the eligible row 
with the best priority on the queue, and save the bins for assessment.
@@ -2983,7 +2975,7 @@ public class JobManager implements IJobM
   }
 
   /** Fetch and process documents matching the passed-in criteria */
-  protected void fetchAndProcessDocuments(ArrayList answers, Long 
currentTimeValue, Long currentPriorityValue,
+  protected void fetchAndProcessDocuments(List<DocumentDescription> answers, 
Long currentTimeValue, Long currentPriorityValue,
     ThrottleLimit vList, IRepositoryConnection[] connections, String processID)
     throws ManifoldCFException
   {
@@ -3111,8 +3103,7 @@ public class JobManager implements IJobM
             Map<String,DocumentDescription> storageMap = new 
HashMap<String,DocumentDescription>();
             Map<String,Integer> statusMap = new HashMap<String,Integer>();
 
-            int i = 0;
-            while (i < set.getRowCount())
+            for (int i = 0; i < set.getRowCount(); i++)
             {
               IResultRow row = set.getRow(i);
               Long id = (Long)row.getValue(jobQueue.idField);
@@ -3142,26 +3133,21 @@ public class JobManager implements IJobM
                 Double docPriority = 
(Double)row.getValue(jobQueue.docPriorityField);
                 Logging.scheduling.debug("Stuffing document '"+docID+"' that 
has priority "+docPriority.toString()+" onto active list");
               }
-              i++;
             }
 
             // No duplicates are possible here
             java.util.Arrays.sort(docIDHashes);
 
-            i = 0;
-            while (i < docIDHashes.length)
+            for (String docIDHash : docIDHashes)
             {
-              String docIDHash = docIDHashes[i];
-              DocumentDescription dd = 
(DocumentDescription)storageMap.get(docIDHash);
+              DocumentDescription dd = storageMap.get(docIDHash);
               Long id = dd.getID();
-              int status = ((Integer)statusMap.get(docIDHash)).intValue();
+              int status = statusMap.get(docIDHash).intValue();
 
               // Set status to "ACTIVE".
               jobQueue.updateActiveRecord(id,status,processID);
 
               answers.add(dd);
-
-              i++;
             }
             TrackerClass.notePrecommit();
             database.performCommit();

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/QueuedDocumentSet.java
 Fri Aug 14 11:19:04 2015
@@ -73,6 +73,16 @@ public class QueuedDocumentSet
     while (l < documents.length)
     {
       QueuedDocument d = documents[l++];
+      if (Logging.scheduling.isDebugEnabled())
+      {
+        StringBuilder sb = new StringBuilder();
+        for (String binName : d.getBinNames())
+        {
+          sb.append(binName).append(" ");
+        }
+        Logging.scheduling.debug("Handing document 
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins 
["+sb.toString()+"] to worker thread");
+      }
+
       queueTracker.beginProcessing(d.getBinNames());
     }
   }
@@ -84,6 +94,16 @@ public class QueuedDocumentSet
     while (l < documents.length)
     {
       QueuedDocument d = documents[l++];
+      if (Logging.scheduling.isDebugEnabled())
+      {
+        StringBuilder sb = new StringBuilder();
+        for (String binName : d.getBinNames())
+        {
+          sb.append(binName).append(" ");
+        }
+        Logging.scheduling.debug("Worker thread done document 
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins 
["+sb.toString()+"]");
+      }
+
       queueTracker.endProcessing(d.getBinNames());
     }
 
@@ -100,8 +120,19 @@ public class QueuedDocumentSet
     int i = 0;
     while (i < documents.length)
     {
-      String[] binNames = documents[i++].getBinNames();
-      ratingAccumulator += 
overlapCalculator.calculateAssignmentRating(binNames,connection);
+      QueuedDocument d = documents[i++];
+      double rating = 
overlapCalculator.calculateAssignmentRating(d.getBinNames(),connection);
+      if (false && Logging.scheduling.isDebugEnabled())
+      {
+        StringBuilder sb = new StringBuilder();
+        for (String binName : d.getBinNames())
+        {
+          sb.append(binName).append(" ");
+        }
+        Logging.scheduling.debug("Document 
'"+d.getDocumentDescription().getDocumentIdentifier()+"' with bins 
["+sb.toString()+"] given assignment rating "+new Double(rating).toString());
+      }
+
+      ratingAccumulator += rating;
     }
 
     return ratingAccumulator / (double)documents.length;

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/StufferThread.java
 Fri Aug 14 11:19:04 2015
@@ -376,6 +376,16 @@ public class StufferThread extends Threa
             set.add(qd);
 
             // Note the queuing activity
+            if (Logging.scheduling.isDebugEnabled())
+            {
+              StringBuilder sb = new StringBuilder();
+              int j = 0;
+              while (j < binNames.length)
+              {
+                sb.append(binNames[j++]).append(" ");
+              }
+              Logging.scheduling.debug("Putting document 
'"+descs[i].getDocumentIdentifier()+"' with bins ["+sb.toString()+"] onto 
active queue");
+            }
             queueTracker.addRecord(binNames);
 
             if (set.size() >= maxDocuments)

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java?rev=1695857&r1=1695856&r2=1695857&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/system/WorkerThread.java
 Fri Aug 14 11:19:04 2015
@@ -731,7 +731,10 @@ public class WorkerThread extends Thread
                     // Successful processing of the set
                     // We count 'get version' time in the average, so even if 
we decide not to process a doc
                     // it still counts.
-                    
queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,System.currentTimeMillis()
 - processingStartTime);
+                    long elapsedTime = System.currentTimeMillis() - 
processingStartTime;
+                    if (Logging.scheduling.isDebugEnabled())
+                      Logging.scheduling.debug("Worker thread for connection 
"+connectionName+" took "+new Long(elapsedTime).toString()+"ms to handle 
"+Integer.toString(qds.getCount())+" documents");
+                    
queueTracker.noteConnectionPerformance(qds.getCount(),connectionName,elapsedTime);
 
                   }
                   


Reply via email to