Author: kwright
Date: Sat Dec 15 17:02:07 2018
New Revision: 1849000

URL: http://svn.apache.org/viewvc?rev=1849000&view=rev
Log:
More debugging and refactoring

Modified:
    
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java

Modified: 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
URL: 
http://svn.apache.org/viewvc/manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java?rev=1849000&r1=1848999&r2=1849000&view=diff
==============================================================================
--- 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
 (original)
+++ 
manifoldcf/trunk/framework/pull-agent/src/main/java/org/apache/manifoldcf/crawler/jobs/HopCount.java
 Sat Dec 15 17:02:07 2018
@@ -326,7 +326,7 @@ public class HopCount extends org.apache
     intrinsicLinkManager.restartCluster();
   }
   
-  /** Record a references from a set of documents to the root.  These will be 
marked as "new" or "existing", and
+  /** Record references from a set of documents to the root.  These will be 
marked as "new" or "existing", and
   * will have a null linktype.
   */
   public void recordSeedReferences(Long jobID, String[] legalLinkTypes, 
String[] targetDocumentIDHashes, int hopcountMethod, String processID)
@@ -433,6 +433,11 @@ public class HopCount extends org.apache
 
       if (sourceDocumentIDHash == null || sourceDocumentIDHash.length() == 0)
       {
+        // This is a seeding entry!!
+        // The distance we want to record, for all link types, is zero.  But 
we need to make sure a delete dependency is there for each answer that will 
match the seeding
+        // doFinish() query; otherwise the number we write is not going to be 
something we can invalidate if the seed goes away.
+        // This must be added in addToProcessingQueue.  It will be added as a 
dependency on a specific link type, though, e.g. "link" or "redirect", and not 
the generic
+        // empty string link type.  Invalidation must therefore be careful for 
seeds to invalidate all specific link types, and not just a generic empty 
string.
         for (int i = 0; i < estimates.length; i++)
         {
           estimates[i] = new Answer(0);
@@ -722,6 +727,17 @@ public class HopCount extends org.apache
     //IResultSet set = performQuery("SELECT 
"+parentIDField+","+linkTypeField+" FROM "+getTableName()+" WHERE "+
     //      parentIDField+" IN("+query+") AND 
"+jobIDField+"=?",list,null,null);
     IResultSet set = performQuery("SELECT 
"+parentIDHashField+","+linkTypeField+","+distanceField+" FROM 
"+getTableName()+" WHERE "+query,newList,null,null);
+    if (Logging.hopcount.isDebugEnabled()) {
+      final StringBuilder sb = new StringBuilder();
+      for (int q = 0; q < list.size(); q++) {
+        sb.append(" '").append((String)list.get(q)).append("' ");
+      }
+      final StringBuilder sb2 = new StringBuilder();
+      for (String lt : affectedLinkTypes) {
+        sb2.append(" '").append(lt).append("' ");
+      }
+      Logging.hopcount.debug("Looked for existing records matching link types: 
["+sb2+"] parent hashes: ["+sb+"]; found "+set.getRowCount()+" matches");
+    }
     int i = 0;
     while (i < set.getRowCount())
     {
@@ -748,7 +764,8 @@ public class HopCount extends org.apache
   *@param jobID is the job the documents belong to.
   *@param affectedLinkTypes are the set of affected link types.
   *@param documentIDHashes are the documents to add.
-  *@param startingAnswers are the hopcounts for the documents as they are 
currently known.
+  *@param startingAnswers are the hopcounts and delete dependencies for the 
source document as they are currently known.
+  *               The size of this array is the same as the size of the 
affectedLinkTypes array.
   *@param sourceDocumentIDHash is the source document identifier for the links 
from source to target documents.
   *@param linkType is the link type for this queue addition.
   *@param hopcountMethod is the desired method of managing hopcounts.
@@ -845,13 +862,13 @@ public class HopCount extends org.apache
         Question q = new Question(documentIDHash,affectedLinkType);
 
         // Calculate what our new answer would be.
-        Answer startingAnswer = (Answer)answerMap.get(affectedLinkType);
+        Answer startingAnswer = answerMap.get(affectedLinkType);
         int newAnswerValue = startingAnswer.getAnswer();
         if (newAnswerValue >= 0 && affectedLinkType.equals(linkType))
           newAnswerValue++;
 
         // Now, see if there's a distance already present.
-        Long currentDistance = (Long)matchMap.get(q);
+        Long currentDistance = matchMap.get(q);
         if (currentDistance == null)
         {
           // Prepare to do an insert.
@@ -876,10 +893,10 @@ public class HopCount extends org.apache
           if (hopcountMethod != IJobDescription.HOPCOUNT_NEVERDELETE)
           {
             deleteDepsManager.writeDependency(hopCountID,jobID,dd);
-            Iterator iter2 = startingAnswer.getDeleteDependencies();
+            Iterator<DeleteDependency> iter2 = 
startingAnswer.getDeleteDependencies();
             while (iter2.hasNext())
             {
-              dd = (DeleteDependency)iter2.next();
+              dd = iter2.next();
               deleteDepsManager.writeDependency(hopCountID,jobID,dd);
             }
           }


Reply via email to