Forgot to attache the patch. Here it is:

--------------------------- patch.txt.20040725 ------------------------------

diff -Nur --exclude='*.txt' --exclude='*.xml' 
nutch-cvs-20040725/src/java/net/nutch/fetcher/Fetcher.java 
nutch-cvs-20040725.xing/src/java/net/nutch/fetcher/Fetcher.java
--- nutch-cvs-20040725/src/java/net/nutch/fetcher/Fetcher.java  2004-06-16 
10:31:31.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/fetcher/Fetcher.java     2004-07-25 
23:26:28.000000000 -0700
@@ -35,22 +35,30 @@
   private ArrayFile.Writer parseTextWriter;
   private ArrayFile.Writer parseDataWriter;
 
-  private int threadCount =                       // max number of threads
-    NutchConf.getInt("fetcher.threads.fetch", 10);
-
   private long start;                             // start time of fetcher run
   private long bytes;                             // total bytes fetched
   private int pages;                              // total pages fetched
   private int errors;                             // total pages errored
 
-  private ThreadGroup group = new ThreadGroup("fetcher"); // our thread group
+  private int threadCount =                       // max number of threads
+    NutchConf.getInt("fetcher.threads.fetch", 10);
+
+  // All threads (FetcherThread or thread started by it) belong to
+  // group "fetcher". Each FetcherThread is named as "fetcherXX",
+  // where XX is the order it's started.
+  private static final String THREAD_GROUP_NAME = "fetcher";
+
+  private ThreadGroup group = new ThreadGroup(THREAD_GROUP_NAME); // our group
+
+  // count of FetcherThreads that are through the loop and just about to return
+  private int atCompletion = 0;
 
   /********************************************
    * Fetcher thread
    ********************************************/
   private class FetcherThread extends Thread {
 
-    public FetcherThread() { super(group, "starting"); }
+    public FetcherThread(String name) { super(group, name); }
 
     /**
      * This thread keeps looping, grabbing an item off the list
@@ -68,21 +76,19 @@
         String url = null;
         try {
 
-          setName("starting");
-
           if (fetchList.next(fle) == null)
-            return;
+            break;
 
           url = fle.getPage().getURL().toString();
 
           if (!fle.getFetch()) {                  // should we fetch this page?
-            LOG.fine("not fetching " + url);
+            if (LOG.isLoggable(Level.FINE))
+              LOG.fine("not fetching " + url);
             handleNoFetch(fle, FetcherOutput.SUCCESS);
             continue;
           }
 
           LOG.info("fetching " + url);            // fetch the page
-          setName(url);
 
           Protocol protocol = ProtocolFactory.getProtocol(url);
           Content content = protocol.getContent(url);
@@ -119,6 +125,20 @@
           }
         }
       }
+
+      // Explicitly invoke shutDown() for all possible plugins.
+      // Done by the FetcherThread finished the last.
+      synchronized (Fetcher.this) {
+        atCompletion++;
+        if (atCompletion == threadCount) {
+          try {
+            PluginRepository.getInstance().finalize();
+          } catch (java.lang.Throwable t) {
+            // do nothing
+          }
+        }
+      }
+      return;
     }
 
     private void logError(String url, FetchListEntry fle, Throwable t) {
@@ -199,16 +219,41 @@
   public void run() throws IOException, InterruptedException {
     start = System.currentTimeMillis();
     for (int i = 0; i < threadCount; i++) {       // spawn threads
-      FetcherThread thread = new FetcherThread(); 
+      FetcherThread thread = new FetcherThread(THREAD_GROUP_NAME+i); 
       thread.start();
     }
-    do {
+
+    // quit monitoring if all FetcherThreads are gone.
+    // there could still be other threads, just ignore them
+    int pages0 = pages; int errors0 = errors; long bytes0 = bytes;
+  
+    while (true) {
       Thread.sleep(1000);
 
       if (LogFormatter.hasLoggedSevere()) 
         throw new RuntimeException("SEVERE error logged.  Exiting fetcher.");
 
-    } while (group.activeCount() > 0);            // wait for threads to finish
+      int n = group.activeCount();
+      Thread[] list = new Thread[n];
+      group.enumerate(list);
+      boolean noMoreFetcherThread = true; // assumption
+      for (int i=0; i<n; i++) {
+        String name = list[i].getName();
+        if (name.startsWith(THREAD_GROUP_NAME)) // prove it
+          noMoreFetcherThread = false;
+        if (LOG.isLoggable(Level.FINE))
+          LOG.fine(list[i].toString());
+      }
+
+      if (noMoreFetcherThread) {
+        if (LOG.isLoggable(Level.FINE))
+          LOG.fine("number of avtive threads: "+n);
+        if (pages == pages0 && errors == errors0 && bytes == bytes0)
+          break;
+        status();
+        pages0 = pages; errors0 = errors; bytes0 = bytes;
+      }
+    }
 
     fetchList.close();                            // close databases
     fetcherWriter.close();
@@ -216,7 +261,6 @@
     parseTextWriter.close();
     parseDataWriter.close();
 
-    status();                                     // print final status
   }
 
   /** Display the status of the fetcher run. */
@@ -237,10 +281,11 @@
   public static void main(String[] args) throws Exception {
     int threadCount = -1;
     long delay = -1;
+    String logLevel = "info";
     boolean showThreadID = false;
     String directory = null;
 
-    String usage = "Usage: Fetcher [-threads n] dir";
+    String usage = "Usage: Fetcher [-logLevel level] [-showThreadID] [-threads n] 
dir";
 
     if (args.length == 0) {
       System.err.println(usage);
@@ -248,14 +293,15 @@
     }
       
     for (int i = 0; i < args.length; i++) {       // parse command line
-
       if (args[i].equals("-threads")) {           // found -threads option
         threadCount =  Integer.parseInt(args[++i]);
-
+      } else if (args[i].equals("-logLevel")) {
+        logLevel = args[++i];
+      } else if (args[i].equals("-showThreadID")) {
+        showThreadID = true;
       } else if (i != args.length-1) {
         System.err.println(usage);
         System.exit(-1);
-
       } else                                      // root is required parameter
         directory = args[i];
     }
@@ -264,6 +310,12 @@
     if (threadCount != -1)                        // set threadCount option
       fetcher.setThreadCount(threadCount);
 
+    // set log level
+    fetcher.setLogLevel(Level.parse(logLevel.toUpperCase()));
+
+    if (showThreadID)
+      LogFormatter.setShowThreadIDs(showThreadID);
+
     fetcher.run();                                // run the Fetcher
   }
 }
diff -Nur --exclude='*.txt' --exclude='*.xml' 
nutch-cvs-20040725/src/java/net/nutch/plugin/Extension.java 
nutch-cvs-20040725.xing/src/java/net/nutch/plugin/Extension.java
--- nutch-cvs-20040725/src/java/net/nutch/plugin/Extension.java 2004-05-20 
11:19:04.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/plugin/Extension.java    2004-07-25 
21:36:17.000000000 -0700
@@ -117,20 +117,22 @@
    * @return Object An instance of the extension implementation
    */
   public Object getExtensionInstance() throws PluginRuntimeException {
-    try {
-      PluginClassLoader loader = fDescriptor.getClassLoader();
-      Class extensionClazz = loader.loadClass(getClazz());
-      // lazy loading of Plugin in case there is no instance of the plugin
-      // already.
-      PluginRepository.getInstance().getPluginInstance(getDiscriptor());
-      Object object = extensionClazz.newInstance();
-      return object;
-    } catch (ClassNotFoundException e) {
-      throw new PluginRuntimeException(e);
-    } catch (InstantiationException e) {
-      throw new PluginRuntimeException(e);
-    } catch (IllegalAccessException e) {
-      throw new PluginRuntimeException(e);
+    synchronized (getId()) {
+      try {
+        PluginClassLoader loader = fDescriptor.getClassLoader();
+        Class extensionClazz = loader.loadClass(getClazz());
+        // lazy loading of Plugin in case there is no instance of the plugin
+        // already.
+        PluginRepository.getInstance().getPluginInstance(getDiscriptor());
+        Object object = extensionClazz.newInstance();
+        return object;
+      } catch (ClassNotFoundException e) {
+        throw new PluginRuntimeException(e);
+      } catch (InstantiationException e) {
+        throw new PluginRuntimeException(e);
+      } catch (IllegalAccessException e) {
+        throw new PluginRuntimeException(e);
+      }
     }
   }
   /**
diff -Nur --exclude='*.txt' --exclude='*.xml' 
nutch-cvs-20040725/src/java/net/nutch/plugin/PluginRepository.java 
nutch-cvs-20040725.xing/src/java/net/nutch/plugin/PluginRepository.java
--- nutch-cvs-20040725/src/java/net/nutch/plugin/PluginRepository.java  2004-07-06 
10:28:30.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/plugin/PluginRepository.java     
2004-07-25 21:32:37.000000000 -0700
@@ -177,15 +177,17 @@
     if (fActivatedPlugins.containsKey(pDescriptor.getPluginId()))
       return (Plugin) fActivatedPlugins.get(pDescriptor.getPluginId());
     try {
-      PluginClassLoader loader = pDescriptor.getClassLoader();
-      Class pluginClass = loader.loadClass(pDescriptor.getPluginClass());
-      Constructor constructor = pluginClass
-        .getConstructor(new Class[]{PluginDescriptor.class});
-      Plugin plugin = (Plugin) constructor
-        .newInstance(new Object[]{pDescriptor});
-      plugin.startUp();
-      fActivatedPlugins.put(pDescriptor.getPluginId(), plugin);
-      return plugin;
+      synchronized (pDescriptor) {
+        PluginClassLoader loader = pDescriptor.getClassLoader();
+        Class pluginClass = loader.loadClass(pDescriptor.getPluginClass());
+        Constructor constructor = pluginClass
+          .getConstructor(new Class[]{PluginDescriptor.class});
+        Plugin plugin = (Plugin) constructor
+          .newInstance(new Object[]{pDescriptor});
+        plugin.startUp();
+        fActivatedPlugins.put(pDescriptor.getPluginId(), plugin);
+        return plugin;
+      }
     } catch (ClassNotFoundException e) {
       throw new PluginRuntimeException(e);
     } catch (InstantiationException e) {
@@ -203,7 +205,7 @@
    * 
    * @see java.lang.Object#finalize()
    */
-  protected void finalize() throws Throwable {
+  public void finalize() throws Throwable {
     shotDownActivatedPlugins();
   }
   /**

On Mon, Jul 26, 2004 at 12:12:41AM -0700, [EMAIL PROTECTED] wrote:
> Attached is a patch that makes Fetcher.java work better with plugin system.
> 
> Changes are
> (1) critical regions added in
> src/java/net/nutch/plugin/Extension.java
> src/java/net/nutch/plugin/PluginRepository.java
> by Stefan
> 
> (2) make PluginRepository.finalize() public
> 
> (3) Explicitly invoke shutDown() for all possible plugins in Fetcher.java
> 
> (4) Better monitoring of FetcherThreads, trying to solve
> one type of process hang problem (due to unfinished non FetcherThread
> thread).
> 
> If looks okay, I will commit in two days.
> 
> John
> 
__________________________________________
http://www.neasys.com - A Good Place to Be
Come to visit us today!


-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=4721&alloc_id=10040&op=click
_______________________________________________
Nutch-developers mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-developers

Reply via email to