Forgot to attache the patch. Here it is:
--------------------------- patch.txt.20040725 ------------------------------
diff -Nur --exclude='*.txt' --exclude='*.xml'
nutch-cvs-20040725/src/java/net/nutch/fetcher/Fetcher.java
nutch-cvs-20040725.xing/src/java/net/nutch/fetcher/Fetcher.java
--- nutch-cvs-20040725/src/java/net/nutch/fetcher/Fetcher.java 2004-06-16
10:31:31.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/fetcher/Fetcher.java 2004-07-25
23:26:28.000000000 -0700
@@ -35,22 +35,30 @@
private ArrayFile.Writer parseTextWriter;
private ArrayFile.Writer parseDataWriter;
- private int threadCount = // max number of threads
- NutchConf.getInt("fetcher.threads.fetch", 10);
-
private long start; // start time of fetcher run
private long bytes; // total bytes fetched
private int pages; // total pages fetched
private int errors; // total pages errored
- private ThreadGroup group = new ThreadGroup("fetcher"); // our thread group
+ private int threadCount = // max number of threads
+ NutchConf.getInt("fetcher.threads.fetch", 10);
+
+ // All threads (FetcherThread or thread started by it) belong to
+ // group "fetcher". Each FetcherThread is named as "fetcherXX",
+ // where XX is the order it's started.
+ private static final String THREAD_GROUP_NAME = "fetcher";
+
+ private ThreadGroup group = new ThreadGroup(THREAD_GROUP_NAME); // our group
+
+ // count of FetcherThreads that are through the loop and just about to return
+ private int atCompletion = 0;
/********************************************
* Fetcher thread
********************************************/
private class FetcherThread extends Thread {
- public FetcherThread() { super(group, "starting"); }
+ public FetcherThread(String name) { super(group, name); }
/**
* This thread keeps looping, grabbing an item off the list
@@ -68,21 +76,19 @@
String url = null;
try {
- setName("starting");
-
if (fetchList.next(fle) == null)
- return;
+ break;
url = fle.getPage().getURL().toString();
if (!fle.getFetch()) { // should we fetch this page?
- LOG.fine("not fetching " + url);
+ if (LOG.isLoggable(Level.FINE))
+ LOG.fine("not fetching " + url);
handleNoFetch(fle, FetcherOutput.SUCCESS);
continue;
}
LOG.info("fetching " + url); // fetch the page
- setName(url);
Protocol protocol = ProtocolFactory.getProtocol(url);
Content content = protocol.getContent(url);
@@ -119,6 +125,20 @@
}
}
}
+
+ // Explicitly invoke shutDown() for all possible plugins.
+ // Done by the FetcherThread finished the last.
+ synchronized (Fetcher.this) {
+ atCompletion++;
+ if (atCompletion == threadCount) {
+ try {
+ PluginRepository.getInstance().finalize();
+ } catch (java.lang.Throwable t) {
+ // do nothing
+ }
+ }
+ }
+ return;
}
private void logError(String url, FetchListEntry fle, Throwable t) {
@@ -199,16 +219,41 @@
public void run() throws IOException, InterruptedException {
start = System.currentTimeMillis();
for (int i = 0; i < threadCount; i++) { // spawn threads
- FetcherThread thread = new FetcherThread();
+ FetcherThread thread = new FetcherThread(THREAD_GROUP_NAME+i);
thread.start();
}
- do {
+
+ // quit monitoring if all FetcherThreads are gone.
+ // there could still be other threads, just ignore them
+ int pages0 = pages; int errors0 = errors; long bytes0 = bytes;
+
+ while (true) {
Thread.sleep(1000);
if (LogFormatter.hasLoggedSevere())
throw new RuntimeException("SEVERE error logged. Exiting fetcher.");
- } while (group.activeCount() > 0); // wait for threads to finish
+ int n = group.activeCount();
+ Thread[] list = new Thread[n];
+ group.enumerate(list);
+ boolean noMoreFetcherThread = true; // assumption
+ for (int i=0; i<n; i++) {
+ String name = list[i].getName();
+ if (name.startsWith(THREAD_GROUP_NAME)) // prove it
+ noMoreFetcherThread = false;
+ if (LOG.isLoggable(Level.FINE))
+ LOG.fine(list[i].toString());
+ }
+
+ if (noMoreFetcherThread) {
+ if (LOG.isLoggable(Level.FINE))
+ LOG.fine("number of avtive threads: "+n);
+ if (pages == pages0 && errors == errors0 && bytes == bytes0)
+ break;
+ status();
+ pages0 = pages; errors0 = errors; bytes0 = bytes;
+ }
+ }
fetchList.close(); // close databases
fetcherWriter.close();
@@ -216,7 +261,6 @@
parseTextWriter.close();
parseDataWriter.close();
- status(); // print final status
}
/** Display the status of the fetcher run. */
@@ -237,10 +281,11 @@
public static void main(String[] args) throws Exception {
int threadCount = -1;
long delay = -1;
+ String logLevel = "info";
boolean showThreadID = false;
String directory = null;
- String usage = "Usage: Fetcher [-threads n] dir";
+ String usage = "Usage: Fetcher [-logLevel level] [-showThreadID] [-threads n]
dir";
if (args.length == 0) {
System.err.println(usage);
@@ -248,14 +293,15 @@
}
for (int i = 0; i < args.length; i++) { // parse command line
-
if (args[i].equals("-threads")) { // found -threads option
threadCount = Integer.parseInt(args[++i]);
-
+ } else if (args[i].equals("-logLevel")) {
+ logLevel = args[++i];
+ } else if (args[i].equals("-showThreadID")) {
+ showThreadID = true;
} else if (i != args.length-1) {
System.err.println(usage);
System.exit(-1);
-
} else // root is required parameter
directory = args[i];
}
@@ -264,6 +310,12 @@
if (threadCount != -1) // set threadCount option
fetcher.setThreadCount(threadCount);
+ // set log level
+ fetcher.setLogLevel(Level.parse(logLevel.toUpperCase()));
+
+ if (showThreadID)
+ LogFormatter.setShowThreadIDs(showThreadID);
+
fetcher.run(); // run the Fetcher
}
}
diff -Nur --exclude='*.txt' --exclude='*.xml'
nutch-cvs-20040725/src/java/net/nutch/plugin/Extension.java
nutch-cvs-20040725.xing/src/java/net/nutch/plugin/Extension.java
--- nutch-cvs-20040725/src/java/net/nutch/plugin/Extension.java 2004-05-20
11:19:04.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/plugin/Extension.java 2004-07-25
21:36:17.000000000 -0700
@@ -117,20 +117,22 @@
* @return Object An instance of the extension implementation
*/
public Object getExtensionInstance() throws PluginRuntimeException {
- try {
- PluginClassLoader loader = fDescriptor.getClassLoader();
- Class extensionClazz = loader.loadClass(getClazz());
- // lazy loading of Plugin in case there is no instance of the plugin
- // already.
- PluginRepository.getInstance().getPluginInstance(getDiscriptor());
- Object object = extensionClazz.newInstance();
- return object;
- } catch (ClassNotFoundException e) {
- throw new PluginRuntimeException(e);
- } catch (InstantiationException e) {
- throw new PluginRuntimeException(e);
- } catch (IllegalAccessException e) {
- throw new PluginRuntimeException(e);
+ synchronized (getId()) {
+ try {
+ PluginClassLoader loader = fDescriptor.getClassLoader();
+ Class extensionClazz = loader.loadClass(getClazz());
+ // lazy loading of Plugin in case there is no instance of the plugin
+ // already.
+ PluginRepository.getInstance().getPluginInstance(getDiscriptor());
+ Object object = extensionClazz.newInstance();
+ return object;
+ } catch (ClassNotFoundException e) {
+ throw new PluginRuntimeException(e);
+ } catch (InstantiationException e) {
+ throw new PluginRuntimeException(e);
+ } catch (IllegalAccessException e) {
+ throw new PluginRuntimeException(e);
+ }
}
}
/**
diff -Nur --exclude='*.txt' --exclude='*.xml'
nutch-cvs-20040725/src/java/net/nutch/plugin/PluginRepository.java
nutch-cvs-20040725.xing/src/java/net/nutch/plugin/PluginRepository.java
--- nutch-cvs-20040725/src/java/net/nutch/plugin/PluginRepository.java 2004-07-06
10:28:30.000000000 -0700
+++ nutch-cvs-20040725.xing/src/java/net/nutch/plugin/PluginRepository.java
2004-07-25 21:32:37.000000000 -0700
@@ -177,15 +177,17 @@
if (fActivatedPlugins.containsKey(pDescriptor.getPluginId()))
return (Plugin) fActivatedPlugins.get(pDescriptor.getPluginId());
try {
- PluginClassLoader loader = pDescriptor.getClassLoader();
- Class pluginClass = loader.loadClass(pDescriptor.getPluginClass());
- Constructor constructor = pluginClass
- .getConstructor(new Class[]{PluginDescriptor.class});
- Plugin plugin = (Plugin) constructor
- .newInstance(new Object[]{pDescriptor});
- plugin.startUp();
- fActivatedPlugins.put(pDescriptor.getPluginId(), plugin);
- return plugin;
+ synchronized (pDescriptor) {
+ PluginClassLoader loader = pDescriptor.getClassLoader();
+ Class pluginClass = loader.loadClass(pDescriptor.getPluginClass());
+ Constructor constructor = pluginClass
+ .getConstructor(new Class[]{PluginDescriptor.class});
+ Plugin plugin = (Plugin) constructor
+ .newInstance(new Object[]{pDescriptor});
+ plugin.startUp();
+ fActivatedPlugins.put(pDescriptor.getPluginId(), plugin);
+ return plugin;
+ }
} catch (ClassNotFoundException e) {
throw new PluginRuntimeException(e);
} catch (InstantiationException e) {
@@ -203,7 +205,7 @@
*
* @see java.lang.Object#finalize()
*/
- protected void finalize() throws Throwable {
+ public void finalize() throws Throwable {
shotDownActivatedPlugins();
}
/**
On Mon, Jul 26, 2004 at 12:12:41AM -0700, [EMAIL PROTECTED] wrote:
> Attached is a patch that makes Fetcher.java work better with plugin system.
>
> Changes are
> (1) critical regions added in
> src/java/net/nutch/plugin/Extension.java
> src/java/net/nutch/plugin/PluginRepository.java
> by Stefan
>
> (2) make PluginRepository.finalize() public
>
> (3) Explicitly invoke shutDown() for all possible plugins in Fetcher.java
>
> (4) Better monitoring of FetcherThreads, trying to solve
> one type of process hang problem (due to unfinished non FetcherThread
> thread).
>
> If looks okay, I will commit in two days.
>
> John
>
__________________________________________
http://www.neasys.com - A Good Place to Be
Come to visit us today!
-------------------------------------------------------
This SF.Net email is sponsored by BEA Weblogic Workshop
FREE Java Enterprise J2EE developer tools!
Get your free copy of BEA WebLogic Workshop 8.1 today.
http://ads.osdn.com/?ad_id=4721&alloc_id=10040&op=click
_______________________________________________
Nutch-developers mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-developers