svn commit: r405083 - /lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties

2006-05-08 Thread siren
Author: siren
Date: Mon May  8 09:19:56 2006
New Revision: 405083

URL: http://svn.apache.org/viewcvs?rev=405083view=rev
Log:
restored props for default locale

Added:

lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties

Added: 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties?rev=405083view=auto
==
--- 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties
 (added)
+++ 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources.properties
 Mon May  8 09:19:56 2006
@@ -0,0 +1,99 @@
+#This is the default resource file for nutch ui localization.
+#If you create a new localized version of resources, please use
+#this as the base
+
+#language code used in html lang attribute
+lang=en
+
+#anchors page title
+anchors.title=anchors
+anchors.anchors=incoming anchor text:
+anchors.page=page: a href={0}{0}/a
+
+#cached page title
+cached.title=nutch cache
+cached.page=page: a href={0}{0}/a
+cached.noContent=Sorry, no content is cached for this page.
+cached.notHtml=The cached content has mime type {0}, click this a 
href=servlet/cached?{1}link/a to download it directly.
+
+#explain page title
+explain.title=score explanation
+explain.page=page
+explain.scoreForQuery=score for query: tt{0}/tt
+
+#search page title
+search.title=search results
+
+#text in search button
+search.search=Search
+
+#text wich describes the search results (nn-nn out of nn)
+search.hits=Results b{0}-{1}/b of about b{2}/b total matching pages 
for b{3}/b.
+
+#text displayed when there are no reults
+search.noResults=Your search - b{0}/b - did not match any documents.
+
+#cached page link text  
+search.cached=cached
+
+#explain page link text
+search.explain=explain
+
+#anchors page link text
+search.anchors=anchors
+
+#text in next page button 
+search.next=next page
+
+#link text of
+search.moreFrom=more from
+search.showAllHits=show all hits
+
+search.clustering=clustering
+search.viewAsText=View as Plain Text
+
+#search help link text
+search.help=help
+
+#index more web ui localization
+search.contentType=[span class=contentType{0}/span]
+search.contentLength=({0} bytes)
+search.lastModified={0}
+
+#view as text page title
+text.title=plain text cache
+text.note=This is the plain text version of the file: a href={0}{0}/a.
+text.noText=iSorry, no plain text version is available./i
+
+#title of help page
+help.title=help
+
+#title of preferences page
+preferences.title=preferences
+
+#interface languages
+preferences.ui.language=Interface language
+preferences.ui.language.info=
+ca=Catalan
+de=German
+en=English
+es=Spanish
+fi=Finnish
+fr=French
+hu=Hungarian
+ms=Malay
+nl=Dutch
+pl=Polish
+pt=Portuguese
+sh=Serbo-Croatian
+sr=Serbian
+sv=Swedish
+th=Thai
+zh=Chinese
+
+#number of results
+preferences.numResults=Number of Results
+preferences.numResults.info=
+
+#text on save button
+preferences.submit=Save and return to search
\ No newline at end of file




svn commit: r405089 - /lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties

2006-05-08 Thread siren
Author: siren
Date: Mon May  8 09:27:10 2006
New Revision: 405089

URL: http://svn.apache.org/viewcvs?rev=405089view=rev
Log:
removed log flooding, removed unneycessaru code from PreferencesController, 
integrated displaying information of index-more plugin

Modified:

lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties

Modified: 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties?rev=405089r1=405088r2=405089view=diff
==
--- 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
 (original)
+++ 
lucene/nutch/trunk/contrib/web2/src/main/resources/org/nutch/jsp/resources_en.properties
 Mon May  8 09:27:10 2006
@@ -55,6 +55,11 @@
 #search help link text
 search.help=help
 
+#index more web ui localization
+search.contentType=[span class=contentType{0}/span]
+search.contentLength=({0} bytes)
+search.lastModified={0}
+
 #view as text page title
 text.title=plain text cache
 text.note=This is the plain text version of the file: a href={0}{0}/a.




svn commit: r405088 - in /lucene/nutch/trunk/contrib/web2: ./ src/main/java/org/apache/nutch/webapp/common/ src/main/java/org/apache/nutch/webapp/controller/ src/main/webapp/WEB-INF/ src/main/webapp/W

2006-05-08 Thread siren
Author: siren
Date: Mon May  8 09:25:53 2006
New Revision: 405088

URL: http://svn.apache.org/viewcvs?rev=405088view=rev
Log:
removed log flooding, removed unneycessaru code from PreferencesController, 
integrated displaying information of index-more plugin

Added:

lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/MoreController.java
Modified:
lucene/nutch/trunk/contrib/web2/README.txt
lucene/nutch/trunk/contrib/web2/build.xml

lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java

lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java

lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/NutchController.java

lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/controller/PreferencesController.java
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/more.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/preferences.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/results.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/search.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/jsp/template.jsp
lucene/nutch/trunk/contrib/web2/src/main/webapp/WEB-INF/tiles-defs.xml

Modified: lucene/nutch/trunk/contrib/web2/README.txt
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/README.txt?rev=405088r1=405087r2=405088view=diff
==
--- lucene/nutch/trunk/contrib/web2/README.txt (original)
+++ lucene/nutch/trunk/contrib/web2/README.txt Mon May  8 09:25:53 2006
@@ -9,9 +9,9 @@
 (and related) pages. Layout is constructed by using following
 tag libraries:
 
-struts-logic
+jstl-c
+jstl-fmt
 struts-tiles
-struts-bean
 
 These tiles blocks can be extended or overridden by plugins
 implementing org.apache.nutch.webapp.UIExtensionPoint. A
@@ -36,8 +36,10 @@
 
 Todo:
 
--Provide some samples of ui plugins
-
+-provide some samples of ui plugins
+-move more functionality to plugin
+-remove table structures from html to allow more flexible css layouts
+-add mechanism for adding binary items (ie. images)
 
 Directory contents
 

Modified: lucene/nutch/trunk/contrib/web2/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/build.xml?rev=405088r1=405087r2=405088view=diff
==
--- lucene/nutch/trunk/contrib/web2/build.xml (original)
+++ lucene/nutch/trunk/contrib/web2/build.xml Mon May  8 09:25:53 2006
@@ -287,6 +287,7 @@
  includes=**/*.html/
 
replace dir=${docs.dir} token=help.html value=help.do 
includes=**/*.html/
+   replace dir=${docs.dir} token=about.html value=about.do 
includes=**/*.html/
replace dir=${docs.dir} token=search.jsp value=search.do 
includes=**/*.html/
replace dir=${docs.dir} token=../ value= includes=**/*.html/


Modified: 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java?rev=405088r1=405087r2=405088view=diff
==
--- 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java
 (original)
+++ 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/NavigationHelper.java
 Mon May  8 09:25:53 2006
@@ -62,9 +62,6 @@
* @return
*/
   protected boolean hasNext() {
-System.out.println(totalIsExact + totalIsExact);
-System.out.println(end + end);
-System.out.println(totalHits + totalHits);
 return end  totalHits  (!getShowAllHits());
   }
 

Modified: 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java?rev=405088r1=405087r2=405088view=diff
==
--- 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
 (original)
+++ 
lucene/nutch/trunk/contrib/web2/src/main/java/org/apache/nutch/webapp/common/Search.java
 Mon May  8 09:25:53 2006
@@ -89,18 +89,12 @@
   hits = new Hits(0, new Hit[0]);
 }
 
-LOG.info(form:);
-LOG.info(locator.getSearchForm().toString());
-LOG.info(performing search);
-
 int realEnd = (int) Math.min(hits.getLength(), getStartOffset()
 + getMaxHits());
 
 int endOffset=hits.getLength();
 
 show = hits.getHits(getStartOffset(), realEnd - getStartOffset());
-
-
 
 navigationHelper = new NavigationHelper(startOffset, endOffset, 
hitsPerPage, hits

svn commit: r405165 - in /lucene/nutch/trunk: ./ conf/ src/java/org/apache/nutch/searcher/ src/plugin/ src/plugin/nutch-extensionpoints/ src/plugin/summary-basic/ src/plugin/summary-basic/src/ src/plu

2006-05-08 Thread jerome
Author: jerome
Date: Mon May  8 14:04:01 2006
New Revision: 405165

URL: http://svn.apache.org/viewcvs?rev=405165view=rev
Log:
NUTCH-134 : Added a summarizer extension point and two enxtensions:
* summary-basic is the current nutch implementation moved into a plugin
* summary-lucene a raw version of a summarizer plugin based on lucene 
highlighter

Added:

lucene/nutch/trunk/src/java/org/apache/nutch/searcher/SummarizerFactory.java   
(with props)
lucene/nutch/trunk/src/plugin/summary-basic/
lucene/nutch/trunk/src/plugin/summary-basic/build.xml   (with props)
lucene/nutch/trunk/src/plugin/summary-basic/plugin.xml   (with props)
lucene/nutch/trunk/src/plugin/summary-basic/src/
lucene/nutch/trunk/src/plugin/summary-basic/src/java/
lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/
lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/
lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/

lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/

lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/

lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/BasicSummarizer.java
   (with props)

lucene/nutch/trunk/src/plugin/summary-basic/src/java/org/apache/nutch/summary/basic/package.html
   (with props)
lucene/nutch/trunk/src/plugin/summary-lucene/
lucene/nutch/trunk/src/plugin/summary-lucene/build.xml   (with props)
lucene/nutch/trunk/src/plugin/summary-lucene/lib/

lucene/nutch/trunk/src/plugin/summary-lucene/lib/lucene-highlighter-2.0-rc1-dev.jar
   (with props)
lucene/nutch/trunk/src/plugin/summary-lucene/plugin.xml   (with props)
lucene/nutch/trunk/src/plugin/summary-lucene/src/
lucene/nutch/trunk/src/plugin/summary-lucene/src/java/
lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/
lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/
lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/

lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/

lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/

lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/LuceneSummarizer.java
   (with props)

lucene/nutch/trunk/src/plugin/summary-lucene/src/java/org/apache/nutch/summary/lucene/package.html
   (with props)
Modified:
lucene/nutch/trunk/build.xml
lucene/nutch/trunk/conf/nutch-default.xml
lucene/nutch/trunk/default.properties
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/FetchedSegments.java
lucene/nutch/trunk/src/java/org/apache/nutch/searcher/Summarizer.java
lucene/nutch/trunk/src/plugin/build.xml
lucene/nutch/trunk/src/plugin/nutch-extensionpoints/plugin.xml

Modified: lucene/nutch/trunk/build.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/build.xml?rev=405165r1=405164r2=405165view=diff
==
--- lucene/nutch/trunk/build.xml (original)
+++ lucene/nutch/trunk/build.xml Mon May  8 14:04:01 2006
@@ -323,6 +323,8 @@
   packageset dir=${plugins.dir}/query-more/src/java/
   packageset dir=${plugins.dir}/query-site/src/java/
   packageset dir=${plugins.dir}/query-url/src/java/
+  packageset dir=${plugins.dir}/summary-basic/src/java/
+  packageset dir=${plugins.dir}/summary-lucene/src/java/
   packageset dir=${plugins.dir}/urlfilter-automaton/src/java/
   packageset dir=${plugins.dir}/urlfilter-regex/src/java/
   packageset dir=${plugins.dir}/urlfilter-prefix/src/java/
@@ -350,6 +352,7 @@
   group title=Analysis Plugins packages=${plugins.analysis}/
   group title=Indexing Filter Plugins packages=${plugins.index}/
   group title=Query Filter Plugins packages=${plugins.query}/
+  group title=Summary Plugins packages=${plugins.summary}/
   group title=Clustering Plugins packages=${plugins.clustering}/
   group title=Ontology Plugins packages=${plugins.ontology}/
   group title=Misc. Plugins packages=${plugins.misc}/

Modified: lucene/nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/conf/nutch-default.xml?rev=405165r1=405164r2=405165view=diff
==
--- lucene/nutch/trunk/conf/nutch-default.xml (original)
+++ lucene/nutch/trunk/conf/nutch-default.xml Mon May  8 14:04:01 2006
@@ -564,7 +564,7 @@
 
 property
   nameplugin.includes/name
-  
valueprotocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)/value
+  
valueprotocol-http|urlfilter-regex|parse-(text|html|js)|index-basic|query-(basic|site|url)|summary-basic/value
   descriptionRegular expression naming plugin directory names to
   include.  Any plugin not matching this expression is excluded.
   In any 

svn commit: r405179 - in /lucene/nutch/trunk/src: java/org/apache/nutch/crawl/MapWritable.java test/org/apache/nutch/crawl/TestMapWritable.java

2006-05-08 Thread ab
Author: ab
Date: Mon May  8 14:48:21 2006
New Revision: 405179

URL: http://svn.apache.org/viewcvs?rev=405179view=rev
Log:
Fix NUTCH-263.

Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java?rev=405179r1=405178r2=405179view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/MapWritable.java Mon May 
 8 14:48:21 2006
@@ -224,16 +224,20 @@
   public boolean equals(Object obj) {
 if (obj instanceof MapWritable) {
   MapWritable map = (MapWritable) obj;
+  if (fSize != map.fSize) return false;
+  HashSet set1 = new HashSet();
   KeyValueEntry e1 = fFirst;
-  KeyValueEntry e2 = map.fFirst;
-  while (e1 != null  e2 != null) {
-if (!e1.equals(e2)) {
-  return false;
-}
+  while (e1 != null) {
+set1.add(e1);
 e1 = e1.fNextEntry;
+  }
+  HashSet set2 = new HashSet();
+  KeyValueEntry e2 = map.fFirst;
+  while (e2 != null) {
+set2.add(e2);
 e2 = e2.fNextEntry;
   }
-  return true;
+  return set1.equals(set2);
 }
 return false;
   }
@@ -451,6 +455,10 @@
 return entry.fKey.equals(fKey)  entry.fValue.equals(fValue);
   }
   return false;
+}
+
+public int hashCode() {
+  return toString().hashCode();
 }
   }
 

Modified: 
lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java?rev=405179r1=405178r2=405179view=diff
==
--- lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java 
(original)
+++ lucene/nutch/trunk/src/test/org/apache/nutch/crawl/TestMapWritable.java Mon 
May  8 14:48:21 2006
@@ -91,6 +91,16 @@
 }
 testWritable(c);
   }
+  
+  public void testEquals() {
+MapWritable map1 = new MapWritable();
+MapWritable map2 = new MapWritable();
+map1.put(new UTF8(key1), new UTF8(val1));
+map1.put(new UTF8(key2), new UTF8(val2));
+map2.put(new UTF8(key2), new UTF8(val2));
+map2.put(new UTF8(key1), new UTF8(val1));
+assertTrue(map1.equals(map2));
+  }
 
   public void testPerformance() throws Exception {
 File file = new File(System.getProperty(java.io.tmpdir), mapTestFile);




svn commit: r405181 - in /lucene/nutch/trunk/src/java/org/apache/nutch/crawl: CrawlDbReader.java LinkDb.java

2006-05-08 Thread ab
Author: ab
Date: Mon May  8 14:52:09 2006
New Revision: 405181

URL: http://svn.apache.org/viewcvs?rev=405181view=rev
Log:
Refactor to make it easier to use these classes programmatically.

Modified:
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java?rev=405181r1=405180r2=405181view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java 
(original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/CrawlDbReader.java Mon 
May  8 14:52:09 2006
@@ -25,6 +25,7 @@
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.io.Closeable;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.MapFile;
@@ -55,9 +56,28 @@
  * @author Andrzej Bialecki
  * 
  */
-public class CrawlDbReader {
+public class CrawlDbReader implements Closeable {
 
   public static final Logger LOG = 
LogFormatter.getLogger(CrawlDbReader.class.getName());
+  
+  private MapFile.Reader[] readers = null;
+  
+  private void openReaders(String crawlDb, Configuration config) throws 
IOException {
+if (readers != null) return;
+FileSystem fs = FileSystem.get(config);
+readers = MapFileOutputFormat.getReaders(fs, new File(crawlDb, 
CrawlDatum.DB_DIR_NAME), config);
+  }
+  
+  private void closeReaders() {
+if (readers == null) return;
+for (int i = 0; i  readers.length; i++) {
+  try {
+readers[i].close();
+  } catch (Exception e) {
+
+  }
+}
+  }
 
   public static class CrawlDbStatMapper implements Mapper {
 public void configure(JobConf job) {}
@@ -177,6 +197,10 @@
 
 public void close() {}
   }
+
+  public void close() {
+closeReaders();
+  }
   
   public void processStatJob(String crawlDb, Configuration config) throws 
IOException {
 LOG.info(CrawlDb statistics start:  + crawlDb);
@@ -249,16 +273,20 @@
 LOG.info(CrawlDb statistics: done);
 
   }
-
-  public void readUrl(String crawlDb, String url, Configuration config) throws 
IOException {
-FileSystem fs = FileSystem.get(config);
+  
+  public CrawlDatum get(String crawlDb, String url, Configuration config) 
throws IOException {
 UTF8 key = new UTF8(url);
 CrawlDatum val = new CrawlDatum();
-MapFile.Reader[] readers = MapFileOutputFormat.getReaders(fs, new 
File(crawlDb, CrawlDatum.DB_DIR_NAME), config);
-Writable res = MapFileOutputFormat.getEntry(readers, new 
HashPartitioner(), key, val);
+openReaders(crawlDb, config);
+CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry(readers, new 
HashPartitioner(), key, val);
+return res;
+  }
+
+  public void readUrl(String crawlDb, String url, Configuration config) throws 
IOException {
+CrawlDatum res = get(crawlDb, url, config);
 System.out.println(URL:  + url);
 if (res != null) {
-  System.out.println(val);
+  System.out.println(res);
 } else {
   System.out.println(not found);
 }

Modified: lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: 
http://svn.apache.org/viewcvs/lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=405181r1=405180r2=405181view=diff
==
--- lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ lucene/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Mon May  8 
14:52:09 2006
@@ -28,6 +28,7 @@
 import org.apache.hadoop.util.LogFormatter;
 import org.apache.hadoop.mapred.*;
 
+import org.apache.nutch.net.URLFilters;
 import org.apache.nutch.parse.*;
 import org.apache.nutch.util.NutchConfiguration;
 import org.apache.nutch.util.NutchJob;
@@ -44,15 +45,27 @@
   private int maxInlinks;
   private boolean ignoreInternalLinks;
   
-  public static class LinkDbMerger extends MapReduceBase implements Reducer {
+  public static class Merger extends MapReduceBase implements Reducer {
 private int _maxInlinks;
+private URLFilters filters = null;
 
 public void configure(JobConf job) {
   super.configure(job);
   _maxInlinks = job.getInt(db.max.inlinks, 1);
+  if (job.getBoolean(linkdb.merger.urlfilters, false)) {
+filters = new URLFilters(job);
+  }
 }
 
 public void reduce(WritableComparable key, Iterator values, 
OutputCollector output, Reporter reporter) throws IOException {
+  if (filters != null) {
+try {
+  if (filters.filter(((UTF8)key).toString()) == null)
+return;
+} catch (Exception e) {
+  LOG.fine(Can't filter  + key + :  + e);
+}
+  }