Author: markus
Date: Tue Feb 23 10:23:24 2016
New Revision: 1731831

URL: http://svn.apache.org/viewvc?rev=1731831&view=rev
Log:
NUTCH-2220 Rename db.* options used only by the linkdb to linkdb.*

Modified:
    nutch/trunk/CHANGES.txt
    nutch/trunk/conf/nutch-default.xml
    nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
    nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbMerger.java

Modified: nutch/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/trunk/CHANGES.txt?rev=1731831&r1=1731830&r2=1731831&view=diff
==============================================================================
--- nutch/trunk/CHANGES.txt (original)
+++ nutch/trunk/CHANGES.txt Tue Feb 23 10:23:24 2016
@@ -1,5 +1,17 @@
+Fellow committers, Nutch 1.12 contains a breaking change NUTCH-2220. Please 
use the note below and
+in the release announcement and keep it on top in this CHANGES.txt for the 
Nutch 1.12 release.
+
+* replace your old conf/nutch-default.xml with the conf/nutch-default.xml from 
Nutch 1.12 release
+* if you use LinkDB (e.g. invertlinks) and modified parameters db.max.inlinks 
and/or db.max.anchor.length
+  and/or db.ignore.internal.links, rename those parameters to 
linkdb.max.inlinks and
+  linkdb.max.anchor.length and linkdb.ignore.internal.links
+* db.ignore.internal.links and db.ignore.external.links now operate on the 
CrawlDB only
+* linkdb.ignore.internal.links and linkdb.ignore.external.links now operate on 
the LinkDB only
+
 Nutch Change Log
 
+* NUTCH-2220 Rename db.* options used only by the linkdb to linkdb.* (markus)
+
 * NUTCH-2228 Plugin index-replace unit test broken on Java 8 (snagel via 
markus)
 
 * NUTCH-2219 Criteria order to be configurable in DeduplicationJob (Ron van 
der Vegt via markus)

Modified: nutch/trunk/conf/nutch-default.xml
URL: 
http://svn.apache.org/viewvc/nutch/trunk/conf/nutch-default.xml?rev=1731831&r1=1731830&r2=1731831&view=diff
==============================================================================
--- nutch/trunk/conf/nutch-default.xml (original)
+++ nutch/trunk/conf/nutch-default.xml Tue Feb 23 10:23:24 2016
@@ -538,16 +538,6 @@
 </property>
 
 <property>
-  <name>db.ignore.internal.links</name>
-  <value>true</value>
-  <description>If true, when adding new links to a page, links from
-  the same host are ignored.  This is an effective way to limit the
-  size of the link database, keeping only the highest quality
-  links.
-  </description>
-</property>
-
-<property>
   <name>db.ignore.external.links</name>
   <value>false</value>
   <description>If true, outlinks leading from a page to external hosts or 
domain
@@ -616,15 +606,6 @@
 </property>
 
 <property>
-  <name>db.max.inlinks</name>
-  <value>10000</value>
-  <description>Maximum number of Inlinks per URL to be kept in LinkDb.
-  If "invertlinks" finds more inlinks than this number, only the first
-  N inlinks will be stored, and the rest will be discarded.
-  </description>
-</property>
-
-<property>
   <name>db.max.outlinks.per.page</name>
   <value>100</value>
   <description>The maximum number of outlinks that we'll process for a page.
@@ -681,6 +662,35 @@
   </description>
 </property>
 
+<!-- linkdb properties -->
+
+<property>
+  <name>linkdb.max.inlinks</name>
+  <value>10000</value>
+  <description>Maximum number of Inlinks per URL to be kept in LinkDb.
+  If "invertlinks" finds more inlinks than this number, only the first
+  N inlinks will be stored, and the rest will be discarded.
+  </description>
+</property>
+
+<property>
+  <name>linkdb.ignore.internal.links</name>
+  <value>true</value>
+  <description>If true, when adding new links to a page, links from
+  the same host are ignored.  This is an effective way to limit the
+  size of the link database, keeping only the highest quality
+  links.
+  </description>
+</property>
+
+<property>
+  <name>linkdb.ignore.external.links</name>
+  <value>false</value>
+  <description>If true, when adding new links to a page, links from
+  the a different host are ignored.
+  </description>
+</property>
+
 <!-- generate properties -->
 
 <property>

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java?rev=1731831&r1=1731830&r2=1731831&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDb.java Tue Feb 23 10:23:24 
2016
@@ -48,8 +48,8 @@ public class LinkDb extends NutchTool im
 
   public static final Logger LOG = LoggerFactory.getLogger(LinkDb.class);
 
-  public static final String IGNORE_INTERNAL_LINKS = 
"db.ignore.internal.links";
-  public static final String IGNORE_EXTERNAL_LINKS = 
"db.ignore.external.links";
+  public static final String IGNORE_INTERNAL_LINKS = 
"linkdb.ignore.internal.links";
+  public static final String IGNORE_EXTERNAL_LINKS = 
"linkdb.ignore.external.links";
 
   public static final String CURRENT_NAME = "current";
   public static final String LOCK_NAME = ".locked";
@@ -68,7 +68,7 @@ public class LinkDb extends NutchTool im
   }
 
   public void configure(JobConf job) {
-    maxAnchorLength = job.getInt("db.max.anchor.length", 100);
+    maxAnchorLength = job.getInt("linkdb.max.anchor.length", 100);
     ignoreInternalLinks = job.getBoolean(IGNORE_INTERNAL_LINKS, true);
     ignoreExternalLinks = job.getBoolean(IGNORE_EXTERNAL_LINKS, false);
 

Modified: nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbMerger.java
URL: 
http://svn.apache.org/viewvc/nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbMerger.java?rev=1731831&r1=1731830&r2=1731831&view=diff
==============================================================================
--- nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbMerger.java (original)
+++ nutch/trunk/src/java/org/apache/nutch/crawl/LinkDbMerger.java Tue Feb 23 
10:23:24 2016
@@ -55,7 +55,7 @@ import org.apache.nutch.util.TimingUtil;
  * </p>
  * <p>
  * If more than one LinkDb contains information about the same URL, all inlinks
- * are accumulated, but only at most <code>db.max.inlinks</code> inlinks will
+ * are accumulated, but only at most <code>linkdb.max.inlinks</code> inlinks 
will
  * ever be added.
  * </p>
  * <p>
@@ -104,7 +104,7 @@ public class LinkDbMerger extends Config
   }
 
   public void configure(JobConf job) {
-    maxInlinks = job.getInt("db.max.inlinks", 10000);
+    maxInlinks = job.getInt("linkdb.max.inlinks", 10000);
   }
 
   public void close() throws IOException {


Reply via email to