[1/2] nutch git commit: Update mvn.template to add Thamme Gowda as developer

2016-07-01 Thread thammegowda
Repository: nutch
Updated Branches:
  refs/heads/master 34050adae -> 6b141fb10


Update mvn.template to add Thamme Gowda as developer


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/2b4991a9
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/2b4991a9
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/2b4991a9

Branch: refs/heads/master
Commit: 2b4991a9c87888fd598257d8139af0679b4f2f82
Parents: beb48a8
Author: Thamme Gowda 
Authored: Fri Jul 1 10:51:13 2016 -0700
Committer: Thamme Gowda 
Committed: Fri Jul 1 10:51:13 2016 -0700

--
 ivy/mvn.template | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/nutch/blob/2b4991a9/ivy/mvn.template
--
diff --git a/ivy/mvn.template b/ivy/mvn.template
index 3bda967..68813fc 100644
--- a/ivy/mvn.template
+++ b/ivy/mvn.template
@@ -1,4 +1,4 @@
-
+   
 

[1/2] nutch git commit: NUTCH-2291 - Fix mrunit dependencies - remove classifier from dependency because pom file name on Maven repository does not contain a classifier

2016-07-01 Thread snagel
Repository: nutch
Updated Branches:
  refs/heads/master cb6fbae51 -> 34050adae


NUTCH-2291 - Fix mrunit dependencies
- remove classifier from dependency because pom file name on Maven repository 
does not contain a classifier


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/c18e19bf
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/c18e19bf
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/c18e19bf

Branch: refs/heads/master
Commit: c18e19bfe63c3ac5221d1a0f454b9e1a037a4386
Parents: cb6fbae
Author: Sebastian Nagel 
Authored: Fri Jul 1 14:45:41 2016 +0200
Committer: Sebastian Nagel 
Committed: Fri Jul 1 14:45:41 2016 +0200

--
 ivy/ivy.xml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
--


http://git-wip-us.apache.org/repos/asf/nutch/blob/c18e19bf/ivy/ivy.xml
--
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index a4e9481..a9a83ae 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -97,7 +97,7 @@
 


-   
+   






[2/2] nutch git commit: NUTCH-1553 Property 'indexer.delete.robots.noindex' not working when using parser-html - fix broken unit test (fix HTML markup, make test for meta data extraction obligatory) -

2016-07-01 Thread snagel
NUTCH-1553 Property 'indexer.delete.robots.noindex' not working when using 
parser-html
- fix broken unit test (fix HTML markup, make test for meta data extraction 
obligatory)
- add all values of general metadata to parse metadata


Project: http://git-wip-us.apache.org/repos/asf/nutch/repo
Commit: http://git-wip-us.apache.org/repos/asf/nutch/commit/34050ada
Tree: http://git-wip-us.apache.org/repos/asf/nutch/tree/34050ada
Diff: http://git-wip-us.apache.org/repos/asf/nutch/diff/34050ada

Branch: refs/heads/master
Commit: 34050adae0896a6d7ddb254a1622a03af6e07175
Parents: c18e19b
Author: Sebastian Nagel 
Authored: Fri Jul 1 15:07:52 2016 +0200
Committer: Sebastian Nagel 
Committed: Fri Jul 1 15:10:49 2016 +0200

--
 .../org/apache/nutch/metadata/Metadata.java | 25 
 .../org/apache/nutch/parse/html/HtmlParser.java |  4 +---
 .../apache/nutch/parse/html/TestHtmlParser.java | 11 -
 3 files changed, 31 insertions(+), 9 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/nutch/blob/34050ada/src/java/org/apache/nutch/metadata/Metadata.java
--
diff --git a/src/java/org/apache/nutch/metadata/Metadata.java 
b/src/java/org/apache/nutch/metadata/Metadata.java
index f0bfcd3..8a57ee3 100644
--- a/src/java/org/apache/nutch/metadata/Metadata.java
+++ b/src/java/org/apache/nutch/metadata/Metadata.java
@@ -123,6 +123,31 @@ public class Metadata implements Writable, 
CreativeCommons, DublinCore,
   }
 
   /**
+   * Add all name/value mappings (merge two metadata mappings). If a name
+   * already exists in current metadata the values are added to existing 
values.
+   *
+   * @param metadata
+   *  other Metadata to be merged
+   */
+  public void addAll(Metadata metadata) {
+for (String name : metadata.names()) {
+  String[] addValues = metadata.getValues(name);
+  if (addValues == null)
+continue;
+  String[] oldValues = this.metadata.get(name);
+  if (oldValues == null) {
+this.metadata.put(name, addValues);
+  } else {
+String[] newValues = new String[oldValues.length + addValues.length];
+System.arraycopy(oldValues, 0, newValues, 0, oldValues.length);
+System.arraycopy(addValues, 0, newValues, oldValues.length,
+addValues.length);
+this.metadata.put(name, newValues);
+  }
+}
+  }
+
+  /**
* Copy All key-value pairs from properties.
* 
* @param properties

http://git-wip-us.apache.org/repos/asf/nutch/blob/34050ada/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
--
diff --git 
a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java 
b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
index baa..4d043ba 100644
--- a/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
+++ b/src/plugin/parse-html/src/java/org/apache/nutch/parse/html/HtmlParser.java
@@ -183,9 +183,7 @@ public class HtmlParser implements Parser {
 HTMLMetaProcessor.getMetaTags(metaTags, root, base);
 
 // populate Nutch metadata with HTML meta directives
-for (String name : metaTags.getGeneralTags().names()) {
-  metadata.add(name, metaTags.getGeneralTags().get(name));
-}
+metadata.addAll(metaTags.getGeneralTags());
 
 if (LOG.isTraceEnabled()) {
   LOG.trace("Meta tags for " + base + ": " + metaTags.toString());

http://git-wip-us.apache.org/repos/asf/nutch/blob/34050ada/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
--
diff --git 
a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
 
b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
index bcfe9e4..7099f50 100644
--- 
a/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
+++ 
b/src/plugin/parse-html/src/test/org/apache/nutch/parse/html/TestHtmlParser.java
@@ -40,8 +40,8 @@ public class TestHtmlParser {
   private static final String encodingTestBody = "\n  français\n  
español\n  русский язык\n  čeština\n  
ελληνικά\n";
   private static final String encodingTestContent = ""
   + encodingTestKeywords + "\n"
-  + "\n" + "\n" + encodingTestBody + 
"\n";
+  + "\n"
+  + "\n" + encodingTestBody + "\n";
 
   private static String[][] encodingTestPages = {
   {
@@ -113,10 +113,9 @@ public class TestHtmlParser {
 Assert.assertTrue(keyword + " not found in text (" + name + ")",
 text.contains(keyword));
   }
-  if (keywords != null) {
-Assert.assertEquals("Keywords not