This is an automated email from the ASF dual-hosted git repository.

markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git


The following commit(s) were added to refs/heads/master by this push:
     new d7e4046  NUTCH-2444 HostDB CSV dumper to emit field header by default
     new 3c21a6b  Merge branch 'master' of 
https://gitbox.apache.org/repos/asf/nutch
d7e4046 is described below

commit d7e4046e6e725ed759d0c43e37c51c5c3122e006
Author: Markus Jelsma <mar...@apache.org>
AuthorDate: Mon Oct 23 15:11:17 2017 +0200

    NUTCH-2444 HostDB CSV dumper to emit field header by default
---
 src/java/org/apache/nutch/hostdb/ReadHostDb.java | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java 
b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index 54649e4..28a7eb7 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -62,6 +62,7 @@ public class ReadHostDb extends Configured implements Tool {
   private static final Logger LOG = LoggerFactory
       .getLogger(MethodHandles.lookup().lookupClass());
 
+  public static final String HOSTDB_DUMP_HEADER = "hostdb.dump.field.header";
   public static final String HOSTDB_DUMP_HOSTNAMES = "hostdb.dump.hostnames";
   public static final String HOSTDB_DUMP_HOMEPAGES = "hostdb.dump.homepages";
   public static final String HOSTDB_FILTER_EXPRESSION = 
"hostdb.filter.expression";
@@ -69,12 +70,14 @@ public class ReadHostDb extends Configured implements Tool {
   static class ReadHostDbMapper extends Mapper<Text, HostDatum, Text, Text> {
     protected boolean dumpHostnames = false;
     protected boolean dumpHomepages = false;
+    protected boolean fieldHeader = true;
     protected Text emptyText = new Text();
     protected Expression expr = null;
 
     public void setup(Context context) {
       dumpHomepages = 
context.getConfiguration().getBoolean(HOSTDB_DUMP_HOMEPAGES, false);
       dumpHostnames = 
context.getConfiguration().getBoolean(HOSTDB_DUMP_HOSTNAMES, false);
+      fieldHeader = context.getConfiguration().getBoolean(HOSTDB_DUMP_HEADER, 
true);
       String expr = context.getConfiguration().get(HOSTDB_FILTER_EXPRESSION);
       if (expr != null) {
         // Create or retrieve a JexlEngine
@@ -89,7 +92,12 @@ public class ReadHostDb extends Configured implements Tool {
       }
     }
 
-    public void map(Text key, HostDatum datum, Context context) throws 
IOException, InterruptedException {     
+    public void map(Text key, HostDatum datum, Context context) throws 
IOException, InterruptedException {
+      if (fieldHeader && !dumpHomepages && !dumpHostnames) {
+        context.write(new Text("hostname"), new 
Text("unfetched\tfetched\tgone\tredirTemp\tredirPerm\tredirSum\tok\tnumRecords\tdnsFail\tcnxFail\tsumFail\tscore\tlastCheck\thomepage\tmetadata"));
+        fieldHeader = false;
+      }
+      
       if (expr != null) {
         // Create a context and add data
         JexlContext jcontext = new MapContext();

-- 
To stop receiving notification emails like this one, please contact
['"commits@nutch.apache.org" <commits@nutch.apache.org>'].

Reply via email to