This is an automated email from the ASF dual-hosted git repository. markus pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push: new d7e4046 NUTCH-2444 HostDB CSV dumper to emit field header by default new 3c21a6b Merge branch 'master' of https://gitbox.apache.org/repos/asf/nutch d7e4046 is described below commit d7e4046e6e725ed759d0c43e37c51c5c3122e006 Author: Markus Jelsma <mar...@apache.org> AuthorDate: Mon Oct 23 15:11:17 2017 +0200 NUTCH-2444 HostDB CSV dumper to emit field header by default --- src/java/org/apache/nutch/hostdb/ReadHostDb.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java b/src/java/org/apache/nutch/hostdb/ReadHostDb.java index 54649e4..28a7eb7 100644 --- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java +++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java @@ -62,6 +62,7 @@ public class ReadHostDb extends Configured implements Tool { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); + public static final String HOSTDB_DUMP_HEADER = "hostdb.dump.field.header"; public static final String HOSTDB_DUMP_HOSTNAMES = "hostdb.dump.hostnames"; public static final String HOSTDB_DUMP_HOMEPAGES = "hostdb.dump.homepages"; public static final String HOSTDB_FILTER_EXPRESSION = "hostdb.filter.expression"; @@ -69,12 +70,14 @@ public class ReadHostDb extends Configured implements Tool { static class ReadHostDbMapper extends Mapper<Text, HostDatum, Text, Text> { protected boolean dumpHostnames = false; protected boolean dumpHomepages = false; + protected boolean fieldHeader = true; protected Text emptyText = new Text(); protected Expression expr = null; public void setup(Context context) { dumpHomepages = context.getConfiguration().getBoolean(HOSTDB_DUMP_HOMEPAGES, false); dumpHostnames = context.getConfiguration().getBoolean(HOSTDB_DUMP_HOSTNAMES, false); + fieldHeader = context.getConfiguration().getBoolean(HOSTDB_DUMP_HEADER, true); String expr = context.getConfiguration().get(HOSTDB_FILTER_EXPRESSION); if (expr != null) { // Create or retrieve a JexlEngine @@ -89,7 +92,12 @@ public class ReadHostDb extends Configured implements Tool { } } - public void map(Text key, HostDatum datum, Context context) throws IOException, InterruptedException { + public void map(Text key, HostDatum datum, Context context) throws IOException, InterruptedException { + if (fieldHeader && !dumpHomepages && !dumpHostnames) { + context.write(new Text("hostname"), new Text("unfetched\tfetched\tgone\tredirTemp\tredirPerm\tredirSum\tok\tnumRecords\tdnsFail\tcnxFail\tsumFail\tscore\tlastCheck\thomepage\tmetadata")); + fieldHeader = false; + } + if (expr != null) { // Create a context and add data JexlContext jcontext = new MapContext(); -- To stop receiving notification emails like this one, please contact ['"commits@nutch.apache.org" <commits@nutch.apache.org>'].