This is an automated email from the ASF dual-hosted git repository.
markus pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/nutch.git
The following commit(s) were added to refs/heads/master by this push:
new d7e4046 NUTCH-2444 HostDB CSV dumper to emit field header by default
new 3c21a6b Merge branch 'master' of
https://gitbox.apache.org/repos/asf/nutch
d7e4046 is described below
commit d7e4046e6e725ed759d0c43e37c51c5c3122e006
Author: Markus Jelsma <[email protected]>
AuthorDate: Mon Oct 23 15:11:17 2017 +0200
NUTCH-2444 HostDB CSV dumper to emit field header by default
---
src/java/org/apache/nutch/hostdb/ReadHostDb.java | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
index 54649e4..28a7eb7 100644
--- a/src/java/org/apache/nutch/hostdb/ReadHostDb.java
+++ b/src/java/org/apache/nutch/hostdb/ReadHostDb.java
@@ -62,6 +62,7 @@ public class ReadHostDb extends Configured implements Tool {
private static final Logger LOG = LoggerFactory
.getLogger(MethodHandles.lookup().lookupClass());
+ public static final String HOSTDB_DUMP_HEADER = "hostdb.dump.field.header";
public static final String HOSTDB_DUMP_HOSTNAMES = "hostdb.dump.hostnames";
public static final String HOSTDB_DUMP_HOMEPAGES = "hostdb.dump.homepages";
public static final String HOSTDB_FILTER_EXPRESSION =
"hostdb.filter.expression";
@@ -69,12 +70,14 @@ public class ReadHostDb extends Configured implements Tool {
static class ReadHostDbMapper extends Mapper<Text, HostDatum, Text, Text> {
protected boolean dumpHostnames = false;
protected boolean dumpHomepages = false;
+ protected boolean fieldHeader = true;
protected Text emptyText = new Text();
protected Expression expr = null;
public void setup(Context context) {
dumpHomepages =
context.getConfiguration().getBoolean(HOSTDB_DUMP_HOMEPAGES, false);
dumpHostnames =
context.getConfiguration().getBoolean(HOSTDB_DUMP_HOSTNAMES, false);
+ fieldHeader = context.getConfiguration().getBoolean(HOSTDB_DUMP_HEADER,
true);
String expr = context.getConfiguration().get(HOSTDB_FILTER_EXPRESSION);
if (expr != null) {
// Create or retrieve a JexlEngine
@@ -89,7 +92,12 @@ public class ReadHostDb extends Configured implements Tool {
}
}
- public void map(Text key, HostDatum datum, Context context) throws
IOException, InterruptedException {
+ public void map(Text key, HostDatum datum, Context context) throws
IOException, InterruptedException {
+ if (fieldHeader && !dumpHomepages && !dumpHostnames) {
+ context.write(new Text("hostname"), new
Text("unfetched\tfetched\tgone\tredirTemp\tredirPerm\tredirSum\tok\tnumRecords\tdnsFail\tcnxFail\tsumFail\tscore\tlastCheck\thomepage\tmetadata"));
+ fieldHeader = false;
+ }
+
if (expr != null) {
// Create a context and add data
JexlContext jcontext = new MapContext();
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].