Author: markus Date: Tue Aug 16 16:28:43 2011 New Revision: 1158357 URL: http://svn.apache.org/viewvc?rev=1158357&view=rev Log: NUTCH-1051 Export WebGraph node scores for Solr.ExternalFileField
Modified: nutch/branches/branch-1.4/CHANGES.txt nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java Modified: nutch/branches/branch-1.4/CHANGES.txt URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1158357&r1=1158356&r2=1158357&view=diff ============================================================================== --- nutch/branches/branch-1.4/CHANGES.txt (original) +++ nutch/branches/branch-1.4/CHANGES.txt Tue Aug 16 16:28:43 2011 @@ -2,6 +2,8 @@ Nutch Change Log Release 1.4 - Current development +* NUTCH-1051 Export WebGraph node scores for Solr.ExternalFileField (markus) + * NUTCH-1083 ParserChecker implements Tools (jnioche) * NUTCH-1082 IndexingFiltersChecker utility does not list multi valued fields (markus) Modified: nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java URL: http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java?rev=1158357&r1=1158356&r2=1158357&view=diff ============================================================================== --- nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java (original) +++ nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java Tue Aug 16 16:28:43 2011 @@ -159,7 +159,7 @@ public class NodeDumper * * @throws IOException If an error occurs while dumping the top values. */ - public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output) + public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output, boolean asEff) throws IOException { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); @@ -186,6 +186,11 @@ public class NodeDumper dumper.setBoolean("scores", type == DumpType.SCORES); dumper.setLong("topn", topN); + // Set equals-sign as separator for Solr's ExternalFileField + if (asEff) { + dumper.set("mapred.textoutputformat.separator", "="); + } + try { LOG.info("NodeDumper: running"); JobClient.runJob(dumper); @@ -226,6 +231,8 @@ public class NodeDumper "show topN scores").create("topn"); Option outputOpts = OptionBuilder.withArgName("output").hasArg().withDescription( "the output directory to use").create("output"); + Option effOpts = OptionBuilder.withArgName("asEff").withDescription( + "Solr ExternalFileField compatible output format").create("asEff"); options.addOption(helpOpts); options.addOption(webGraphDbOpts); options.addOption(inlinkOpts); @@ -233,6 +240,7 @@ public class NodeDumper options.addOption(scoreOpts); options.addOption(topNOpts); options.addOption(outputOpts); + options.addOption(effOpts); CommandLineParser parser = new GnuParser(); try { @@ -256,7 +264,10 @@ public class NodeDumper DumpType type = (inlinks ? DumpType.INLINKS : outlinks ? DumpType.OUTLINKS : DumpType.SCORES); - dumpNodes(new Path(webGraphDb), type, topN, new Path(output)); + // Use ExternalFileField? + boolean asEff = line.hasOption("asEff"); + + dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff); return 0; } catch (Exception e) {