Author: markus
Date: Tue Aug 16 16:28:43 2011
New Revision: 1158357

URL: http://svn.apache.org/viewvc?rev=1158357&view=rev
Log:
NUTCH-1051 Export WebGraph node scores for Solr.ExternalFileField

Modified:
    nutch/branches/branch-1.4/CHANGES.txt
    
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java

Modified: nutch/branches/branch-1.4/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1158357&r1=1158356&r2=1158357&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Tue Aug 16 16:28:43 2011
@@ -2,6 +2,8 @@ Nutch Change Log
 
 Release 1.4 - Current development
 
+* NUTCH-1051 Export WebGraph node scores for Solr.ExternalFileField (markus)
+
 * NUTCH-1083 ParserChecker implements Tools (jnioche)
 
 * NUTCH-1082 IndexingFiltersChecker utility does not list multi valued fields 
(markus)

Modified: 
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
URL: 
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java?rev=1158357&r1=1158356&r2=1158357&view=diff
==============================================================================
--- 
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
 (original)
+++ 
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/NodeDumper.java
 Tue Aug 16 16:28:43 2011
@@ -159,7 +159,7 @@ public class NodeDumper
    * 
    * @throws IOException If an error occurs while dumping the top values.
    */
-  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path output)
+  public void dumpNodes(Path webGraphDb, DumpType type, long topN, Path 
output, boolean asEff)
     throws IOException {
 
     SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
@@ -186,6 +186,11 @@ public class NodeDumper
     dumper.setBoolean("scores", type == DumpType.SCORES);
     dumper.setLong("topn", topN);
 
+    // Set equals-sign as separator for Solr's ExternalFileField
+    if (asEff) {
+      dumper.set("mapred.textoutputformat.separator", "=");
+    }
+
     try {
       LOG.info("NodeDumper: running");
       JobClient.runJob(dumper);
@@ -226,6 +231,8 @@ public class NodeDumper
       "show topN scores").create("topn");
     Option outputOpts = 
OptionBuilder.withArgName("output").hasArg().withDescription(
       "the output directory to use").create("output");
+    Option effOpts = OptionBuilder.withArgName("asEff").withDescription(
+      "Solr ExternalFileField compatible output format").create("asEff");
     options.addOption(helpOpts);
     options.addOption(webGraphDbOpts);
     options.addOption(inlinkOpts);
@@ -233,6 +240,7 @@ public class NodeDumper
     options.addOption(scoreOpts);
     options.addOption(topNOpts);
     options.addOption(outputOpts);
+    options.addOption(effOpts);
 
     CommandLineParser parser = new GnuParser();
     try {
@@ -256,7 +264,10 @@ public class NodeDumper
       DumpType type = (inlinks ? DumpType.INLINKS : outlinks
         ? DumpType.OUTLINKS : DumpType.SCORES);
 
-      dumpNodes(new Path(webGraphDb), type, topN, new Path(output));
+      // Use ExternalFileField?
+      boolean asEff = line.hasOption("asEff");
+
+      dumpNodes(new Path(webGraphDb), type, topN, new Path(output), asEff);
       return 0;
     }
     catch (Exception e) {


Reply via email to