Author: markus
Date: Mon Jul 11 10:22:37 2011
New Revision: 1145109
URL: http://svn.apache.org/viewvc?rev=1145109&view=rev
Log:
NUTCH-1030 WebgraphDB program requires manually added directories
Modified:
nutch/branches/branch-1.4/CHANGES.txt
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
Modified: nutch/branches/branch-1.4/CHANGES.txt
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/CHANGES.txt?rev=1145109&r1=1145108&r2=1145109&view=diff
==============================================================================
--- nutch/branches/branch-1.4/CHANGES.txt (original)
+++ nutch/branches/branch-1.4/CHANGES.txt Mon Jul 11 10:22:37 2011
@@ -2,6 +2,8 @@ Nutch Change Log
Release 1.4 - Current development
+* NUTCH-1030 WebgraphDB program requires manually added directories (markus)
+
* NUTCH-1011 Normalize duplicate slashes in URL's (markus)
* NUTCH-993 NullPointerException at FetcherOutputFormat.checkOutputSpecs
(Christian Guegi via jnioche)
Modified:
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
URL:
http://svn.apache.org/viewvc/nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java?rev=1145109&r1=1145108&r2=1145109&view=diff
==============================================================================
---
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
(original)
+++
nutch/branches/branch-1.4/src/java/org/apache/nutch/scoring/webgraph/WebGraph.java
Mon Jul 11 10:22:37 2011
@@ -451,18 +451,19 @@ public class WebGraph
// lock an existing webgraphdb to prevent multiple simultaneous updates
Path lock = new Path(webGraphDb, LOCK_NAME);
- boolean webGraphDbExists = fs.exists(webGraphDb);
- if (webGraphDbExists) {
- LockUtil.createLockFile(fs, lock, false);
- }
- else {
-
- // if the webgraph doesn't exist, create it
+ if (!fs.exists(webGraphDb)) {
fs.mkdirs(webGraphDb);
}
+ LockUtil.createLockFile(fs, lock, false);
+
// outlink and temp outlink database paths
Path outlinkDb = new Path(webGraphDb, OUTLINK_DIR);
+
+ if (!fs.exists(outlinkDb)) {
+ fs.mkdirs(outlinkDb);
+ }
+
Path tempOutlinkDb = new Path(outlinkDb + "-"
+ Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
JobConf outlinkJob = new NutchJob(conf);
@@ -480,10 +481,8 @@ public class WebGraph
}
// add the existing webgraph
- if (webGraphDbExists) {
- LOG.info("OutlinkDb: adding input: " + outlinkDb);
- FileInputFormat.addInputPath(outlinkJob, outlinkDb);
- }
+ LOG.info("OutlinkDb: adding input: " + outlinkDb);
+ FileInputFormat.addInputPath(outlinkJob, outlinkDb);
outlinkJob.setInputFormat(SequenceFileInputFormat.class);
outlinkJob.setMapperClass(OutlinkDb.class);