Author: cutting
Date: Thu Dec 1 11:59:24 2005
New Revision: 350310
URL: http://svn.apache.org/viewcvs?rev=350310&view=rev
Log:
Paul Baclace's code & comment cleanups from NUTCH-116.
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Crawl.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NDFSFileSystem.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NutchFileSystem.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobConf.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/DatanodeInfo.java
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/NDFSClient.java
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Crawl.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Crawl.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Crawl.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/crawl/Crawl.java Thu
Dec 1 11:59:24 2005
@@ -52,7 +52,7 @@
JobConf conf = new JobConf(NutchConf.get());
//conf.addConfResource("crawl-tool.xml");
- File rootUrlFile = null;
+ File rootUrlDir = null;
File dir = new File("crawl-" + getDate());
int threads = conf.getInt("fetcher.threads.fetch", 10);
int depth = 5;
@@ -72,7 +72,7 @@
topN = Integer.parseInt(args[i+1]);
i++;
} else if (args[i] != null) {
- rootUrlFile = new File(args[i]);
+ rootUrlDir = new File(args[i]);
}
}
@@ -82,7 +82,7 @@
}
LOG.info("crawl started in: " + dir);
- LOG.info("rootUrlFile = " + rootUrlFile);
+ LOG.info("rootUrlDir = " + rootUrlDir);
LOG.info("threads = " + threads);
LOG.info("depth = " + depth);
@@ -98,7 +98,7 @@
File tmpDir = conf.getLocalFile("crawl", getDate());
// initialize crawlDb
- new Injector(conf).inject(crawlDb, rootUrlFile);
+ new Injector(conf).inject(crawlDb, rootUrlDir);
for (int i = 0; i < depth; i++) { // generate new segment
File segment =
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NDFSFileSystem.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NDFSFileSystem.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NDFSFileSystem.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NDFSFileSystem.java
Thu Dec 1 11:59:24 2005
@@ -25,8 +25,9 @@
import org.apache.nutch.util.NutchConf;
/****************************************************************
- * Implement the NutchFileSystem interface for the NDFS system.
- *
+ * Implementation of the abstract NutchFileSystem for the NDFS system.
+ * This is the distributed file system. It can be distributed over
+ * 1 or more machines
* @author Mike Cafarella
*****************************************************************/
public class NDFSFileSystem extends NutchFileSystem {
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NutchFileSystem.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NutchFileSystem.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NutchFileSystem.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/fs/NutchFileSystem.java
Thu Dec 1 11:59:24 2005
@@ -24,16 +24,21 @@
import org.apache.nutch.util.*;
/****************************************************************
- * NutchFileSystem is an interface for a fairly simple
- * distributed file system. A Nutch installation might consist
+ * An abstract base class for a fairly simple
+ * distributed file system.
+ * A Nutch installation might consist
* of multiple machines, which should swap files transparently.
* This interface allows other Nutch systems to find and place
* files into the distributed Nutch-controlled file world.
- *
+ * <p>
+ * A local implementation exists for testing and for small Nutch instances.
+ * <p>
* The standard job of NutchFileSystem is to take the location-
* independent NutchFile objects, and resolve them using local
* knowledge and local instances of ShareGroup.
- *
+ * <p>
+ * The local implementation is [EMAIL PROTECTED] LocalFileSystem} and
distributed
+ * implementation is [EMAIL PROTECTED] NDFSFileSystem}.
* @author Mike Cafarella
*****************************************************************/
public abstract class NutchFileSystem {
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobConf.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobConf.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobConf.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/mapred/JobConf.java
Thu Dec 1 11:59:24 2005
@@ -52,7 +52,7 @@
/** Construct a map/reduce job configuration.
*
- * @param confg a NutchConf whose settings will be inherited.
+ * @param conf a NutchConf whose settings will be inherited.
*/
public JobConf(NutchConf conf) {
super(conf);
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/DatanodeInfo.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/DatanodeInfo.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
---
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/DatanodeInfo.java
(original)
+++
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/DatanodeInfo.java
Thu Dec 1 11:59:24 2005
@@ -27,33 +27,36 @@
* @author Mike Cafarella
**************************************************/
public class DatanodeInfo implements Writable, Comparable {
- UTF8 name;
- long capacity, remaining, lastUpdate;
- volatile TreeSet blocks;
+ private UTF8 name;
+ private long capacityBytes, remainingBytes, lastUpdate;
+ private volatile TreeSet blocks;
- /**
+ /** Create an empty DatanodeInfo.
*/
public DatanodeInfo() {
this(new UTF8(), 0, 0);
}
+ /**
+ * @param name hostname:portNumber as UTF8 object.
+ */
public DatanodeInfo(UTF8 name) {
this.name = name;
- int colon = name.toString().indexOf(":");
this.blocks = new TreeSet();
updateHeartbeat(0, 0);
}
- /**
- */
+ /**
+ * @param name hostname:portNumber as UTF8 object.
+ */
public DatanodeInfo(UTF8 name, long capacity, long remaining) {
this.name = name;
this.blocks = new TreeSet();
updateHeartbeat(capacity, remaining);
}
- /**
- */
+ /**
+ */
public void updateBlocks(Block newBlocks[]) {
blocks.clear();
for (int i = 0; i < newBlocks.length; i++) {
@@ -61,8 +64,8 @@
}
}
- /**
- */
+ /**
+ */
public void addBlock(Block b) {
blocks.add(b);
}
@@ -70,13 +73,21 @@
/**
*/
public void updateHeartbeat(long capacity, long remaining) {
- this.capacity = capacity;
- this.remaining = remaining;
+ this.capacityBytes = capacity;
+ this.remainingBytes = remaining;
this.lastUpdate = System.currentTimeMillis();
}
+
+ /**
+ * @return hostname:portNumber as UTF8 object.
+ */
public UTF8 getName() {
return name;
}
+
+ /**
+ * @return hostname and no :portNumber as UTF8 object.
+ */
public UTF8 getHost() {
String nameStr = name.toString();
int colon = nameStr.indexOf(":");
@@ -96,18 +107,20 @@
return blocks.iterator();
}
public long getCapacity() {
- return capacity;
+ return capacityBytes;
}
public long getRemaining() {
- return remaining;
+ return remainingBytes;
}
public long lastUpdate() {
return lastUpdate;
}
- /////////////////////////////////////////////////
- // Comparable
- /////////////////////////////////////////////////
+ /** Comparable.
+ * Basis of compare is the UTF8 name (host:portNumber) only.
+ * @param o
+ * @return as specified by Comparable.
+ */
public int compareTo(Object o) {
DatanodeInfo d = (DatanodeInfo) o;
return name.compareTo(d.getName());
@@ -120,8 +133,8 @@
*/
public void write(DataOutput out) throws IOException {
name.write(out);
- out.writeLong(capacity);
- out.writeLong(remaining);
+ out.writeLong(capacityBytes);
+ out.writeLong(remainingBytes);
out.writeLong(lastUpdate);
/**
@@ -137,8 +150,8 @@
public void readFields(DataInput in) throws IOException {
this.name = new UTF8();
this.name.readFields(in);
- this.capacity = in.readLong();
- this.remaining = in.readLong();
+ this.capacityBytes = in.readLong();
+ this.remainingBytes = in.readLong();
this.lastUpdate = in.readLong();
/**
Modified:
lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/NDFSClient.java
URL:
http://svn.apache.org/viewcvs/lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/NDFSClient.java?rev=350310&r1=350309&r2=350310&view=diff
==============================================================================
--- lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/NDFSClient.java
(original)
+++ lucene/nutch/branches/mapred/src/java/org/apache/nutch/ndfs/NDFSClient.java
Thu Dec 1 11:59:24 2005
@@ -26,9 +26,8 @@
import java.util.logging.*;
/********************************************************
- * NDFSClient does what's necessary to connect to a Nutch Filesystem
- * and perform basic file tasks.
- *
+ * NDFSClient can connect to a Nutch Filesystem and perform basic file tasks.
+ * Connects to a namenode daemon.
* @author Mike Cafarella, Tessa MacDuff
********************************************************/
public class NDFSClient implements FSConstants {
@@ -41,7 +40,7 @@
Daemon leaseChecker;
- /**
+ /** Create a new NDFSClient connected to the given namenode server.
*/
public NDFSClient(InetSocketAddress nameNodeAddr) {
this.namenode = (ClientProtocol) RPC.getProxy(ClientProtocol.class,
nameNodeAddr);
@@ -154,6 +153,7 @@
}
/**
+ *
*/
public void release(UTF8 src) throws IOException {
boolean hasReleased = false;
@@ -186,8 +186,8 @@
}
/***************************************************************
- * If any leases are outstanding, periodically check in with the
- * namenode and renew all the leases.
+ * Periodically check in with the namenode and renew all the leases
+ * when the lease period is half over.
***************************************************************/
class LeaseChecker implements Runnable {
/**