Update of /cvsroot/nutch/nutch/src/java/net/nutch/util
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv4633/src/java/net/nutch/util
Modified Files:
FileUtil.java
Added Files:
NutchFile.java NutchFileSystem.java
NutchGenericFileSystem.java NutchNFSFileSystem.java
NutchRemoteFileSystem.java ShareGroup.java ShareSet.java
Log Message:
Full commit for Nutch distributed WebDB.
This is a lot of new code that implements the multi-machine
web database. This means we should be able to update the db
with multiple CPUs and disks simultaneously. (This has been
a major bottleneck for us so far.)
This commit also contains files for the NutchFileSystem, which
is a rudimentary distributed file system. The Distributed WebDB
is built on top of NutchFS. There are two implementations of
NutchFS: one for machines mounting NFS (network file system), and
one for machines that need to use a remote SSL connection, The
former is well-tested, but the latter is still a little sketchy.
I've done what little testing I can do on my laptop. I'm putting
code back so that other people can take a look, and so we can put
it on multiple machines.
Note that I've put changes back to the files "DistributedWebDBWriter"
and "DistributedWebDBReader". These are meant to replace "WebDBWriter"
and "WebDBReader," but I didn't want to disturb the source base
until the distributed code is tested further.
--- NEW FILE: NutchFile.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
/************************************************************
* A class that names a file in the "NutchFileSpace". You can
* convert a NutchFile to a real file with the help of
* an instance of NutchFileSystem.
*
* @author Mike Cafarella
*************************************************************/
public class NutchFile {
String dbName;
String shareGroupName;
File name;
NutchFileSystem nfs;
/**
* A NutchFile contains:
* dbName, which labels the cooperating NutchFileSystem it
* belongs to.
* shareGroupName, which tells the NutchFileSystem which group should get
* access to this file. If the value is null, then no remote
* group will get access.
* name, which gives the file a unique name.
*/
public NutchFile(NutchFileSystem nfs, String dbName, String shareGroupName, File
name) {
this.nfs = nfs;
this.dbName = dbName;
this.shareGroupName = shareGroupName;
this.name = name;
}
/**
* Create a NutchFile from a previous one that is a directory.
*/
public NutchFile(NutchFile dir, String name) {
this.nfs = dir.nfs;
this.dbName = dir.getDBName();
this.shareGroupName = dir.getShareGroupName();
this.name = new File(dir.getName(), name);
}
/**
* DB Name the NutchFile lives in.
*/
public String getDBName() {
return dbName;
}
/**
* Get the name of the sharegroup this file belongs to.
*/
public String getShareGroupName() {
return shareGroupName;
}
/**
* Terminating filename for the NutchFile.
*/
public File getName() {
return name;
}
/**
* Grab a handle to the NutchFileSystem
*/
public NutchFileSystem getFS() {
return nfs;
}
/**
* Get the almost-fully-qualified name for this NutchFile.
*/
public String getFilename() {
File target = new File(new File(dbName), shareGroupName);
target = new File(target, name.getPath());
return target.getPath();
}
/**
* Get the almost-fully-qualified name for this NutchFile's
* 'completed' flag file.
*/
public String getCompleteFlagName() {
File db = new File(dbName);
File target = new File(new File(dbName), shareGroupName);
target = new File(target, name.getPath() + ".completed");
return target.getPath();
}
/**
*/
public String toString() {
return getFilename();
}
}
--- NEW FILE: NutchFileSystem.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
/****************************************************************
* NutchFileSystem is an interface for a fairly simple
* distributed file system. A Nutch installation might consist
* of multiple machines, which should swap files transparently.
* This interface allows other Nutch systems to find and place
* files into the distributed Nutch-controlled file world.
*
* The standard job of NutchFileSystem is to take the location-
* independent NutchFile objects, and resolve them using local
* knowledge and local instances of ShareGroup.
*
* @author Mike Cafarella
*****************************************************************/
public interface NutchFileSystem {
/**
* Get a real File for a name that's not yet under NutchFS control.
* This may improve performance later on when the
* File is put() under NutchFS control. It's also handy for
* finding a file location where there is a lot of extra room.
*/
public File getWorkingFile() throws IOException;
/**
* Associates a NutchFile with a given real-fs File. The
* real-world File will be moved to a proper location according
* to its NutchFile representation. It will be moved locally
* or remotely, as appropriate.
*
* The given "real" File can no longer be assumed to exist at
* the given location after the call to putFile(). In the future,
* the File should only be obtained via its NutchFile identifier.
*
* Returns the File that was there previously, if any.
*/
public void put(NutchFile nutchFile, File workingFile, boolean overwrite) throws
IOException;
/**
* Sometimes the NutchFileSystem user constructs a directory of many
* subparts, often built slowly over time. However, that highest-level
* directory might not ever have been put(); instead, its subparts
* have been put(), one piece at a time.
*
* Eventually, though, all the subdirs will be in place, and the
* entire directory structure will be complete. That event is
* signified by calling "completeDir". This call will mark
* the given directory as completed.
*/
public void completeDir(NutchFile nutchFile) throws IOException;
/**
* Obtains the indicated NutchFile, whether remote or local.
* The function will block until the file is available.
*/
public File get(NutchFile nutchFile) throws IOException;
/**
* Same as above, but expires after the given number of ms,
* returning null.
*/
public File get(NutchFile nutchFile, long timeout) throws IOException;
/**
* Obtain a lock with the given NutchFile as the lock object
*/
public void lock(NutchFile lockFile, boolean exclusive) throws IOException;
/**
* Release the lock. Must be in the lock() state.
*/
public void release(NutchFile lockFile) throws IOException;
/**
* Delete the given NutchFile and everything below it. This is
* propagated to the different appropriate machines, the same
* way a put() operation is.
*/
public void delete(NutchFile nutchFile) throws IOException;
/**
* Rename the given NutchFile to something new. Files cannot
* be moved across share-spaces. The change is propagated
* immediately to all participants in the share-space. The
* client is responsible for any necessary locking or process
* synchronization.
*/
public void renameTo(NutchFile src, NutchFile dst) throws IOException;
/**
* Close down the fs.
*/
public void close() throws IOException;
}
--- NEW FILE: NutchGenericFileSystem.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
import java.util.*;
import java.nio.channels.*;
/****************************************************************
* NutchGenericFileSystem implements the NutchFileSystem interface
* and adds some generic utility methods for subclasses to use.
*
* The standard task any implementor of NutchFileSystem
*
* @author Mike Cafarella
****************************************************************/
public abstract class NutchGenericFileSystem implements NutchFileSystem {
File dbRoot, localTmp, flagFile;
FileInputStream lockData;
FileLock lock;
ShareSet shareSet;
boolean destructivePut;
/**
* Create a Nutch Filesystem at the indicated mounted
* directory.
*/
public NutchGenericFileSystem(File dbRoot, ShareSet shareSet, boolean
destructivePut) throws IOException {
if (shareSet == null) {
this.shareSet = new ShareSet(dbRoot);
} else {
this.shareSet = shareSet;
}
//
// 1. Create/find main work area (which will receive files from
// other processes and may be shared).
this.dbRoot = dbRoot;
if (! dbRoot.exists()) {
dbRoot.mkdirs();
}
if (! dbRoot.isDirectory()) {
throw new IOException("Directory " + dbRoot + " does not exist.");
}
//
// 2. Attempt to acquire an exclusive lock on the directory.
// If this succeeds, the process should then clear out the
// tmp storage area. If this fails, just continue.
//
Vector tmpDirs = new Vector();
File rootFiles[] = dbRoot.listFiles();
for (int i = 0; i < rootFiles.length; i++) {
if (rootFiles[i].isDirectory() &&
rootFiles[i].getName().startsWith("localtmpdir")) {
tmpDirs.add(rootFiles[i]);
}
}
// If there are any tmpDirs for us to delete, try to do it.
if (tmpDirs.size() > 0) {
File exclusiveLockFile = new File(dbRoot, "nutchfslock");
exclusiveLockFile.createNewFile();
FileOutputStream exclusiveLockData = new
FileOutputStream(exclusiveLockFile);
FileLock exclusiveLock = exclusiveLockData.getChannel().tryLock();
// Once we have the lock, go and delete them
if (exclusiveLock != null) {
for (Enumeration e = tmpDirs.elements(); e.hasMoreElements(); ) {
FileUtil.fullyDelete((File) e.nextElement());
}
exclusiveLock.release();
exclusiveLockData.close();
}
}
//
// 3. Acquire a non-exclusive lock on the directory. Block
// until this is acquired. (The only thing preventing it
// would be another process in step 2.)
//
File lockFile = new File(dbRoot, "nutchfslock");
lockFile.createNewFile();
this.lockData = new FileInputStream(lockFile);
this.lock = lockData.getChannel().lock(0L, Long.MAX_VALUE, true);
//
// 4. Create the tmp directory
//
this.localTmp = File.createTempFile("localtmpdir", "", dbRoot);
this.localTmp.delete();
if (! localTmp.exists()) {
localTmp.mkdirs();
}
if (! localTmp.isDirectory()) {
throw new IOException("Directory " + localTmp + " does not exist.");
}
//
// 5. Create the src lock file
//
this.flagFile = File.createTempFile("flag", "tmp");
//
// 6. Whether files should be deleted after being copied
//
this.destructivePut = destructivePut;
}
/**
* Acquire a real File for a name that's not yet under NutchFS
* control. This may improve performance later on when the
* File is put() under NutchFS control. It's also handy for
* finding a file location where there is a lot of extra room.
*/
public File getWorkingFile() throws IOException {
File f = File.createTempFile("tmp", "", localTmp);
f.delete();
return f;
}
/**
* Wait for a NutchFile from somewhere in NutchSpace. Translate
* it to a regular old filesystem File.
*
* The file should already be in place. So we wait until it is.
*/
public File get(NutchFile nutchFile) throws IOException {
return get(nutchFile, -1);
}
/**
* Wait for a NutchFile for the specified amount of time. Return null
* if we don't get it before 'timeout' ms have elapsed.
*/
public File get(NutchFile nutchFile, long timeout) throws IOException {
long startTime = System.currentTimeMillis();
int numTries = 0;
ShareGroup sg = shareSet.getShareGroup(nutchFile);
File target = new File(dbRoot, nutchFile.getFilename());
File completeFlag = new File(dbRoot, nutchFile.getCompleteFlagName());
while (! completeFlag.exists()) {
try {
if ((numTries > 0) &&
(timeout > 0) &&
(System.currentTimeMillis() - startTime > timeout)) {
return null;
}
Thread.sleep(1000);
numTries++;
if (numTries > 10) {
System.err.println("NutchGenericFileSystem waiting for file " +
completeFlag);
}
} catch (InterruptedException ie) {
}
}
return target;
}
/**
* Obtain a lock with the given NutchFile. This might mean obtaining
* locks across many different machines/filesystems. That's fine,
* as long as every machine always obtains the locks in a standard
* ordering.
*/
public void lock(NutchFile nutchFile, boolean exclusive) throws IOException {
File lockFile = getWorkingFile();
lockFile.createNewFile();
put(nutchFile, lockFile, false);
ShareGroup sg = shareSet.getShareGroup(nutchFile);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
lockFile(locMach, locStr, nutchFile.getFilename(), exclusive);
}
}
/**
* Release the lock for the given NutchFile
*/
public void release(NutchFile nutchFile) throws IOException {
ShareGroup sg = shareSet.getShareGroup(nutchFile);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
release(locMach, locStr, nutchFile.getFilename());
}
}
/**
* Add a single file or a directory of files to the filesystem.
* If the source File is a directory, we want to reproduce
* the entire directory structure, rooted at the given
* NutchFile.
*/
public void put(NutchFile nutchFile, File workingFile, boolean overwrite) throws
IOException {
if (workingFile.isDirectory()) {
putDir(nutchFile, workingFile, overwrite);
} else {
putFile(nutchFile, workingFile, overwrite);
}
FileUtil.fullyDelete(workingFile);
}
/**
* Add a directory and its contents to the filesystem
*/
void putDir(NutchFile nutchDir, File workingDir, boolean overwrite) throws
IOException {
File workingFiles[] = workingDir.listFiles();
NutchFile nutchFiles[] = new NutchFile[workingFiles.length];
//
// Remove target dir's completion flag
//
ShareGroup sg = shareSet.getShareGroup(nutchDir);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
deleteFile(locMach, locStr, nutchDir.getCompleteFlagName());
}
//
// Build a list of all contained items
//
for (int i = 0; i < nutchFiles.length; i++) {
nutchFiles[i] = new NutchFile(nutchDir, workingFiles[i].getName());
}
//
// Put the list to the FS
//
for (int i = 0; i < workingFiles.length; i++) {
put(nutchFiles[i], workingFiles[i], overwrite);
}
//
// We've written dir's contents, so write out completion flag
//
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
copyFile(flagFile, locMach, locStr, nutchDir.getCompleteFlagName(), true);
}
}
/**
* Add a single file to the filesystem.
*/
void putFile(NutchFile nutchFile, File workingFile, boolean overwrite) throws
IOException {
ShareGroup sg = shareSet.getShareGroup(nutchFile);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
// Remove 'complete' flag
deleteFile(locMach, locStr, nutchFile.getCompleteFlagName());
// Write file, if necessary.
copyFile(workingFile, locMach, locStr, nutchFile.getFilename(), overwrite);
// Write 'complete' flag
copyFile(flagFile, locMach, locStr, nutchFile.getCompleteFlagName(), true);
}
}
/**
* Complete the given directory
*/
public void completeDir(NutchFile nutchFile) throws IOException {
ShareGroup sg = shareSet.getShareGroup(nutchFile);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
// Write 'complete' flag
copyFile(flagFile, locMach, locStr, nutchFile.getCompleteFlagName(), true);
}
}
/**
* Take the file out of the NutchFileSystem.
*/
public void delete(NutchFile nutchFile) throws IOException {
ShareGroup sg = shareSet.getShareGroup(nutchFile);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
deleteFile(locMach, locStr, nutchFile.getFilename());
deleteFile(locMach, locStr, nutchFile.getCompleteFlagName());
}
}
/**
* Rename the thing. Usually done at close.
*/
public void renameTo(NutchFile src, NutchFile dst) throws IOException {
// Make sure src file is complete
File srcFile = get(src);
// Remove src complete flags
ShareGroup sg = shareSet.getShareGroup(src);
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
String locMach = extractMachine(locations[i]);
String locStr = extractPath(locations[i]);
// Remove src complete flags
deleteFile(locMach, locStr, src.getCompleteFlagName());
// Rename contents
renameFile(srcFile, locMach, locStr, dst.getFilename(), true);
// Create target flags
copyFile(flagFile, locMach, locStr, dst.getCompleteFlagName(), true);
}
}
/**
* Close down the Generic File System
*/
public void close() throws IOException {
// Get rid of the tmp directory
FileUtil.fullyDelete(localTmp);
// Get rid of tmp flag file
FileUtil.fullyDelete(flagFile);
this.lock.release();
this.lockData.close();
}
/**
* To be implemented by subclasses
*/
protected abstract void copyFile(File srcFile, String locationMach, String
locationStr, String nutchFileName, boolean overwrite) throws IOException;
protected abstract void deleteFile(String locationMach, String locationStr, String
nutchFileName) throws IOException;
protected abstract void renameFile(File srcFile, String locationMach, String
locationStr, String nutchFileName, boolean overwrite) throws IOException;
protected abstract void lockFile(String locMach, String locStr, String filename,
boolean exclusive) throws IOException;
protected abstract void release(String locMach, String locStr, String filename)
throws IOException;
/**
* Utility str-processing of location-string.
* (format "machinename:path")
*/
String extractMachine(String location) {
int colDex = location.indexOf(":");
if (colDex < 0) {
return null;
}
return location.substring(0, colDex);
}
/**
* Utility str-processing of location-string.
* (format "machinename:path")
*/
String extractPath(String location) {
int colDex = location.indexOf(":");
if (colDex < 0) {
return location;
}
return location.substring(colDex + 1);
}
}
--- NEW FILE: NutchNFSFileSystem.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
import java.util.*;
import java.nio.channels.*;
/****************************************************************
* NutchNFSFileSystem implements NutchFileSystem over the Network File System.
* We assume all participants are mounting the same drive.
*
* @author Mike Cafarella
*****************************************************************/
public class NutchNFSFileSystem extends NutchGenericFileSystem {
TreeMap lockDataSet = new TreeMap(), lockObjSet = new TreeMap();
/**
* Create the ShareSet automatically, and then go on to
* the regular constructor.
*/
public NutchNFSFileSystem(File dbRoot, boolean destructiveCopy) throws IOException
{
this(dbRoot, null, destructiveCopy);
}
/**
* Create a Nutch Filesystem at the indicated mounted
* directory. We're given a ShareSet.
*/
public NutchNFSFileSystem(File dbRoot, ShareSet initShareSet, boolean
destructiveCopy) throws IOException {
super(dbRoot, initShareSet, destructiveCopy);
// Make sure the shareGroups are in good working order
for (Iterator it = shareSet.getShareGroups().values().iterator();
it.hasNext(); ) {
ShareGroup sg = (ShareGroup) it.next();
String locations[] = sg.getLocations();
for (int i = 0; i < locations.length; i++) {
if (locations[i].indexOf(":") >= 0) {
throw new IOException("Cannot process non-local locations");
}
}
}
}
/**
* Obtain a lock with the given info.
*/
public synchronized void lockFile(String locMach, String locStr, String filename,
boolean exclusive) throws IOException {
// NFSFileSystem ignores the locMach value
File lockTarget = new File(locStr, filename);
FileInputStream lockData = new FileInputStream(lockTarget);
FileLock lockObj = lockData.getChannel().lock(0L, Long.MAX_VALUE, exclusive);
lockDataSet.put(lockTarget, lockData);
lockObjSet.put(lockTarget, lockObj);
}
/**
* Release the lock for the given NutchFile
*/
public synchronized void release(String locMach, String locStr, String filename)
throws IOException {
// NFSFileSystem ignores the locMach value
File lockTarget= new File(locStr, filename);
FileLock lockObj = (FileLock) lockObjSet.get(lockTarget);
FileInputStream lockData = (FileInputStream) lockDataSet.get(lockTarget);
lockObj.release();
lockData.close();
lockObjSet.remove(lockTarget);
lockDataSet.remove(lockTarget);
}
/**
* Copy a file to the right place in the local dir, which assumes
* NFS-connectivity.
*/
protected void copyFile(File srcFile, String locMach, String locStr, String
filename, boolean overwrite) throws IOException {
// NFSFileSystem has no locMachine component.
File target = new File(locStr, filename);
FileUtil.copyContents(srcFile, target, overwrite);
}
/**
* Remove a file from its current location. Assumes an NFS-universe.
*/
protected void deleteFile(String locMach, String locStr, String filename) throws
IOException {
// NFSFileSystem has no machine component
FileUtil.fullyDelete(new File(locStr, filename));
}
/**
* Rename the existing file or dir to a new location
*/
protected void renameFile(File srcFile, String locMach, String locStr, String
filename, boolean overwrite) throws IOException {
// NFSFileSystem has no machine component
File target = new File(locStr, filename);
srcFile.renameTo(target);
}
}
--- NEW FILE: NutchRemoteFileSystem.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
import java.util.*;
/*****************************************************
* NutchRemoteFileSystem implements the NutchFileSystem over
* machines that can be linked via some set of command-line args.
* (presumably 'scp').
*
* @author Mike Cafarella
*****************************************************/
public class NutchRemoteFileSystem extends NutchGenericFileSystem {
static String SRCPATH_SYMBOL = "%srcpath%";
static String DSTPATH_SYMBOL = "%dstpath%";
static String DSTMACH_SYMBOL = "%dstmach%";
String cpTemplate = null, rmTemplate = null, mkdirTemplate = null;
/**
* Create the ShareSet automatically, then do regular constructor.
*/
public NutchRemoteFileSystem(File dbRoot, String cpTemplate, String rmTemplate,
String mkdirTemplate) throws IOException {
this(dbRoot, new ShareSet(dbRoot), cpTemplate, rmTemplate, mkdirTemplate);
}
/**
* The NutchRemoteFileSystem takes template-strings for
* its various needed commands, which may differ among installations.
* The class will fill in these templates with the necessary args,
* and then invoke them via System.exec().
*
* We're given the ShareSet here.
*/
public NutchRemoteFileSystem(File dbRoot, ShareSet shareSet, String cpTemplate,
String rmTemplate, String mkdirTemplate) throws IOException {
super(dbRoot, shareSet, true);
this.cpTemplate = cpTemplate;
this.rmTemplate = rmTemplate;
this.mkdirTemplate = mkdirTemplate;
// Make sure templates are found
if (cpTemplate == null) {
throw new IOException("No value found for cptemplate");
}
if (rmTemplate == null) {
throw new IOException("No value found for rmtemplate");
}
if (mkdirTemplate == null) {
throw new IOException("No value found for mkdirtemplate");
}
// Make sure the templates have everything they should
if (cpTemplate.indexOf(SRCPATH_SYMBOL) < 0) {
throw new IOException("The cptemplate string does not contain " +
SRCPATH_SYMBOL);
}
if (cpTemplate.indexOf(DSTPATH_SYMBOL) < 0) {
throw new IOException("The cptemplate string does not contain " +
DSTPATH_SYMBOL);
}
if (rmTemplate.indexOf(DSTPATH_SYMBOL) < 0) {
throw new IOException("The rmtemplate string does not contain " +
DSTPATH_SYMBOL);
}
if (mkdirTemplate.indexOf(DSTPATH_SYMBOL) < 0) {
throw new IOException("The mkdirtemplate string does not contain " +
DSTPATH_SYMBOL);
}
}
/**
* Copy a file from one place to another. Requires that
* template-strings be set correctly.
*/
protected void copyFile(File srcFile, String locationMach, String locationStr,
String nutchFileName, boolean overwrite) throws IOException {
//
// Use values to fill in the template strs.
//
String cpCommand = cpTemplate.replaceAll(SRCPATH_SYMBOL, srcFile.getPath());
cpCommand = cpCommand.replaceAll(DSTMACH_SYMBOL, locationMach);
cpCommand = cpCommand.replaceAll(DSTPATH_SYMBOL, new File(new
File(locationStr), nutchFileName).getPath());
String mkdirCommand = mkdirTemplate.replaceAll(DSTPATH_SYMBOL, new File(new
File(locationStr), nutchFileName).getParentFile().getPath());
//
// Make sure the target directory exists
//
invoke(mkdirCommand);
//
// Finally, invoke the newly-built copy command.
//
invoke(cpCommand);
}
/**
* Remove a file the given location. Requires that template-
* strings be set correctly.
*/
protected void deleteFile(String locationMach, String locationStr, String
nutchFileName) throws IOException {
//
// Use values to fill in template strs
//
String rmCommand = rmTemplate.replaceAll(DSTMACH_SYMBOL, locationMach);
rmCommand = rmCommand.replaceAll(DSTPATH_SYMBOL, new File(new
File(locationStr), nutchFileName).getPath());
//
// Finally, invoke newly-built command
//
invoke(rmCommand);
}
/**
* Currently unimplemented
*/
protected void lockFile(String locMach, String locStr, String filename, boolean
exclusive) throws IOException {
}
/**
*/
protected void release(String locMach, String locStr, String filename) throws
IOException {
}
/**
*/
protected void renameFile(File srcFile, String locMach, String locStr, String
filename, boolean overwrite) throws IOException {
}
/**
* Take care of the details of invoking an external process.
* We always assume traditional error-code interpretation
* (0 for success, non-zero for failure).
*/
void invoke(String command) throws IOException {
Process p = Runtime.getRuntime().exec(command);
int returnCode = 0;
try {
returnCode = p.waitFor();
} catch (InterruptedException ie) {
returnCode = -1;
}
if (returnCode != 0) {
throw new IOException("Runtime.exec() failed with code " + returnCode + "
while running " + command);
}
}
}
--- NEW FILE: ShareGroup.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
import java.util.*;
/****************************************************************
* A ShareGroup combines the name of a group with where the Nutch
* filesystem can find members of that group. Used by NutchFileSystem
* to help resolve NutchFile objects.
*
* @author Mike Cafarella
*****************************************************************/
public class ShareGroup {
String name;
String locations[];
/**
* Make a named ShareGroup, to be found at the given location.
* locationDesc is a semicolon-separated list of the form
* "machinename:dbroot;machinename2:dbroot2...". The leading
* "machinename:" part is optional, in which case the location
* is a locally-(probably NFS)-mounted disk.
*/
public ShareGroup(String name, String locationDescs) {
this.name = name;
Vector v = new Vector();
StringTokenizer toks = new StringTokenizer(locationDescs, ";");
while (toks.hasMoreTokens()) {
v.add(toks.nextToken());
}
this.locations = new String[v.size()];
v.copyInto(this.locations);
}
/**
* Create a ShareGroup as above, but assume the location description
* can be found via NutchConf.
*/
public ShareGroup(String name) {
this(name, NutchConf.get("nutchfs.sharegroup." + name));
}
/**
* ShareGroup name.
*/
public String getName() {
return name;
}
/**
* Locations for the ShareGroup (machinename:path)
*/
public String[] getLocations() {
return locations;
}
}
--- NEW FILE: ShareSet.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.util;
import java.io.*;
import java.util.*;
/****************************************************************
* A ShareSet is a library of ShareGroup objects. It defines
* every other machine in the current NutchFileSystem's universe.
*
* @author Mike Cafarella
*****************************************************************/
public class ShareSet {
TreeMap shareGroups = new TreeMap();
/**
* Build a ShareSet out of a Vector of ShareGroup objects.
*/
public ShareSet(File dbRoot, Vector shareList) {
for (Enumeration e = shareList.elements(); e.hasMoreElements(); ) {
ShareGroup sg = (ShareGroup) e.nextElement();
shareGroups.put(sg.getName(), sg);
}
buildDefault(dbRoot);
}
/**
* Default constructor. Loads configuration from NutchConf.
*/
public ShareSet(File dbRoot) {
String groupList = NutchConf.get("nutchfs.sharegroups.names");
if (groupList != null) {
StringTokenizer toks = new StringTokenizer(groupList, ",");
Vector sharenames = new Vector();
while (toks.hasMoreTokens()) {
sharenames.add(toks.nextToken());
}
for (Enumeration e = sharenames.elements(); e.hasMoreElements();) {
String shareName = (String) e.nextElement();
shareGroups.put(shareName, new ShareGroup(shareName));
}
}
buildDefault(dbRoot);
}
/**
* Add a default ShareGroup if necessary.
*/
void buildDefault(File dbRoot) {
// Create a default shareGroup if necessary
if (shareGroups.get("*") == null) {
ShareGroup defaultSG = new ShareGroup("*", dbRoot.getPath());
shareGroups.put(defaultSG.getName(), defaultSG);
}
}
/**
* Find the relevant ShareGroup object
*/
ShareGroup getShareGroup(NutchFile nutchFile) {
// Check if there is a registered ShareGroup that matches this NutchFile
ShareGroup sg = (ShareGroup) shareGroups.get(nutchFile.getShareGroupName());
// If not, find the default ShareGroup
if (sg == null) {
sg = (ShareGroup) shareGroups.get("*");
}
return sg;
}
/**
* Return entire TreeMap of ShareGroups
*/
TreeMap getShareGroups() {
return shareGroups;
}
}
Index: FileUtil.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/util/FileUtil.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** FileUtil.java 23 May 2003 03:13:36 -0000 1.4
--- FileUtil.java 30 Jan 2004 22:11:44 -0000 1.5
***************
*** 10,39 ****
*/
public class FileUtil {
-
/**
* Delete a directory and all its contents. If
! * we throw an exception, the directory may be
! * partially-deleted.
*/
! public static void fullyDelete(File dir) throws IOException {
File contents[] = dir.listFiles();
! for (int i = 0; i < contents.length; i++) {
! if (contents[i].isFile()) {
! if (! contents[i].delete()) {
! throw new IOException("Could not delete " +
contents[i].getPath());
}
- } else {
- fullyDelete(contents[i]);
}
}
! if (! dir.delete()) {
! throw new IOException("Could not delete " + dir.getPath());
! }
}
/**
! * Copy a file's contents to a new location
*/
! public static void copyContents(File src, File dst) throws IOException {
DataInputStream in = new DataInputStream(new FileInputStream(src));
try {
--- 10,46 ----
*/
public class FileUtil {
/**
* Delete a directory and all its contents. If
! * we return false, the directory may be partially-deleted.
*/
! public static boolean fullyDelete(File dir) throws IOException {
File contents[] = dir.listFiles();
! if (contents != null) {
! for (int i = 0; i < contents.length; i++) {
! if (contents[i].isFile()) {
! if (! contents[i].delete()) {
! throw new IOException("Could not delete " +
contents[i].getPath());
! }
! } else {
! fullyDelete(contents[i]);
}
}
}
! return dir.delete();
}
/**
! * Copy a file's contents to a new location.
! * Returns whether a target file was overwritten
*/
! public static boolean copyContents(File src, File dst, boolean overwrite) throws
IOException {
! if (dst.exists() && !overwrite) {
! return false;
! }
!
! File dstParent = dst.getParentFile();
! if (! dstParent.exists()) {
! dstParent.mkdirs();
! }
DataInputStream in = new DataInputStream(new FileInputStream(src));
try {
***************
*** 53,56 ****
--- 60,64 ----
in.close();
}
+ return true;
}
***************
*** 76,80 ****
// If the source is a file, then just copy the contents
//
! copyContents(src, dst);
} else {
//
--- 84,88 ----
// If the source is a file, then just copy the contents
//
! copyContents(src, dst, true);
} else {
//
-------------------------------------------------------
The SF.Net email is sponsored by EclipseCon 2004
Premiere Conference on Open Tools Development and Integration
See the breadth of Eclipse activity. February 3-5 in Anaheim, CA.
http://www.eclipsecon.org/osdn
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs