Update of /cvsroot/nutch/playground/src/java/net/nutch/indexer
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv10313/src/java/net/nutch/indexer
Added Files:
NutchSimilarity.java DeleteDuplicates.java IndexSegment.java
package.html IndexOptimizer.java IndexMerger.java
HighFreqTerms.java
Log Message:
intial commit
--- NEW FILE: NutchSimilarity.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import org.apache.lucene.search.DefaultSimilarity;
/** Similarity implementatation used by Nutch indexing and search. */
public class NutchSimilarity extends DefaultSimilarity {
private static final int MIN_CONTENT_LENGTH = 1000;
/** Normalize field by length. */
public float lengthNorm(String fieldName, int numTokens) {
if ("url".equals(fieldName)) { // URL: prefer short
return 1.0f / numTokens; // use linear normalization
} else if ("content".equals(fieldName)) { // Content: penalize short
return super.lengthNorm(fieldName, // treat short as longer
Math.max(numTokens, MIN_CONTENT_LENGTH));
} else { // Anchor: use default
return super.lengthNorm(fieldName, numTokens);
}
}
}
--- NEW FILE: DeleteDuplicates.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import net.nutch.io.*;
import net.nutch.util.LogFormatter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.document.Document;
import java.io.*;
import java.util.logging.Logger;
import java.security.MessageDigest;
/** Deletes duplicate documents in a set of Lucene indexes.
* Duplicates have either the same contents (via MD5 hash) or the same URL.
*/
public class DeleteDuplicates {
private static final Logger LOG =
LogFormatter.getLogger("net.nutch.indexer.DeleteDuplicates");
/** The key used in sorting for duplicates. */
public static class IndexedDoc implements WritableComparable {
private MD5Hash hash = new MD5Hash();
private float score;
private int index; // the segment index
private int doc; // within the index
public void write(DataOutput out) throws IOException {
hash.write(out);
out.writeFloat(score);
out.writeInt(index);
out.writeInt(doc);
}
public void readFields(DataInput in) throws IOException {
hash.readFields(in);
this.score = in.readFloat();
this.index = in.readInt();
this.doc = in.readInt();
}
public int compareTo(Object o) {
throw new RuntimeException("this is never used");
}
/** Order equal hashes by decreasing score. */
public static class ByHashScore extends WritableComparator {
public ByHashScore() { super(IndexedDoc.class); }
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2){
int c = compareBytes(b1, s1, MD5Hash.MD5_LEN, b2, s2, MD5Hash.MD5_LEN);
if (c != 0)
return c;
float thisScore = readFloat(b1, s1+MD5Hash.MD5_LEN);
float thatScore = readFloat(b2, s2+MD5Hash.MD5_LEN);
return (thisScore<thatScore ? 1 : (thisScore==thatScore ? 0 : -1));
}
}
/** Order equal hashes by decreasing index and document. */
public static class ByHashDoc extends WritableComparator {
public ByHashDoc() { super(IndexedDoc.class); }
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2){
int c = compareBytes(b1, s1, MD5Hash.MD5_LEN, b2, s2, MD5Hash.MD5_LEN);
if (c != 0)
return c;
int thisIndex = readInt(b1, s1+MD5Hash.MD5_LEN+4);
int thatIndex = readInt(b2, s2+MD5Hash.MD5_LEN+4);
if (thisIndex != thatIndex)
return thatIndex - thisIndex;
int thisDoc = readInt(b1, s1+MD5Hash.MD5_LEN+8);
int thatDoc = readInt(b2, s2+MD5Hash.MD5_LEN+8);
return thatDoc - thisDoc;
}
}
}
private interface Hasher {
void updateHash(MD5Hash hash, Document doc);
}
private IndexReader[] readers;
private String tempFile;
/** Constructs a duplicate detector for the provided indexes. */
public DeleteDuplicates(IndexReader[] readers, String tempFile) {
this.readers = readers;
this.tempFile = tempFile;
}
/** Closes the indexes, saving changes. */
public void close() throws IOException {
for (int i = 0; i < readers.length; i++)
readers[i].close();
}
/** Delete pages with duplicate content hashes. Of those with the same
* content hash, keep the page with the highest score. */
public void deleteContentDuplicates() throws IOException {
LOG.info("Reading content hashes...");
computeHashes(new Hasher() {
public void updateHash(MD5Hash hash, Document doc) {
hash.setDigest(doc.get("digest"));
}
});
LOG.info("Sorting content hashes...");
SequenceFile.Sorter byHashScoreSorter =
new SequenceFile.Sorter(new IndexedDoc.ByHashScore(),NullWritable.class);
byHashScoreSorter.sort(tempFile, tempFile + ".sorted");
LOG.info("Deleting content duplicates...");
int duplicateCount = deleteDuplicates();
LOG.info("Deleted " + duplicateCount + " content duplicates.");
}
/** Delete pages with duplicate URLs. Of those with the same
* URL, keep the most recently fetched page. */
public void deleteUrlDuplicates() throws IOException {
final MessageDigest digest;
try {
digest = MessageDigest.getInstance("MD5");
} catch (Exception e) {
throw new RuntimeException(e.toString());
}
LOG.info("Reading url hashes...");
computeHashes(new Hasher() {
public void updateHash(MD5Hash hash, Document doc) {
try {
digest.update(UTF8.getBytes(doc.get("url")));
digest.digest(hash.getDigest(), 0, MD5Hash.MD5_LEN);
} catch (Exception e) {
throw new RuntimeException(e.toString());
}
}
});
LOG.info("Sorting url hashes...");
SequenceFile.Sorter byHashDocSorter =
new SequenceFile.Sorter(new IndexedDoc.ByHashDoc(), NullWritable.class);
byHashDocSorter.sort(tempFile, tempFile + ".sorted");
LOG.info("Deleting url duplicates...");
int duplicateCount = deleteDuplicates();
LOG.info("Deleted " + duplicateCount + " url duplicates.");
}
private void computeHashes(Hasher hasher) throws IOException {
IndexedDoc indexedDoc = new IndexedDoc();
SequenceFile.Writer writer =
new SequenceFile.Writer(tempFile, IndexedDoc.class, NullWritable.class);
try {
for (int index = 0; index < readers.length; index++) {
IndexReader reader = readers[index];
int readerMax = reader.maxDoc();
indexedDoc.index = index;
for (int doc = 0; doc < readerMax; doc++) {
if (!reader.isDeleted(doc)) {
Document document = reader.document(doc);
hasher.updateHash(indexedDoc.hash, document);
indexedDoc.score = Float.parseFloat(document.get("boost"));
indexedDoc.doc = doc;
writer.append(indexedDoc, NullWritable.get());
}
}
}
} finally {
writer.close();
}
}
private int deleteDuplicates() throws IOException {
if (new File(tempFile).exists())
new File(tempFile).delete();
if (!new File(tempFile + ".sorted").renameTo(new File(tempFile)))
throw new IOException("Couldn't rename!");
IndexedDoc indexedDoc = new IndexedDoc();
SequenceFile.Reader reader = new SequenceFile.Reader(tempFile);
try {
int duplicateCount = 0;
MD5Hash prev = null; // previous hash
while (reader.next(indexedDoc, NullWritable.get())) {
if (prev == null) { // initialize prev
prev = new MD5Hash();
prev.set(indexedDoc.hash);
continue;
}
if (indexedDoc.hash.equals(prev)) { // found a duplicate
readers[indexedDoc.index].delete(indexedDoc.doc); // delete it
duplicateCount++;
} else {
prev.set(indexedDoc.hash); // reset prev
}
}
return duplicateCount;
} finally {
reader.close();
new File(tempFile).delete();
}
}
/** Delete duplicates in the indexes in the named directory. */
public static void main(String[] args) throws Exception {
String usage = "DeleteDuplicates <segmentsDir> <tempFile>";
if (args.length != 2) {
System.err.println("Usage: " + usage);
return;
}
String segmentsDir = args[0];
String tempFile = args[1];
File[] directories = new File(segmentsDir).listFiles();
IndexReader[] readers = new IndexReader[directories.length];
int maxDoc = 0;
for (int i = 0; i < directories.length; i++) {
File indexDir = new File(directories[i], "index");
IndexReader reader = IndexReader.open(indexDir);
if (reader.hasDeletions()) {
LOG.info("Clearing old deletions in " + indexDir);
reader.undeleteAll();
}
maxDoc += reader.maxDoc();
readers[i] = reader;
}
DeleteDuplicates dd = new DeleteDuplicates(readers, tempFile);
dd.deleteUrlDuplicates();
dd.deleteContentDuplicates();
dd.close();
}
}
--- NEW FILE: IndexSegment.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import net.nutch.pagedb.*;
import net.nutch.linkdb.*;
import net.nutch.fetcher.*;
import net.nutch.analysis.NutchDocumentAnalyzer;
import net.nutch.db.*;
import net.nutch.io.*;
import net.nutch.util.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import java.util.logging.Logger;
import java.util.Date;
import java.io.File;
import java.io.EOFException;
/** Creates an index for the output corresponding to a single fetcher run. */
public class IndexSegment {
public static final String DONE_NAME = "index.done";
public static final Logger LOG =
LogFormatter.getLogger("net.nutch.index.IndexSegment");
private float scorePower = NutchConf.getFloat("indexer.score.power", 0.5f);
private int maxTitleLength =
NutchConf.getInt("indexer.max.title.length", 100);
private File directory = null;
private int maxDocs = Integer.MAX_VALUE;
/** Determines the power of link analyis scores. Each pages's boost is
* set to <i>score<sup>scorePower</sup></i> where <i>score</i> is its link
* analysis score and <i>scorePower</i> is the value passed to this method.
*/
public void setScorePower(float power) { scorePower = power; }
private void indexPages() throws Exception {
IndexWriter writer
= new IndexWriter(new File(directory, "index"),
new NutchDocumentAnalyzer(), true);
writer.mergeFactor = 50;
writer.infoStream = System.out;
writer.setSimilarity(new NutchSimilarity());
ArrayFile.Reader fetcher =
new ArrayFile.Reader(new File(directory, FetcherOutput.DIR_NAME).toString());
ArrayFile.Reader text =
new ArrayFile.Reader(new File(directory,FetcherText.DIR_NAME).toString());
int count = 0;
try {
String segmentName = directory.getCanonicalFile().getName();
FetcherOutput fetcherOutput = new FetcherOutput();
FetcherText fetcherText = new FetcherText();
while (fetcher.next(fetcherOutput) != null && count++ < maxDocs) {
text.next(fetcherText);
if (!fetcherOutput.getSuccess()) // if the fetch failed
continue; // don't index the page
Document doc = makeDocument(segmentName, fetcher.key(),
fetcherOutput, fetcherText);
writer.addDocument(doc);
}
} catch (EOFException e) {
LOG.warning("Unexpected EOF in: " + directory +
" at entry #" + count + ". Ignoring.");
} finally {
fetcher.close();
text.close();
}
System.out.println("Optimizing index...");
writer.optimize();
writer.close();
}
private Document makeDocument(String segmentName, long docNo,
FetcherOutput fetcherOutput,
FetcherText fetcherText)
throws Exception {
FetchListEntry fle = fetcherOutput.getFetchListEntry();
String url = fle.getPage().getURL().toString();
String title = fetcherOutput.getTitle();
if (title.length() > maxTitleLength) { // truncate title if needed
title = title.substring(0, maxTitleLength);
}
Document doc = new Document();
// url is both stored and indexed, so it's both searchable and returned
doc.add(Field.Text("url", url));
// un-indexed fields: not searchable, but in hits and/or used by dedup
doc.add(Field.UnIndexed("title", title));
doc.add(Field.UnIndexed("digest", fetcherOutput.getMD5Hash().toString()));
doc.add(Field.UnIndexed("docNo", Long.toString(docNo, 16)));
doc.add(Field.UnIndexed("segment", segmentName));
// content is indexed, so that it's searchable, but not stored in index
doc.add(Field.UnStored("content", fetcherText.getText()));
// anchors are indexed, so they're searchable, but not stored in index
String[] anchors = fle.getAnchors();
for (int i = 0; i < anchors.length; i++) {
doc.add(Field.UnStored("anchor", anchors[i]));
}
// add title as anchor so it's searchable. doesn't warrant its own field.
doc.add(Field.UnStored("anchor", title));
// compute boost
float boost = (float)Math.pow(fle.getPage().getScore(), scorePower);
// apply boost to all indexed fields
doc.setBoost(boost);
// store boost for use by explain and dedup
doc.add(Field.UnIndexed("boost", Float.toString(boost)));
return doc;
}
/** Create an index for the input files in the named directory. */
public static void main(String[] args) throws Exception {
String usage = "IndexSegment <segment_directory>";
if (args.length == 0) {
System.err.println("Usage: " + usage);
return;
}
IndexSegment indexer = new IndexSegment();
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-max")) { // parse -max option
indexer.maxDocs = Integer.parseInt(args[++i]);
} else if (i != args.length-1) {
System.err.println("Usage: " + usage);
return;
} else {
indexer.directory = new File(args[i]);
}
}
File fetcherDone = new File(indexer.directory, FetcherOutput.DONE_NAME);
if (!fetcherDone.exists()) // check fetcher done file
throw new RuntimeException("can't index--not yet fetched: " +
fetcherDone + " does not exist");
File doneFile = new File(indexer.directory, DONE_NAME);
if (doneFile.exists()) // check index done file
throw new RuntimeException("already indexed: " + doneFile + " exists");
Date start = new Date();
indexer.indexPages();
Date end = new Date();
System.out.print(end.getTime() - start.getTime());
System.out.println(" total milliseconds");
doneFile.createNewFile(); // create the done file
}
}
--- NEW FILE: package.html ---
<html>
<body>
Maintain Lucene full-text indexes.
</body>
</html>
--- NEW FILE: IndexOptimizer.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import java.util.*;
import java.io.*;
import org.apache.lucene.util.*;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
/** */
public class IndexOptimizer {
public static final String DONE_NAME = "optimize.done";
private static final float IDF_THRESHOLD = 6.0f;
private static final float FRACTION = 0.1f;
private static class FilterTermDocs implements TermDocs {
protected TermDocs in;
public FilterTermDocs(TermDocs in) { this.in = in; }
public void seek(Term term) throws IOException { in.seek(term); }
public void seek(TermEnum enum) throws IOException { in.seek(enum); }
public int doc() { return in.doc(); }
public int freq() { return in.freq(); }
public boolean next() throws IOException { return in.next(); }
public int read(int[] docs, int[] freqs) throws IOException {
return in.read(docs, freqs);
}
public boolean skipTo(int i) throws IOException { return in.skipTo(i); }
public void close() throws IOException { in.close(); }
}
private static class FilterTermPositions
extends FilterTermDocs implements TermPositions {
public FilterTermPositions(TermPositions in) { super(in); }
public int nextPosition() throws IOException {
return ((TermPositions)in).nextPosition();
}
}
private static class FilterTermEnum extends TermEnum {
protected TermEnum in;
public FilterTermEnum(TermEnum in) { this.in = in; }
public boolean next() throws IOException { return in.next(); }
public Term term() { return in.term(); }
public int docFreq() { return in.docFreq(); }
public void close() throws IOException { in.close(); }
}
private static class OptimizingTermEnum extends FilterTermEnum {
private IndexReader reader;
private Similarity similarity;
public OptimizingTermEnum(IndexReader reader, Similarity similarity)
throws IOException {
super(reader.terms());
this.reader = reader;
this.similarity = similarity;
}
public boolean next() throws IOException {
while (in.next()) {
float idf = similarity.idf(in.docFreq(), reader.maxDoc());
if (idf <= IDF_THRESHOLD)
return true;
}
return false;
}
}
private static class ScoreDocQueue extends PriorityQueue {
ScoreDocQueue(int size) {
initialize(size);
}
protected final boolean lessThan(Object a, Object b) {
ScoreDoc hitA = (ScoreDoc)a;
ScoreDoc hitB = (ScoreDoc)b;
if (hitA.score == hitB.score)
return hitA.doc > hitB.doc;
else
return hitA.score < hitB.score;
}
}
private static class OptimizingTermPositions extends FilterTermPositions {
private IndexReader reader;
private TermDocs termDocs;
private int docFreq;
private ScoreDocQueue sdq;
private BitSet docs;
private Similarity similarity;
public OptimizingTermPositions(IndexReader reader, Similarity similarity)
throws IOException {
super(reader.termPositions());
this.reader = reader;
this.termDocs = reader.termDocs();
this.similarity = similarity;
this.sdq = new ScoreDocQueue((int)Math.ceil(reader.maxDoc() * FRACTION));
this.docs = new BitSet(reader.maxDoc());
}
public void seek(TermEnum enum) throws IOException {
super.seek(enum);
termDocs.seek(enum);
byte[] norms = reader.norms(enum.term().field());
sdq.clear();
float minScore = 0.0f;
int count = (int)Math.ceil(enum.docFreq() * FRACTION);
System.out.println("Optimizing " + enum.term()
+ " from " + enum.docFreq()
+ " to " + count);
while (termDocs.next()) {
int doc = termDocs.doc();
float score =
similarity.tf(termDocs.freq()) * similarity.decodeNorm(norms[doc]);
if (score > minScore) {
sdq.put(new ScoreDoc(doc, score));
if (sdq.size() > count) { // if sdq overfull
sdq.pop(); // remove lowest in sdq
minScore = ((ScoreDoc)sdq.top()).score; // reset minScore
}
}
}
docs.clear();
while (sdq.size() != 0) {
docs.set(((ScoreDoc)sdq.pop()).doc);
}
}
public boolean next() throws IOException {
while (in.next()) {
if (docs.get(in.doc()))
return true;
}
return false;
}
}
private static class OptimizingReader extends FilterIndexReader {
private Similarity similarity = new NutchSimilarity();
public OptimizingReader(IndexReader reader) {
super(reader);
}
// don't copy any per-document data
public int numDocs() { return 0; }
public int maxDoc() { return 0; }
// filter out low frequency terms
public TermEnum terms() throws IOException {
return new OptimizingTermEnum(in, similarity);
}
// filter out low-scoring postings
public TermPositions termPositions() throws IOException {
return new OptimizingTermPositions(in, similarity);
}
public boolean hasDeletions() { return false; }
}
private File directory;
public IndexOptimizer(File directory) {
this.directory = directory;
}
public void optimize() throws IOException {
IndexReader reader = IndexReader.open(new File(directory, "index"));
OptimizingReader optimizer = new OptimizingReader(reader);
IndexWriter writer = new IndexWriter(new File(directory, "index-opt"),
null, true);
writer.addIndexes(new IndexReader[] { optimizer });
}
/** */
public static void main(String[] args) throws Exception {
File directory;
String usage = "IndexOptimizer directory";
if (args.length < 1) {
System.err.println("Usage: " + usage);
return;
}
directory = new File(args[0]);
IndexOptimizer optimizer = new IndexOptimizer(directory);
Date start = new Date();
optimizer.optimize();
Date end = new Date();
System.out.print(end.getTime() - start.getTime());
System.out.println(" total milliseconds");
}
}
--- NEW FILE: IndexMerger.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import java.util.Date;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.IndexWriter;
/** Creates an index for the output corresponding to a single fetcher run. */
public class IndexMerger {
public static final String DONE_NAME = "merge.done";
private File indexDirectory;
private File[] segments;
public IndexMerger(File indexDirectory, File[] segments) {
this.indexDirectory = indexDirectory;
this.segments = segments;
}
private void merge() throws IOException {
Directory[] dirs = new Directory[segments.length];
for (int i = 0; i < segments.length; i++)
dirs[i] = FSDirectory.getDirectory(new File(segments[i],"index"), false);
String name =
segments[0].getName() + "_" + segments[segments.length-1].getName();
IndexWriter writer =
new IndexWriter(new File(indexDirectory, name), null, true);
writer.mergeFactor = 50;
writer.infoStream = System.out;
writer.addIndexes(dirs);
writer.close();
}
/** Create an index for the input files in the named directory. */
public static void main(String[] args) throws Exception {
File indexDirectory;
String usage = "IndexMerger indexDirectory segments...";
if (args.length < 2) {
System.err.println("Usage: " + usage);
return;
}
indexDirectory = new File(args[0]);
File[] segments = new File[args.length - 1];
for (int i = 1; i < args.length; i++) {
segments[i-1] = new File(args[i]);
}
IndexMerger merger = new IndexMerger(indexDirectory, segments);
Date start = new Date();
merger.merge();
Date end = new Date();
System.out.print(end.getTime() - start.getTime());
System.out.println(" total milliseconds");
}
}
--- NEW FILE: HighFreqTerms.java ---
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
package net.nutch.indexer;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import java.io.OutputStreamWriter;
/** Lists the most frequent terms in an index. */
public class HighFreqTerms {
public static int numTerms = 100;
private static class TermFreq {
TermFreq(Term t, int df) {
term = t;
docFreq = df;
}
int docFreq;
Term term;
}
private static class TermFreqQueue extends PriorityQueue {
TermFreqQueue(int size) {
initialize(size);
}
protected final boolean lessThan(Object a, Object b) {
TermFreq termInfoA = (TermFreq)a;
TermFreq termInfoB = (TermFreq)b;
return termInfoA.docFreq < termInfoB.docFreq;
}
}
public static void main(String[] args) throws Exception {
IndexReader reader = null;
boolean noFreqs = false;
int count = 100;
String usage = "HighFreqTerms [-count <n>] [-nofreqs] <index dir>";
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-count")) { // found -count option
count = Integer.parseInt(args[++i]);
} else if (args[i].equals("-nofreqs")) { // found -nofreqs option
noFreqs = true;
} else {
reader = IndexReader.open(args[i]);
}
}
TermFreqQueue tiq = new TermFreqQueue(count);
TermEnum terms = reader.terms();
int minFreq = 0;
while (terms.next()) {
if (terms.docFreq() > minFreq) {
tiq.put(new TermFreq(terms.term(), terms.docFreq()));
if (tiq.size() > count) { // if tiq overfull
tiq.pop(); // remove lowest in tiq
minFreq = ((TermFreq)tiq.top()).docFreq; // reset minFreq
}
}
}
OutputStreamWriter out = new OutputStreamWriter(System.out, "UTF-8");
while (tiq.size() != 0) {
TermFreq termInfo = (TermFreq)tiq.pop();
out.write(termInfo.term.toString());
if (!noFreqs) {
out.write(" ");
out.write(Integer.toString(termInfo.docFreq));
}
out.write("\n");
}
out.flush();
reader.close();
}
}
-------------------------------------------------------
The SF.Net email is sponsored by EclipseCon 2004
Premiere Conference on Open Tools Development and Integration
See the breadth of Eclipse activity. February 3-5 in Anaheim, CA.
http://www.eclipsecon.org/osdn
_______________________________________________
Nutch-cvs mailing list
[EMAIL PROTECTED]
https://lists.sourceforge.net/lists/listinfo/nutch-cvs