I propose this patch. It moves the logic of contructing a HitDetails
from a Lucene document to the HitDetails constructor. It also removes
useless array copies. The benefit of this patch is to be able to use
part of Nutch machinery (get a Lucene document by other means and later
construct a HitDetails). It also looks cleaner IMO.
Thanks!
Index: src/java/org/apache/nutch/searcher/IndexSearcher.java
===================================================================
--- src/java/org/apache/nutch/searcher/IndexSearcher.java (revisión: 543252)
+++ src/java/org/apache/nutch/searcher/IndexSearcher.java (copia de trabajo)
@@ -21,6 +21,8 @@
import java.util.ArrayList;
import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.List;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -105,20 +107,8 @@
}
public HitDetails getDetails(Hit hit) throws IOException {
- ArrayList fields = new ArrayList();
- ArrayList values = new ArrayList();
-
Document doc = luceneSearcher.doc(hit.getIndexDocNo());
-
- Enumeration e = doc.fields();
- while (e.hasMoreElements()) {
- Field field = (Field)e.nextElement();
- fields.add(field.name());
- values.add(field.stringValue());
- }
-
- return new HitDetails((String[])fields.toArray(new String[fields.size()]),
- (String[])values.toArray(new String[values.size()]));
+ return new HitDetails(doc);
}
public HitDetails[] getDetails(Hit[] hits) throws IOException {
Index: src/java/org/apache/nutch/searcher/HitDetails.java
===================================================================
--- src/java/org/apache/nutch/searcher/HitDetails.java (revisión: 543252)
+++ src/java/org/apache/nutch/searcher/HitDetails.java (copia de trabajo)
@@ -21,8 +21,11 @@
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.List;
import org.apache.hadoop.io.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
import org.apache.nutch.html.Entities;
/** Data stored in the index for a hit.
@@ -52,7 +55,20 @@
this.fields[1] = "url";
this.values[1] = url;
}
+
+ /** Construct from Lucene document. */
+ public HitDetails(Document doc)
+ {
+ List<?> ff = doc.getFields();
+ length = ff.size();
+ for(int i = 0 ; i < length ; i++) {
+ Field field = (Field)ff.get(i);
+ fields[i] = field.name();
+ values[i] = field.stringValue();
+ }
+ }
+
/** Returns the number of fields contained in this. */
public int getLength() { return length; }
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Nutch-developers mailing list
Nutch-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-developers