I propose this patch. It moves the logic of contructing a HitDetails from a Lucene document to the HitDetails constructor. It also removes useless array copies. The benefit of this patch is to be able to use part of Nutch machinery (get a Lucene document by other means and later construct a HitDetails). It also looks cleaner IMO.

Thanks!

Index: src/java/org/apache/nutch/searcher/IndexSearcher.java
===================================================================
--- src/java/org/apache/nutch/searcher/IndexSearcher.java	(revisión: 543252)
+++ src/java/org/apache/nutch/searcher/IndexSearcher.java	(copia de trabajo)
@@ -21,6 +21,8 @@
 
 import java.util.ArrayList;
 import java.util.Enumeration;
+import java.util.Iterator;
+import java.util.List;
 
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -105,20 +107,8 @@
   }
 
   public HitDetails getDetails(Hit hit) throws IOException {
-    ArrayList fields = new ArrayList();
-    ArrayList values = new ArrayList();
-
     Document doc = luceneSearcher.doc(hit.getIndexDocNo());
-
-    Enumeration e = doc.fields();
-    while (e.hasMoreElements()) {
-      Field field = (Field)e.nextElement();
-      fields.add(field.name());
-      values.add(field.stringValue());
-    }
-
-    return new HitDetails((String[])fields.toArray(new String[fields.size()]),
-                          (String[])values.toArray(new String[values.size()]));
+    return new HitDetails(doc);
   }
 
   public HitDetails[] getDetails(Hit[] hits) throws IOException {
Index: src/java/org/apache/nutch/searcher/HitDetails.java
===================================================================
--- src/java/org/apache/nutch/searcher/HitDetails.java	(revisión: 543252)
+++ src/java/org/apache/nutch/searcher/HitDetails.java	(copia de trabajo)
@@ -21,8 +21,11 @@
 import java.io.DataOutput;
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.List;
 
 import org.apache.hadoop.io.*;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
 import org.apache.nutch.html.Entities;
 
 /** Data stored in the index for a hit.
@@ -52,7 +55,20 @@
     this.fields[1] = "url";
     this.values[1] = url;
   }
+  
+  /** Construct from Lucene document. */
+  public HitDetails(Document doc)
+  {
+    List<?> ff = doc.getFields();
+    length = ff.size();
 
+    for(int i = 0 ; i < length ; i++) {
+      Field field = (Field)ff.get(i);
+      fields[i] = field.name();
+      values[i] = field.stringValue();
+    }
+  }
+
   /** Returns the number of fields contained in this. */
   public int getLength() { return length; }
 
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Nutch-developers mailing list
Nutch-developers@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/nutch-developers

Reply via email to