Re: Alternative to looping through Hits

Trevor Watson Fri, 02 Oct 2009 11:26:34 -0700

Thank you so far for the help with this. I've been trying the differentsuggestions that you all posted on here.The Lucene index contains a numeric index (the value I want), 4 textfields (With simple data (i.e. Form, Publication, Email)) or people'snames) and 1 text field with the OCR of the image that is referenced bythe line (very large in some cases). The data is currently stored forthe text fields (for testing), and hopefully making that only tokenizedand not save the actual info will speed some things up.


The following is a list of the times the loops are taking.


Any advice on speeding any of them up to better time?

Thanks in advance.

End Fieldable:22 seconds
--------------start code--------------

Lucene.Net.Search.TopFieldDocCollector collector = newLucene.Net.Search.TopFieldDocCollector(reader,Lucene.Net.Search.Sort.RELEVANCE, 100000);

           search.Search(query, null, collector);
           Lucene.Net.Search.TopDocs topDocs = collector.TopDocs();
           string[] values = new string[topDocs.scoreDocs.Length];

LuceneUtilities.MyFieldSelector field_selector = newLuceneUtilities.MyFieldSelector("DocumentId");

           for(int i = 0; i < values.Length; i++)
           {

Lucene.Net.Search.ScoreDoc score_document =topDocs.scoreDocs[i];Lucene.Net.Documents.Document document =search.Doc(score_document.doc, field_selector);values[i] =document.GetFieldable("DocumentId").StringValue();

           }

           string csv = String.Join(" ,",values);
--------------end code--------------


End TopDocs, plus string:30 seconds
--------------start code--------------

string docIds = "";totalDocuments = hits.Length();


           dtStart = DateTime.Now;
           docIds = "";

//Lucene.Net.Search.TopDocs topDocs = search.Search(query,null, 100000);

           topDocs = search.Search(query, null, 100000);

           foreach (Lucene.Net.Search.ScoreDoc sd in topDocs.scoreDocs)
           {
               Lucene.Net.Documents.Document docTest = search.Doc(sd.doc);

docIds += docTest.GetField("DocumentId").StringValue() +", ";

           }
           dtCurrent = DateTime.Now;
--------------end code--------------

End HitIterator (string array):29 seconds
--------------start code--------------

Lucene.Net.Search.HitIterator hi =(Lucene.Net.Search.HitIterator)hits.Iterator();

           string[] sTest1 = new string[hits.Length()];
           int iCount1 = 0;

           dtStart = DateTime.Now;
           while (hi.MoveNext())
           {

sTest1[iCount1] =((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue();

               iCount1++;

//docIds +=((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue()+ ", ";

           }
--------------end code--------------

End HitIterator (arrayList):30 seconds
--------------start code--------------
hi = (Lucene.Net.Search.HitIterator)hits.Iterator();
           StringBuilder sb = new StringBuilder();
           ArrayList alTest = new ArrayList();

           dtStart = DateTime.Now;
           while (hi.MoveNext())

alTest.Add(((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue());


--------------end code--------------

End Hits (array):30 seconds
--------------start code--------------
       string[] sFinalDocs = new string[totalDocuments];
           for (int iCount = 0; iCount < totalDocuments; iCount++)
           {
               Lucene.Net.Documents.Document docHit = hits.Doc(iCount);

//docIds += docHit.GetField("DocumentId").StringValue()+ ", ";sFinalDocs[iCount] =docHit.GetField("DocumentId").StringValue();

docIds = String.Join(", ", sFinalDocs);--------------endcode--------------

Re: Alternative to looping through Hits

Reply via email to