Thank you so far for the help with this. I've been trying the different
suggestions that you all posted on here.
The Lucene index contains a numeric index (the value I want), 4 text
fields (With simple data (i.e. Form, Publication, Email)) or people's
names) and 1 text field with the OCR of the image that is referenced by
the line (very large in some cases). The data is currently stored for
the text fields (for testing), and hopefully making that only tokenized
and not save the actual info will speed some things up.
The following is a list of the times the loops are taking.
Any advice on speeding any of them up to better time?
Thanks in advance.
End Fieldable:22 seconds
--------------start code--------------
Lucene.Net.Search.TopFieldDocCollector collector = new
Lucene.Net.Search.TopFieldDocCollector(reader,
Lucene.Net.Search.Sort.RELEVANCE, 100000);
search.Search(query, null, collector);
Lucene.Net.Search.TopDocs topDocs = collector.TopDocs();
string[] values = new string[topDocs.scoreDocs.Length];
LuceneUtilities.MyFieldSelector field_selector = new
LuceneUtilities.MyFieldSelector("DocumentId");
for(int i = 0; i < values.Length; i++)
{
Lucene.Net.Search.ScoreDoc score_document =
topDocs.scoreDocs[i];
Lucene.Net.Documents.Document document =
search.Doc(score_document.doc, field_selector);
values[i] =
document.GetFieldable("DocumentId").StringValue();
}
string csv = String.Join(" ,",values);
--------------end code--------------
End TopDocs, plus string:30 seconds
--------------start code--------------
string docIds = "";
totalDocuments = hits.Length();
dtStart = DateTime.Now;
docIds = "";
//Lucene.Net.Search.TopDocs topDocs = search.Search(query,
null, 100000);
topDocs = search.Search(query, null, 100000);
foreach (Lucene.Net.Search.ScoreDoc sd in topDocs.scoreDocs)
{
Lucene.Net.Documents.Document docTest = search.Doc(sd.doc);
docIds += docTest.GetField("DocumentId").StringValue() +
", ";
}
dtCurrent = DateTime.Now;
--------------end code--------------
End HitIterator (string array):29 seconds
--------------start code--------------
Lucene.Net.Search.HitIterator hi =
(Lucene.Net.Search.HitIterator)hits.Iterator();
string[] sTest1 = new string[hits.Length()];
int iCount1 = 0;
dtStart = DateTime.Now;
while (hi.MoveNext())
{
sTest1[iCount1] =
((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue();
iCount1++;
//docIds +=
((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue()
+ ", ";
}
--------------end code--------------
End HitIterator (arrayList):30 seconds
--------------start code--------------
hi = (Lucene.Net.Search.HitIterator)hits.Iterator();
StringBuilder sb = new StringBuilder();
ArrayList alTest = new ArrayList();
dtStart = DateTime.Now;
while (hi.MoveNext())
alTest.Add(((Lucene.Net.Search.Hit)hi.Current).GetDocument().GetField("DocumentId").StringValue());
--------------end code--------------
End Hits (array):30 seconds
--------------start code--------------
string[] sFinalDocs = new string[totalDocuments];
for (int iCount = 0; iCount < totalDocuments; iCount++)
{
Lucene.Net.Documents.Document docHit = hits.Doc(iCount);
//docIds += docHit.GetField("DocumentId").StringValue()
+ ", ";
sFinalDocs[iCount] =
docHit.GetField("DocumentId").StringValue();
}
docIds = String.Join(", ", sFinalDocs);--------------end
code--------------