I got trunk, as of yesterday, of NHibernate Search from nhcontrib's
svn repo. I tried following the batch indexing advice located here:
http://www.hibernate.org/hib_docs/search/reference/en/html_single/#search-batchindex-indexing,
which basically uses a JDBC Scrollable ResultSet, which does not exist
in ADO. Also, I looked through the code and couldn't find the
hibernate.search.worker.batch_size being used anywhere. I saw the
constant that declares the parameter but nothing referencing it. Right
now, I am indexing 200K Address POCOs at around 3.5 hours with 2.8
Xeon and 8 gigs. I manually commit them in batches of 500. I looked at
the log4net logs and noticed that the actual index writing is slow
taking about 25 seconds for each batch of 500.
Anyway, I was wondering if you had any suggestions for making things
faster. I realize I'm playing with bleeding edge stuff out of trunk
and nothing is guaranteed, but I thought I'd ask anyway. Here is code
and config if you are interested:
public int Import(string filePath) {
Stopwatch watch = new Stopwatch();
Debug.WriteLine(string.Format("Importing AT&T Address and
Phone Number Flat File {0}", filePath));
TextReader reader = new StreamReader(filePath);
ISession session = Search.CreateFullTextSession
(SessionFactory.OpenSession());
session.CacheMode = CacheMode.Ignore;
session.BeginTransaction();
int count = 0;
try {
reader.ReadLine(); // advance cursor past header
reader.ReadLine(); // advance cursor past column
titles
string record;
while ((record = reader.ReadLine()) != null) {
string[] fields = record.Split(',');
if (fields.Length != 13) continue;
count++;
session.Save(ParseRecord(fields));
Debug.WriteLine(string.Format("{0}: Wrote record
{1}", DateTime.Now.ToShortTimeString(), count));
if (count % BATCH_SIZE == 0) {
watch.Start();
Debug.Write(string.Format("Writing batch
{0}... ", count));
session.Transaction.Commit
();
Debug.WriteLine(string.Format("Completed in
{0} milliseconds.",
watch.ElapsedMilliseconds));
watch.Reset();
session.BeginTransaction();
}
}
Debug.Write(string.Format("{0}: Comitting final
transaction... ", DateTime.Now.ToShortTimeString()));
watch.Start();
session.Transaction.Commit();
Debug.WriteLine(string.Format("Completed in {0}
milliseconds.", watch.ElapsedMilliseconds));
watch.Reset();
return count;
} catch (Exception e) {
session.Transaction.Rollback();
throw new Exception(string.Format("Error occurred with
record {0}:, {1}", count, e.Message), e);
} finally {
Debug.WriteLine("Committing addresses to database");
session.Close();
reader.Close();
Debug.WriteLine("Done committing addresses to
database");
}
}
Here is my relevant config
<hibernate-configuration xmlns="urn:nhibernate-configuration-2.2">
<session-factory>
<property
name="dialect">NHibernate.Dialect.MsSql2005Dialect</property>
<property
name="connection.provider">NHibernate.Connection.DriverConnectionProvider</
property>
<property name="connection.connection_string">[xxx]</
property>
<property
name="current_session_context_class">thread_static</property>
<property name="show_sql">true</property>
<property name="hbm2ddl.auto">create</property>
<property name="adonet.batch_size">500</property>
<listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-insert"/>
<listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-update"/>
<listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-delete"/>
<listener
class="NHibernate.Validator.Event.ValidatePreInsertEventListener,
NHibernate.Validator" type="pre-insert"/>
<listener
class="NHibernate.Validator.Event.ValidatePreUpdateEventListener,
NHibernate.Validator" type="pre-update"/>
</session-factory>
</hibernate-configuration>
<nhs-configuration xmlns="urn:nhs-configuration-1.0">
<search-factory>
<property
name="hibernate.search.default.directory_provider">NHibernate.Search.Store.FSDirectoryProvider,
NHibernate.Search</property>
<property name="hibernate.search.default.indexBase">~/App_Data/
Index</property>
<property
name="hibernate.search.default.analyzer">Lucene.Net.Analysis.Standard.StandardAnalyzer,
Lucene.Net</property>
<property
name="hibernate.search.default.transaction.ram_buffer_size">2048</
property>
<property name="hibernate.search.worker.batch_size">500</
property>
</search-factory>
</nhs-configuration>
Thanks Much
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups
"NHibernate Contrib - Development Group" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at
http://groups.google.com.ar/group/nhcdevs?hl=en
-~----------~----~----~----~------~----~------~--~---