I got trunk, as of yesterday, of NHibernate Search from nhcontrib's
svn repo. I tried following the batch indexing advice located here:
http://www.hibernate.org/hib_docs/search/reference/en/html_single/#search-batchindex-indexing,
which basically uses a JDBC Scrollable ResultSet, which does not exist
in ADO. Also, I looked through the code and couldn't find the
hibernate.search.worker.batch_size being used anywhere. I saw the
constant that declares the parameter but nothing referencing it. Right
now, I am indexing 200K Address POCOs at around 3.5 hours with 2.8
Xeon and 8 gigs. I manually commit them in batches of 500. I looked at
the log4net logs and noticed that the actual index writing is slow
taking about 25 seconds for each batch of 500.

Anyway, I was wondering if you had any suggestions for making things
faster. I realize I'm playing with bleeding edge stuff out of trunk
and nothing is guaranteed, but I thought I'd ask anyway. Here is code
and config if you are interested:

        public int Import(string filePath) {
            Stopwatch watch = new Stopwatch();

            Debug.WriteLine(string.Format("Importing AT&T Address and
Phone Number Flat File {0}", filePath));
            TextReader reader = new StreamReader(filePath);
            ISession session = Search.CreateFullTextSession
(SessionFactory.OpenSession());
            session.CacheMode = CacheMode.Ignore;
            session.BeginTransaction();
            int count = 0;

            try {
                reader.ReadLine(); // advance cursor past header
                reader.ReadLine(); // advance cursor past column
titles

                string record;
                while ((record = reader.ReadLine()) != null) {
                    string[] fields = record.Split(',');
                    if (fields.Length != 13) continue;
                    count++;
                    session.Save(ParseRecord(fields));
                    Debug.WriteLine(string.Format("{0}: Wrote record
{1}", DateTime.Now.ToShortTimeString(), count));

                    if (count % BATCH_SIZE == 0) {
                        watch.Start();
                        Debug.Write(string.Format("Writing batch
{0}... ", count));
                        session.Transaction.Commit
();
                        Debug.WriteLine(string.Format("Completed in
{0} milliseconds.",
watch.ElapsedMilliseconds));
                        watch.Reset();
                        session.BeginTransaction();
                    }
                }

                Debug.Write(string.Format("{0}: Comitting final
transaction... ", DateTime.Now.ToShortTimeString()));
                watch.Start();
                session.Transaction.Commit();
                Debug.WriteLine(string.Format("Completed in {0}
milliseconds.", watch.ElapsedMilliseconds));
                watch.Reset();

                return count;
            } catch (Exception e) {
                session.Transaction.Rollback();
                throw new Exception(string.Format("Error occurred with
record {0}:, {1}", count, e.Message), e);
            } finally {
                Debug.WriteLine("Committing addresses to database");
                session.Close();
                reader.Close();
                Debug.WriteLine("Done committing addresses to
database");
            }
        }

Here is my relevant config

    <hibernate-configuration xmlns="urn:nhibernate-configuration-2.2">
        <session-factory>
            <property
name="dialect">NHibernate.Dialect.MsSql2005Dialect</property>
            <property
name="connection.provider">NHibernate.Connection.DriverConnectionProvider</
property>
            <property name="connection.connection_string">[xxx]</
property>
            <property
name="current_session_context_class">thread_static</property>
            <property name="show_sql">true</property>
            <property name="hbm2ddl.auto">create</property>
      <property name="adonet.batch_size">500</property>
            <listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-insert"/>
            <listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-update"/>
            <listener
class="NHibernate.Search.Event.FullTextIndexEventListener,
NHibernate.Search" type="post-delete"/>
            <listener
class="NHibernate.Validator.Event.ValidatePreInsertEventListener,
NHibernate.Validator" type="pre-insert"/>
            <listener
class="NHibernate.Validator.Event.ValidatePreUpdateEventListener,
NHibernate.Validator" type="pre-update"/>
        </session-factory>
    </hibernate-configuration>

    <nhs-configuration xmlns="urn:nhs-configuration-1.0">
    <search-factory>
      <property
name="hibernate.search.default.directory_provider">NHibernate.Search.Store.FSDirectoryProvider,
NHibernate.Search</property>
      <property name="hibernate.search.default.indexBase">~/App_Data/
Index</property>
      <property
name="hibernate.search.default.analyzer">Lucene.Net.Analysis.Standard.StandardAnalyzer,
Lucene.Net</property>
      <property
name="hibernate.search.default.transaction.ram_buffer_size">2048</
property>
      <property name="hibernate.search.worker.batch_size">500</
property>
    </search-factory>
    </nhs-configuration>

Thanks Much

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"NHibernate Contrib - Development Group" group.
To post to this group, send email to [email protected]
To unsubscribe from this group, send email to [EMAIL PROTECTED]
For more options, visit this group at 
http://groups.google.com.ar/group/nhcdevs?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to