[ 
https://issues.apache.org/jira/browse/SOLR-3954?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13477283#comment-13477283
 ] 

Shawn Heisey commented on SOLR-3954:
------------------------------------

Which specific configuration bits would you like to see?  My solrconfig.xml 
file is heavily split into separate files and uses xinclude.  I will go ahead 
and paste my best guesses now.

{code}
<directoryFactory name="DirectoryFactory" 
class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/>

<indexDefaults>
  <useCompoundFile>false</useCompoundFile>
  <mergePolicy class="org.apache.lucene.index.TieredMergePolicy">
    <int name="maxMergeAtOnce">35</int>
    <int name="segmentsPerTier">35</int>
    <int name="maxMergeAtOnceExplicit">105</int>
  </mergePolicy>
  <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler">
    <int name="maxMergeCount">4</int>
    <int name="maxThreadCount">4</int>
  </mergeScheduler>
  <ramBufferSizeMB>128</ramBufferSizeMB>
  <maxFieldLength>32768</maxFieldLength>
  <writeLockTimeout>1000</writeLockTimeout>
  <commitLockTimeout>10000</commitLockTimeout>
  <lockType>native</lockType>
</indexDefaults>

<updateHandler class="solr.DirectUpdateHandler2">
  <autoCommit>
    <maxDocs>0</maxDocs>
    <maxTime>0</maxTime>
  </autoCommit>
<!--
  <updateLog />
-->
</updateHandler>
{code}

My schema has 47 fields defined.  Not all fields in a typical document will be 
there, but at least half of them usually will be present.  I use the ICU 
classes for lowercasing and most of the text fieldTypes are using 
WordDelimeterFilter.

{code}
  <fields>
   <field name="catchall" type="genText" indexed="true" stored="false" 
multiValued="true" termVectors="true"/>
   <field name="doc_date" type="tdate" indexed="true" stored="true"/>
   <field name="pd" type="tdate" indexed="true" stored="true"/>
   <field name="ft_text" type="ignored"/>
   <field name="mime_type" type="mimeText" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="ft_dname" type="genText" indexed="true" stored="true"/>
   <field name="ft_subject" type="genText" indexed="true" stored="true"/>
   <field name="action" type="keyText" indexed="true" stored="true"/>
   <field name="attribute" type="keyText" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="category" type="keyText" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="caption_writer" type="keyText" indexed="true" stored="true"/>
   <field name="doc_id" type="keyText" indexed="true" stored="true"/>
   <field name="ft_owner" type="keyText" indexed="true" stored="true"/>
   <field name="location" type="keyText" indexed="true" stored="true"/>
   <field name="special" type="keyText" indexed="true" stored="true"/>
   <field name="special_cats" type="keyText" indexed="true" stored="true"/>
   <field name="selector" type="keyText" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="scode" type="keyText" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="byline" type="sourceText" indexed="true" stored="true"/>
   <field name="credit" type="sourceText" indexed="true" stored="false"/>
   <field name="keywords" type="sourceText" indexed="true" stored="true"/>
   <field name="source" type="sourceText" indexed="true" stored="true"/>
   <field name="sg" type="lcsemi" indexed="true" stored="false" 
omitTermFreqAndPositions="true"/>
   <field name="aimcode" type="lowercase" indexed="true" stored="false" 
omitTermFreqAndPositions="true"/>
   <field name="nc_lang" type="lowercase" indexed="true" stored="false" 
omitTermFreqAndPositions="true"/>
   <field name="tag_id" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="collection" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="feature" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="ip" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="longdim" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="webtable" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="set_name" type="lowercase" indexed="true" stored="true" 
omitTermFreqAndPositions="true"/>
   <field name="did" type="long" indexed="true" stored="true" 
postingsFormat="BloomFilter"/>
   <field name="doc_size" type="long" indexed="true" stored="true"/>
   <field name="post_date" type="tlong" indexed="true" stored="true"/>
   <field name="post_hour" type="tlong" indexed="true" stored="true"/>
   <field name="set_count" type="int" indexed="false" stored="true"/>
   <field name="set_lead" type="boolean" indexed="true" stored="true" 
default="true"/>
   <field name="format" type="string" indexed="false" stored="true"/>
   <field name="ft_sfname" type="string" indexed="false" stored="true"/>
   <field name="text_preview" type="string" indexed="false" stored="true"/>
   <field name="_version_" type="long" indexed="true" stored="true"/>
   <field name="headline" type="keyText" indexed="true" stored="true"/>
   <field name="mood" type="keyText" indexed="true" stored="true"/>
   <field name="object" type="keyText" indexed="true" stored="true"/>
   <field name="personality" type="keyText" indexed="true" stored="true"/>
   <field name="poster" type="keyText" indexed="true" stored="true"/>
  </fields>
  <uniqueKey>tag_id</uniqueKey>
  <copyField source="ft_subject" dest="catchall"/>
  <copyField source="doc_id" dest="catchall"/>
  <copyField source="ft_dname" dest="catchall"/>
  <copyField source="keywords" dest="catchall"/>
  <copyField source="ft_text" dest="catchall"/>
{code}

                
> Option to have updateHandler and DIH skip updateLog
> ---------------------------------------------------
>
>                 Key: SOLR-3954
>                 URL: https://issues.apache.org/jira/browse/SOLR-3954
>             Project: Solr
>          Issue Type: Improvement
>          Components: update
>    Affects Versions: 4.0
>            Reporter: Shawn Heisey
>             Fix For: 4.1
>
>
> The updateLog feature makes updates take longer, likely because of the I/O 
> time required to write the additional information to disk.  It may take as 
> much as three times as long for the indexing portion of the process.  I'm not 
> sure whether it affects the time to commit, but I would imagine that the 
> difference there is small or zero.  When doing incremental updates/deletes on 
> an existing index, the time lag is probably very small and unimportant.
> When doing a full reindex (which may happen via DIH), especially if this is 
> done in a build core that is then swapped with a live core, this performance 
> hit is unacceptable.  It seems to make the import take about three times as 
> long.
> An option to have an update skip the updateLog would be very useful for these 
> situations.  It should have a method in SolrJ and be exposed in DIH as well.

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to