Also make sure you don't have any autocommit rules enabled in solrconfig.xml

How many documents are in the 400MB CSV file, and how long does it
take to index now?

-Yonik
http://www.lucidimagination.com



On Tue, Jul 7, 2009 at 10:03 AM, Anand Kumar
Prabhakar<anand2...@gmail.com> wrote:
>
> Hi Yonik,
>
> Currently our Schema has very few fields and we don't have any copy fields
> also. Please find the below Schema.xml we are using:
>
> <?xml version="1.0" encoding="UTF-8" ?>
> <schema name="cmps" version="1.1">
>  <!-- attribute "name" is the name of this schema and is only used for
> display purposes.
>       Applications should change this to reflect the nature of the search
> collection.
>       version="1.1" is Solr's version number for the schema syntax and
> semantics.  It should
>       not normally be changed by applications.
>       1.0: multiValued attribute did not exist, all fields are multiValued
> by nature
>       1.1: multiValued attribute introduced, false by default -->
>  <types>
>
>
>    <fieldType name="string" class="solr.StrField" sortMissingLast="true"
> omitNorms="true"/>
>
>    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"
> omitNorms="true"/>
>
>
>    <fieldType name="integer" class="solr.IntField" omitNorms="true"/>
>    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
>    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
>    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
>
>    <fieldType name="sint" class="solr.SortableIntField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="slong" class="solr.SortableLongField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="sfloat" class="solr.SortableFloatField"
> sortMissingLast="true" omitNorms="true"/>
>    <fieldType name="sdouble" class="solr.SortableDoubleField"
> sortMissingLast="true" omitNorms="true"/>
>
>    <fieldType name="date" class="solr.DateField" sortMissingLast="true"
> omitNorms="true"/>
>
>    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
>
>
>
>    <fieldType name="text_ws" class="solr.TextField"
> positionIncrementGap="100">
>      <analyzer>
>        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>      </analyzer>
>    </fieldType>
>    <fieldType name="text" class="solr.TextField"
> positionIncrementGap="100">
>      <analyzer type="index">
>        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>
>
>        <filter class="solr.StopFilterFactory"
>                ignoreCase="true"
>                words="stopwords.txt"
>                enablePositionIncrements="true"
>                />
>        <filter class="solr.WordDelimiterFilterFactory"
> generateWordParts="1" generateNumberParts="1" catenateWords="1"
> catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
>        <filter class="solr.EnglishPorterFilterFactory"
> protected="protwords.txt"/>
>        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>      </analyzer>
>      <analyzer type="query">
>        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="true"/>
>        <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt"/>
>        <filter class="solr.WordDelimiterFilterFactory"
> generateWordParts="1" generateNumberParts="1" catenateWords="0"
> catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
>        <filter class="solr.EnglishPorterFilterFactory"
> protected="protwords.txt"/>
>        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>      </analyzer>
>    </fieldType>
>
>    <fieldType name="textTight" class="solr.TextField"
> positionIncrementGap="100" >
>      <analyzer>
>        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
> ignoreCase="true" expand="false"/>
>        <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt"/>
>        <filter class="solr.WordDelimiterFilterFactory"
> generateWordParts="0" generateNumberParts="0" catenateWords="1"
> catenateNumbers="1" catenateAll="0"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
>        <filter class="solr.EnglishPorterFilterFactory"
> protected="protwords.txt"/>
>        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>      </analyzer>
>    </fieldType>
>
>    <fieldType name="textSpell" class="solr.TextField"
> positionIncrementGap="100" >
>      <analyzer>
>        <tokenizer class="solr.StandardTokenizerFactory"/>
>        <filter class="solr.LowerCaseFilterFactory"/>
>        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>      </analyzer>
>    </fieldType>
>
>    <fieldType name="alphaNumericKeyword" class="solr.TextField"
> sortMissingLast="true" omitNorms="true">
>      <analyzer>
>
>        <tokenizer class="solr.KeywordTokenizerFactory"/>
>
>      </analyzer>
>    </fieldType>
>
>
>    <fieldtype name="ignored" stored="false" indexed="false"
> class="solr.StrField" />
>    <fieldType name="phNo" class="solr.TextField"
> positionIncrementGap="100" sortMissingLast="true" omitNorms="true">
>        <analyzer>
>                <tokenizer class="solr.KeywordTokenizerFactory"/>
>
>        </analyzer>
>    </fieldType>
>    <fieldType name="textStA" class="solr.TextField"
> positionIncrementGap="100" sortMissingLast="true" omitNorms="true">
>        <analyzer>
>                <tokenizer class="solr.StandardTokenizerFactory"/>
>                <filter class="solr.StandardFilterFactory"/>
>
>        </analyzer>
>    </fieldType>
>  </types>
>  <fields>
>   <field name="cugKey" type="textStA" indexed="true" stored="true"/>
>   <field name="bacKey" type="textStA" indexed="true" stored="true"/>
>   <field name="assetKey" type="phNo" indexed="true" stored="true"/>
>   <field name="contactKey" type="phNo" indexed="true" stored="true"/>
>   <field name="sourceSystem" type="textStA" indexed="true" stored="true"/>
>   <field name="parentIdFieldName" type="alphaNumericKeyword" indexed="true"
> stored="true"/>
>   <field name="parentIdFieldValue" type="alphaNumericKeyword"
> indexed="true" stored="true"/>
>   <field name="idFieldName" type="alphaNumericKeyword" indexed="true"
> stored="true"/>
>  </fields>
>
>  <defaultSearchField>cugKey</defaultSearchField>
>
>  <solrQueryParser defaultOperator="OR"/>
>
>
>
> </schema>
>
>
> Yonik Seeley-2 wrote:
>>
>> On Tue, Jul 7, 2009 at 9:14 AM, Anand Kumar
>> Prabhakar<anand2...@gmail.com> wrote:
>>> I want to know is there any method to do
>>> it much faster, we have overcome the OutOfMemoryException by increasing
>>> heap
>>> space.
>>
>> Optimize your schema - eliminate all unnecessary copyFields and
>> default values.  The current example schema is not good for
>> performance benchmarking.
>>
>> -Yonik
>> http://www.lucidimagination.com
>>
>>
>
> --
> View this message in context: 
> http://www.nabble.com/Loading-Data-into-Solr-without-HTTP-tp24372564p24373870.html
> Sent from the Solr - User mailing list archive at Nabble.com.
>
>

Reply via email to