Hi Jan, thanks for your fast reply. Below is the information you requested:

* I use nutch, using the command "nutch crawl urls -dir crawl-$(date +%FT%H-%M-%S) -solr http://localhost:8983/solr/ -depth 10 -topN 5"
* What do you mean "which RequestHandler"? How can I find that out?
* 3.6.1
* Both schemas are below:

<schema name="nutch" version="1.4">
    <types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
            omitNorms="true"/>
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>

        <fieldType name="text" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.StopFilterFactory"
                    ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"
                    catenateWords="1" catenateNumbers="1" catenateAll="0"
                    splitOnCaseChange="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory"
                    protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldType>
        <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>
        <field name="id" type="string" stored="true" indexed="true"/>

        <!-- core fields -->
        <field name="segment" type="string" stored="true" indexed="false"/>
        <field name="digest" type="string" stored="true" indexed="false"/>
        <field name="boost" type="float" stored="true" indexed="false"/>

        <!-- fields for index-basic plugin -->
        <field name="host" type="string" stored="false" indexed="true"/>
        <field name="url" type="url" stored="true" indexed="true"
            required="true"/>
        <field name="content" type="text" stored="false" indexed="true"/>
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>
        <field name="tstamp" type="date" stored="true" indexed="false"/>

        <!-- fields for index-anchor plugin -->
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for index-more plugin -->
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        <!-- fields for languageidentifier plugin -->
        <field name="lang" type="string" stored="true" indexed="true"/>

        <!-- fields for subcollection plugin -->
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

<!-- fields for feed plugin (tag is also used by microformats-reltag)-->
        <field name="author" type="string" stored="true" indexed="true"/>
<field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>

        <!-- fields for creativecommons plugin -->
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>
    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>content</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
</schema>

<schema name="nutch" version="1.4">
    <types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
            omitNorms="true"/>
        <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>
        <fieldType name="date" class="solr.TrieDateField" precisionStep="0"
            omitNorms="true" positionIncrementGap="0"/>

        <fieldType name="text" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
                <filter class="solr.StopFilterFactory"
                    ignoreCase="true" words="stopwords.txt"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"
                    catenateWords="1" catenateNumbers="1" catenateAll="0"
                    splitOnCaseChange="1"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.EnglishPorterFilterFactory"
                    protected="protwords.txt"/>
                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
            </analyzer>
        </fieldType>
        <fieldType name="url" class="solr.TextField"
            positionIncrementGap="100">
            <analyzer>
                <tokenizer class="solr.StandardTokenizerFactory"/>
                <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory"
                    generateWordParts="1" generateNumberParts="1"/>
            </analyzer>
        </fieldType>
    </types>
    <fields>
        <field name="id" type="string" stored="true" indexed="true"/>

        <!-- core fields -->
        <field name="segment" type="string" stored="true" indexed="false"/>
        <field name="digest" type="string" stored="true" indexed="false"/>
        <field name="boost" type="float" stored="true" indexed="false"/>

        <!-- fields for index-basic plugin -->
        <field name="host" type="string" stored="false" indexed="true"/>
        <field name="url" type="url" stored="true" indexed="true"
            required="true"/>
        <field name="content" type="text" stored="false" indexed="true"/>
        <field name="title" type="text" stored="true" indexed="true"/>
        <field name="cache" type="string" stored="true" indexed="false"/>
        <field name="tstamp" type="date" stored="true" indexed="false"/>

        <!-- fields for index-anchor plugin -->
        <field name="anchor" type="string" stored="true" indexed="true"
            multiValued="true"/>

        <!-- fields for index-more plugin -->
        <field name="type" type="string" stored="true" indexed="true"
            multiValued="true"/>
        <field name="contentLength" type="long" stored="true"
            indexed="false"/>
        <field name="lastModified" type="date" stored="true"
            indexed="false"/>
        <field name="date" type="date" stored="true" indexed="true"/>

        <!-- fields for languageidentifier plugin -->
        <field name="lang" type="string" stored="true" indexed="true"/>

        <!-- fields for subcollection plugin -->
        <field name="subcollection" type="string" stored="true"
            indexed="true" multiValued="true"/>

<!-- fields for feed plugin (tag is also used by microformats-reltag)-->
        <field name="author" type="string" stored="true" indexed="true"/>
<field name="tag" type="string" stored="true" indexed="true" multiValued="true"/>
        <field name="feed" type="string" stored="true" indexed="true"/>
        <field name="text" type="string" stored="true" indexed="true"/>
        <field name="publishedDate" type="date" stored="true"
            indexed="true"/>
        <field name="updatedDate" type="date" stored="true"
            indexed="true"/>

        <!-- fields for creativecommons plugin -->
        <field name="cc" type="string" stored="true" indexed="true"
            multiValued="true"/>
    <copyField source="*" dest="text" indexed="true" stored="true"/>
    </fields>
    <uniqueKey>id</uniqueKey>
    <defaultSearchField>content</defaultSearchField>
    <solrQueryParser defaultOperator="OR"/>
</schema>

These schemas mention Nutch because Nutch tutorial tells me to overwrite Solr's schema with its own.


On 10/08/2012 01:33 PM, Jan Høydahl wrote:
Hi,

Please describe your environemnt better

* How do you "crawl", using which crawler?
* To which RequestHandler do you send the docs?
* Which version of Solr
* Can you share your schema and other relevant config with us?

--
Jan Høydahl, search solution architect
Cominvent AS - www.cominvent.com
Solr Training - www.solrtraining.com

8. okt. 2012 kl. 12:11 skrev Tolga <to...@ozses.net>:

Hi,

There are two servers with the same configuration. I crawl the same URL. One of 
them is giving the following error:

Caused by: org.apache.solr.common.SolrException: ERROR: 
[doc=http://bilgisayarciniz.org/] multiple values encountered for non 
multiValued copy field text: bilgisayarciniz web hizmetleri

I really fail to understand. Why does this happen?

Regards,

PS: Neither server has multiValued=true for title field.

Reply via email to