It worked. Thank you very much for helping.

On Wed, 8 Apr 2015 at 18:28 Alexis Hope <[email protected]> wrote:

> I believe thats because nutch has a property metatag.keywords that its
> trying to send to solr.
> Solr then complains that it doesnt know where to put it, because its not
> defined in fields.
>
> One option that worked for me was defining a dynamic field, which catches
> all but doesn't store or index them (effectivly ignoring fields that arent
> defined).
>
> Pop this inside the fields tag and see if it helps.
> <dynamicField name="*" type="string" indexed="false" stored="false"
> multiValued="false" />
>
> On Wed, Apr 8, 2015 at 1:27 PM, Anchit Jain <[email protected]>
> wrote:
>
> > I have crawled a website using nutch.
> > When I try to index it with solr I get following error
> > org.apache.solr.common.SolrException: ERROR: [doc=http://xyz.htm]
> unknown
> > field 'metatag.keywords'
> > *unknown field 'metatag.keywords'*
> >
> > I can not figure out where the error is as I have o not defined any field
> > in schema.xml for metatags.I just copied the schema.xml from nutch into
> > solr.
> > I am using Nutch 1.9 with Solr 4.10
> >
> > My *schema.xml* for *solr*
> >
> > <?xml version="1.0" encoding="UTF-8" ?>
> > <schema name="nutch" version="1.5">
> >     <types>
> >         <fieldType name="string" class="solr.StrField"
> > sortMissingLast="true"
> >             omitNorms="true"/>
> >         <fieldType name="long" class="solr.TrieLongField"
> precisionStep="0"
> >             omitNorms="true" positionIncrementGap="0"/>
> >         <fieldType name="float" class="solr.TrieFloatField"
> > precisionStep="0"
> >             omitNorms="true" positionIncrementGap="0"/>
> >         <fieldType name="date" class="solr.TrieDateField"
> precisionStep="0"
> >             omitNorms="true" positionIncrementGap="0"/>
> >
> >         <fieldType name="text" class="solr.TextField"
> >             positionIncrementGap="100">
> >             <analyzer>
> >                 <tokenizer class="solr.WhitespaceTokenizerFactory"/>
> >                 <filter class="solr.StopFilterFactory"
> >                     ignoreCase="true" words="stopwords.txt"/>
> >                 <filter class="solr.WordDelimiterFilterFactory"
> >                     generateWordParts="1" generateNumberParts="1"
> >                     catenateWords="1" catenateNumbers="1" catenateAll="0"
> >                     splitOnCaseChange="1"/>
> >                 <filter class="solr.LowerCaseFilterFactory"/>
> >                 <!--<filter class="solr.EnglishPorterFilterFactory"
> >                     protected="protwords.txt"/>-->
> >                 <filter class="solr.RemoveDuplicatesTokenFilterFac
> tory"/>
> >             </analyzer>
> >         </fieldType>
> >         <fieldType name="url" class="solr.TextField"
> >             positionIncrementGap="100">
> >             <analyzer>
> >                 <tokenizer class="solr.StandardTokenizerFactory"/>
> >                 <filter class="solr.LowerCaseFilterFactory"/>
> >                 <filter class="solr.WordDelimiterFilterFactory"
> >                     generateWordParts="1" generateNumberParts="1"/>
> >             </analyzer>
> >         </fieldType>
> >     </types>
> >     <fields>
> >         <field name="id" type="string" stored="true" indexed="true"
> >             required="true"/>
> >         <field name="_version_" type="long" indexed="true"
> stored="true"/>
> >         <!-- core fields -->
> >         <field name="segment" type="string" stored="true"
> indexed="false"/>
> >         <field name="digest" type="string" stored="true"
> indexed="false"/>
> >         <field name="boost" type="float" stored="true" indexed="false"/>
> >
> >         <!-- fields for index-basic plugin -->
> >         <field name="host" type="string" stored="false" indexed="true"/>
> >         <field name="url" type="url" stored="true" indexed="true"/>
> >         <field name="content" type="text" stored="true" indexed="true"/>
> >         <field name="title" type="text" stored="true" indexed="true"/>
> >         <field name="cache" type="string" stored="true" indexed="false"/>
> >         <field name="tstamp" type="date" stored="true" indexed="false"/>
> >
> >         <!-- fields for index-anchor plugin -->
> >         <field name="anchor" type="string" stored="true" indexed="true"
> >             multiValued="true"/>
> >
> >         <!-- fields for index-more plugin -->
> >         <field name="type" type="string" stored="true" indexed="true"
> >             multiValued="true"/>
> >         <field name="contentLength" type="long" stored="true"
> >             indexed="false"/>
> >         <field name="lastModified" type="date" stored="true"
> >             indexed="false"/>
> >         <field name="date" type="date" stored="true" indexed="true"/>
> >
> >         <!-- fields for languageidentifier plugin -->
> >         <field name="lang" type="string" stored="true" indexed="true"/>
> >
> >         <!-- fields for subcollection plugin -->
> >         <field name="subcollection" type="string" stored="true"
> >             indexed="true" multiValued="true"/>
> >
> >         <!-- fields for feed plugin (tag is also used by
> > microformats-reltag)-->
> >         <field name="author" type="string" stored="true" indexed="true"/>
> >         <field name="tag" type="string" stored="true" indexed="true"
> > multiValued="true"/>
> >         <field name="feed" type="string" stored="true" indexed="true"/>
> >         <field name="publishedDate" type="date" stored="true"
> >             indexed="true"/>
> >         <field name="updatedDate" type="date" stored="true"
> >             indexed="true"/>
> >
> >         <!-- fields for creativecommons plugin -->
> >         <field name="cc" type="string" stored="true" indexed="true"
> >             multiValued="true"/>
> >
> >         <!-- fields for tld plugin -->
> >         <field name="tld" type="string" stored="false" indexed="false"/>
> >     </fields>
> >     <uniqueKey>id</uniqueKey>
> >     <defaultSearchField>content</defaultSearchField>
> >     <solrQueryParser defaultOperator="OR"/>
> > </schema>
> >
> > my *solrindex-mapping.xml*
> >
> > <mapping>
> >     <fields>
> >         <field dest="content" source="content"/>
> >         <field dest="title" source="title"/>
> >         <field dest="host" source="host"/>
> >         <field dest="segment" source="segment"/>
> >         <field dest="boost" source="boost"/>
> >         <field dest="digest" source="digest"/>
> >         <field dest="tstamp" source="tstamp"/>
> >     </fields>
> >     <uniqueKey>id</uniqueKey>
> > </mapping>
> >
> >
> >
>

Reply via email to