Hi, everyone:
I integrate nutch/solr/hbase to construct a search engine, it work well, except 
that some fileds in the schma.xml are not indexed to solr.
The fields in " <!-- core fields -->" and " <!-- fields for index-basic plugin 
-->" are indexed to solr, but other fields, such as the fields in "<!-- fields 
for index-anchor plugin -->"  <!-- fields for index-more plugin --> , are not.
what is the problem? Or any other work should be do for that?

The schema.xml likes this:<schema name="nutch" version="1.5">    <types>        
<fieldType name="string" class="solr.StrField" sortMissingLast="true"           
 omitNorms="true"/>        <fieldType name="long" class="solr.TrieLongField" 
precisionStep="0"            omitNorms="true" positionIncrementGap="0"/>        
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"           
 omitNorms="true" positionIncrementGap="0"/>        <fieldType name="date" 
class="solr.TrieDateField" precisionStep="0"            omitNorms="true" 
positionIncrementGap="0"/>        <fieldType name="text" class="solr.TextField" 
           positionIncrementGap="100">            <analyzer>                
<tokenizer class="solr.WhitespaceTokenizerFactory"/>                <filter 
class="solr.StopFilterFactory"                    ignoreCase="true" 
words="stopwords.txt"/>                <filter 
class="solr.WordDelimiterFilterFactory"                    
generateWordParts="1" generateNumberParts="1"                    
catenateWords="1" catenateNumbers="1" catenateAll="0"                    
splitOnCaseChange="1"/>                <filter 
class="solr.LowerCaseFilterFactory"/>                <filter 
class="solr.RemoveDuplicatesTokenFilterFactory"/>            </analyzer>        
</fieldType>        <fieldType name="url" class="solr.TextField"            
positionIncrementGap="100">            <analyzer>                <tokenizer 
class="solr.StandardTokenizerFactory"/>                <filter 
class="solr.LowerCaseFilterFactory"/>                <filter 
class="solr.WordDelimiterFilterFactory"/last            
positionIncrementGap="100">            <analyzer>                <tokenizer 
class="solr.StandardTokenizerFactory"/>                <filter 
class="solr.LowerCaseFilterFactory"/>                <filter 
class="solr.WordDelimiterFilterFactory"                    
generateWordParts="1" generateNumberParts="1"/>            </analyzer>        
</fieldType>    </types>    <fields>        <field name="id" type="string" 
stored="true" indexed="true"/>
        <!-- core fields -->        <field name="batchId" type="string" 
stored="true" indexed="false"/>        <field name="digest" type="string" 
stored="true" indexed="false"/>        <field name="boost" type="float" 
stored="true" indexed="false"/>
        <!-- fields for index-basic plugin -->        <field name="host" 
type="url" stored="false" indexed="true"/>        <field name="url" type="url" 
stored="true" indexed="true"            required="true"/>        <field 
name="content" type="text" stored="true" indexed="true"/>        <field 
name="title" type="text" stored="true" indexed="true"/>        <field 
name="cache" type="string" stored="true" indexed="false"/>        <field 
name="tstamp" type="date" stored="true" indexed="true"/>
        <field name="_version_" type="long" indexed="true" stored="true"/>      
  <!-- fields for index-anchor plugin -->        <field name="anchor" 
type="string" stored="true" indexed="true"            multiValued="true"/>
        <!-- fields for index-more plugin -->        <field name="type" 
type="string" stored="true" indexed="true"            multiValued="true"/>      
  <field name="contentLength" type="long" stored="true"            
indexed="true"/>        <field name="lastModified" type="date" stored="true"    
        indexed="true"/>        <field name="date" type="date" stored="true" 
indexed="true"/>
        <!-- fields for languageidentifier plugin -->        <field name="lang" 
type="string" stored="true" indexed="true"/>
        <!-- fields for subcollection plugin -->        <field 
name="subcollection" type="string" stored="true"            indexed="true" 
multiValued="true"/>
        <!-- fields for feed plugin (tag is also used by 
microformats-reltag)-->        <field name="author" type="string" stored="true" 
indexed="true"/>        <field name="tag" type="string" stored="true" 
indexed="true" multiValued="true"/>        <field name="feed" type="string" 
stored="true" indexed="true"/>        <field name="publishedDate" type="date" 
stored="true"            indexed="true"/>        <field name="updatedDate" 
type="date" stored="true"            indexed="true"/>
        <!-- fields for creativecommons plugin -->        <field name="cc" 
type="string" stored="true" indexed="true"            multiValued="true"/>
        <!-- fields for tld plugin -->        <field name="tld" type="string" 
stored="false" indexed="false"/>    </fields>    <uniqueKey>id</uniqueKey>    
<defaultSearchField>content</defaultSearchField>    <solrQueryParser 
defaultOperator="OR"/></schema>


thanks,[email protected]

Reply via email to