Hi, everyone:
I integrate nutch/solr/hbase to construct a search engine, it work well, except
that some fileds in the schma.xml are not indexed to solr.
The fields in " <!-- core fields -->" and " <!-- fields for index-basic plugin
-->" are indexed to solr, but other fields, such as the fields in "<!-- fields
for index-anchor plugin -->" <!-- fields for index-more plugin --> , are not.
what is the problem? Or any other work should be do for that?
The schema.xml likes this:<schema name="nutch" version="1.5"> <types>
<fieldType name="string" class="solr.StrField" sortMissingLast="true"
omitNorms="true"/> <fieldType name="long" class="solr.TrieLongField"
precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/> <fieldType name="date"
class="solr.TrieDateField" precisionStep="0" omitNorms="true"
positionIncrementGap="0"/> <fieldType name="text" class="solr.TextField"
positionIncrementGap="100"> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <filter
class="solr.StopFilterFactory" ignoreCase="true"
words="stopwords.txt"/> <filter
class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1"
catenateWords="1" catenateNumbers="1" catenateAll="0"
splitOnCaseChange="1"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.RemoveDuplicatesTokenFilterFactory"/> </analyzer>
</fieldType> <fieldType name="url" class="solr.TextField"
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.WordDelimiterFilterFactory"/last
positionIncrementGap="100"> <analyzer> <tokenizer
class="solr.StandardTokenizerFactory"/> <filter
class="solr.LowerCaseFilterFactory"/> <filter
class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1"/> </analyzer>
</fieldType> </types> <fields> <field name="id" type="string"
stored="true" indexed="true"/>
<!-- core fields --> <field name="batchId" type="string"
stored="true" indexed="false"/> <field name="digest" type="string"
stored="true" indexed="false"/> <field name="boost" type="float"
stored="true" indexed="false"/>
<!-- fields for index-basic plugin --> <field name="host"
type="url" stored="false" indexed="true"/> <field name="url" type="url"
stored="true" indexed="true" required="true"/> <field
name="content" type="text" stored="true" indexed="true"/> <field
name="title" type="text" stored="true" indexed="true"/> <field
name="cache" type="string" stored="true" indexed="false"/> <field
name="tstamp" type="date" stored="true" indexed="true"/>
<field name="_version_" type="long" indexed="true" stored="true"/>
<!-- fields for index-anchor plugin --> <field name="anchor"
type="string" stored="true" indexed="true" multiValued="true"/>
<!-- fields for index-more plugin --> <field name="type"
type="string" stored="true" indexed="true" multiValued="true"/>
<field name="contentLength" type="long" stored="true"
indexed="true"/> <field name="lastModified" type="date" stored="true"
indexed="true"/> <field name="date" type="date" stored="true"
indexed="true"/>
<!-- fields for languageidentifier plugin --> <field name="lang"
type="string" stored="true" indexed="true"/>
<!-- fields for subcollection plugin --> <field
name="subcollection" type="string" stored="true" indexed="true"
multiValued="true"/>
<!-- fields for feed plugin (tag is also used by
microformats-reltag)--> <field name="author" type="string" stored="true"
indexed="true"/> <field name="tag" type="string" stored="true"
indexed="true" multiValued="true"/> <field name="feed" type="string"
stored="true" indexed="true"/> <field name="publishedDate" type="date"
stored="true" indexed="true"/> <field name="updatedDate"
type="date" stored="true" indexed="true"/>
<!-- fields for creativecommons plugin --> <field name="cc"
type="string" stored="true" indexed="true" multiValued="true"/>
<!-- fields for tld plugin --> <field name="tld" type="string"
stored="false" indexed="false"/> </fields> <uniqueKey>id</uniqueKey>
<defaultSearchField>content</defaultSearchField> <solrQueryParser
defaultOperator="OR"/></schema>
thanks,[email protected]