It's not the lang field giving problems but the content field. See the output 
of debugQuery where fieldNorm is written. For the content field in your schema 
you can add omitNorms="true". 

After that, restart your Solr server. It's also a good idea to reindex all 
segments.

On Monday 18 April 2011 13:08:35 Klaus Tachtler wrote:
> Hi Markus,
> 
> following my nucth schema.xml - there was a lang field - can you give
> me an advice for the schema.xml for better definitions for the lang
> field definition?
> 
> <?xml version="1.0" encoding="UTF-8" ?>
>      <!--
>          Licensed to the Apache Software Foundation (ASF) under one or
>          more contributor license agreements. See the NOTICE file
>          distributed with this work for additional information regarding
>          copyright ownership. The ASF licenses this file to You under the
>          Apache License, Version 2.0 (the "License"); you may not use
>          this file except in compliance with the License. You may obtain
>          a copy of the License at
>          http://www.apache.org/licenses/LICENSE-2.0 Unless required by
>          applicable law or agreed to in writing, software distributed
>          under the License is distributed on an "AS IS" BASIS, WITHOUT
>          WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
>          See the License for the specific language governing permissions
>          and limitations under the License.
>      -->
>      <!--
>          Description: This document contains solr schema definition to be
>          used with solr integration currently build into Nutch. See
>          https://issues.apache.org/jira/browse/NUTCH-442
>          https://issues.apache.org/jira/browse/NUTCH-699 for more info.
>      -->
> <schema name="nutch" version="1.1">
>      <types>
>          <fieldType name="string" class="solr.StrField"
>              sortMissingLast="true" omitNorms="true"/>
>          <fieldType name="long" class="solr.LongField"
>              omitNorms="true"/>
>          <fieldType name="float" class="solr.FloatField"
>              omitNorms="true"/>
>          <fieldType name="text" class="solr.TextField"
>              positionIncrementGap="100">
>              <analyzer>
>                  <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>                  <filter class="solr.StopFilterFactory"
>                      ignoreCase="true" words="stopwords.txt"/>
>                  <filter class="solr.WordDelimiterFilterFactory"
>                      generateWordParts="1" generateNumberParts="1"
>                      catenateWords="1" catenateNumbers="1" catenateAll="0"
>                      splitOnCaseChange="1"/>
>                  <filter class="solr.LowerCaseFilterFactory"/>
>                  <filter class="solr.EnglishPorterFilterFactory"
>                      protected="protwords.txt"/>
>                  <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>              </analyzer>
>          </fieldType>
>          <fieldType name="url" class="solr.TextField"
>              positionIncrementGap="100">
>              <analyzer>
>                  <tokenizer class="solr.StandardTokenizerFactory"/>
>                  <filter class="solr.LowerCaseFilterFactory"/>
>                  <filter class="solr.WordDelimiterFilterFactory"
>                      generateWordParts="1" generateNumberParts="1"/>
>                  <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>              </analyzer>
>          </fieldType>
>      </types>
>      <fields>
>          <field name="id" type="string" stored="true" indexed="true"/>
> 
>          <!-- core fields -->
>          <field name="segment" type="string" stored="true"
> indexed="false"/> <field name="digest" type="string" stored="true"
> indexed="false"/> <field name="boost" type="float" stored="true"
> indexed="false"/>
> 
>          <!-- fields for index-basic plugin -->
>          <field name="host" type="url" stored="false" indexed="true"/>
>          <field name="site" type="string" stored="false" indexed="true"/>
>          <field name="url" type="url" stored="true" indexed="true"
>              required="true"/>
>          <field name="content" type="text" stored="false" indexed="true"/>
>          <field name="title" type="text" stored="true" indexed="true"/>
>          <field name="cache" type="string" stored="true" indexed="false"/>
>          <field name="tstamp" type="long" stored="true" indexed="false"/>
> 
>          <!-- fields for index-anchor plugin -->
>          <field name="anchor" type="string" stored="true" indexed="true"
>              multiValued="true"/>
> 
>          <!-- fields for index-more plugin -->
>          <field name="type" type="string" stored="true" indexed="true"
>              multiValued="true"/>
>          <field name="contentLength" type="long" stored="true"
>              indexed="false"/>
>          <field name="lastModified" type="long" stored="true"
>              indexed="false"/>
>          <field name="date" type="string" stored="true" indexed="true"/>
> 
>          <!-- fields for languageidentifier plugin -->
>          <field name="lang" type="string" stored="true" indexed="true"/>
> 
>          <!-- fields for subcollection plugin -->
>          <field name="subcollection" type="string" stored="true"
>              indexed="true" multiValued="true"/>
> 
>          <!-- fields for feed plugin -->
>          <field name="author" type="string" stored="true" indexed="true"/>
>          <field name="tag" type="string" stored="true" indexed="true"/>
>          <field name="feed" type="string" stored="true" indexed="true"/>
>          <field name="publishedDate" type="string" stored="true"
>              indexed="true"/>
>          <field name="updatedDate" type="string" stored="true"
>              indexed="true"/>
>      </fields>
>      <uniqueKey>id</uniqueKey>
>      <defaultSearchField>content</defaultSearchField>
>      <solrQueryParser defaultOperator="OR"/>
> </schema>
> 
> Thank you,
> 
> Klaus.
> 
> 
> --
> 
> ------------------------------------------------
> e-Mail  : [email protected]
> Homepage: http://www.tachtler.net
> DokuWiki: http://www.dokuwiki.tachtler.net
> ------------------------------------------------

-- 
Markus Jelsma - CTO - Openindex
http://www.linkedin.com/in/markus17
050-8536620 / 06-50258350

Reply via email to