Unless you stored all the original fields, I think you're stuck with
re-indexing all your docs....

Best
Erick


On Mon, Nov 19, 2012 at 12:21 PM, Ravi Solr <ravis...@gmail.com> wrote:

> Hello,
>       I have a couple of questions. I need an easy way to clean up a gaffe
> with copyFields (close to a million docs). Is there any way we could remove
> duplicates emitted via copyField while re-indexing ? Also is there a way to
> query multiValued fields to give only docs that have duplicated value ??
>
> The fields having issue are declared as follows
>
>     <fieldType name="keywordText" class="solr.TextField"
> sortMissingLast="true" omitNorms="true" positionIncrementGap="100">
>       <analyzer type="index">
>         <tokenizer class="solr.KeywordTokenizerFactory"/>
>         <filter class="solr.TrimFilterFactory" />
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt" enablePositionIncrements="true"/>
>         <filter class="solr.SynonymFilterFactory"
> tokenizerFactory="solr.KeywordTokenizerFactory"
>
> synonyms="person-synonyms.txt,organization-synonyms.txt,location-synonyms.txt,subject-synonyms.txt"
> ignoreCase="true" expand="false" />
>         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.KeywordTokenizerFactory"/>
>         <filter class="solr.TrimFilterFactory" />
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt" enablePositionIncrements="true" />
>         <filter class="solr.SynonymFilterFactory"
> tokenizerFactory="solr.KeywordTokenizerFactory"
>
> synonyms="person-synonyms.txt,organization-synonyms.txt,location-synonyms.txt,subject-synonyms.txt"
> ignoreCase="true" expand="false" />
>         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>       </analyzer>
>     </fieldType>
>
>
>     <fieldType name="text" class="solr.TextField" sortMissingLast="true"
> omitNorms="true" positionIncrementGap="100">
>       <analyzer type="index">
>         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>         <filter class="solr.TrimFilterFactory" />
>         <filter class="solr.LowerCaseFilterFactory"/>
>         <filter class="solr.SynonymFilterFactory"
>
> synonyms="person-synonyms.txt,organization-synonyms.txt,location-synonyms.txt,subject-synonyms.txt"
> ignoreCase="true" expand="true"/>
>         <!-- Case insensitive stop word removal.
> enablePositionIncrements=true ensures that a 'gap' is left to allow for
> accurate phrase queries. -->
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt" enablePositionIncrements="true" />
>         <filter class="solr.WordDelimiterFilterFactory"
> generateWordParts="0" generateNumberParts="0" catenateWords="0"
> catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"
> protected="protwords.txt"/>
>         <filter class="solr.EnglishPorterFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>       </analyzer>
>       <analyzer type="query">
>         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
>         <filter class="solr.TrimFilterFactory" />
>         <filter class="solr.LowerCaseFilterFactory"/>
>         <filter class="solr.SynonymFilterFactory"
>
> synonyms="person-synonyms.txt,organization-synonyms.txt,location-synonyms.txt,subject-synonyms.txt"
> ignoreCase="true" expand="true"/>
>         <filter class="solr.StopFilterFactory" ignoreCase="true"
> words="stopwords.txt" enablePositionIncrements="true" />
>         <filter class="solr.WordDelimiterFilterFactory"
> generateWordParts="0" generateNumberParts="0" catenateWords="0"
> catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"
> protected="protwords.txt"/>
>         <filter class="solr.EnglishPorterFilterFactory"
> protected="protwords.txt"/>
>         <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
>       </analyzer>
>     </fieldType>
>
> <field name="city" type="keywordText" indexed="true" stored="true"
> multiValued="true" termVectors="true"/>
>
> <field name="cityLower" type="text" indexed="true" stored="true"
> multiValued="true" termVectors="false"/>
>
>
> <copyField source="city" dest="cityLower"/>
>
> Query results look as follows
>
>
> <arr name="city">
>     <str>No city</str>
> </arr>
> <arr name="cityLower">
>    <str>No city</str>
>    <str>No city</str>
>    <str>No city</str>
>
>    <str>No city</str>
>    <str>No city</str>
>    <str>No city</str>
>    <str>No city</str>
>    <str>No city</str>
>    <str>No city</str>
>
>    <str>No city</str>
> </arr>
>
> Thanks,
>
> Ravi Kiran Bhaskar
>

Reply via email to