wrong query results with wdf and ngtf

Andreas Owen Thu, 20 Mar 2014 02:10:38 -0700

Is there a way to tell ngramfilterfactory while indexing that number shall 
never be tokenized? then the query should be able to find numbers.


Or do i have to change the ngram-min for numbers (not alpha) to 1, if that is 
possible? So to speak put the hole number as token and not all possible tokens.

Solr analysis shows onnly WDF has no underscore in its tokens, the rest have 
it. can i tell the query to search numbers differently with NGTF, WT, LCF or 
whatever?

I also tried <filter class="solr.WordDelimiterFilterFactory" 
types="at-under-alpha.txt"/>
        @ => ALPHA
        _ => ALPHA

I have gotten nearly everything to work. There are to queries where i dont get 
back what i want.

        "avaloq frage 1"        -> only returns if i set minGramSize=1 while 
indexing
        "yh_cug"                -> query parser doesn't remove "_" but the 
indexer does (WDF) so there is no match

Is there a way to also query the hole term "avaloq frage 1" without tokenizing 
it?

Fieldtype:

<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index"> 
                        <tokenizer class="solr.StandardTokenizerFactory"/>
                        <filter class="solr.LowerCaseFilterFactory"/>
                <filter class="solr.WordDelimiterFilterFactory" 
types="at-under-alpha.txt"/> 
                <filter class="solr.StopFilterFactory" ignoreCase="true" 
words="lang/stopwords_de.txt" format="snowball" 
enablePositionIncrements="true"/> <!-- remove common words -->
                 <filter class="solr.GermanNormalizationFilterFactory"/>
                <filter class="solr.SnowballPorterFilterFactory" 
language="German"/> <!-- remove noun/adjective inflections like plural endings 
-->             
                <filter class="solr.NGramFilterFactory" minGramSize="3" 
maxGramSize="15"/>
                <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="1" catenateWords="1" 
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
           </analyzer>
           <analyzer type="query">
                        <tokenizer class="solr.WhiteSpaceTokenizerFactory"/>
                        <filter class="solr.LowerCaseFilterFactory"/>
                        <filter class="solr.WordDelimiterFilterFactory" 
types="at-under-alpha.txt"/> 
                        <filter class="solr.StopFilterFactory" 
ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" 
enablePositionIncrements="true"/> <!-- remove common words -->
                        <filter class="solr.GermanNormalizationFilterFactory"/>
                        <filter class="solr.SnowballPorterFilterFactory" 
language="German"/>
      </analyzer>
 </fieldType>


Solrconfig:

> <queryParser name="synonym_edismax"
> class="solr.SynonymExpandingExtendedDismaxQParserPlugin">
>   <lst name="synonymAnalyzers">
> <lst name="myCoolAnalyzer">
>   <lst name="tokenizer">
> <str name="class">standard</str>
>   </lst>
>   <lst name="filter">
> <str name="class">shingle</str>
> <str name="outputUnigramsIfNoShingles">true</str>
> <str name="outputUnigrams">true</str>
> <str name="minShingleSize">2</str>
> <str name="maxShingleSize">4</str>
>   </lst>
>   <lst name="filter">
> <str name="class">synonym</str>
> <str name="tokenizerFactory">solr.KeywordTokenizerFactory</str>
> <str name="synonyms">synonyms.txt</str>
> <str name="expand">true</str>
> <str name="ignoreCase">true</str>
>   </lst>
> </lst>
>   </lst>
> </queryParser>
> 
> <requestHandler name="/select2" class="solr.SearchHandler">
>      <lst name="defaults">
>        <str name="echoParams">explicit</str>
>        <int name="rows">10</int>
>        <str name="defType">synonym_edismax</str>
>    <str name="synonyms">true</str>
>    <str name="qf">plain_text^10 editorschoice^200
> title^20 h_*^14
> tags^10 thema^15 inhaltstyp^6 breadcrumb^6 doctype^10
> contentmanager^5 links^5
> last_modified^5 url^5
>    </str>
>    <str name="bq">(expiration:[NOW TO *] OR (*:* 
> -expiration:*))^6</str>
>    <str name="bf">div(clicks,max(displays,1))^8</str> <!-- tested -->
> 
>        <str name="df">text</str>
>    <str name="fl">*,path,score</str>
>    <str name="wt">json</str>
>    <str name="q.op">AND</str>
> 
>    <!-- Highlighting defaults -->
>        <str name="hl">on</str>
>        <str name="hl.fl">plain_text,title</str>
>    <str name="hl.fragSize">200</str>
>    <str name="hl.simple.pre">&lt;b&gt;</str>
>        <str name="hl.simple.post">&lt;/b&gt;</str>
> 
> <!-- <lst name="invariants"> -->
>     <str name="facet">on</str>
> <str name="facet.mincount">1</str>
>         <str name="facet.field">{!ex=inhaltstyp_s}inhaltstyp_s</str>
> <str name="f.inhaltstyp_s.facet.sort">index</str>
> <str name="facet.field">{!ex=doctype}doctype</str>
> <str name="f.doctype.facet.sort">index</str>
> <str name="facet.field">{!ex=thema_f}thema_f</str>
> <str name="f.thema_f.facet.sort">index</str>
> <str name="facet.field">{!ex=author_s}author_s</str>
> <str name="f.author_s.facet.sort">index</str>
> <str
> name="facet.field">{!ex=sachverstaendiger_s}sachverstaendiger_s</str>
> <str name="f.sachverstaendiger_s.facet.sort">index</str>
> <str name="facet.field">{!ex=veranstaltung_s}veranstaltung_s</str>
> <str name="f.veranstaltung_s.facet.sort">index</str>
> <str name="facet.date">{!ex=last_modified}last_modified</str>
> <str name="facet.date.gap">+1MONTH</str>
> <str name="facet.date.end">NOW/MONTH+1MONTH</str>
> <str name="facet.date.start">NOW/MONTH-36MONTHS</str>
> <str name="facet.date.other">after</str>
> 
>        </lst>
> </requestHandler>

wrong query results with wdf and ngtf

Reply via email to