Re: svn commit: r979049 - in /lucene/dev/trunk/solr: ./ example/solr/conf/ src/java/org/apache/solr/schema/ src/java/org/apache/solr/search/ src/test/org/apache/solr/search/ src/test/test-files/solr/conf/

Koji Sekiguchi Sun, 25 Jul 2010 15:35:07 -0700

Don't we need a note about introducing the new attribute
in schema version?


> 1.3: removed optional field compress feature

Koji

--
http://www.rondhuit.com/en/



(10/07/26 0:13), [email protected] wrote:

Author: yonik
Date: Sun Jul 25 15:13:05 2010
New Revision: 979049

URL: http://svn.apache.org/viewvc?rev=979049&view=rev
Log:
SOLR-2015: add boolean attribute autoGeneratePhraseQueries to TextField

Added:
     
lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java
Modified:
     lucene/dev/trunk/solr/CHANGES.txt
     lucene/dev/trunk/solr/example/solr/conf/schema.xml
     lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java
     lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
     lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml

Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=979049&r1=979048&r2=979049&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Sun Jul 25 15:13:05 2010
@@ -203,6 +203,15 @@ New Features
             http://wiki.apache.org/solr/SpatialSearch and the example.  
Refactored some items in Lucene spatial.
             Removed SpatialTileField as the underlying CartesianTier is broken 
beyond repair and is going to be moved. (gsingers)

+* SOLR-2015: Add a boolean attribute autoGeneratePhraseQueries to TextField.
+  autoGeneratePhraseQueries="true" (the default) causes the query parser to
+  generate phrase queries if multiple tokens are generated from a single
+  non-quoted analysis string.  For example WordDelimiterFilter splitting 
text:pdp-11
+  will cause the parser to generate text:"pdp 11" rather than (text:PDP OR 
text:11).
+  Note that autoGeneratePhraseQueries="true" tends to not work well for non 
whitespace
+  delimited languages. (yonik)
+
+
  Optimizations
  ----------------------


Modified: lucene/dev/trunk/solr/example/solr/conf/schema.xml
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/solr/conf/schema.xml?rev=979049&r1=979048&r2=979049&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/solr/conf/schema.xml Sun Jul 25 15:13:05 2010
@@ -213,8 +213,12 @@
          words on case-change, alpha numeric boundaries, and non-alphanumeric 
chars,
          so that a query of "wifi" or "wi fi" could match a document containing 
"Wi-Fi".
          Synonyms and stopwords are customized by external files, and stemming 
is enabled.
+        The attribute autoGeneratePhraseQueries="true" (the default) causes 
words that get split to
+        form phrase queries. For example, WordDelimiterFilter splitting 
text:pdp-11 will cause the parser
+        to generate text:"pdp 11" rather than (text:PDP OR text:11).
+        NOTE: autoGeneratePhraseQueries="true" tends to not work well for non 
whitespace delimited languages.
          -->
-<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+<fieldType name="text" class="solr.TextField" positionIncrementGap="100" 
autoGeneratePhraseQueries="true">
        <analyzer type="index">
          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
          <!-- in this example, we will only use synonyms at query time

Modified: lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java?rev=979049&r1=979048&r2=979049&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java 
(original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/schema/TextField.java Sun 
Jul 25 15:13:05 2010
@@ -46,13 +46,21 @@ import java.io.StringReader;
   * @version $Id$
   */
  public class TextField extends FieldType {
+  protected boolean autoGeneratePhraseQueries = true;
+
    protected void init(IndexSchema schema, Map<String,String>  args) {
      properties |= TOKENIZED;
      if (schema.getVersion()>  1.1f) properties&= ~OMIT_TF_POSITIONS;
-
+    String autoGeneratePhraseQueriesStr = 
args.remove("autoGeneratePhraseQueries");
+    if (autoGeneratePhraseQueriesStr != null)
+      autoGeneratePhraseQueries = 
Boolean.parseBoolean(autoGeneratePhraseQueriesStr);
      super.init(schema, args);
    }

+  public boolean getAutoGeneratePhraseQueries() {
+    return autoGeneratePhraseQueries;
+  }
+
    public SortField getSortField(SchemaField field, boolean reverse) {
      return getStringSort(field, reverse);
    }

Modified: 
lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java?rev=979049&r1=979048&r2=979049&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java 
(original)
+++ lucene/dev/trunk/solr/src/java/org/apache/solr/search/SolrQueryParser.java 
Sun Jul 25 15:13:05 2010
@@ -142,11 +142,15 @@ public class SolrQueryParser extends Que
          return parser.subQuery(queryText, null).getQuery();
        }
      }
-    //Intercept poly fields, as they get expanded by default to an OR clause of
-    SchemaField sf = schema.getField(field);
-    //TODO: is there anyway to avoid this instance of check?
-    if (sf != null&&  !(sf.getType() instanceof TextField)){//we have a poly 
field, deal with it specially by delegating to the FieldType
-      return sf.getType().getFieldQuery(parser, sf, queryText);
+    SchemaField sf = schema.getFieldOrNull(field);
+    if (sf != null) {
+      FieldType ft = sf.getType();
+      // delegate to type for everything except TextField
+      if (ft instanceof TextField) {
+        return super.getFieldQuery(field, queryText, quoted || 
((TextField)ft).getAutoGeneratePhraseQueries());
+      } else {
+        return sf.getType().getFieldQuery(parser, sf, queryText);
+      }
      }

      // default to a normal field query

Added: 
lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java?rev=979049&view=auto
==============================================================================
--- 
lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java 
(added)
+++ 
lucene/dev/trunk/solr/src/test/org/apache/solr/search/TestSolrQueryParser.java 
Sun Jul 25 15:13:05 2010
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.search;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.junit.BeforeClass;
+import org.junit.After;
+import org.junit.Test;
+
+
+public class TestSolrQueryParser extends SolrTestCaseJ4 {
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    initCore("solrconfig.xml", "schema12.xml");
+    createIndex();
+  }
+
+  public static void createIndex() {
+    String v;
+    v="how now brown cow";
+    assertU(adoc("id","1", "text",v,  "text_np",v));
+    v="now cow";
+    assertU(adoc("id","2", "text",v,  "text_np",v));
+    assertU(commit());
+  }
+
+  @Test
+  public void testPhrase() {
+    // should generate a phrase of "now cow" and match only one doc
+    assertQ(req("q","text:now-cow", "indent","true")
+        ,"//*...@numfound='1']"
+    );
+    // should generate a query of (now OR cow) and match both docs
+    assertQ(req("q","text_np:now-cow", "indent","true")
+        ,"//*...@numfound='2']"
+    );
+  }
+
+}

Modified: lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml
URL: 
http://svn.apache.org/viewvc/lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml?rev=979049&r1=979048&r2=979049&view=diff
==============================================================================
--- lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml (original)
+++ lucene/dev/trunk/solr/src/test/test-files/solr/conf/schema12.xml Sun Jul 25 
15:13:05 2010
@@ -145,6 +145,35 @@
      </fieldType>


+<!-- field type that doesn't generate phrases from unquoted multiple tokens per 
analysis unit -->
+<fieldType name="text_np" class="solr.TextField" positionIncrementGap="100" 
autoGeneratePhraseQueries="false">
+<analyzer type="index">
+<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+<filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" 
catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+<filter class="solr.LowerCaseFilterFactory"/>
+<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+<filter class="solr.PorterStemFilterFactory"/>
+</analyzer>
+<analyzer type="query">
+<tokenizer class="solr.WhitespaceTokenizerFactory"/>
+<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" 
expand="true"/>
+<filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords.txt"
+                enablePositionIncrements="true"
+                />
+<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" 
catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+<filter class="solr.LowerCaseFilterFactory"/>
+<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+<filter class="solr.PorterStemFilterFactory"/>
+</analyzer>
+</fieldType>
+
      <fieldtype name="nametext" class="solr.TextField">
        <analyzer class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
      </fieldtype>
@@ -403,6 +432,8 @@
     <field name="weight" type="float" indexed="true" stored="true"/>
     <field name="bday" type="date" indexed="true" stored="true"/>

+<field name="text_np" type="text_np" indexed="true" stored="false"/>
+
     <field name="title_stemmed" type="text" indexed="true" stored="false"/>
     <field name="title_lettertok" type="lettertok" indexed="true" 
stored="false"/>




---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: svn commit: r979049 - in /lucene/dev/trunk/solr: ./ example/solr/conf/ src/java/org/apache/solr/schema/ src/java/org/apache/solr/search/ src/test/org/apache/solr/search/ src/test/test-files/solr/conf/

Reply via email to