Author: koji
Date: Sat Nov 29 20:46:20 2008
New Revision: 721758

URL: http://svn.apache.org/viewvc?rev=721758&view=rev
Log:
SOLR-538: added maxChars attribute for copyField

Added:
    lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java   (with 
props)
    lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java   
(with props)
    lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml   
(with props)
Modified:
    lucene/solr/trunk/CHANGES.txt
    lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
    lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java

Modified: lucene/solr/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Sat Nov 29 20:46:20 2008
@@ -94,6 +94,10 @@
 18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum 
capabilities.
     Useful for auto suggest and possibly distributed search.  Not distributed 
search compliant.  (gsingers)
 
+19. SOLR-538: Add maxChars attribute for copyField function so that the length 
limit for destination
+    can be specified.
+    (Georgios Stamatis, Lars Kotthoff, Chris Harris via koji)
+
 Optimizations
 ----------------------
  1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

Added: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java Sat Nov 29 
20:46:20 2008
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+/**
+ * <code>CopyField</code> contains all the information of a valid copy fields 
in an index.
+ * 
+ * @since solr 1.4
+ */
+public class CopyField {
+
+  private final SchemaField source;
+  private final SchemaField destination;
+  private final int maxChars;
+  public static final int UNLIMITED = 0;
+
+  public CopyField(final SchemaField source, final SchemaField destination) {
+    this(source, destination, UNLIMITED);
+  }
+
+  /**
+   * @param source The SchemaField of the source field.
+   * @param destination The SchemaField of the destination field.
+   * @param maxChars Maximum number of chars in source field to copy to 
destination field.
+   * If equal to 0, there is no limit.
+   */
+  public CopyField(final SchemaField source, final SchemaField destination,
+      final int maxChars) {
+    if (source == null || destination == null) {
+      throw new IllegalArgumentException(
+          "Source or Destination SchemaField can't be NULL.");
+    }
+    if (maxChars < 0) {
+      throw new IllegalArgumentException(
+          "Attribute maxChars can't have a negative value.");
+    }
+    this.source = source;
+    this.destination = destination;
+    this.maxChars = maxChars;
+  }
+  
+  public String getLimitedValue( final String val ){
+    return maxChars == UNLIMITED || val.length() < maxChars ?
+        val : val.substring( 0, maxChars );
+  }
+
+  /**
+   * @return source SchemaField
+   */
+  public SchemaField getSource() {
+    return source;
+  }
+
+  /**
+   * @return destination SchemaField
+   */
+  public SchemaField getDestination() {
+    return destination;
+  }
+
+  /**
+   * @return tha maximum number of chars in source field to copy to 
destination field.
+   */
+  public int getMaxChars() {
+    return maxChars;
+  }
+}

Propchange: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Modified: lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java 
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java Sat Nov 
29 20:46:20 2008
@@ -621,8 +621,18 @@
 
         String source = DOMUtil.getAttr(attrs,"source","copyField definition");
         String dest   = DOMUtil.getAttr(attrs,"dest",  "copyField definition");
+        String maxChars = DOMUtil.getAttr(attrs, "maxChars");
+        int maxCharsInt = CopyField.UNLIMITED;
+        if (maxChars != null) {
+          try {
+            maxCharsInt = Integer.parseInt(maxChars);
+          } catch (NumberFormatException e) {
+            log.warn("Couldn't parse maxChars attribute for copyField from "
+                    + source + " to " + dest + " as integer. The whole field 
will be copied.");
+          }
+        }
 
-        registerCopyField(source, dest);
+        registerCopyField(source, dest, maxCharsInt);
      }
       
       for (Map.Entry<SchemaField, Integer> entry : 
copyFieldTargetCounts.entrySet())    {
@@ -646,6 +656,11 @@
     refreshAnalyzers();
   }
 
+  public void registerCopyField( String source, String dest )
+  {
+    registerCopyField(source, dest, CopyField.UNLIMITED);
+  }
+
   /**
    * <p>
    * NOTE: this function is not thread safe.  However, it is safe to use 
within the standard
@@ -655,12 +670,12 @@
    * 
    * @see SolrCoreAware
    */
-  public void registerCopyField( String source, String dest )
+  public void registerCopyField( String source, String dest, int maxChars )
   {
     boolean sourceIsPattern = isWildCard(source);
     boolean destIsPattern   = isWildCard(dest);
 
-    log.debug("copyField source='"+source+"' dest='"+dest+"'");
+    log.debug("copyField source='"+source+"' dest='"+dest+"' 
maxChars='"+maxChars);
     SchemaField d = getFieldOrNull(dest);
     if(d == null){
       throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, 
"copyField destination :'"+dest+"' does not exist" );
@@ -678,10 +693,10 @@
         if( df == null ) {
           throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, 
"copyField dynamic destination must match a dynamicField." );
         }
-        registerDynamicCopyField(new DynamicDestCopy(source, df ));
+        registerDynamicCopyField(new DynamicDestCopy(source, df, maxChars ));
       }
       else {
-        registerDynamicCopyField(new DynamicCopy(source, d));
+        registerDynamicCopyField(new DynamicCopy(source, d, maxChars));
       }
     } 
     else if( destIsPattern ) {
@@ -692,13 +707,12 @@
       // retrieve the field to force an exception if it doesn't exist
       SchemaField f = getField(source);
 
-      SchemaField[] destArr = copyFields.get(source);
-      if (destArr==null) {
-        destArr=new SchemaField[]{d};
-      } else {
-        destArr = (SchemaField[])append(destArr,d);
+      List<CopyField> copyFieldList = copyFieldsMap.get(source);
+      if (copyFieldList == null) {
+        copyFieldList = new ArrayList<CopyField>();
+        copyFieldsMap.put(source, copyFieldList);
       }
-      copyFields.put(source,destArr);
+      copyFieldList.add(new CopyField(f, d, maxChars));
 
       copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ? 
copyFieldTargetCounts.get(d) + 1 : 1));
     }
@@ -894,9 +908,16 @@
 
   static class DynamicCopy extends DynamicReplacement {
     final SchemaField targetField;
+    final int maxChars;
+
     DynamicCopy(String regex, SchemaField targetField) {
+      this(regex, targetField, CopyField.UNLIMITED);
+    }
+
+    DynamicCopy(String regex, SchemaField targetField, int maxChars) {
       super(regex);
       this.targetField = targetField;
+      this.maxChars = maxChars;
     }
     
     public SchemaField getTargetField( String sourceField )
@@ -918,7 +939,11 @@
     final String dstr;
     
     DynamicDestCopy(String source, DynamicField dynamic) {
-      super(source, dynamic.prototype );
+      this(source, dynamic, CopyField.UNLIMITED);
+    }
+      
+    DynamicDestCopy(String source, DynamicField dynamic, int maxChars) {
+      super(source, dynamic.prototype, maxChars);
       this.dynamic = dynamic;
       
       String dest = dynamic.regex;
@@ -1098,7 +1123,7 @@
   };
 
 
-  private final Map<String, SchemaField[]> copyFields = new 
HashMap<String,SchemaField[]>();
+  private final Map<String, List<CopyField>> copyFieldsMap = new 
HashMap<String, List<CopyField>>();
   private DynamicCopy[] dynamicCopyFields;
   /**
    * keys are all fields copied to, count is num of copyField
@@ -1119,46 +1144,69 @@
       return new SchemaField[0];
     }
     List<SchemaField> sf = new ArrayList<SchemaField>();
-    for (Map.Entry<String, SchemaField[]> cfs : copyFields.entrySet()) {
-      for (SchemaField cf : cfs.getValue()) {
-        if (cf.getName().equals(destField)) {
-          sf.add(getField(cfs.getKey()));
+    for (Map.Entry<String, List<CopyField>> cfs : copyFieldsMap.entrySet()) {
+      for (CopyField copyField : cfs.getValue()) {
+        if (copyField.getDestination().getName().equals(destField)) {
+          sf.add(copyField.getSource());
         }
       }
     }
-    return sf.toArray(new SchemaField[1]);
+    return sf.toArray(new SchemaField[sf.size()]);
   }
   /**
    * Get all copy fields, both the static and the dynamic ones.
+   * 
    * @param sourceField
    * @return Array of fields to copy to.
+   * @deprecated Use [EMAIL PROTECTED] #getCopyFieldsList(String)} instead.
    */
+  @Deprecated
   public SchemaField[] getCopyFields(String sourceField) {
-    // Get the dynamic ones into a list.
+    // This is the List that holds all the results, dynamic or not.
     List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
 
+    // Get the dynamic results into the list.
     for(DynamicCopy dynamicCopy : dynamicCopyFields) {
       if(dynamicCopy.matches(sourceField)) {
         matchCopyFields.add(dynamicCopy.getTargetField(sourceField));
       }
     }
 
-    // Get the fixed ones, if there are any.
-    SchemaField[] fixedCopyFields = copyFields.get(sourceField);
-
-    boolean appendFixed = copyFields.containsKey(sourceField);
-
-    // Construct the results by concatenating dynamic and fixed into a results 
array.
-
-    SchemaField[] results = new SchemaField[matchCopyFields.size() + 
(appendFixed ? fixedCopyFields.length : 0)];
+    // Get the fixed ones, if there are any and add them.
+    final List<CopyField> copyFields = copyFieldsMap.get(sourceField);
+    if (copyFields!=null) {
+      final Iterator<CopyField> it = copyFields.iterator();
+      while (it.hasNext()) {
+        matchCopyFields.add(it.next().getDestination());
+      }
+    }
 
-    matchCopyFields.toArray(results);
+    // Construct the results by transforming the list into an array.
+    return matchCopyFields.toArray(new SchemaField[matchCopyFields.size()]);
+  }
 
-    if(appendFixed) {
-      System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(), 
fixedCopyFields.length);
+  /**
+   * Get all copy fields for a specified source field, both static
+   * and dynamic ones.
+   * @param sourceField
+   * @return List of CopyFields to copy to.
+   * @since solr 1.4
+   */
+  // This is useful when we need the maxSize param of each CopyField
+  public List<CopyField> getCopyFieldsList(final String sourceField){
+    final List<CopyField> result = new ArrayList<CopyField>();
+    for (DynamicCopy dynamicCopy : dynamicCopyFields) {
+      if (dynamicCopy.matches(sourceField)) {
+        result.add(new CopyField(getField(sourceField), 
dynamicCopy.getTargetField(sourceField), dynamicCopy.maxChars));
+      }
+    }
+    List<CopyField> fixedCopyFields = copyFieldsMap.get(sourceField);
+    if (fixedCopyFields != null)
+    {
+      result.addAll(fixedCopyFields);
     }
 
-    return results;
+    return result;
   }
   
   /**

Modified: lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java 
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java Sat 
Nov 29 20:46:20 2008
@@ -29,6 +29,7 @@
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
+import org.apache.solr.schema.CopyField;
 import org.apache.solr.schema.DateField;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
@@ -120,15 +121,15 @@
 
     // Check if we should copy this field to any other fields.
     // This could happen whether it is explicit or not.
-    SchemaField[] destArr = schema.getCopyFields(name);
-    if (destArr != null) {
-      for (SchemaField destField : destArr) {
-        addSingleField(destField,val,boost);
+    final List<CopyField> copyFields = schema.getCopyFieldsList(name);
+    if (copyFields != null) {
+      for(CopyField cf : copyFields) {
+        addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost);
       }
     }
 
     // error if this field name doesn't match anything
-    if (sfield==null && (destArr==null || destArr.length==0)) {
+    if (sfield==null && (copyFields==null || copyFields.size()==0)) {
       throw new SolrException( 
SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'");
     }
   }
@@ -218,7 +219,7 @@
               sfield.getName() + ": " +field.getValue() );
       }
       
-      SchemaField[] destArr = schema.getCopyFields(name);
+      final List<CopyField> copyFields = schema.getCopyFieldsList(name);
       
       // load each field value
       boolean hasField = false;
@@ -246,8 +247,10 @@
           }
         }
         
-        // Add the copy fields
-        for( SchemaField sf : destArr ) {
+        // Check if we should copy this field to any other fields.
+        // This could happen whether it is explicit or not.
+        for( CopyField cf : copyFields ) {
+          SchemaField sf = cf.getDestination();
           // check if the copy field is a multivalued or not
           if( !sf.multiValued() && out.get( sf.getName() ) != null ) {
             throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
@@ -256,7 +259,7 @@
           }
           
           used = true;
-          Field f = sf.createField( val, boost );
+          Field f = sf.createField( cf.getLimitedValue( val ), boost );
           if( f != null ) { // null fields are not added
             out.add( f );
           }

Added: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java Sat 
Nov 29 20:46:20 2008
@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.junit.Test;
+
+/**
+ * This is a simple test to make sure the <code>CopyField</code> works.
+ * It uses its own special schema file.
+ *
+ * @since solr 1.4
+ */
+public class CopyFieldTest extends AbstractSolrTestCase {
+
+  @Override
+  public String getSchemaFile() {
+    return "schema-copyfield-test.xml";
+  }
+
+  @Override
+  public String getSolrConfigFile() {
+    return "solrconfig.xml";
+  }
+
+  @Override
+  public void setUp() throws Exception {
+    super.setUp();
+  }
+
+  @Override
+  public void tearDown() throws Exception {
+    super.tearDown();
+  }
+
+  @Test
+  public void testCopyFieldSchemaFieldSchemaField() {
+    try {
+      new CopyField(new SchemaField("source", new TextField()), null);
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+    try {
+      new CopyField(null, new SchemaField("destination", new TextField()));
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+    try {
+      new CopyField(null, null);
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+  }
+
+  @Test
+  public void testCopyFieldSchemaFieldSchemaFieldInt() {
+    try {
+      new CopyField(null,
+          new SchemaField("destination", new TextField()), 1000);
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+    try {
+      new CopyField(new SchemaField("source", new TextField()), null,
+          1000);
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+    try {
+      new CopyField(null, null, 1000);
+      fail("CopyField failed with null SchemaField argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+    }
+    try {
+      new CopyField(new SchemaField("source", new TextField()),
+          new SchemaField("destination", new TextField()), -1000);
+      fail("CopyField failed with negative length argument.");
+    } catch (IllegalArgumentException e) {
+      assertTrue(e.getLocalizedMessage().contains(
+          "can't have a negative value"));
+    }
+    new CopyField(new SchemaField("source", new TextField()),
+        new SchemaField("destination", new TextField()), CopyField.UNLIMITED);
+  }
+
+  @Test
+  public void testGetSource() {
+    final CopyField copyField = new CopyField(new SchemaField("source",
+        new TextField()), new SchemaField("destination",
+        new TextField()), 1000);
+    assertEquals("source", copyField.getSource().name);
+  }
+
+  @Test
+  public void testGetDestination() {
+    final CopyField copyField = new CopyField(new SchemaField("source",
+        new TextField()), new SchemaField("destination",
+        new TextField()), 1000);
+    assertEquals("destination", copyField.getDestination().name);
+  }
+
+  @Test
+  public void testGetMaxChars() {
+    final CopyField copyField = new CopyField(new SchemaField("source",
+        new TextField()), new SchemaField("destination",
+        new TextField()), 1000);
+    assertEquals(1000, copyField.getMaxChars());
+  }
+
+  @Test
+  public void testCopyFieldFunctionality() 
+    {
+      SolrCore core = h.getCore();
+      assertU(adoc("id", "10", "title", "test copy field", "text_en", "this is 
a simple test of the copy field functionality"));
+      assertU(commit());
+      
+      Map<String,String> args = new HashMap<String, String>();
+      args.put( CommonParams.Q, "text_en:simple" );
+      args.put( "indent", "true" );
+      SolrQueryRequest req = new LocalSolrQueryRequest( core, new 
MapSolrParams( args) );
+      
+      assertQ("Make sure they got in", req
+              ,"//[EMAIL PROTECTED]'1']"
+              ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='10']"
+              );
+      
+      args = new HashMap<String, String>();
+      args.put( CommonParams.Q, "highlight:simple" );
+      args.put( "indent", "true" );
+      req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+      assertQ("dynamic source", req
+              ,"//[EMAIL PROTECTED]'1']"
+              ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='10']"
+              ,"//result/doc[1]/[EMAIL PROTECTED]'highlight']/str[.='this is a 
simple test of ']"
+              );
+
+      args = new HashMap<String, String>();
+      args.put( CommonParams.Q, "text_en:functionality" );
+      args.put( "indent", "true" );
+      req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+      assertQ("Make sure they got in", req
+              ,"//[EMAIL PROTECTED]'1']");
+      
+      args = new HashMap<String, String>();
+      args.put( CommonParams.Q, "highlight:functionality" );
+      args.put( "indent", "true" );
+      req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+      assertQ("dynamic source", req
+              ,"//[EMAIL PROTECTED]'0']");
+    }
+}

Propchange: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL

Added: lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
URL: 
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml 
(added)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml 
Sat Nov 29 20:46:20 2008
@@ -0,0 +1,468 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The Solr schema file. This file should be named "schema.xml" and
+     should be located where the classloader for the Solr webapp can find it.
+
+     This schema is used for testing, and as such has everything and the 
+     kitchen sink thrown in. See example/solr/conf/schema.xml for a 
+     more concise example.
+
+     $Id$
+     $Source: 
/cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
+     $Name:  $
+  -->
+
+<schema name="test" version="1.0">
+  <types>
+
+    <!-- field type definitions... note that the "name" attribute is
+         just a label to be used by field definitions.  The "class"
+         attribute and any other attributes determine the real type and
+         behavior of the fieldtype.
+      -->
+
+    <!-- numeric field types that store and index the text
+         value verbatim (and hence don't sort correctly or support range 
queries.)
+         These are provided more for backward compatability, allowing one
+         to create a schema that matches an existing lucene index.
+    -->
+    <fieldType name="integer" class="solr.IntField"/>
+    <fieldType name="long" class="solr.LongField"/>
+    <fieldtype name="float" class="solr.FloatField"/>
+    <fieldType name="double" class="solr.DoubleField"/>
+
+    <!-- numeric field types that manipulate the value into
+       a string value that isn't human readable in it's internal form,
+       but sorts correctly and supports range queries.
+
+         If sortMissingLast="true" then a sort on this field will cause 
documents
+       without the field to come after documents with the field,
+       regardless of the requested sort order.
+         If sortMissingFirst="true" then a sort on this field will cause 
documents
+       without the field to come before documents with the field,
+       regardless of the requested sort order.
+         If sortMissingLast="false" and sortMissingFirst="false" (the default),
+       then default lucene sorting will be used which places docs without the 
field
+       first in an ascending sort and last in a descending sort.
+    -->
+    <fieldtype name="sint" class="solr.SortableIntField" 
sortMissingLast="true"/>
+    <fieldtype name="slong" class="solr.SortableLongField" 
sortMissingLast="true"/>
+    <fieldtype name="sfloat" class="solr.SortableFloatField" 
sortMissingLast="true"/>
+    <fieldtype name="sdouble" class="solr.SortableDoubleField" 
sortMissingLast="true"/>
+
+    <!-- bcd versions of sortable numeric type may provide smaller
+         storage space and support very large numbers.
+    -->
+    <fieldtype name="bcdint" class="solr.BCDIntField" sortMissingLast="true"/>
+    <fieldtype name="bcdlong" class="solr.BCDLongField" 
sortMissingLast="true"/>
+    <fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
+
+    <!-- Field type demonstrating an Analyzer failure -->
+    <fieldtype name="failtype1" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="0" catenateWords="0" 
catenateNumbers="0" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- Demonstrating ignoreCaseChange -->
+    <fieldtype name="wdf_nocase" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="0" catenateWords="0" 
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+
+    <!-- HighlitText optimizes storage for (long) columns which will be 
highlit -->
+    <fieldtype name="highlittext" class="solr.TextField" 
compressThreshold="345" />
+
+    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
+
+    <!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
+         seconds part (.999) is optional.
+      -->
+    <fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
+
+    <!-- solr.TextField allows the specification of custom
+         text analyzers specified as a tokenizer and a list
+         of token filters.
+      -->
+    <fieldtype name="text" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory"/>
+        <!-- lucene PorterStemFilterFactory deprecated
+          <filter class="solr.PorterStemFilterFactory"/>
+        -->
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+
+    <fieldtype name="nametext" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
+    </fieldtype>
+
+    <fieldtype name="teststop" class="solr.TextField">
+       <analyzer>
+        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- fieldtypes in this section isolate tokenizers and tokenfilters for 
testing -->
+    <fieldtype name="lowertok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="keywordtok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="standardtok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="lettertok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="whitetok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="HTMLstandardtok" class="solr.TextField">
+      <analyzer><tokenizer 
class="solr.HTMLStripStandardTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="HTMLwhitetok" class="solr.TextField">
+      <analyzer><tokenizer 
class="solr.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="standardtokfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="standardfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="lowerfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="patternreplacefilt" class="solr.TextField">
+      <analyzer type="index">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-zA-Z])" replacement="_" replace="all"
+        />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="porterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <!-- fieldtype name="snowballfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype -->
+    <fieldtype name="engporterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="custengporterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" 
protected="protwords.txt"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="stopfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="custstopfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="lengthfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LengthFilterFactory" min="2" max="5"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="subword" class="solr.TextField" multiValued="true" 
positionIncrementGap="100">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="1" catenateWords="1" 
catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+          <filter class="solr.StopFilterFactory"/>
+          <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="1" catenateWords="0" 
catenateNumbers="0" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+          <filter class="solr.StopFilterFactory"/>
+          <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- more flexible in matching skus, but more chance of a false match -->
+    <fieldtype name="skutype1" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="1" generateNumberParts="1" catenateWords="1" 
catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="0" generateNumberParts="0" catenateWords="1" 
catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- less flexible in matching skus, but less chance of a false match -->
+    <fieldtype name="skutype2" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="0" generateNumberParts="0" catenateWords="1" 
catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" 
generateWordParts="0" generateNumberParts="0" catenateWords="1" 
catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- less flexible in matching skus, but less chance of a false match -->
+    <fieldtype name="syn" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter name="syn" class="solr.SynonymFilterFactory" 
synonyms="synonyms.txt"/>
+      </analyzer>
+    </fieldtype>
+    
+    <!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
+         synonyms "better"
+      -->
+    <fieldtype name="dedup" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.SynonymFilterFactory"
+                  synonyms="synonyms.txt" expand="true" />
+          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype  name="unstored" class="solr.StrField" indexed="true" 
stored="false"/>
+
+
+  <fieldtype name="textgap" class="solr.TextField" multiValued="true" 
positionIncrementGap="100">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+  </fieldtype>
+
+ </types>
+
+
+ <fields>
+   <field name="id" type="integer" indexed="true" stored="true" 
multiValued="false" required="false"/>
+   <field name="name" type="nametext" indexed="true" stored="true"/>
+   <field name="text" type="text" indexed="true" stored="false"/>
+   <field name="subject" type="text" indexed="true" stored="true"/>
+   <field name="title" type="nametext" indexed="true" stored="true"/>
+   <field name="weight" type="float" indexed="true" stored="true"/>
+   <field name="bday" type="date" indexed="true" stored="true"/>
+
+   <field name="title_stemmed" type="text" indexed="true" stored="false"/>
+   <field name="title_lettertok" type="lettertok" indexed="true" 
stored="false"/>
+
+   <field name="syn" type="syn" indexed="true" stored="true"/>
+
+   <!-- to test property inheritance and overriding -->
+   <field name="shouldbeunstored" type="unstored" />
+   <field name="shouldbestored" type="unstored" stored="true"/>
+   <field name="shouldbeunindexed" type="unstored" indexed="false" 
stored="true"/>
+
+
+   <!-- test different combinations of indexed and stored -->
+   <field name="bind" type="boolean" indexed="true" stored="false"/>
+   <field name="bsto" type="boolean" indexed="false" stored="true"/>
+   <field name="bindsto" type="boolean" indexed="true" stored="true"/>
+   <field name="isto" type="integer" indexed="false" stored="true"/>
+   <field name="iind" type="integer" indexed="true" stored="false"/>
+   <field name="ssto" type="string" indexed="false" stored="true"/>
+   <field name="sind" type="string" indexed="true" stored="false"/>
+   <field name="sindsto" type="string" indexed="true" stored="true"/>
+
+   <!-- test combinations of term vector settings -->
+   <field name="test_basictv" type="text" termVectors="true"/>
+   <field name="test_notv" type="text" termVectors="false"/>
+   <field name="test_postv" type="text" termVectors="true" 
termPositions="true"/>
+   <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
+   <field name="test_posofftv" type="text" termVectors="true" 
+     termPositions="true" termOffsets="true"/>
+
+   <!-- test highlit field settings -->
+   <field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
+   <field name="test_hlt_off" type="highlittext" indexed="true" 
compressed="false"/>
+
+   <!-- fields to test individual tokenizers and tokenfilters -->
+   <field name="teststop" type="teststop" indexed="true" stored="true"/>
+   <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
+   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
+   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
+   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" 
stored="true"/>
+   <field name="lettertok" type="lettertok" indexed="true" stored="true"/>
+   <field name="whitetok" type="whitetok" indexed="true" stored="true"/>
+   <field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" 
stored="true"/>
+   <field name="standardtokfilt" type="standardtokfilt" indexed="true" 
stored="true"/>
+   <field name="standardfilt" type="standardfilt" indexed="true" 
stored="true"/>
+   <field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
+   <field name="patternreplacefilt" type="patternreplacefilt" indexed="true" 
stored="true"/>
+   <field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
+   <field name="engporterfilt" type="engporterfilt" indexed="true" 
stored="true"/>
+   <field name="custengporterfilt" type="custengporterfilt" indexed="true" 
stored="true"/>
+   <field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
+   <field name="custstopfilt" type="custstopfilt" indexed="true" 
stored="true"/>
+   <field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
+   <field name="dedup" type="dedup" indexed="true" stored="true"/>
+   <field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
+
+   <field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
+
+   <field name="nullfirst" type="string" indexed="true" stored="true" 
sortMissingFirst="true"/>
+
+   <field name="subword" type="subword" indexed="true" stored="true"/>
+   <field name="sku1" type="skutype1" indexed="true" stored="true"/>
+   <field name="sku2" type="skutype2" indexed="true" stored="true"/>
+
+   <field name="textgap" type="textgap" indexed="true" stored="true"/>
+   
+   <field name="timestamp" type="date" indexed="true" stored="true" 
default="NOW" multiValued="false"/>
+   <field name="multiDefault" type="string" indexed="true" stored="true" 
default="muLti-Default" multiValued="true"/>
+   <field name="intDefault" type="sint" indexed="true" stored="true" 
default="42" multiValued="false"/>
+   
+   <!-- test maxChars copyField attribute -->
+   <field name="text_fr"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+   <field name="text_en"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+   <field name="highlight"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+   
+
+   
+   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
+        will be used if the name matches any of the patterns.
+        RESTRICTION: the glob-like pattern in the name attribute must have
+        a "*" only at the start or the end.
+        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, 
z_i)
+        Longer patterns will be matched first.  if equal size patterns
+        both match, the first appearing in the schema will be used.
+   -->
+   <dynamicField name="*_i"  type="sint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
+   <dynamicField name="*_s1"  type="string"  indexed="true"  stored="true" 
multiValued="false"/>
+   <dynamicField name="*_l"  type="slong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
+   <dynamicField name="*_f"  type="sfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_d"  type="sdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
+   <dynamicField name="*_bcd" type="bcdstr" indexed="true"  stored="true"/>
+
+   <dynamicField name="*_sI" type="string"  indexed="true"  stored="false"/>
+   <dynamicField name="*_sS" type="string"  indexed="false" stored="true"/>
+   <dynamicField name="t_*"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="tv_*"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+
+   <!-- special fields for dynamic copyField test -->
+   <dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
+   <dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
+  
+   <!-- for testing to ensure that longer patterns are matched first -->
+   <dynamicField name="*aa"  type="string"  indexed="true" stored="true"/>
+   <dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
+
+   <!-- ignored becuase not stored or indexed -->
+   <dynamicField name="*_ignored" type="text" indexed="false" stored="false"/>
+   
+   <!-- test maxSize copyField attribute -->
+   <dynamicField name="text_*"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+
+ </fields>
+
+ <defaultSearchField>text</defaultSearchField>
+ <uniqueKey>id</uniqueKey>
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field 
different
+        ways, or to add multiple fields to the same field for easier/faster 
searching.
+   -->
+   <copyField source="title" dest="title_stemmed"/>
+   <copyField source="title" dest="title_lettertok"/>
+
+   <copyField source="title" dest="text"/>
+   <copyField source="subject" dest="text"/>
+ 
+   <copyField source="*_t" dest="text"/>
+   
+   <!-- dynamic destination -->
+   <copyField source="*_dynamic" dest="dynamic_*"/>
+   
+   <!-- test maxSize copyField attribute -->
+   <copyField source="text_fr" dest="highlight" maxChars="25" />
+   <copyField source="text_en" dest="highlight" maxChars="25" />
+   <copyField source="text_*" dest="highlight" maxChars="25" />
+   
+ <!-- Similarity is the scoring routine for each document vs a query.
+      A custom similarity may be specified here, but the default is fine
+      for most applications.
+ -->
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
+
+</schema>

Propchange: 
lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: 
lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
------------------------------------------------------------------------------
    svn:keywords = Date Author Id Revision HeadURL


Reply via email to