Author: koji
Date: Sat Nov 29 20:46:20 2008
New Revision: 721758
URL: http://svn.apache.org/viewvc?rev=721758&view=rev
Log:
SOLR-538: added maxChars attribute for copyField
Added:
lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java (with
props)
lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
(with props)
lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
(with props)
Modified:
lucene/solr/trunk/CHANGES.txt
lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java
Modified: lucene/solr/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/CHANGES.txt?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/CHANGES.txt (original)
+++ lucene/solr/trunk/CHANGES.txt Sat Nov 29 20:46:20 2008
@@ -94,6 +94,10 @@
18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum
capabilities.
Useful for auto suggest and possibly distributed search. Not distributed
search compliant. (gsingers)
+19. SOLR-538: Add maxChars attribute for copyField function so that the length
limit for destination
+ can be specified.
+ (Georgios Stamatis, Lars Kotthoff, Chris Harris via koji)
+
Optimizations
----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
Added: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java Sat Nov 29
20:46:20 2008
@@ -0,0 +1,82 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+/**
+ * <code>CopyField</code> contains all the information of a valid copy fields
in an index.
+ *
+ * @since solr 1.4
+ */
+public class CopyField {
+
+ private final SchemaField source;
+ private final SchemaField destination;
+ private final int maxChars;
+ public static final int UNLIMITED = 0;
+
+ public CopyField(final SchemaField source, final SchemaField destination) {
+ this(source, destination, UNLIMITED);
+ }
+
+ /**
+ * @param source The SchemaField of the source field.
+ * @param destination The SchemaField of the destination field.
+ * @param maxChars Maximum number of chars in source field to copy to
destination field.
+ * If equal to 0, there is no limit.
+ */
+ public CopyField(final SchemaField source, final SchemaField destination,
+ final int maxChars) {
+ if (source == null || destination == null) {
+ throw new IllegalArgumentException(
+ "Source or Destination SchemaField can't be NULL.");
+ }
+ if (maxChars < 0) {
+ throw new IllegalArgumentException(
+ "Attribute maxChars can't have a negative value.");
+ }
+ this.source = source;
+ this.destination = destination;
+ this.maxChars = maxChars;
+ }
+
+ public String getLimitedValue( final String val ){
+ return maxChars == UNLIMITED || val.length() < maxChars ?
+ val : val.substring( 0, maxChars );
+ }
+
+ /**
+ * @return source SchemaField
+ */
+ public SchemaField getSource() {
+ return source;
+ }
+
+ /**
+ * @return destination SchemaField
+ */
+ public SchemaField getDestination() {
+ return destination;
+ }
+
+ /**
+ * @return tha maximum number of chars in source field to copy to
destination field.
+ */
+ public int getMaxChars() {
+ return maxChars;
+ }
+}
Propchange: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/java/org/apache/solr/schema/CopyField.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Modified: lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/schema/IndexSchema.java Sat Nov
29 20:46:20 2008
@@ -621,8 +621,18 @@
String source = DOMUtil.getAttr(attrs,"source","copyField definition");
String dest = DOMUtil.getAttr(attrs,"dest", "copyField definition");
+ String maxChars = DOMUtil.getAttr(attrs, "maxChars");
+ int maxCharsInt = CopyField.UNLIMITED;
+ if (maxChars != null) {
+ try {
+ maxCharsInt = Integer.parseInt(maxChars);
+ } catch (NumberFormatException e) {
+ log.warn("Couldn't parse maxChars attribute for copyField from "
+ + source + " to " + dest + " as integer. The whole field
will be copied.");
+ }
+ }
- registerCopyField(source, dest);
+ registerCopyField(source, dest, maxCharsInt);
}
for (Map.Entry<SchemaField, Integer> entry :
copyFieldTargetCounts.entrySet()) {
@@ -646,6 +656,11 @@
refreshAnalyzers();
}
+ public void registerCopyField( String source, String dest )
+ {
+ registerCopyField(source, dest, CopyField.UNLIMITED);
+ }
+
/**
* <p>
* NOTE: this function is not thread safe. However, it is safe to use
within the standard
@@ -655,12 +670,12 @@
*
* @see SolrCoreAware
*/
- public void registerCopyField( String source, String dest )
+ public void registerCopyField( String source, String dest, int maxChars )
{
boolean sourceIsPattern = isWildCard(source);
boolean destIsPattern = isWildCard(dest);
- log.debug("copyField source='"+source+"' dest='"+dest+"'");
+ log.debug("copyField source='"+source+"' dest='"+dest+"'
maxChars='"+maxChars);
SchemaField d = getFieldOrNull(dest);
if(d == null){
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"copyField destination :'"+dest+"' does not exist" );
@@ -678,10 +693,10 @@
if( df == null ) {
throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
"copyField dynamic destination must match a dynamicField." );
}
- registerDynamicCopyField(new DynamicDestCopy(source, df ));
+ registerDynamicCopyField(new DynamicDestCopy(source, df, maxChars ));
}
else {
- registerDynamicCopyField(new DynamicCopy(source, d));
+ registerDynamicCopyField(new DynamicCopy(source, d, maxChars));
}
}
else if( destIsPattern ) {
@@ -692,13 +707,12 @@
// retrieve the field to force an exception if it doesn't exist
SchemaField f = getField(source);
- SchemaField[] destArr = copyFields.get(source);
- if (destArr==null) {
- destArr=new SchemaField[]{d};
- } else {
- destArr = (SchemaField[])append(destArr,d);
+ List<CopyField> copyFieldList = copyFieldsMap.get(source);
+ if (copyFieldList == null) {
+ copyFieldList = new ArrayList<CopyField>();
+ copyFieldsMap.put(source, copyFieldList);
}
- copyFields.put(source,destArr);
+ copyFieldList.add(new CopyField(f, d, maxChars));
copyFieldTargetCounts.put(d, (copyFieldTargetCounts.containsKey(d) ?
copyFieldTargetCounts.get(d) + 1 : 1));
}
@@ -894,9 +908,16 @@
static class DynamicCopy extends DynamicReplacement {
final SchemaField targetField;
+ final int maxChars;
+
DynamicCopy(String regex, SchemaField targetField) {
+ this(regex, targetField, CopyField.UNLIMITED);
+ }
+
+ DynamicCopy(String regex, SchemaField targetField, int maxChars) {
super(regex);
this.targetField = targetField;
+ this.maxChars = maxChars;
}
public SchemaField getTargetField( String sourceField )
@@ -918,7 +939,11 @@
final String dstr;
DynamicDestCopy(String source, DynamicField dynamic) {
- super(source, dynamic.prototype );
+ this(source, dynamic, CopyField.UNLIMITED);
+ }
+
+ DynamicDestCopy(String source, DynamicField dynamic, int maxChars) {
+ super(source, dynamic.prototype, maxChars);
this.dynamic = dynamic;
String dest = dynamic.regex;
@@ -1098,7 +1123,7 @@
};
- private final Map<String, SchemaField[]> copyFields = new
HashMap<String,SchemaField[]>();
+ private final Map<String, List<CopyField>> copyFieldsMap = new
HashMap<String, List<CopyField>>();
private DynamicCopy[] dynamicCopyFields;
/**
* keys are all fields copied to, count is num of copyField
@@ -1119,46 +1144,69 @@
return new SchemaField[0];
}
List<SchemaField> sf = new ArrayList<SchemaField>();
- for (Map.Entry<String, SchemaField[]> cfs : copyFields.entrySet()) {
- for (SchemaField cf : cfs.getValue()) {
- if (cf.getName().equals(destField)) {
- sf.add(getField(cfs.getKey()));
+ for (Map.Entry<String, List<CopyField>> cfs : copyFieldsMap.entrySet()) {
+ for (CopyField copyField : cfs.getValue()) {
+ if (copyField.getDestination().getName().equals(destField)) {
+ sf.add(copyField.getSource());
}
}
}
- return sf.toArray(new SchemaField[1]);
+ return sf.toArray(new SchemaField[sf.size()]);
}
/**
* Get all copy fields, both the static and the dynamic ones.
+ *
* @param sourceField
* @return Array of fields to copy to.
+ * @deprecated Use [EMAIL PROTECTED] #getCopyFieldsList(String)} instead.
*/
+ @Deprecated
public SchemaField[] getCopyFields(String sourceField) {
- // Get the dynamic ones into a list.
+ // This is the List that holds all the results, dynamic or not.
List<SchemaField> matchCopyFields = new ArrayList<SchemaField>();
+ // Get the dynamic results into the list.
for(DynamicCopy dynamicCopy : dynamicCopyFields) {
if(dynamicCopy.matches(sourceField)) {
matchCopyFields.add(dynamicCopy.getTargetField(sourceField));
}
}
- // Get the fixed ones, if there are any.
- SchemaField[] fixedCopyFields = copyFields.get(sourceField);
-
- boolean appendFixed = copyFields.containsKey(sourceField);
-
- // Construct the results by concatenating dynamic and fixed into a results
array.
-
- SchemaField[] results = new SchemaField[matchCopyFields.size() +
(appendFixed ? fixedCopyFields.length : 0)];
+ // Get the fixed ones, if there are any and add them.
+ final List<CopyField> copyFields = copyFieldsMap.get(sourceField);
+ if (copyFields!=null) {
+ final Iterator<CopyField> it = copyFields.iterator();
+ while (it.hasNext()) {
+ matchCopyFields.add(it.next().getDestination());
+ }
+ }
- matchCopyFields.toArray(results);
+ // Construct the results by transforming the list into an array.
+ return matchCopyFields.toArray(new SchemaField[matchCopyFields.size()]);
+ }
- if(appendFixed) {
- System.arraycopy(fixedCopyFields, 0, results, matchCopyFields.size(),
fixedCopyFields.length);
+ /**
+ * Get all copy fields for a specified source field, both static
+ * and dynamic ones.
+ * @param sourceField
+ * @return List of CopyFields to copy to.
+ * @since solr 1.4
+ */
+ // This is useful when we need the maxSize param of each CopyField
+ public List<CopyField> getCopyFieldsList(final String sourceField){
+ final List<CopyField> result = new ArrayList<CopyField>();
+ for (DynamicCopy dynamicCopy : dynamicCopyFields) {
+ if (dynamicCopy.matches(sourceField)) {
+ result.add(new CopyField(getField(sourceField),
dynamicCopy.getTargetField(sourceField), dynamicCopy.maxChars));
+ }
+ }
+ List<CopyField> fixedCopyFields = copyFieldsMap.get(sourceField);
+ if (fixedCopyFields != null)
+ {
+ result.addAll(fixedCopyFields);
}
- return results;
+ return result;
}
/**
Modified: lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java?rev=721758&r1=721757&r2=721758&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java
(original)
+++ lucene/solr/trunk/src/java/org/apache/solr/update/DocumentBuilder.java Sat
Nov 29 20:46:20 2008
@@ -29,6 +29,7 @@
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
+import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.DateField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@@ -120,15 +121,15 @@
// Check if we should copy this field to any other fields.
// This could happen whether it is explicit or not.
- SchemaField[] destArr = schema.getCopyFields(name);
- if (destArr != null) {
- for (SchemaField destField : destArr) {
- addSingleField(destField,val,boost);
+ final List<CopyField> copyFields = schema.getCopyFieldsList(name);
+ if (copyFields != null) {
+ for(CopyField cf : copyFields) {
+ addSingleField(cf.getDestination(), cf.getLimitedValue( val ), boost);
}
}
// error if this field name doesn't match anything
- if (sfield==null && (destArr==null || destArr.length==0)) {
+ if (sfield==null && (copyFields==null || copyFields.size()==0)) {
throw new SolrException(
SolrException.ErrorCode.BAD_REQUEST,"ERROR:unknown field '" + name + "'");
}
}
@@ -218,7 +219,7 @@
sfield.getName() + ": " +field.getValue() );
}
- SchemaField[] destArr = schema.getCopyFields(name);
+ final List<CopyField> copyFields = schema.getCopyFieldsList(name);
// load each field value
boolean hasField = false;
@@ -246,8 +247,10 @@
}
}
- // Add the copy fields
- for( SchemaField sf : destArr ) {
+ // Check if we should copy this field to any other fields.
+ // This could happen whether it is explicit or not.
+ for( CopyField cf : copyFields ) {
+ SchemaField sf = cf.getDestination();
// check if the copy field is a multivalued or not
if( !sf.multiValued() && out.get( sf.getName() ) != null ) {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,
@@ -256,7 +259,7 @@
}
used = true;
- Field f = sf.createField( val, boost );
+ Field f = sf.createField( cf.getLimitedValue( val ), boost );
if( f != null ) { // null fields are not added
out.add( f );
}
Added: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java (added)
+++ lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java Sat
Nov 29 20:46:20 2008
@@ -0,0 +1,180 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.schema;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.solr.common.params.CommonParams;
+import org.apache.solr.common.params.MapSolrParams;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.util.AbstractSolrTestCase;
+import org.junit.Test;
+
+/**
+ * This is a simple test to make sure the <code>CopyField</code> works.
+ * It uses its own special schema file.
+ *
+ * @since solr 1.4
+ */
+public class CopyFieldTest extends AbstractSolrTestCase {
+
+ @Override
+ public String getSchemaFile() {
+ return "schema-copyfield-test.xml";
+ }
+
+ @Override
+ public String getSolrConfigFile() {
+ return "solrconfig.xml";
+ }
+
+ @Override
+ public void setUp() throws Exception {
+ super.setUp();
+ }
+
+ @Override
+ public void tearDown() throws Exception {
+ super.tearDown();
+ }
+
+ @Test
+ public void testCopyFieldSchemaFieldSchemaField() {
+ try {
+ new CopyField(new SchemaField("source", new TextField()), null);
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ try {
+ new CopyField(null, new SchemaField("destination", new TextField()));
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ try {
+ new CopyField(null, null);
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ }
+
+ @Test
+ public void testCopyFieldSchemaFieldSchemaFieldInt() {
+ try {
+ new CopyField(null,
+ new SchemaField("destination", new TextField()), 1000);
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ try {
+ new CopyField(new SchemaField("source", new TextField()), null,
+ 1000);
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ try {
+ new CopyField(null, null, 1000);
+ fail("CopyField failed with null SchemaField argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains("can't be NULL"));
+ }
+ try {
+ new CopyField(new SchemaField("source", new TextField()),
+ new SchemaField("destination", new TextField()), -1000);
+ fail("CopyField failed with negative length argument.");
+ } catch (IllegalArgumentException e) {
+ assertTrue(e.getLocalizedMessage().contains(
+ "can't have a negative value"));
+ }
+ new CopyField(new SchemaField("source", new TextField()),
+ new SchemaField("destination", new TextField()), CopyField.UNLIMITED);
+ }
+
+ @Test
+ public void testGetSource() {
+ final CopyField copyField = new CopyField(new SchemaField("source",
+ new TextField()), new SchemaField("destination",
+ new TextField()), 1000);
+ assertEquals("source", copyField.getSource().name);
+ }
+
+ @Test
+ public void testGetDestination() {
+ final CopyField copyField = new CopyField(new SchemaField("source",
+ new TextField()), new SchemaField("destination",
+ new TextField()), 1000);
+ assertEquals("destination", copyField.getDestination().name);
+ }
+
+ @Test
+ public void testGetMaxChars() {
+ final CopyField copyField = new CopyField(new SchemaField("source",
+ new TextField()), new SchemaField("destination",
+ new TextField()), 1000);
+ assertEquals(1000, copyField.getMaxChars());
+ }
+
+ @Test
+ public void testCopyFieldFunctionality()
+ {
+ SolrCore core = h.getCore();
+ assertU(adoc("id", "10", "title", "test copy field", "text_en", "this is
a simple test of the copy field functionality"));
+ assertU(commit());
+
+ Map<String,String> args = new HashMap<String, String>();
+ args.put( CommonParams.Q, "text_en:simple" );
+ args.put( "indent", "true" );
+ SolrQueryRequest req = new LocalSolrQueryRequest( core, new
MapSolrParams( args) );
+
+ assertQ("Make sure they got in", req
+ ,"//[EMAIL PROTECTED]'1']"
+ ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='10']"
+ );
+
+ args = new HashMap<String, String>();
+ args.put( CommonParams.Q, "highlight:simple" );
+ args.put( "indent", "true" );
+ req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+ assertQ("dynamic source", req
+ ,"//[EMAIL PROTECTED]'1']"
+ ,"//result/doc[1]/[EMAIL PROTECTED]'id'][.='10']"
+ ,"//result/doc[1]/[EMAIL PROTECTED]'highlight']/str[.='this is a
simple test of ']"
+ );
+
+ args = new HashMap<String, String>();
+ args.put( CommonParams.Q, "text_en:functionality" );
+ args.put( "indent", "true" );
+ req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+ assertQ("Make sure they got in", req
+ ,"//[EMAIL PROTECTED]'1']");
+
+ args = new HashMap<String, String>();
+ args.put( CommonParams.Q, "highlight:functionality" );
+ args.put( "indent", "true" );
+ req = new LocalSolrQueryRequest( core, new MapSolrParams( args) );
+ assertQ("dynamic source", req
+ ,"//[EMAIL PROTECTED]'0']");
+ }
+}
Propchange: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/solr/trunk/src/test/org/apache/solr/schema/CopyFieldTest.java
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL
Added: lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
URL:
http://svn.apache.org/viewvc/lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml?rev=721758&view=auto
==============================================================================
--- lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
(added)
+++ lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
Sat Nov 29 20:46:20 2008
@@ -0,0 +1,468 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The Solr schema file. This file should be named "schema.xml" and
+ should be located where the classloader for the Solr webapp can find it.
+
+ This schema is used for testing, and as such has everything and the
+ kitchen sink thrown in. See example/solr/conf/schema.xml for a
+ more concise example.
+
+ $Id$
+ $Source:
/cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
+ $Name: $
+ -->
+
+<schema name="test" version="1.0">
+ <types>
+
+ <!-- field type definitions... note that the "name" attribute is
+ just a label to be used by field definitions. The "class"
+ attribute and any other attributes determine the real type and
+ behavior of the fieldtype.
+ -->
+
+ <!-- numeric field types that store and index the text
+ value verbatim (and hence don't sort correctly or support range
queries.)
+ These are provided more for backward compatability, allowing one
+ to create a schema that matches an existing lucene index.
+ -->
+ <fieldType name="integer" class="solr.IntField"/>
+ <fieldType name="long" class="solr.LongField"/>
+ <fieldtype name="float" class="solr.FloatField"/>
+ <fieldType name="double" class="solr.DoubleField"/>
+
+ <!-- numeric field types that manipulate the value into
+ a string value that isn't human readable in it's internal form,
+ but sorts correctly and supports range queries.
+
+ If sortMissingLast="true" then a sort on this field will cause
documents
+ without the field to come after documents with the field,
+ regardless of the requested sort order.
+ If sortMissingFirst="true" then a sort on this field will cause
documents
+ without the field to come before documents with the field,
+ regardless of the requested sort order.
+ If sortMissingLast="false" and sortMissingFirst="false" (the default),
+ then default lucene sorting will be used which places docs without the
field
+ first in an ascending sort and last in a descending sort.
+ -->
+ <fieldtype name="sint" class="solr.SortableIntField"
sortMissingLast="true"/>
+ <fieldtype name="slong" class="solr.SortableLongField"
sortMissingLast="true"/>
+ <fieldtype name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true"/>
+ <fieldtype name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true"/>
+
+ <!-- bcd versions of sortable numeric type may provide smaller
+ storage space and support very large numbers.
+ -->
+ <fieldtype name="bcdint" class="solr.BCDIntField" sortMissingLast="true"/>
+ <fieldtype name="bcdlong" class="solr.BCDLongField"
sortMissingLast="true"/>
+ <fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
+
+ <!-- Field type demonstrating an Analyzer failure -->
+ <fieldtype name="failtype1" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- Demonstrating ignoreCaseChange -->
+ <fieldtype name="wdf_nocase" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="0" catenateWords="0"
catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+
+ <!-- HighlitText optimizes storage for (long) columns which will be
highlit -->
+ <fieldtype name="highlittext" class="solr.TextField"
compressThreshold="345" />
+
+ <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+ <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
+
+ <!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
+ seconds part (.999) is optional.
+ -->
+ <fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
+
+ <!-- solr.TextField allows the specification of custom
+ text analyzers specified as a tokenizer and a list
+ of token filters.
+ -->
+ <fieldtype name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory"/>
+ <!-- lucene PorterStemFilterFactory deprecated
+ <filter class="solr.PorterStemFilterFactory"/>
+ -->
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+
+ <fieldtype name="nametext" class="solr.TextField">
+ <analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
+ </fieldtype>
+
+ <fieldtype name="teststop" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- fieldtypes in this section isolate tokenizers and tokenfilters for
testing -->
+ <fieldtype name="lowertok" class="solr.TextField">
+ <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="keywordtok" class="solr.TextField">
+ <analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="standardtok" class="solr.TextField">
+ <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="lettertok" class="solr.TextField">
+ <analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="whitetok" class="solr.TextField">
+ <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="HTMLstandardtok" class="solr.TextField">
+ <analyzer><tokenizer
class="solr.HTMLStripStandardTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="HTMLwhitetok" class="solr.TextField">
+ <analyzer><tokenizer
class="solr.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
+ </fieldtype>
+ <fieldtype name="standardtokfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="standardfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.StandardFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="lowerfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="patternreplacefilt" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ <filter class="solr.PatternReplaceFilterFactory"
+ pattern="([^a-zA-Z])" replacement="_" replace="all"
+ />
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="porterfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+ <!-- fieldtype name="snowballfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SnowballPorterFilterFactory"/>
+ </analyzer>
+ </fieldtype -->
+ <fieldtype name="engporterfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="custengporterfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"
protected="protwords.txt"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="stopfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="custstopfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+ </analyzer>
+ </fieldtype>
+ <fieldtype name="lengthfilt" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LengthFilterFactory" min="2" max="5"/>
+ </analyzer>
+ </fieldtype>
+
+ <fieldtype name="subword" class="solr.TextField" multiValued="true"
positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="0"
catenateNumbers="0" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.StopFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- more flexible in matching skus, but more chance of a false match -->
+ <fieldtype name="skutype1" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="1" generateNumberParts="1" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- less flexible in matching skus, but less chance of a false match -->
+ <fieldtype name="skutype2" class="solr.TextField">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
generateWordParts="0" generateNumberParts="0" catenateWords="1"
catenateNumbers="1" catenateAll="0"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- less flexible in matching skus, but less chance of a false match -->
+ <fieldtype name="syn" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter name="syn" class="solr.SynonymFilterFactory"
synonyms="synonyms.txt"/>
+ </analyzer>
+ </fieldtype>
+
+ <!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
+ synonyms "better"
+ -->
+ <fieldtype name="dedup" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory"
+ synonyms="synonyms.txt" expand="true" />
+ <filter class="solr.EnglishPorterFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
+ </analyzer>
+ </fieldtype>
+
+ <fieldtype name="unstored" class="solr.StrField" indexed="true"
stored="false"/>
+
+
+ <fieldtype name="textgap" class="solr.TextField" multiValued="true"
positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
+ </types>
+
+
+ <fields>
+ <field name="id" type="integer" indexed="true" stored="true"
multiValued="false" required="false"/>
+ <field name="name" type="nametext" indexed="true" stored="true"/>
+ <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="subject" type="text" indexed="true" stored="true"/>
+ <field name="title" type="nametext" indexed="true" stored="true"/>
+ <field name="weight" type="float" indexed="true" stored="true"/>
+ <field name="bday" type="date" indexed="true" stored="true"/>
+
+ <field name="title_stemmed" type="text" indexed="true" stored="false"/>
+ <field name="title_lettertok" type="lettertok" indexed="true"
stored="false"/>
+
+ <field name="syn" type="syn" indexed="true" stored="true"/>
+
+ <!-- to test property inheritance and overriding -->
+ <field name="shouldbeunstored" type="unstored" />
+ <field name="shouldbestored" type="unstored" stored="true"/>
+ <field name="shouldbeunindexed" type="unstored" indexed="false"
stored="true"/>
+
+
+ <!-- test different combinations of indexed and stored -->
+ <field name="bind" type="boolean" indexed="true" stored="false"/>
+ <field name="bsto" type="boolean" indexed="false" stored="true"/>
+ <field name="bindsto" type="boolean" indexed="true" stored="true"/>
+ <field name="isto" type="integer" indexed="false" stored="true"/>
+ <field name="iind" type="integer" indexed="true" stored="false"/>
+ <field name="ssto" type="string" indexed="false" stored="true"/>
+ <field name="sind" type="string" indexed="true" stored="false"/>
+ <field name="sindsto" type="string" indexed="true" stored="true"/>
+
+ <!-- test combinations of term vector settings -->
+ <field name="test_basictv" type="text" termVectors="true"/>
+ <field name="test_notv" type="text" termVectors="false"/>
+ <field name="test_postv" type="text" termVectors="true"
termPositions="true"/>
+ <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
+ <field name="test_posofftv" type="text" termVectors="true"
+ termPositions="true" termOffsets="true"/>
+
+ <!-- test highlit field settings -->
+ <field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
+ <field name="test_hlt_off" type="highlittext" indexed="true"
compressed="false"/>
+
+ <!-- fields to test individual tokenizers and tokenfilters -->
+ <field name="teststop" type="teststop" indexed="true" stored="true"/>
+ <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
+ <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
+ <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
+ <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true"
stored="true"/>
+ <field name="lettertok" type="lettertok" indexed="true" stored="true"/>
+ <field name="whitetok" type="whitetok" indexed="true" stored="true"/>
+ <field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true"
stored="true"/>
+ <field name="standardtokfilt" type="standardtokfilt" indexed="true"
stored="true"/>
+ <field name="standardfilt" type="standardfilt" indexed="true"
stored="true"/>
+ <field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
+ <field name="patternreplacefilt" type="patternreplacefilt" indexed="true"
stored="true"/>
+ <field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
+ <field name="engporterfilt" type="engporterfilt" indexed="true"
stored="true"/>
+ <field name="custengporterfilt" type="custengporterfilt" indexed="true"
stored="true"/>
+ <field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
+ <field name="custstopfilt" type="custstopfilt" indexed="true"
stored="true"/>
+ <field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
+ <field name="dedup" type="dedup" indexed="true" stored="true"/>
+ <field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
+
+ <field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
+
+ <field name="nullfirst" type="string" indexed="true" stored="true"
sortMissingFirst="true"/>
+
+ <field name="subword" type="subword" indexed="true" stored="true"/>
+ <field name="sku1" type="skutype1" indexed="true" stored="true"/>
+ <field name="sku2" type="skutype2" indexed="true" stored="true"/>
+
+ <field name="textgap" type="textgap" indexed="true" stored="true"/>
+
+ <field name="timestamp" type="date" indexed="true" stored="true"
default="NOW" multiValued="false"/>
+ <field name="multiDefault" type="string" indexed="true" stored="true"
default="muLti-Default" multiValued="true"/>
+ <field name="intDefault" type="sint" indexed="true" stored="true"
default="42" multiValued="false"/>
+
+ <!-- test maxChars copyField attribute -->
+ <field name="text_fr" type="text" indexed="true" stored="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
+ <field name="text_en" type="text" indexed="true" stored="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
+ <field name="highlight" type="text" indexed="true" stored="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
+
+
+
+ <!-- Dynamic field definitions. If a field name is not found, dynamicFields
+ will be used if the name matches any of the patterns.
+ RESTRICTION: the glob-like pattern in the name attribute must have
+ a "*" only at the start or the end.
+ EXAMPLE: name="*_i" will match any field ending in _i (like myid_i,
z_i)
+ Longer patterns will be matched first. if equal size patterns
+ both match, the first appearing in the schema will be used.
+ -->
+ <dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true"/>
+ <dynamicField name="*_s1" type="string" indexed="true" stored="true"
multiValued="false"/>
+ <dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
+ <dynamicField name="*_t" type="text" indexed="true" stored="true"/>
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
+ <dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
+ <dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
+ <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
+ <dynamicField name="*_bcd" type="bcdstr" indexed="true" stored="true"/>
+
+ <dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
+ <dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
+ <dynamicField name="t_*" type="text" indexed="true" stored="true"/>
+ <dynamicField name="tv_*" type="text" indexed="true" stored="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
+
+ <!-- special fields for dynamic copyField test -->
+ <dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
+ <dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
+
+ <!-- for testing to ensure that longer patterns are matched first -->
+ <dynamicField name="*aa" type="string" indexed="true" stored="true"/>
+ <dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
+
+ <!-- ignored becuase not stored or indexed -->
+ <dynamicField name="*_ignored" type="text" indexed="false" stored="false"/>
+
+ <!-- test maxSize copyField attribute -->
+ <dynamicField name="text_*" type="text" indexed="true" stored="true"
+ termVectors="true" termPositions="true" termOffsets="true"/>
+
+ </fields>
+
+ <defaultSearchField>text</defaultSearchField>
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copyField commands copy one field to another at the time a document
+ is added to the index. It's used either to index the same field
different
+ ways, or to add multiple fields to the same field for easier/faster
searching.
+ -->
+ <copyField source="title" dest="title_stemmed"/>
+ <copyField source="title" dest="title_lettertok"/>
+
+ <copyField source="title" dest="text"/>
+ <copyField source="subject" dest="text"/>
+
+ <copyField source="*_t" dest="text"/>
+
+ <!-- dynamic destination -->
+ <copyField source="*_dynamic" dest="dynamic_*"/>
+
+ <!-- test maxSize copyField attribute -->
+ <copyField source="text_fr" dest="highlight" maxChars="25" />
+ <copyField source="text_en" dest="highlight" maxChars="25" />
+ <copyField source="text_*" dest="highlight" maxChars="25" />
+
+ <!-- Similarity is the scoring routine for each document vs a query.
+ A custom similarity may be specified here, but the default is fine
+ for most applications.
+ -->
+ <!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
+
+</schema>
Propchange:
lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
lucene/solr/trunk/src/test/test-files/solr/conf/schema-copyfield-test.xml
------------------------------------------------------------------------------
svn:keywords = Date Author Id Revision HeadURL