Author: ivol37 at gmail.com
Date: Wed Jan 26 13:55:27 2011
New Revision: 710

Log:


Added:
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
   sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml
Removed:
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/NutchIndex.java
   sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/default_schema.xml
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/default_solrconfig.xml
   sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/solr.xml
Modified:
   sandbox/ivol/amdatu-searchandindex/solr/pom.xml
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
   
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java

Modified: sandbox/ivol/amdatu-searchandindex/solr/pom.xml
==============================================================================
--- sandbox/ivol/amdatu-searchandindex/solr/pom.xml     (original)
+++ sandbox/ivol/amdatu-searchandindex/solr/pom.xml     Wed Jan 26 13:55:27 2011
@@ -101,24 +101,8 @@
       <type>jar</type>
       <scope>compile</scope>
     </dependency>
-    <dependency>
-      <groupId>org.apache.nutch</groupId>
-      <artifactId>nutch</artifactId>
-      <version>2.0-dev</version>
-      <scope>compile</scope>
-    </dependency>
   </dependencies>
 
-
-
-  <repositories>
-    <repository>
-      <id>apacherepo</id>
-      <name>Apache Repository</name>
-      <url>https://repository.apache.org/content/groups/public</url>
-    </repository>
-  </repositories>
-
   <build>
     <plugins>
       <plugin>
@@ -147,6 +131,7 @@
               *
             </Import-Package>
             <Export-Package>
+              org.amdatu.searchandindex.solr
             </Export-Package>
           </instructions>
         </configuration>

Modified: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
==============================================================================
--- 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
      (original)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
      Wed Jan 26 13:55:27 2011
@@ -46,8 +46,8 @@
 
 public class SolrApi {
     // Statics
-    private static final String DEFAULT_SCHEMA = "default_schema.xml";
-    private static final String DEFAULT_SOLRCONFIG = "default_solrconfig.xml";
+    private static final String DEFAULT_SCHEMA = "nutch_schema.xml";
+    private static final String DEFAULT_SOLRCONFIG = "nutch_solrconfig.xml";
 
     // Service dependencies injected by the dependency manager
     private volatile LogService m_logService;

Modified: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
==============================================================================
--- 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
        (original)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
        Wed Jan 26 13:55:27 2011
@@ -20,7 +20,6 @@
 import java.io.IOException;
 import java.net.URL;
 import java.util.Dictionary;
-import java.util.Enumeration;
 
 import org.amdatu.core.config.templates.ConfigTemplateManager;
 import org.amdatu.core.tenant.TenantManagementService;
@@ -33,7 +32,6 @@
 import org.apache.solr.core.CoreContainer;
 import org.apache.solr.core.SolrConfig;
 import org.apache.solr.core.SolrCore;
-import org.osgi.framework.Bundle;
 import org.osgi.framework.BundleContext;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.cm.ManagedService;
@@ -44,7 +42,9 @@
  */
 public class SolrDaemonServiceImpl implements SolrService, ManagedService {
     // Statics
-    public static final String SOLR_CONFIG_XML = "solr.xml";
+    private static final String DEFAULT_SOLAR_CONFIG = 
"default_solrconfig.xml";
+    private static final String DEFAULT_SCHEMA = "default_schema.xml";
+    private static final String SOLR = "solr.xml";
     private static final String CONFIG_DIR = "conf";
 
     // Services injected by the Felix dependency manager
@@ -57,45 +57,30 @@
     private CoreContainer m_coreContainer;
     private File m_workDir;
 
-    /**
-     * The init() method is invoked by the Felix dependency manager.
-     */
-    @SuppressWarnings("unchecked")
     public void init() {
         try {
             m_logService.log(LogService.LOG_INFO, "Initializing Solr 
configuration");
 
             // Initialize storage configuration
-            // Load the URL of the storage-conf.xml and write it file using 
the config template
+            // Load the URL of the solr.xml and write it file using the config 
template
             // manager, which automatically replaces configuration entries in 
that file
-            File storageConfigFile = new File(m_workDir, SOLR_CONFIG_XML);
-            if (!storageConfigFile.exists()) {
+            File solrFile = new File(m_workDir, SOLR);
+            if (!solrFile.exists()) {
                 m_workDir.mkdirs();
-                Bundle bundle = m_bundleContext.getBundle();
-                URL url = bundle.getResource(SOLR_CONFIG_XML);
                 try {
-                    // Replace placeholders in the solr.xml config file
-                    m_configTemplateManager.writeConfiguration(url, 
storageConfigFile);
+                    // Write the solr.xml file to the solr root directory
+                    copyConfig("conf/" + SOLR, solrFile);
 
                     // Solr uses this system property to find its storage 
location.
-                    System.setProperty("solr.solr.home", 
storageConfigFile.getParentFile().getAbsolutePath());
+                    System.setProperty("solr.solr.home", 
solrFile.getParentFile().getAbsolutePath());
 
                     // Update the main config
                     File mainConfigDir = new File(m_workDir, "/" + CONFIG_DIR);
                     mainConfigDir.mkdirs();
 
-                    // Find all entries in our 'conf' directory.
-                    final Enumeration<URL> resources = 
bundle.findEntries(CONFIG_DIR, "*.*", true);
-                    if (resources != null) {
-                        while (resources.hasMoreElements()) {
-                            final URL resource = resources.nextElement();
-                            File coreConfFile = new File(mainConfigDir, 
resource.getFile().replace(CONFIG_DIR + "/", ""));
-                            if (!coreConfFile.exists()) {
-                                // Only write this file if it does not yet 
exist
-                                
m_configTemplateManager.writeConfiguration(resource, coreConfFile);
-                            }
-                        }
-                    }
+                    // Write default schema and solr config to /conf
+                    copyConfig("conf/" + DEFAULT_SOLAR_CONFIG, new 
File(mainConfigDir, "solrconfig.xml"));
+                    copyConfig("conf/" + DEFAULT_SCHEMA, new 
File(mainConfigDir, "schema.xml"));
                 } catch (IOException e) {
                     m_logService.log(LogService.LOG_ERROR, "Could not replace 
configuration entries in storage-conf.xml", e);
                 }
@@ -195,4 +180,9 @@
     private ServiceDependency createServiceDependency(Class<?> clazz) {
         return 
m_dependencyManager.createServiceDependency().setService(clazz).setRequired(true);
     }
+
+    private void copyConfig(String source, File target) throws IOException {
+        URL solrConfig = m_bundleContext.getBundle().getResource("conf/" + 
source);
+        m_configTemplateManager.writeConfiguration(solrConfig, target);
+    }
 }

Modified: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
==============================================================================
--- 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
     (original)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
     Wed Jan 26 13:55:27 2011
@@ -31,6 +31,7 @@
  */
 public class SolrTest {
     private volatile TenantManagementService m_tenantService;
+    private static int ID = 0;
 
     public void start() {
         try {
@@ -82,7 +83,11 @@
                 document.addField( "name", "doc1", 1.0f );
                 document.addField( "price", 10 );
 
-                streamPUT(baseUrl1 + "/course", 
SolrUtil.toXMLStream(document));
+                // Add 10.000 Solr documents to the indices
+                for (int i=0; i<10000; i++) {
+                    streamPUT(baseUrl1 + "/course", 
SolrUtil.toXMLStream(generateSolrDocument()));
+                    streamPUT(baseUrl2 + "/course", 
SolrUtil.toXMLStream(generateSolrDocument()));
+                }
             }
             catch (InterruptedException e) {
                 // TODO Auto-generated catch block
@@ -95,6 +100,14 @@
         }
     }
 
+    private SolrInputDocument generateSolrDocument() {
+        SolrInputDocument document = new SolrInputDocument();
+        document.addField( "id", ID++, 0.5f);
+        document.addField( "name", "Solr document " + ID, 1.0f );
+        document.addField( "price", 100*Math.random(), 0.5f);
+        return document;
+    }
+
     private RESTResult invokeGET(String url) {
         return invoke(url, javax.ws.rs.HttpMethod.GET);
     }

Modified: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
==============================================================================
--- 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
    (original)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
    Wed Jan 26 13:55:27 2011
@@ -20,7 +20,6 @@
 import org.amdatu.core.tenant.TenantManagementService;
 import org.amdatu.searchandindex.solr.SolrRestService;
 import org.amdatu.searchandindex.solr.SolrService;
-import org.amdatu.searchandindex.solr.impl.NutchIndex;
 import org.amdatu.searchandindex.solr.impl.SolrDaemonServiceImpl;
 import org.amdatu.searchandindex.solr.impl.SolrTest;
 import org.apache.felix.dm.DependencyActivatorBase;
@@ -49,12 +48,6 @@
 
         // Create and register the Solr service.
         manager.add(createComponent()
-            .setImplementation(NutchIndex.class)
-            
.add(createServiceDependency().setService(SolrRestService.class).setRequired(true))
-            
.add(createServiceDependency().setService(TenantManagementService.class).setRequired(true)));
-
-        // Create and register the Solr service.
-        manager.add(createComponent()
             .setInterface(SolrService.class.getName(), null)
             .setImplementation(SolrDaemonServiceImpl.class)
             
.add(createServiceDependency().setService(LogService.class).setRequired(true))

Added: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
  Wed Jan 26 13:55:27 2011
@@ -0,0 +1,312 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking.  To improve performance one could
+  - set stored="false" for all fields possible (esp large fields) when you
+    only need to search on the field but don't need to return the original
+    value.
+  - set indexed="false" if you don't need to search on the field, but only
+    return the field as a result of searching on other indexed fields.
+  - remove all unneeded copyField statements
+  - for best index size and searching performance, set "index" to false
+    for all general text fields, use copyField to copy them to the
+    catchall "text" field, and use that for searching.
+  - For maximum indexing performance, use the StreamingUpdateSolrServer
+    java client.
+  - Remember to run the JVM in server mode, and use a higher logging level
+    that avoids logging every request
+-->
+
+<schema name="amdatu" version="1.2">
+  <!-- attribute "name" is the name of this schema and is only used for 
display purposes.
+       Applications should change this to reflect the nature of the search 
collection.
+       version="1.2" is Solr's version number for the schema syntax and 
semantics.  It should
+       not normally be changed by applications.
+       1.0: multiValued attribute did not exist, all fields are multiValued by 
nature
+       1.1: multiValued attribute introduced, false by default
+       1.2: omitTermFreqAndPositions attribute introduced, true by default 
except for text fields.
+     -->
+
+  <types>
+    <!-- field type definitions. The "name" attribute is
+       just a label to be used by field definitions.  The "class"
+       attribute and any other attributes determine the real
+       behavior of the fieldType.
+         Class names starting with "solr" refer to java classes in the
+       org.apache.solr.analysis package.
+    -->
+
+    <!-- The StrField type is not analyzed, but indexed/stored verbatim.
+       - StrField and TextField support an optional compressThreshold which
+       limits compression (if enabled in the derived fields) to values which
+       exceed a certain size (in characters).
+    -->
+    <fieldType name="string" class="solr.StrField" sortMissingLast="true" 
omitNorms="true"/>
+
+    <!-- boolean type: "true" or "false" -->
+    <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true" 
omitNorms="true"/>
+    <!--Binary data type. The data should be sent/retrieved in as Base64 
encoded Strings -->
+    <fieldtype name="binary" class="solr.BinaryField"/>
+
+    <!-- The optional sortMissingLast and sortMissingFirst attributes are
+         currently supported on types that are sorted internally as strings.
+         This includes 
"string","boolean","sint","slong","sfloat","sdouble","pdate"
+       - If sortMissingLast="true", then a sort on this field will cause 
documents
+         without the field to come after documents with the field,
+         regardless of the requested sort order (asc or desc).
+       - If sortMissingFirst="true", then a sort on this field will cause 
documents
+         without the field to come before documents with the field,
+         regardless of the requested sort order.
+       - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+         then default lucene sorting will be used which places docs without the
+         field first in an ascending sort and last in a descending sort.
+    -->
+
+    <!--
+      Default numeric field types. For faster range queries, consider the 
tint/tfloat/tlong/tdouble types.
+    -->
+    <fieldType name="int" class="solr.TrieIntField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="float" class="solr.TrieFloatField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="long" class="solr.TrieLongField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0" 
omitNorms="true" positionIncrementGap="0"/>
+
+    <!--
+     Numeric field types that index each value at various levels of precision
+     to accelerate range queries when the number of values between the range
+     endpoints is large. See the javadoc for NumericRangeQuery for internal
+     implementation details.
+
+     Smaller precisionStep values (specified in bits) will lead to more tokens
+     indexed per value, slightly larger index size, and faster range queries.
+     A precisionStep of 0 disables indexing at different precision levels.
+    -->
+    <fieldType name="tint" class="solr.TrieIntField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+    <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8" 
omitNorms="true" positionIncrementGap="0"/>
+
+    <!-- The format for this date field is of the form 1995-12-31T23:59:59Z, 
and
+         is a more restricted form of the canonical representation of dateTime
+         http://www.w3.org/TR/xmlschema-2/#dateTime
+         The trailing "Z" designates UTC time and is mandatory.
+         Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+         All other components are mandatory.
+
+         Expressions can also be used to denote calculations that should be
+         performed relative to "NOW" to determine the value, ie...
+
+               NOW/HOUR
+                  ... Round to the start of the current hour
+               NOW-1DAY
+                  ... Exactly 1 day prior to now
+               NOW/DAY+6MONTHS+3DAYS
+                  ... 6 months and 3 days in the future from the start of
+                      the current day
+
+         Consult the DateField javadocs for more information.
+
+         Note: For faster range queries, consider the tdate type
+      -->
+    <fieldType name="date" class="solr.TrieDateField" omitNorms="true" 
precisionStep="0" positionIncrementGap="0"/>
+
+    <!-- A Trie based date field for faster date range queries and date 
faceting. -->
+    <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true" 
precisionStep="6" positionIncrementGap="0"/>
+
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes 
(created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. 
Use Trie based fields instead.
+
+      Plain numeric field types that store and index the text
+      value verbatim (and hence don't support range queries, since the
+      lexicographic ordering isn't equal to the numeric ordering)
+    -->
+    <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
+    <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
+    <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
+    <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
+    <fieldType name="pdate" class="solr.DateField" sortMissingLast="true" 
omitNorms="true"/>
+
+
+    <!--
+      Note:
+      These should only be used for compatibility with existing indexes 
(created with older Solr versions)
+      or if "sortMissingFirst" or "sortMissingLast" functionality is needed. 
Use Trie based fields instead.
+
+      Numeric field types that manipulate the value into
+      a string value that isn't human-readable in its internal form,
+      but with a lexicographic ordering the same as the numeric ordering,
+      so that range queries work correctly.
+    -->
+    <fieldType name="sint" class="solr.SortableIntField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="slong" class="solr.SortableLongField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sfloat" class="solr.SortableFloatField" 
sortMissingLast="true" omitNorms="true"/>
+    <fieldType name="sdouble" class="solr.SortableDoubleField" 
sortMissingLast="true" omitNorms="true"/>
+
+
+    <!-- The "RandomSortField" is not used to store or search any
+         data.  You can declare fields of this type it in your schema
+         to generate pseudo-random orderings of your docs for sorting
+         purposes.  The ordering is generated based on the field name
+         and the version of the index, As long as the index version
+         remains unchanged, and the same field name is reused,
+         the ordering of the docs will be consistent.
+         If you want different psuedo-random orderings of documents,
+         for the same version of the index, use a dynamicField and
+         change the name
+     -->
+    <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+    <!-- solr.TextField allows the specification of custom text analyzers
+         specified as a tokenizer and a list of token filters. Different
+         analyzers may be specified for indexing and querying.
+
+         The optional positionIncrementGap puts space between multiple fields 
of
+         this type on the same document, with the purpose of preventing false 
phrase
+         matching across fields.
+
+         For more info on customizing your analyzer chain, please see
+         http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+     -->
+
+    <!-- One can also specify an existing Analyzer class that has a
+         default constructor via the class attribute on the analyzer element
+    <fieldType name="text_greek" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+    </fieldType>
+    -->
+
+    <!-- A text field that only splits on whitespace for exact matching of 
words -->
+    <fieldType name="text_ws" class="solr.TextField" 
positionIncrementGap="100">
+    </fieldType>
+
+    <!-- A text field that uses WordDelimiterFilter to enable splitting and 
matching of
+        words on case-change, alpha numeric boundaries, and non-alphanumeric 
chars,
+        so that a query of "wifi" or "wi fi" could match a document containing 
"Wi-Fi".
+        Synonyms and stopwords are customized by external files, and stemming 
is enabled.
+        -->
+    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+    </fieldType>
+
+    <!-- A general unstemmed text field - good if one does not know the 
language of the field -->
+    <fieldType name="textgen" class="solr.TextField" 
positionIncrementGap="100">
+    </fieldType>
+
+    <!-- A general unstemmed text field that indexes tokens normally and also
+         reversed (via ReversedWildcardFilterFactory), to enable more efficient
+   leading wildcard queries. -->
+    <fieldType name="text_rev" class="solr.TextField" 
positionIncrementGap="100">
+    </fieldType>
+
+    <!-- since fields of this type are by default not stored or indexed,
+         any data added to them will be ignored outright.  -->
+    <fieldtype name="ignored" stored="false" indexed="false" 
multiValued="true" class="solr.StrField" />
+ </types>
+
+
+ <fields>
+   <!-- Valid attributes for fields:
+     name: mandatory - the name for the field
+     type: mandatory - the name of a previously defined type from the
+       <types> section
+     indexed: true if this field should be indexed (searchable or sortable)
+     stored: true if this field should be retrievable
+     compressed: [false] if this field should be stored using gzip compression
+       (this will only apply if the field type is compressable; among
+       the standard field types, only TextField and StrField are)
+     multiValued: true if this field may contain multiple values per document
+     omitNorms: (expert) set to true to omit the norms associated with
+       this field (this disables length normalization and index-time
+       boosting for the field, and saves some memory).  Only full-text
+       fields or fields that need an index-time boost need norms.
+     termVectors: [false] set to true to store the term vector for a
+       given field.
+       When using MoreLikeThis, fields used for similarity should be
+       stored for best performance.
+     termPositions: Store position information with the term vector.
+       This will increase storage costs.
+     termOffsets: Store offset information with the term vector. This
+       will increase storage costs.
+     default: a value that should be used if no value is specified
+       when adding a document.
+   -->
+
+   <field name="id" type="string" indexed="true" stored="true" required="true" 
/>
+
+   <!-- catchall field, containing all other searchable text fields 
(implemented
+        via copyField further on in this schema  -->
+   <field name="text" type="text" indexed="true" stored="false" 
multiValued="true"/>
+
+   <field name="timestamp" type="date" indexed="true" stored="false" 
default="NOW" multiValued="false"/>
+
+   <dynamicField name="*_i"  type="sint" indexed="true" stored="false" 
omitNorms="true" multiValued="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="false" 
multiValued="true"/>
+   <dynamicField name="*_l"  type="long"   indexed="true"  stored="false"/>
+   <dynamicField name="*_t"  type="text"    indexed="true"  stored="false"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="false"/>
+   <dynamicField name="*_f"  type="float"  indexed="true"  stored="false"/>
+   <dynamicField name="*_d"  type="double" indexed="true"  stored="false"/>
+   <dynamicField name="*_dt" type="date"    indexed="true"  stored="false"/>
+
+   <!-- some trie-coded dynamic fields for faster range queries -->
+   <dynamicField name="*_ti" type="tint"    indexed="true"  stored="false"/>
+   <dynamicField name="*_tl" type="tlong"   indexed="true"  stored="false"/>
+   <dynamicField name="*_tf" type="tfloat"  indexed="true"  stored="false"/>
+   <dynamicField name="*_td" type="tdouble" indexed="true"  stored="false"/>
+   <dynamicField name="*_tdt" type="tdate"  indexed="true"  stored="false"/>
+
+   <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="false"/>
+
+   <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+   <dynamicField name="attr_*" type="textgen" indexed="true" stored="false" 
multiValued="true"/>
+
+   <dynamicField name="random_*" type="random" />
+
+   <!-- uncomment the following to ignore any fields that don't already match 
an existing
+        field name or dynamic field, rather than reporting them as an error.
+        alternately, change the type="ignored" to some other type e.g. "text" 
if you want
+        unknown fields indexed and/or stored by default -->
+   <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+
+ </fields>
+
+ <!-- Field to use to determine and enforce document uniqueness.
+      Unless this field is marked with required="false", it will be a required 
field
+   -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>text</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="OR"/>
+</schema>

Added: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
      Wed Jan 26 13:55:27 2011
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ 
+ http://wiki.apache.org/solr/SolrConfigXml
+ 
+-->
+
+<config>
+  <indexDefaults>
+    <!-- Sets the amount of RAM that may be used by Lucene indexing
+      for buffering added documents and deletions before they are
+      flushed to the Directory.  -->
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <lockType>simple</lockType>
+    <!--
+     Expert:
+    Controls how often Lucene loads terms into memory -->
+    <!--<termIndexInterval>256</termIndexInterval>-->
+  </indexDefaults>
+  
+   <jmx />
+   
+     <query>
+       <maxBooleanClauses>102400</maxBooleanClauses>
+     </query>
+    
+  <updateHandler class="solr.DirectUpdateHandler2" />
+
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="false" 
multipartUploadLimitInKB="2048" />
+  </requestDispatcher>
+  
+  <requestHandler name="standard" class="solr.StandardRequestHandler" 
default="true">
+    <arr name="last-components">
+    </arr>
+  </requestHandler>
+  
+  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
+  <requestHandler name="/admin/" 
class="org.apache.solr.handler.admin.AdminHandlers" />
+        
+  <!-- config for the admin interface --> 
+  <admin>
+    <defaultQuery>solr</defaultQuery>
+  </admin>
+
+</config>
+

Added: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
    Wed Jan 26 13:55:27 2011
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+    <!--
+        Licensed to the Apache Software Foundation (ASF) under one or
+        more contributor license agreements. See the NOTICE file
+        distributed with this work for additional information regarding
+        copyright ownership. The ASF licenses this file to You under the
+        Apache License, Version 2.0 (the "License"); you may not use
+        this file except in compliance with the License. You may obtain
+        a copy of the License at
+        http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+        applicable law or agreed to in writing, software distributed
+        under the License is distributed on an "AS IS" BASIS, WITHOUT
+        WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+        See the License for the specific language governing permissions
+        and limitations under the License.
+    -->
+    <!--
+        Description: This document contains solr schema definition to be
+        used with solr integration currently build into Nutch. See
+        https://issues.apache.org/jira/browse/NUTCH-442
+        https://issues.apache.org/jira/browse/NUTCH-699 for more info.
+    -->
+<schema name="nutch" version="1.1">
+    <types>
+        <fieldType name="string" class="solr.StrField"
+            sortMissingLast="true" omitNorms="true"/>
+        <fieldType name="long" class="solr.LongField"
+            omitNorms="true"/>
+        <fieldType name="float" class="solr.FloatField"
+            omitNorms="true"/>
+        <fieldType name="text" class="solr.TextField"
+            positionIncrementGap="100">
+            <analyzer>
+                <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+                <filter class="solr.StopFilterFactory"
+                    ignoreCase="true" words="stopwords.txt"/>
+                <filter class="solr.WordDelimiterFilterFactory"
+                    generateWordParts="1" generateNumberParts="1"
+                    catenateWords="1" catenateNumbers="1" catenateAll="0"
+                    splitOnCaseChange="1"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.EnglishPorterFilterFactory"
+                    protected="protwords.txt"/>
+                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+            </analyzer>
+        </fieldType>
+        <fieldType name="url" class="solr.TextField"
+            positionIncrementGap="100">
+            <analyzer>
+                <tokenizer class="solr.StandardTokenizerFactory"/>
+                <filter class="solr.LowerCaseFilterFactory"/>
+                <filter class="solr.WordDelimiterFilterFactory"
+                    generateWordParts="1" generateNumberParts="1"/>
+                <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+            </analyzer>
+        </fieldType>
+    </types>
+    <fields>
+        <field name="id" type="string" stored="true" indexed="true"/>
+
+        <!-- core fields -->
+        <field name="segment" type="string" stored="true" indexed="false"/>
+        <field name="digest" type="string" stored="true" indexed="false"/>
+        <field name="boost" type="float" stored="true" indexed="false"/>
+
+        <!-- fields for index-basic plugin -->
+        <field name="host" type="url" stored="false" indexed="true"/>
+        <field name="site" type="string" stored="false" indexed="true"/>
+        <field name="url" type="url" stored="true" indexed="true"
+            required="true"/>
+        <field name="content" type="text" stored="true" indexed="true"/>
+        <field name="title" type="text" stored="true" indexed="true"/>
+        <field name="cache" type="string" stored="true" indexed="false"/>
+        <field name="tstamp" type="long" stored="true" indexed="false"/>
+
+        <!-- fields for index-anchor plugin -->
+        <field name="anchor" type="string" stored="true" indexed="true"
+            multiValued="true"/>
+
+        <!-- fields for index-more plugin -->
+        <field name="type" type="string" stored="true" indexed="true"
+            multiValued="true"/>
+        <field name="contentLength" type="long" stored="true"
+            indexed="false"/>
+        <field name="lastModified" type="long" stored="true"
+            indexed="false"/>
+        <field name="date" type="string" stored="true" indexed="true"/>
+
+        <!-- fields for languageidentifier plugin -->
+        <field name="lang" type="string" stored="true" indexed="true"/>
+
+        <!-- fields for subcollection plugin -->
+        <field name="subcollection" type="string" stored="true"
+            indexed="true" multiValued="true"/>
+
+        <!-- fields for feed plugin -->
+        <field name="author" type="string" stored="true" indexed="true"/>
+        <field name="tag" type="string" stored="true" indexed="true"/>
+        <field name="feed" type="string" stored="true" indexed="true"/>
+        <field name="publishedDate" type="string" stored="true"
+            indexed="true"/>
+        <field name="updatedDate" type="string" stored="true"
+            indexed="true"/>
+    </fields>
+    <uniqueKey>id</uniqueKey>
+    <defaultSearchField>content</defaultSearchField>
+    <solrQueryParser defaultOperator="OR"/>
+</schema>

Added: 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
==============================================================================
--- (empty file)
+++ 
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
        Wed Jan 26 13:55:27 2011
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ http://wiki.apache.org/solr/SolrConfigXml
+
+-->
+
+<config>
+  <indexDefaults>
+    <!-- Sets the amount of RAM that may be used by Lucene indexing
+      for buffering added documents and deletions before they are
+      flushed to the Directory.  -->
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <lockType>simple</lockType>
+    <!--
+     Expert:
+    Controls how often Lucene loads terms into memory -->
+    <!--<termIndexInterval>256</termIndexInterval>-->
+  </indexDefaults>
+
+   <jmx />
+
+     <query>
+       <maxBooleanClauses>102400</maxBooleanClauses>
+     </query>
+
+  <updateHandler class="solr.DirectUpdateHandler2" />
+
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="false" 
multipartUploadLimitInKB="2048" />
+  </requestDispatcher>
+
+  <requestHandler name="standard" class="solr.StandardRequestHandler" 
default="true">
+    <arr name="last-components">
+    </arr>
+  </requestHandler>
+
+  <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
+  <requestHandler name="/admin/" 
class="org.apache.solr.handler.admin.AdminHandlers" />
+
+  <requestHandler name="/nutch" class="solr.SearchHandler" >
+    <lst name="defaults">
+      <str name="defType">dismax</str>
+      <str name="echoParams">explicit</str>
+      <float name="tie">0.01</float>
+      <str name="qf">
+        content^0.5 anchor^1.0 title^1.2
+      </str>
+      <str name="pf">
+        content^0.5 anchor^1.5 title^1.2 site^1.5
+      </str>
+      <str name="fl">
+        url
+      </str>
+      <str name="mm">
+        2&lt;-1 5&lt;-2 6&lt;90%
+      </str>
+      <int name="ps">100</int>
+      <bool hl="true"/>
+      <str name="q.alt">*:*</str>
+      <str name="hl.fl">title url content</str>
+      <str name="f.title.hl.fragsize">0</str>
+      <str name="f.title.hl.alternateField">title</str>
+      <str name="f.url.hl.fragsize">0</str>
+      <str name="f.url.hl.alternateField">url</str>
+      <str name="f.content.hl.fragmenter">regex</str>
+    </lst>
+  </requestHandler>
+
+  <!-- config for the admin interface -->
+  <admin>
+    <defaultQuery>solr</defaultQuery>
+  </admin>
+
+</config>
+

Added: sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml
==============================================================================
--- (empty file)
+++ sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml    
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements. See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version
+    2.0 (the "License"); you may not use this file except in compliance
+    with the License. You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+    applicable law or agreed to in writing, software distributed under the
+    License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+    CONDITIONS OF ANY KIND, either express or implied. See the License for
+    the specific language governing permissions and limitations under the
+    License.
+  -->
+
+  <!--
+    All (relative) paths are relative to the installation path persistent:
+    Save changes made via the API to this file sharedLib: path to a lib
+    directory that will be shared across all cores
+  -->
+
+<solr persistent="true">
+  <!--
+    adminPath: RequestHandler path to manage cores. If 'null' (or absent),
+    cores will not be manageable via request handler
+  -->
+  <cores adminPath="/solr/admin/cores">
+  </cores>
+</solr>
\ No newline at end of file

Reply via email to