Author: ivol37 at gmail.com
Date: Wed Jan 26 13:55:27 2011
New Revision: 710
Log:
Added:
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml
Removed:
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/NutchIndex.java
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/default_schema.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/default_solrconfig.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/solr.xml
Modified:
sandbox/ivol/amdatu-searchandindex/solr/pom.xml
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
Modified: sandbox/ivol/amdatu-searchandindex/solr/pom.xml
==============================================================================
--- sandbox/ivol/amdatu-searchandindex/solr/pom.xml (original)
+++ sandbox/ivol/amdatu-searchandindex/solr/pom.xml Wed Jan 26 13:55:27 2011
@@ -101,24 +101,8 @@
<type>jar</type>
<scope>compile</scope>
</dependency>
- <dependency>
- <groupId>org.apache.nutch</groupId>
- <artifactId>nutch</artifactId>
- <version>2.0-dev</version>
- <scope>compile</scope>
- </dependency>
</dependencies>
-
-
- <repositories>
- <repository>
- <id>apacherepo</id>
- <name>Apache Repository</name>
- <url>https://repository.apache.org/content/groups/public</url>
- </repository>
- </repositories>
-
<build>
<plugins>
<plugin>
@@ -147,6 +131,7 @@
*
</Import-Package>
<Export-Package>
+ org.amdatu.searchandindex.solr
</Export-Package>
</instructions>
</configuration>
Modified:
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
==============================================================================
---
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
(original)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrApi.java
Wed Jan 26 13:55:27 2011
@@ -46,8 +46,8 @@
public class SolrApi {
// Statics
- private static final String DEFAULT_SCHEMA = "default_schema.xml";
- private static final String DEFAULT_SOLRCONFIG = "default_solrconfig.xml";
+ private static final String DEFAULT_SCHEMA = "nutch_schema.xml";
+ private static final String DEFAULT_SOLRCONFIG = "nutch_solrconfig.xml";
// Service dependencies injected by the dependency manager
private volatile LogService m_logService;
Modified:
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
==============================================================================
---
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
(original)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrDaemonServiceImpl.java
Wed Jan 26 13:55:27 2011
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.net.URL;
import java.util.Dictionary;
-import java.util.Enumeration;
import org.amdatu.core.config.templates.ConfigTemplateManager;
import org.amdatu.core.tenant.TenantManagementService;
@@ -33,7 +32,6 @@
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.SolrCore;
-import org.osgi.framework.Bundle;
import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.cm.ManagedService;
@@ -44,7 +42,9 @@
*/
public class SolrDaemonServiceImpl implements SolrService, ManagedService {
// Statics
- public static final String SOLR_CONFIG_XML = "solr.xml";
+ private static final String DEFAULT_SOLAR_CONFIG =
"default_solrconfig.xml";
+ private static final String DEFAULT_SCHEMA = "default_schema.xml";
+ private static final String SOLR = "solr.xml";
private static final String CONFIG_DIR = "conf";
// Services injected by the Felix dependency manager
@@ -57,45 +57,30 @@
private CoreContainer m_coreContainer;
private File m_workDir;
- /**
- * The init() method is invoked by the Felix dependency manager.
- */
- @SuppressWarnings("unchecked")
public void init() {
try {
m_logService.log(LogService.LOG_INFO, "Initializing Solr
configuration");
// Initialize storage configuration
- // Load the URL of the storage-conf.xml and write it file using
the config template
+ // Load the URL of the solr.xml and write it file using the config
template
// manager, which automatically replaces configuration entries in
that file
- File storageConfigFile = new File(m_workDir, SOLR_CONFIG_XML);
- if (!storageConfigFile.exists()) {
+ File solrFile = new File(m_workDir, SOLR);
+ if (!solrFile.exists()) {
m_workDir.mkdirs();
- Bundle bundle = m_bundleContext.getBundle();
- URL url = bundle.getResource(SOLR_CONFIG_XML);
try {
- // Replace placeholders in the solr.xml config file
- m_configTemplateManager.writeConfiguration(url,
storageConfigFile);
+ // Write the solr.xml file to the solr root directory
+ copyConfig("conf/" + SOLR, solrFile);
// Solr uses this system property to find its storage
location.
- System.setProperty("solr.solr.home",
storageConfigFile.getParentFile().getAbsolutePath());
+ System.setProperty("solr.solr.home",
solrFile.getParentFile().getAbsolutePath());
// Update the main config
File mainConfigDir = new File(m_workDir, "/" + CONFIG_DIR);
mainConfigDir.mkdirs();
- // Find all entries in our 'conf' directory.
- final Enumeration<URL> resources =
bundle.findEntries(CONFIG_DIR, "*.*", true);
- if (resources != null) {
- while (resources.hasMoreElements()) {
- final URL resource = resources.nextElement();
- File coreConfFile = new File(mainConfigDir,
resource.getFile().replace(CONFIG_DIR + "/", ""));
- if (!coreConfFile.exists()) {
- // Only write this file if it does not yet
exist
-
m_configTemplateManager.writeConfiguration(resource, coreConfFile);
- }
- }
- }
+ // Write default schema and solr config to /conf
+ copyConfig("conf/" + DEFAULT_SOLAR_CONFIG, new
File(mainConfigDir, "solrconfig.xml"));
+ copyConfig("conf/" + DEFAULT_SCHEMA, new
File(mainConfigDir, "schema.xml"));
} catch (IOException e) {
m_logService.log(LogService.LOG_ERROR, "Could not replace
configuration entries in storage-conf.xml", e);
}
@@ -195,4 +180,9 @@
private ServiceDependency createServiceDependency(Class<?> clazz) {
return
m_dependencyManager.createServiceDependency().setService(clazz).setRequired(true);
}
+
+ private void copyConfig(String source, File target) throws IOException {
+ URL solrConfig = m_bundleContext.getBundle().getResource("conf/" +
source);
+ m_configTemplateManager.writeConfiguration(solrConfig, target);
+ }
}
Modified:
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
==============================================================================
---
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
(original)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/impl/SolrTest.java
Wed Jan 26 13:55:27 2011
@@ -31,6 +31,7 @@
*/
public class SolrTest {
private volatile TenantManagementService m_tenantService;
+ private static int ID = 0;
public void start() {
try {
@@ -82,7 +83,11 @@
document.addField( "name", "doc1", 1.0f );
document.addField( "price", 10 );
- streamPUT(baseUrl1 + "/course",
SolrUtil.toXMLStream(document));
+ // Add 10.000 Solr documents to the indices
+ for (int i=0; i<10000; i++) {
+ streamPUT(baseUrl1 + "/course",
SolrUtil.toXMLStream(generateSolrDocument()));
+ streamPUT(baseUrl2 + "/course",
SolrUtil.toXMLStream(generateSolrDocument()));
+ }
}
catch (InterruptedException e) {
// TODO Auto-generated catch block
@@ -95,6 +100,14 @@
}
}
+ private SolrInputDocument generateSolrDocument() {
+ SolrInputDocument document = new SolrInputDocument();
+ document.addField( "id", ID++, 0.5f);
+ document.addField( "name", "Solr document " + ID, 1.0f );
+ document.addField( "price", 100*Math.random(), 0.5f);
+ return document;
+ }
+
private RESTResult invokeGET(String url) {
return invoke(url, javax.ws.rs.HttpMethod.GET);
}
Modified:
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
==============================================================================
---
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
(original)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/java/org/amdatu/searchandindex/solr/osgi/Activator.java
Wed Jan 26 13:55:27 2011
@@ -20,7 +20,6 @@
import org.amdatu.core.tenant.TenantManagementService;
import org.amdatu.searchandindex.solr.SolrRestService;
import org.amdatu.searchandindex.solr.SolrService;
-import org.amdatu.searchandindex.solr.impl.NutchIndex;
import org.amdatu.searchandindex.solr.impl.SolrDaemonServiceImpl;
import org.amdatu.searchandindex.solr.impl.SolrTest;
import org.apache.felix.dm.DependencyActivatorBase;
@@ -49,12 +48,6 @@
// Create and register the Solr service.
manager.add(createComponent()
- .setImplementation(NutchIndex.class)
-
.add(createServiceDependency().setService(SolrRestService.class).setRequired(true))
-
.add(createServiceDependency().setService(TenantManagementService.class).setRequired(true)));
-
- // Create and register the Solr service.
- manager.add(createComponent()
.setInterface(SolrService.class.getName(), null)
.setImplementation(SolrDaemonServiceImpl.class)
.add(createServiceDependency().setService(LogService.class).setRequired(true))
Added:
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
==============================================================================
--- (empty file)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_schema.xml
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,312 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is the Solr schema file. This file should be named "schema.xml" and
+ should be in the conf directory under the solr home
+ (i.e. ./solr/conf/schema.xml by default)
+ or located where the classloader for the Solr webapp can find it.
+
+ This example schema is the recommended starting point for users.
+ It should be kept correct and concise, usable out-of-the-box.
+
+ For more information, on how to customize this file, please see
+ http://wiki.apache.org/solr/SchemaXml
+
+ PERFORMANCE NOTE: this schema includes many optional features and should not
+ be used for benchmarking. To improve performance one could
+ - set stored="false" for all fields possible (esp large fields) when you
+ only need to search on the field but don't need to return the original
+ value.
+ - set indexed="false" if you don't need to search on the field, but only
+ return the field as a result of searching on other indexed fields.
+ - remove all unneeded copyField statements
+ - for best index size and searching performance, set "index" to false
+ for all general text fields, use copyField to copy them to the
+ catchall "text" field, and use that for searching.
+ - For maximum indexing performance, use the StreamingUpdateSolrServer
+ java client.
+ - Remember to run the JVM in server mode, and use a higher logging level
+ that avoids logging every request
+-->
+
+<schema name="amdatu" version="1.2">
+ <!-- attribute "name" is the name of this schema and is only used for
display purposes.
+ Applications should change this to reflect the nature of the search
collection.
+ version="1.2" is Solr's version number for the schema syntax and
semantics. It should
+ not normally be changed by applications.
+ 1.0: multiValued attribute did not exist, all fields are multiValued by
nature
+ 1.1: multiValued attribute introduced, false by default
+ 1.2: omitTermFreqAndPositions attribute introduced, true by default
except for text fields.
+ -->
+
+ <types>
+ <!-- field type definitions. The "name" attribute is
+ just a label to be used by field definitions. The "class"
+ attribute and any other attributes determine the real
+ behavior of the fieldType.
+ Class names starting with "solr" refer to java classes in the
+ org.apache.solr.analysis package.
+ -->
+
+ <!-- The StrField type is not analyzed, but indexed/stored verbatim.
+ - StrField and TextField support an optional compressThreshold which
+ limits compression (if enabled in the derived fields) to values which
+ exceed a certain size (in characters).
+ -->
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true"
omitNorms="true"/>
+
+ <!-- boolean type: "true" or "false" -->
+ <fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"
omitNorms="true"/>
+ <!--Binary data type. The data should be sent/retrieved in as Base64
encoded Strings -->
+ <fieldtype name="binary" class="solr.BinaryField"/>
+
+ <!-- The optional sortMissingLast and sortMissingFirst attributes are
+ currently supported on types that are sorted internally as strings.
+ This includes
"string","boolean","sint","slong","sfloat","sdouble","pdate"
+ - If sortMissingLast="true", then a sort on this field will cause
documents
+ without the field to come after documents with the field,
+ regardless of the requested sort order (asc or desc).
+ - If sortMissingFirst="true", then a sort on this field will cause
documents
+ without the field to come before documents with the field,
+ regardless of the requested sort order.
+ - If sortMissingLast="false" and sortMissingFirst="false" (the default),
+ then default lucene sorting will be used which places docs without the
+ field first in an ascending sort and last in a descending sort.
+ -->
+
+ <!--
+ Default numeric field types. For faster range queries, consider the
tint/tfloat/tlong/tdouble types.
+ -->
+ <fieldType name="int" class="solr.TrieIntField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="float" class="solr.TrieFloatField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="long" class="solr.TrieLongField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="double" class="solr.TrieDoubleField" precisionStep="0"
omitNorms="true" positionIncrementGap="0"/>
+
+ <!--
+ Numeric field types that index each value at various levels of precision
+ to accelerate range queries when the number of values between the range
+ endpoints is large. See the javadoc for NumericRangeQuery for internal
+ implementation details.
+
+ Smaller precisionStep values (specified in bits) will lead to more tokens
+ indexed per value, slightly larger index size, and faster range queries.
+ A precisionStep of 0 disables indexing at different precision levels.
+ -->
+ <fieldType name="tint" class="solr.TrieIntField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tfloat" class="solr.TrieFloatField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tlong" class="solr.TrieLongField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
+ <fieldType name="tdouble" class="solr.TrieDoubleField" precisionStep="8"
omitNorms="true" positionIncrementGap="0"/>
+
+ <!-- The format for this date field is of the form 1995-12-31T23:59:59Z,
and
+ is a more restricted form of the canonical representation of dateTime
+ http://www.w3.org/TR/xmlschema-2/#dateTime
+ The trailing "Z" designates UTC time and is mandatory.
+ Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
+ All other components are mandatory.
+
+ Expressions can also be used to denote calculations that should be
+ performed relative to "NOW" to determine the value, ie...
+
+ NOW/HOUR
+ ... Round to the start of the current hour
+ NOW-1DAY
+ ... Exactly 1 day prior to now
+ NOW/DAY+6MONTHS+3DAYS
+ ... 6 months and 3 days in the future from the start of
+ the current day
+
+ Consult the DateField javadocs for more information.
+
+ Note: For faster range queries, consider the tdate type
+ -->
+ <fieldType name="date" class="solr.TrieDateField" omitNorms="true"
precisionStep="0" positionIncrementGap="0"/>
+
+ <!-- A Trie based date field for faster date range queries and date
faceting. -->
+ <fieldType name="tdate" class="solr.TrieDateField" omitNorms="true"
precisionStep="6" positionIncrementGap="0"/>
+
+
+ <!--
+ Note:
+ These should only be used for compatibility with existing indexes
(created with older Solr versions)
+ or if "sortMissingFirst" or "sortMissingLast" functionality is needed.
Use Trie based fields instead.
+
+ Plain numeric field types that store and index the text
+ value verbatim (and hence don't support range queries, since the
+ lexicographic ordering isn't equal to the numeric ordering)
+ -->
+ <fieldType name="pint" class="solr.IntField" omitNorms="true"/>
+ <fieldType name="plong" class="solr.LongField" omitNorms="true"/>
+ <fieldType name="pfloat" class="solr.FloatField" omitNorms="true"/>
+ <fieldType name="pdouble" class="solr.DoubleField" omitNorms="true"/>
+ <fieldType name="pdate" class="solr.DateField" sortMissingLast="true"
omitNorms="true"/>
+
+
+ <!--
+ Note:
+ These should only be used for compatibility with existing indexes
(created with older Solr versions)
+ or if "sortMissingFirst" or "sortMissingLast" functionality is needed.
Use Trie based fields instead.
+
+ Numeric field types that manipulate the value into
+ a string value that isn't human-readable in its internal form,
+ but with a lexicographic ordering the same as the numeric ordering,
+ so that range queries work correctly.
+ -->
+ <fieldType name="sint" class="solr.SortableIntField"
sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="slong" class="solr.SortableLongField"
sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="sfloat" class="solr.SortableFloatField"
sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="sdouble" class="solr.SortableDoubleField"
sortMissingLast="true" omitNorms="true"/>
+
+
+ <!-- The "RandomSortField" is not used to store or search any
+ data. You can declare fields of this type it in your schema
+ to generate pseudo-random orderings of your docs for sorting
+ purposes. The ordering is generated based on the field name
+ and the version of the index, As long as the index version
+ remains unchanged, and the same field name is reused,
+ the ordering of the docs will be consistent.
+ If you want different psuedo-random orderings of documents,
+ for the same version of the index, use a dynamicField and
+ change the name
+ -->
+ <fieldType name="random" class="solr.RandomSortField" indexed="true" />
+
+ <!-- solr.TextField allows the specification of custom text analyzers
+ specified as a tokenizer and a list of token filters. Different
+ analyzers may be specified for indexing and querying.
+
+ The optional positionIncrementGap puts space between multiple fields
of
+ this type on the same document, with the purpose of preventing false
phrase
+ matching across fields.
+
+ For more info on customizing your analyzer chain, please see
+ http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters
+ -->
+
+ <!-- One can also specify an existing Analyzer class that has a
+ default constructor via the class attribute on the analyzer element
+ <fieldType name="text_greek" class="solr.TextField">
+ <analyzer class="org.apache.lucene.analysis.el.GreekAnalyzer"/>
+ </fieldType>
+ -->
+
+ <!-- A text field that only splits on whitespace for exact matching of
words -->
+ <fieldType name="text_ws" class="solr.TextField"
positionIncrementGap="100">
+ </fieldType>
+
+ <!-- A text field that uses WordDelimiterFilter to enable splitting and
matching of
+ words on case-change, alpha numeric boundaries, and non-alphanumeric
chars,
+ so that a query of "wifi" or "wi fi" could match a document containing
"Wi-Fi".
+ Synonyms and stopwords are customized by external files, and stemming
is enabled.
+ -->
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
+ </fieldType>
+
+ <!-- A general unstemmed text field - good if one does not know the
language of the field -->
+ <fieldType name="textgen" class="solr.TextField"
positionIncrementGap="100">
+ </fieldType>
+
+ <!-- A general unstemmed text field that indexes tokens normally and also
+ reversed (via ReversedWildcardFilterFactory), to enable more efficient
+ leading wildcard queries. -->
+ <fieldType name="text_rev" class="solr.TextField"
positionIncrementGap="100">
+ </fieldType>
+
+ <!-- since fields of this type are by default not stored or indexed,
+ any data added to them will be ignored outright. -->
+ <fieldtype name="ignored" stored="false" indexed="false"
multiValued="true" class="solr.StrField" />
+ </types>
+
+
+ <fields>
+ <!-- Valid attributes for fields:
+ name: mandatory - the name for the field
+ type: mandatory - the name of a previously defined type from the
+ <types> section
+ indexed: true if this field should be indexed (searchable or sortable)
+ stored: true if this field should be retrievable
+ compressed: [false] if this field should be stored using gzip compression
+ (this will only apply if the field type is compressable; among
+ the standard field types, only TextField and StrField are)
+ multiValued: true if this field may contain multiple values per document
+ omitNorms: (expert) set to true to omit the norms associated with
+ this field (this disables length normalization and index-time
+ boosting for the field, and saves some memory). Only full-text
+ fields or fields that need an index-time boost need norms.
+ termVectors: [false] set to true to store the term vector for a
+ given field.
+ When using MoreLikeThis, fields used for similarity should be
+ stored for best performance.
+ termPositions: Store position information with the term vector.
+ This will increase storage costs.
+ termOffsets: Store offset information with the term vector. This
+ will increase storage costs.
+ default: a value that should be used if no value is specified
+ when adding a document.
+ -->
+
+ <field name="id" type="string" indexed="true" stored="true" required="true"
/>
+
+ <!-- catchall field, containing all other searchable text fields
(implemented
+ via copyField further on in this schema -->
+ <field name="text" type="text" indexed="true" stored="false"
multiValued="true"/>
+
+ <field name="timestamp" type="date" indexed="true" stored="false"
default="NOW" multiValued="false"/>
+
+ <dynamicField name="*_i" type="sint" indexed="true" stored="false"
omitNorms="true" multiValued="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="false"
multiValued="true"/>
+ <dynamicField name="*_l" type="long" indexed="true" stored="false"/>
+ <dynamicField name="*_t" type="text" indexed="true" stored="false"/>
+ <dynamicField name="*_b" type="boolean" indexed="true" stored="false"/>
+ <dynamicField name="*_f" type="float" indexed="true" stored="false"/>
+ <dynamicField name="*_d" type="double" indexed="true" stored="false"/>
+ <dynamicField name="*_dt" type="date" indexed="true" stored="false"/>
+
+ <!-- some trie-coded dynamic fields for faster range queries -->
+ <dynamicField name="*_ti" type="tint" indexed="true" stored="false"/>
+ <dynamicField name="*_tl" type="tlong" indexed="true" stored="false"/>
+ <dynamicField name="*_tf" type="tfloat" indexed="true" stored="false"/>
+ <dynamicField name="*_td" type="tdouble" indexed="true" stored="false"/>
+ <dynamicField name="*_tdt" type="tdate" indexed="true" stored="false"/>
+
+ <dynamicField name="*_pi" type="pint" indexed="true" stored="false"/>
+
+ <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
+ <dynamicField name="attr_*" type="textgen" indexed="true" stored="false"
multiValued="true"/>
+
+ <dynamicField name="random_*" type="random" />
+
+ <!-- uncomment the following to ignore any fields that don't already match
an existing
+ field name or dynamic field, rather than reporting them as an error.
+ alternately, change the type="ignored" to some other type e.g. "text"
if you want
+ unknown fields indexed and/or stored by default -->
+ <!--dynamicField name="*" type="ignored" multiValued="true" /-->
+
+ </fields>
+
+ <!-- Field to use to determine and enforce document uniqueness.
+ Unless this field is marked with required="false", it will be a required
field
+ -->
+ <uniqueKey>id</uniqueKey>
+
+ <!-- field for the QueryParser to use when an explicit fieldname is absent -->
+ <defaultSearchField>text</defaultSearchField>
+
+ <!-- SolrQueryParser configuration: defaultOperator="AND|OR" -->
+ <solrQueryParser defaultOperator="OR"/>
+</schema>
Added:
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
==============================================================================
--- (empty file)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/default_solrconfig.xml
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ http://wiki.apache.org/solr/SolrConfigXml
+
+-->
+
+<config>
+ <indexDefaults>
+ <!-- Sets the amount of RAM that may be used by Lucene indexing
+ for buffering added documents and deletions before they are
+ flushed to the Directory. -->
+ <ramBufferSizeMB>32</ramBufferSizeMB>
+ <lockType>simple</lockType>
+ <!--
+ Expert:
+ Controls how often Lucene loads terms into memory -->
+ <!--<termIndexInterval>256</termIndexInterval>-->
+ </indexDefaults>
+
+ <jmx />
+
+ <query>
+ <maxBooleanClauses>102400</maxBooleanClauses>
+ </query>
+
+ <updateHandler class="solr.DirectUpdateHandler2" />
+
+ <requestDispatcher handleSelect="true" >
+ <requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="2048" />
+ </requestDispatcher>
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler"
default="true">
+ <arr name="last-components">
+ </arr>
+ </requestHandler>
+
+ <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
+ <requestHandler name="/admin/"
class="org.apache.solr.handler.admin.AdminHandlers" />
+
+ <!-- config for the admin interface -->
+ <admin>
+ <defaultQuery>solr</defaultQuery>
+ </admin>
+
+</config>
+
Added:
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
==============================================================================
--- (empty file)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_schema.xml
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,108 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+ <!--
+ Licensed to the Apache Software Foundation (ASF) under one or
+ more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information regarding
+ copyright ownership. The ASF licenses this file to You under the
+ Apache License, Version 2.0 (the "License"); you may not use
+ this file except in compliance with the License. You may obtain
+ a copy of the License at
+ http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+ applicable law or agreed to in writing, software distributed
+ under the License is distributed on an "AS IS" BASIS, WITHOUT
+ WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions
+ and limitations under the License.
+ -->
+ <!--
+ Description: This document contains solr schema definition to be
+ used with solr integration currently build into Nutch. See
+ https://issues.apache.org/jira/browse/NUTCH-442
+ https://issues.apache.org/jira/browse/NUTCH-699 for more info.
+ -->
+<schema name="nutch" version="1.1">
+ <types>
+ <fieldType name="string" class="solr.StrField"
+ sortMissingLast="true" omitNorms="true"/>
+ <fieldType name="long" class="solr.LongField"
+ omitNorms="true"/>
+ <fieldType name="float" class="solr.FloatField"
+ omitNorms="true"/>
+ <fieldType name="text" class="solr.TextField"
+ positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory"
+ generateWordParts="1" generateNumberParts="1"
+ catenateWords="1" catenateNumbers="1" catenateAll="0"
+ splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.EnglishPorterFilterFactory"
+ protected="protwords.txt"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+ <fieldType name="url" class="solr.TextField"
+ positionIncrementGap="100">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.WordDelimiterFilterFactory"
+ generateWordParts="1" generateNumberParts="1"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldType>
+ </types>
+ <fields>
+ <field name="id" type="string" stored="true" indexed="true"/>
+
+ <!-- core fields -->
+ <field name="segment" type="string" stored="true" indexed="false"/>
+ <field name="digest" type="string" stored="true" indexed="false"/>
+ <field name="boost" type="float" stored="true" indexed="false"/>
+
+ <!-- fields for index-basic plugin -->
+ <field name="host" type="url" stored="false" indexed="true"/>
+ <field name="site" type="string" stored="false" indexed="true"/>
+ <field name="url" type="url" stored="true" indexed="true"
+ required="true"/>
+ <field name="content" type="text" stored="true" indexed="true"/>
+ <field name="title" type="text" stored="true" indexed="true"/>
+ <field name="cache" type="string" stored="true" indexed="false"/>
+ <field name="tstamp" type="long" stored="true" indexed="false"/>
+
+ <!-- fields for index-anchor plugin -->
+ <field name="anchor" type="string" stored="true" indexed="true"
+ multiValued="true"/>
+
+ <!-- fields for index-more plugin -->
+ <field name="type" type="string" stored="true" indexed="true"
+ multiValued="true"/>
+ <field name="contentLength" type="long" stored="true"
+ indexed="false"/>
+ <field name="lastModified" type="long" stored="true"
+ indexed="false"/>
+ <field name="date" type="string" stored="true" indexed="true"/>
+
+ <!-- fields for languageidentifier plugin -->
+ <field name="lang" type="string" stored="true" indexed="true"/>
+
+ <!-- fields for subcollection plugin -->
+ <field name="subcollection" type="string" stored="true"
+ indexed="true" multiValued="true"/>
+
+ <!-- fields for feed plugin -->
+ <field name="author" type="string" stored="true" indexed="true"/>
+ <field name="tag" type="string" stored="true" indexed="true"/>
+ <field name="feed" type="string" stored="true" indexed="true"/>
+ <field name="publishedDate" type="string" stored="true"
+ indexed="true"/>
+ <field name="updatedDate" type="string" stored="true"
+ indexed="true"/>
+ </fields>
+ <uniqueKey>id</uniqueKey>
+ <defaultSearchField>content</defaultSearchField>
+ <solrQueryParser defaultOperator="OR"/>
+</schema>
Added:
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
==============================================================================
--- (empty file)
+++
sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/nutch_solrconfig.xml
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,90 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ http://wiki.apache.org/solr/SolrConfigXml
+
+-->
+
+<config>
+ <indexDefaults>
+ <!-- Sets the amount of RAM that may be used by Lucene indexing
+ for buffering added documents and deletions before they are
+ flushed to the Directory. -->
+ <ramBufferSizeMB>32</ramBufferSizeMB>
+ <lockType>simple</lockType>
+ <!--
+ Expert:
+ Controls how often Lucene loads terms into memory -->
+ <!--<termIndexInterval>256</termIndexInterval>-->
+ </indexDefaults>
+
+ <jmx />
+
+ <query>
+ <maxBooleanClauses>102400</maxBooleanClauses>
+ </query>
+
+ <updateHandler class="solr.DirectUpdateHandler2" />
+
+ <requestDispatcher handleSelect="true" >
+ <requestParsers enableRemoteStreaming="false"
multipartUploadLimitInKB="2048" />
+ </requestDispatcher>
+
+ <requestHandler name="standard" class="solr.StandardRequestHandler"
default="true">
+ <arr name="last-components">
+ </arr>
+ </requestHandler>
+
+ <requestHandler name="/update" class="solr.XmlUpdateRequestHandler" />
+ <requestHandler name="/admin/"
class="org.apache.solr.handler.admin.AdminHandlers" />
+
+ <requestHandler name="/nutch" class="solr.SearchHandler" >
+ <lst name="defaults">
+ <str name="defType">dismax</str>
+ <str name="echoParams">explicit</str>
+ <float name="tie">0.01</float>
+ <str name="qf">
+ content^0.5 anchor^1.0 title^1.2
+ </str>
+ <str name="pf">
+ content^0.5 anchor^1.5 title^1.2 site^1.5
+ </str>
+ <str name="fl">
+ url
+ </str>
+ <str name="mm">
+ 2<-1 5<-2 6<90%
+ </str>
+ <int name="ps">100</int>
+ <bool hl="true"/>
+ <str name="q.alt">*:*</str>
+ <str name="hl.fl">title url content</str>
+ <str name="f.title.hl.fragsize">0</str>
+ <str name="f.title.hl.alternateField">title</str>
+ <str name="f.url.hl.fragsize">0</str>
+ <str name="f.url.hl.alternateField">url</str>
+ <str name="f.content.hl.fragmenter">regex</str>
+ </lst>
+ </requestHandler>
+
+ <!-- config for the admin interface -->
+ <admin>
+ <defaultQuery>solr</defaultQuery>
+ </admin>
+
+</config>
+
Added: sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml
==============================================================================
--- (empty file)
+++ sandbox/ivol/amdatu-searchandindex/solr/src/main/resources/conf/solr.xml
Wed Jan 26 13:55:27 2011
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+ <!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version
+ 2.0 (the "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0 Unless required by
+ applicable law or agreed to in writing, software distributed under the
+ License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+ CONDITIONS OF ANY KIND, either express or implied. See the License for
+ the specific language governing permissions and limitations under the
+ License.
+ -->
+
+ <!--
+ All (relative) paths are relative to the installation path persistent:
+ Save changes made via the API to this file sharedLib: path to a lib
+ directory that will be shared across all cores
+ -->
+
+<solr persistent="true">
+ <!--
+ adminPath: RequestHandler path to manage cores. If 'null' (or absent),
+ cores will not be manageable via request handler
+ -->
+ <cores adminPath="/solr/admin/cores">
+ </cores>
+</solr>
\ No newline at end of file