Modified: stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/solrconfig.xml URL: http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/solrconfig.xml?rev=1489728&r1=1489727&r2=1489728&view=diff ============================================================================== --- stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/solrconfig.xml (original) +++ stanbol/trunk/enhancement-engines/topic/engine/src/main/resources/default-topic-model/conf/solrconfig.xml Wed Jun 5 07:23:15 2013 @@ -47,466 +47,69 @@ that you fully re-index after changing this setting as it can affect both how text is indexed and queried. --> - <luceneMatchVersion>LUCENE_32</luceneMatchVersion> + <luceneMatchVersion>LUCENE_42</luceneMatchVersion> - <!-- lib directives can be used to instruct Solr to load an Jars - identified and use them to resolve any "plugins" specified in - your solrconfig.xml or schema.xml (ie: Analyzers, Request - Handlers, etc...). - - All directories and paths are resolved relative to the - instanceDir. - - If a "./lib" directory exists in your instanceDir, all files - found in it are included as if you had used the following - syntax... - - <lib dir="./lib" /> - --> - <!-- A dir option by itself adds any files found in the directory to - the classpath, this is useful for including all jars in a - directory. - --> - <lib dir="../../contrib/extraction/lib" /> - <!-- When a regex is specified in addition to a directory, only the - files in that directory which completely match the regex - (anchored on both ends) will be included. - --> - <lib dir="../../dist/" regex="apache-solr-cell-\d.*\.jar" /> - <lib dir="../../dist/" regex="apache-solr-clustering-\d.*\.jar" /> - <lib dir="../../dist/" regex="apache-solr-dataimporthandler-\d.*\.jar" /> - - <!-- If a dir option (with or without a regex) is used and nothing - is found that matches, it will be ignored - --> - <lib dir="../../contrib/clustering/lib/" /> - <lib dir="/total/crap/dir/ignored" /> - <!-- an exact path can be used to specify a specific file. This - will cause a serious error to be logged if it can't be loaded. - --> - <!-- - <lib path="../a-jar-that-does-not-exist.jar" /> - --> - - <!-- Data Directory - - Used to specify an alternate directory to hold all index data - other than the default ./data under the Solr home. If - replication is in use, this should match the replication - configuration. - --> <dataDir>${solr.data.dir:}</dataDir> - - <!-- The DirectoryFactory to use for indexes. - - solr.StandardDirectoryFactory, the default, is filesystem - based. solr.RAMDirectoryFactory is memory based, not - persistent, and doesn't work with replication. - --> <directoryFactory name="DirectoryFactory" - class="${solr.directoryFactory:solr.StandardDirectoryFactory}"/> - - - <!-- Index Defaults - - Values here affect all index writers and act as a default - unless overridden. - - WARNING: See also the <mainIndex> section below for parameters - that overfor Solr's main Lucene index. - --> - <indexDefaults> - - <useCompoundFile>false</useCompoundFile> - - <mergeFactor>10</mergeFactor> - <!-- Sets the amount of RAM that may be used by Lucene indexing - for buffering added documents and deletions before they are - flushed to the Directory. --> - <ramBufferSizeMB>32</ramBufferSizeMB> - <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then - Lucene will flush based on whichever limit is hit first. - --> - <!-- <maxBufferedDocs>1000</maxBufferedDocs> --> - - <maxFieldLength>100000</maxFieldLength> - <writeLockTimeout>1000</writeLockTimeout> - <commitLockTimeout>10000</commitLockTimeout> - - <!-- Expert: Merge Policy - - The Merge Policy in Lucene controls how merging is handled by - Lucene. The default in Solr 3.3 is TieredMergePolicy. - - The default in 2.3 was the LogByteSizeMergePolicy, - previous versions used LogDocMergePolicy. - - LogByteSizeMergePolicy chooses segments to merge based on - their size. The Lucene 2.2 default, LogDocMergePolicy chose - when to merge based on number of documents - - Other implementations of MergePolicy must have a no-argument - constructor - --> - <!-- - <mergePolicy class="org.apache.lucene.index.TieredMergePolicy"/> - --> - - <!-- Expert: Merge Scheduler - - The Merge Scheduler in Lucene controls how merges are - performed. The ConcurrentMergeScheduler (Lucene 2.3 default) - can perform merges in the background using separate threads. - The SerialMergeScheduler (Lucene 2.2 default) does not. - --> - <!-- - <mergeScheduler class="org.apache.lucene.index.ConcurrentMergeScheduler"/> - --> - - <!-- LockFactory - - This option specifies which Lucene LockFactory implementation - to use. - - single = SingleInstanceLockFactory - suggested for a - read-only index or when there is no possibility of - another process trying to modify the index. - native = NativeFSLockFactory - uses OS native file locking. - Do not use when multiple solr webapps in the same - JVM are attempting to share a single index. - simple = SimpleFSLockFactory - uses a plain file for locking - - (For backwards compatibility with Solr 1.2, 'simple' is the - default if not specified.) - - More details on the nuances of each LockFactory... - http://wiki.apache.org/lucene-java/AvailableLockFactories - --> - <lockType>native</lockType> - - <!-- Expert: Controls how often Lucene loads terms into memory - Default is 128 and is likely good for most everyone. - --> - <!-- <termIndexInterval>256</termIndexInterval> --> - </indexDefaults> - - <!-- Main Index + class="${solr.directoryFactory:solr.NRTCachingDirectoryFactory}"/> - Values here override the values in the <indexDefaults> section - for the main on disk index. - --> - <mainIndex> - - <useCompoundFile>false</useCompoundFile> - <ramBufferSizeMB>32</ramBufferSizeMB> - <mergeFactor>10</mergeFactor> - - <!-- Unlock On Startup - - If true, unlock any held write or commit locks on startup. - This defeats the locking mechanism that allows multiple - processes to safely access a lucene index, and should be used - with care. - - This is not needed if lock type is 'none' or 'single' - --> - <unlockOnStartup>false</unlockOnStartup> - - <!-- If true, IndexReaders will be reopened (often more efficient) - instead of closed and then opened. - --> - <reopenReaders>true</reopenReaders> - - <!-- Commit Deletion Policy - - Custom deletion policies can specified here. The class must - implement org.apache.lucene.index.IndexDeletionPolicy. - - http://lucene.apache.org/java/2_9_1/api/all/org/apache/lucene/index/IndexDeletionPolicy.html - - The standard Solr IndexDeletionPolicy implementation supports - deleting index commit points on number of commits, age of - commit point and optimized status. - - The latest commit point should always be preserved regardless - of the criteria. - --> - <deletionPolicy class="solr.SolrDeletionPolicy"> - <!-- The number of commit points to be kept --> - <str name="maxCommitsToKeep">1</str> - <!-- The number of optimized commit points to be kept --> - <str name="maxOptimizedCommitsToKeep">0</str> - <!-- - Delete all commit points once they have reached the given age. - Supports DateMathParser syntax e.g. - --> - <!-- - <str name="maxCommitAge">30MINUTES</str> - <str name="maxCommitAge">1DAY</str> - --> - </deletionPolicy> - - <!-- Lucene Infostream - - To aid in advanced debugging, Lucene provides an "InfoStream" - of detailed information when indexing. - - Setting The value to true will instruct the underlying Lucene - IndexWriter to write its debugging info the specified file - --> - <infoStream file="INFOSTREAM.txt">false</infoStream> - </mainIndex> + <indexConfig> + <!-- maxFieldLength was removed in 4.0. To get similar behavior, include a + LimitTokenCountFilterFactory in your fieldType definition. E.g. + <filter class="solr.LimitTokenCountFilterFactory" maxTokenCount="10000"/> + --> + <!-- Maximum time to wait for a write lock (ms) for an IndexWriter. Default: 1000 --> + <writeLockTimeout>5000</writeLockTimeout> + </indexConfig> + + <jmx /> + <!-- The default high-performance update handler --> - <!-- JMX - - This example enables JMX if and only if an existing MBeanServer - is found, use this if you want to configure JMX through JVM - parameters. Remove this to disable exposing Solr configuration - and statistics to JMX. - - For more details see http://wiki.apache.org/solr/SolrJmx - --> - <jmx /> - <!-- If you want to connect to a particular server, specify the - agentId - --> - <!-- <jmx agentId="myAgent" /> --> - <!-- If you want to start a new MBeanServer, specify the serviceUrl --> - <!-- <jmx serviceUrl="service:jmx:rmi:///jndi/rmi://localhost:9999/solr"/> - --> - - <!-- The default high-performance update handler --> <updateHandler class="solr.DirectUpdateHandler2"> - - <!-- AutoCommit - - Perform a <commit/> automatically under certain conditions. - Instead of enabling autoCommit, consider using "commitWithin" - when adding documents. - - http://wiki.apache.org/solr/UpdateXmlMessages - - maxDocs - Maximum number of documents to add since the last - commit before automatically triggering a new commit. - - maxTime - Maximum amount of time that is allowed to pass - since a document was added before automaticly - triggering a new commit. - --> - <!-- - <autoCommit> - <maxDocs>10000</maxDocs> + <!-- Deactivate transaction log + <updateLog> + <str name="dir">${solr.ulog.dir:}</str> + </updateLog > --> + + <!-- no auto commit + <autoCommit> + <maxTime>15000</maxTime> + <openSearcher>false</openSearcher> + </autoCommit> + --> + <!-- + <autoSoftCommit> <maxTime>1000</maxTime> - </autoCommit> - --> - - <!-- Update Related Event Listeners - - Various IndexWriter related events can trigger Listeners to - take actions. - - postCommit - fired after every commit or optimize command - postOptimize - fired after every optimize command - --> - <!-- The RunExecutableListener executes an external command from a - hook such as postCommit or postOptimize. - - exe - the name of the executable to run - dir - dir to use as the current working directory. (default=".") - wait - the calling thread waits until the executable returns. - (default="true") - args - the arguments to pass to the program. (default is none) - env - environment variables to set. (default is none) - --> - <!-- This example shows how RunExecutableListener could be used - with the script based replication... - http://wiki.apache.org/solr/CollectionDistribution - --> - <!-- - <listener event="postCommit" class="solr.RunExecutableListener"> - <str name="exe">solr/bin/snapshooter</str> - <str name="dir">.</str> - <bool name="wait">true</bool> - <arr name="args"> <str>arg1</str> <str>arg2</str> </arr> - <arr name="env"> <str>MYVAR=val1</str> </arr> - </listener> + </autoSoftCommit> --> </updateHandler> - - <!-- IndexReaderFactory - - Use the following format to specify a custom IndexReaderFactory, - which allows for alternate IndexReader implementations. - - ** Experimental Feature ** - - Please note - Using a custom IndexReaderFactory may prevent - certain other features from working. The API to - IndexReaderFactory may change without warning or may even be - removed from future releases if the problems cannot be - resolved. - - - ** Features that may not work with custom IndexReaderFactory ** - - The ReplicationHandler assumes a disk-resident index. Using a - custom IndexReader implementation may cause incompatibility - with ReplicationHandler and may cause replication to not work - correctly. See SOLR-1366 for details. - - --> - <!-- - <indexReaderFactory name="IndexReaderFactory" class="package.class"> - <str name="someArg">Some Value</str> - </indexReaderFactory > - --> - <!-- By explicitly declaring the Factory, the termIndexDivisor can - be specified. - --> - <!-- - <indexReaderFactory name="IndexReaderFactory" - class="solr.StandardIndexReaderFactory"> - <int name="setTermIndexDivisor">12</int> - </indexReaderFactory > - --> - - + <query> - <!-- Max Boolean Clauses - - Maximum number of clauses in each BooleanQuery, an exception - is thrown if exceeded. - - ** WARNING ** - - This option actually modifies a global Lucene property that - will affect all SolrCores. If multiple solrconfig.xml files - disagree on this property, the value at any given moment will - be based on the last SolrCore to be initialized. - - --> <maxBooleanClauses>1024</maxBooleanClauses> - - <!-- Solr Internal Query Caches - - There are two implementations of cache available for Solr, - LRUCache, based on a synchronized LinkedHashMap, and - FastLRUCache, based on a ConcurrentHashMap. - - FastLRUCache has faster gets and slower puts in single - threaded operation and thus is generally faster than LRUCache - when the hit ratio of the cache is high (> 75%), and may be - faster under other scenarios on multi-cpu systems. - --> - - <!-- Filter Cache - - Cache used by SolrIndexSearcher for filters (DocSets), - unordered sets of *all* documents that match a query. When a - new searcher is opened, its caches may be prepopulated or - "autowarmed" using data from caches in the old searcher. - autowarmCount is the number of items to prepopulate. For - LRUCache, the autowarmed items will be the most recently - accessed items. - - Parameters: - class - the SolrCache implementation LRUCache or - (LRUCache or FastLRUCache) - size - the maximum number of entries in the cache - initialSize - the initial capacity (number of entries) of - the cache. (see java.util.HashMap) - autowarmCount - the number of entries to prepopulate from - and old cache. - --> <filterCache class="solr.FastLRUCache" - size="512" - initialSize="512" - autowarmCount="0"/> - - <!-- Query Result Cache - - Caches results of searches - ordered lists of document ids - (DocList) based on a query, a sort, and the range of documents requested. - --> + size="2048" + initialSize="1024" + autowarmCount="512"/> <queryResultCache class="solr.LRUCache" - size="512" - initialSize="512" - autowarmCount="0"/> - - <!-- Document Cache - - Caches Lucene Document objects (the stored fields for each - document). Since Lucene internal document ids are transient, - this cache will not be autowarmed. - --> + size="2048" + initialSize="1024" + autowarmCount="512"/> <documentCache class="solr.LRUCache" - size="512" - initialSize="512" + size="4096" + initialSize="1024" autowarmCount="0"/> - <!-- Field Value Cache - - Cache used to hold field values that are quickly accessible - by document id. The fieldValueCache is created by default - even if not configured here. - --> <!-- <fieldValueCache class="solr.FastLRUCache" size="512" autowarmCount="128" showItems="32" /> --> - - <!-- Custom Cache - - Example of a generic cache. These caches may be accessed by - name through SolrIndexSearcher.getCache(),cacheLookup(), and - cacheInsert(). The purpose is to enable easy caching of - user/application level data. The regenerator argument should - be specified as an implementation of solr.CacheRegenerator - if autowarming is desired. - --> - <!-- - <cache name="myUserCache" - class="solr.LRUCache" - size="4096" - initialSize="1024" - autowarmCount="1024" - regenerator="com.mycompany.MyRegenerator" - /> - --> - - - <!-- Lazy Field Loading - - If true, stored fields that are not requested will be loaded - lazily. This can result in a significant speed improvement - if the usual case is to not load all stored fields, - especially if the skipped fields are large compressed text - fields. - --> <enableLazyFieldLoading>true</enableLazyFieldLoading> - <!-- Use Filter For Sorted Query - - A possible optimization that attempts to use a filter to - satisfy a search. If the requested sort does not include - score, then the filterCache will be checked for a filter - matching the query. If found, the filter will be used as the - source of document ids, and then the sort will be applied to - that. - - For most situations, this will not be useful unless you - frequently get the same search repeatedly with different sort - options, and none of them ever use "score" - --> - <!-- - <useFilterForSortedQuery>true</useFilterForSortedQuery> - --> - <!-- Result Window Size An optimization for use with the queryResultCache. When a search @@ -523,22 +126,6 @@ --> <queryResultMaxDocsCached>200</queryResultMaxDocsCached> - <!-- Query Related Event Listeners - - Various IndexSearcher related events can trigger Listeners to - take actions. - - newSearcher - fired whenever a new searcher is being prepared - and there is a current searcher handling requests (aka - registered). It can be used to prime certain caches to - prevent long request times for certain requests. - - firstSearcher - fired whenever a new searcher is being - prepared but there is no current registered searcher to handle - requests or to gain autowarming data from. - - - --> <!-- QuerySenderListener takes an array of NamedList and executes a local query request for each NamedList in sequence. --> @@ -580,233 +167,75 @@ </query> - - <!-- Request Dispatcher - - This section contains instructions for how the SolrDispatchFilter - should behave when processing requests for this SolrCore. - - handleSelect affects the behavior of requests such as /select?qt=XXX - - handleSelect="true" will cause the SolrDispatchFilter to process - the request and will result in consistent error handling and - formatting for all types of requests. - - handleSelect="false" will cause the SolrDispatchFilter to - ignore "/select" requests and fallback to using the legacy - SolrServlet and it's Solr 1.1 style error formatting - --> - <requestDispatcher handleSelect="true" > - <!-- Request Parsing - - These settings indicate how Solr Requests may be parsed, and - what restrictions may be placed on the ContentStreams from - those requests - - enableRemoteStreaming - enables use of the stream.file - and stream.url parameters for specifying remote streams. - - multipartUploadLimitInKB - specifies the max size of - Multipart File Uploads that Solr will allow in a Request. - - *** WARNING *** - The settings below authorize Solr to fetch remote files, You - should make sure your system has some authentication before - using enableRemoteStreaming="true" - - --> + <requestDispatcher handleSelect="false" > <requestParsers enableRemoteStreaming="true" - multipartUploadLimitInKB="2048000" /> - - <!-- HTTP Caching - - Set HTTP caching related parameters (for proxy caches and clients). - - The options below instruct Solr not to output any HTTP Caching - related headers - --> + multipartUploadLimitInKB="2048000" + formdataUploadLimitInKB="2048"/> <httpCaching never304="true" /> - <!-- If you include a <cacheControl> directive, it will be used to - generate a Cache-Control header (as well as an Expires header - if the value contains "max-age=") - - By default, no Cache-Control header is generated. - - You can use the <cacheControl> option even if you have set - never304="true" - --> - <!-- - <httpCaching never304="true" > - <cacheControl>max-age=30, public</cacheControl> - </httpCaching> - --> - <!-- To enable Solr to respond with automatically generated HTTP - Caching headers, and to response to Cache Validation requests - correctly, set the value of never304="false" - - This will cause Solr to generate Last-Modified and ETag - headers based on the properties of the Index. - - The following options can also be specified to affect the - values of these headers... - - lastModFrom - the default value is "openTime" which means the - Last-Modified value (and validation against If-Modified-Since - requests) will all be relative to when the current Searcher - was opened. You can change it to lastModFrom="dirLastMod" if - you want the value to exactly correspond to when the physical - index was last modified. - - etagSeed="..." is an option you can change to force the ETag - header (and validation against If-None-Match requests) to be - different even if the index has not changed (ie: when making - significant changes to your config file) - - (lastModifiedFrom and etagSeed are both ignored if you use - the never304="true" option) - --> - <!-- - <httpCaching lastModifiedFrom="openTime" - etagSeed="Solr"> - <cacheControl>max-age=30, public</cacheControl> - </httpCaching> - --> </requestDispatcher> - <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" /> - <!-- Request Handlers http://wiki.apache.org/solr/SolrRequestHandler - - incoming queries will be dispatched to the correct handler - based on the path or the qt (query type) param. - - Names starting with a '/' are accessed with the a path equal to - the registered name. Names without a leading '/' are accessed - with: http://host/app/[core/]select?qt=name - - If a /select request is processed with out a qt param - specified, the requestHandler that declares default="true" will - be used. - - If a Request Handler is declared with startup="lazy", then it will - not be initialized until the first request that uses it. - --> <!-- SearchHandler http://wiki.apache.org/solr/SearchHandler - - For processing Search Queries, the primary Request Handler - provided with Solr is "SearchHandler" It delegates to a sequent - of SearchComponents (see below) and supports distributed - queries across multiple shards --> - <requestHandler name="search" class="solr.SearchHandler" default="true"> + <requestHandler name="/select" class="solr.SearchHandler"> <!-- default values for query parameters can be specified, these will be overridden by parameters in the request --> <lst name="defaults"> <str name="echoParams">explicit</str> <int name="rows">10</int> - </lst> - <!-- In addition to defaults, "appends" params can be specified - to identify values which should be appended to the list of - multi-val params from the query (or the existing "defaults"). - --> - <!-- In this example, the param "fq=instock:true" would be appended to - any query time fq params the user may specify, as a mechanism for - partitioning the index, independent of any user selected filtering - that may also be desired (perhaps as a result of faceted searching). - - NOTE: there is *absolutely* nothing a client can do to prevent these - "appends" values from being used, so don't use this mechanism - unless you are sure you always want it. - --> - <!-- - <lst name="appends"> - <str name="fq">inStock:true</str> - </lst> - --> - <!-- "invariants" are a way of letting the Solr maintainer lock down - the options available to Solr clients. Any params values - specified here are used regardless of what values may be specified - in either the query, the "defaults", or the "appends" params. - - In this example, the facet.field and facet.query params would - be fixed, limiting the facets clients can use. Faceting is - not turned on by default - but if the client does specify - facet=true in the request, these are the only facets they - will be able to see counts for; regardless of what other - facet.field or facet.query params they may specify. - - NOTE: there is *absolutely* nothing a client can do to prevent these - "invariants" values from being used, so don't use this mechanism - unless you are sure you always want it. - --> - <!-- - <lst name="invariants"> - <str name="facet.field">cat</str> - <str name="facet.field">manu_exact</str> - <str name="facet.query">price:[* TO 500]</str> - <str name="facet.query">price:[500 TO *]</str> - </lst> - --> - <!-- If the default list of SearchComponents is not desired, that - list can either be overridden completely, or components can be - prepended or appended to the default list. (see below) - --> - <!-- - <arr name="components"> - <str>nameOfCustomComponent1</str> - <str>nameOfCustomComponent2</str> - </arr> - --> + </lst> </requestHandler> - <!-- XML Update Request Handler. - - http://wiki.apache.org/solr/UpdateXmlMessages + <!-- Request Handler for similarity queries and topic classification --> + <requestHandler name="/mlt" class="solr.MoreLikeThisHandler" startup="lazy" /> - The canonical Request Handler for Modifying the Index through - commands specified using XML. + <!-- A request handler that returns indented JSON by default --> + <requestHandler name="/query" class="solr.SearchHandler"> + <lst name="defaults"> + <str name="echoParams">explicit</str> + <str name="wt">json</str> + <str name="indent">true</str> + <str name="df">text</str> + </lst> + </requestHandler> - Note: Since solr1.1 requestHandlers requires a valid content - type header if posted in the body. For example, curl now - requires: -H 'Content-type:text/xml; charset=utf-8' - --> - <requestHandler name="/update" - class="solr.XmlUpdateRequestHandler"> - <!-- See below for information on defining - updateRequestProcessorChains that can be used by name - on each Update Request - --> - <!-- - <lst name="defaults"> - <str name="update.chain">dedupe</str> - </lst> - --> - </requestHandler> - <!-- Binary Update Request Handler - http://wiki.apache.org/solr/javabin - --> - <requestHandler name="/update/javabin" - class="solr.BinaryUpdateRequestHandler" /> - <!-- CSV Update Request Handler - http://wiki.apache.org/solr/UpdateCSV - --> - <requestHandler name="/update/csv" - class="solr.CSVRequestHandler" - startup="lazy" /> + <!-- realtime get handler, guaranteed to return the latest stored fields of + any document, without the need to commit or open a new searcher. The + current implementation relies on the updateLog feature being enabled. --> + <requestHandler name="/get" class="solr.RealTimeGetHandler"> + <lst name="defaults"> + <str name="omitHeader">true</str> + <str name="wt">json</str> + <str name="indent">true</str> + </lst> + </requestHandler> + + + <!-- Update Request Handler. + + http://wiki.apache.org/solr/UpdateXmlMessages - <!-- JSON Update Request Handler - http://wiki.apache.org/solr/UpdateJSON --> - <requestHandler name="/update/json" - class="solr.JsonUpdateRequestHandler" - startup="lazy" /> + <requestHandler name="/update" class="solr.UpdateRequestHandler" /> + + <!-- for back compat with clients using /update/json and /update/csv --> + <requestHandler name="/update/json" class="solr.JsonUpdateRequestHandler"> + <lst name="defaults"> + <str name="stream.contentType">application/json</str> + </lst> + </requestHandler> + <requestHandler name="/update/csv" class="solr.CSVRequestHandler"> + <lst name="defaults"> + <str name="stream.contentType">application/csv</str> + </lst> + </requestHandler> <!-- Solr Cell Update Request Handler @@ -817,9 +246,6 @@ startup="lazy" class="solr.extraction.ExtractingRequestHandler" > <lst name="defaults"> - <!-- All the main content goes into "text"... if you need to return - the extracted text or do highlighting, use a stored field. --> - <str name="fmap.content">text</str> <str name="lowernames">true</str> <str name="uprefix">ignored_</str> @@ -830,6 +256,7 @@ </lst> </requestHandler> + <!-- Field Analysis Request Handler RequestHandler that provides much the same functionality as @@ -858,7 +285,7 @@ http://wiki.apache.org/solr/AnalysisRequestHandler An analysis handler that provides a breakdown of the analysis - process of provided docuemnts. This handler expects a (single) + process of provided documents. This handler expects a (single) content stream with the following format: <docs> @@ -897,11 +324,18 @@ <!-- ping/healthcheck --> <requestHandler name="/admin/ping" class="solr.PingRequestHandler"> - <lst name="defaults"> - <str name="qt">search</str> + <lst name="invariants"> <str name="q">solrpingquery</str> + </lst> + <lst name="defaults"> <str name="echoParams">all</str> </lst> + <!-- An optional feature of the PingRequestHandler is to configure the + handler with a "healthcheckFile" which can be used to enable/disable + the PingRequestHandler. + relative paths are resolved against the data dir + --> + <!-- <str name="healthcheckFile">server-enabled.txt</str> --> </requestHandler> <!-- Echo the request contents back to the client --> @@ -911,18 +345,347 @@ <str name="echoHandler">true</str> </lst> </requestHandler> + + <!-- Solr Replication + The SolrReplicationHandler supports replicating indexes from a + "master" used for indexing and "slaves" used for queries. - <!-- Legacy config for the admin interface --> - <admin> - <defaultQuery>*:*</defaultQuery> + http://wiki.apache.org/solr/SolrReplication - <!-- configure a healthcheck file for servers behind a - loadbalancer + It is also neccessary for SolrCloud to function (in Cloud mode, the + replication handler is used to bulk transfer segments when nodes + are added or need to recover). + + https://wiki.apache.org/solr/SolrCloud/ + --> + <requestHandler name="/replication" class="solr.ReplicationHandler" > + <!-- + To enable simple master/slave replication, uncomment one of the + sections below, depending on wether this solr instance should be + the "master" or a "slave". If this instance is a "slave" you will + also need to fill in the masterUrl to point to a real machine. + --> + <!-- + <lst name="master"> + <str name="replicateAfter">commit</str> + <str name="replicateAfter">startup</str> + <str name="confFiles">schema.xml,stopwords.txt</str> + </lst> + --> + <!-- + <lst name="slave"> + <str name="masterUrl">http://your-master-hostname:8983/solr</str> + <str name="pollInterval">00:00:60</str> + </lst> + --> + </requestHandler> + <!-- Spell Check + + The spell check component can return a list of alternative spelling + suggestions. + + http://wiki.apache.org/solr/SpellCheckComponent + --> + <searchComponent name="spellcheck" class="solr.SpellCheckComponent"> + + <str name="queryAnalyzerFieldType">textSpell</str> + + <!-- Multiple "Spell Checkers" can be declared and used by this + component + --> + + <!-- a spellchecker built from a field of the main index --> + <lst name="spellchecker"> + <str name="name">default</str> + <str name="field">name</str> + <str name="classname">solr.DirectSolrSpellChecker</str> + <!-- the spellcheck distance measure used, the default is the internal levenshtein --> + <str name="distanceMeasure">internal</str> + <!-- minimum accuracy needed to be considered a valid spellcheck suggestion --> + <float name="accuracy">0.5</float> + <!-- the maximum #edits we consider when enumerating terms: can be 1 or 2 --> + <int name="maxEdits">2</int> + <!-- the minimum shared prefix when enumerating terms --> + <int name="minPrefix">1</int> + <!-- maximum number of inspections per result. --> + <int name="maxInspections">5</int> + <!-- minimum length of a query term to be considered for correction --> + <int name="minQueryLength">4</int> + <!-- maximum threshold of documents a query term can appear to be considered for correction --> + <float name="maxQueryFrequency">0.01</float> + <!-- uncomment this to require suggestions to occur in 1% of the documents + <float name="thresholdTokenFrequency">.01</float> --> + </lst> + + <!-- a spellchecker that can break or combine words. See "/spell" handler below for usage --> + <lst name="spellchecker"> + <str name="name">wordbreak</str> + <str name="classname">solr.WordBreakSolrSpellChecker</str> + <str name="field">name</str> + <str name="combineWords">true</str> + <str name="breakWords">true</str> + <int name="maxChanges">10</int> + </lst> + + <!-- a spellchecker that uses a different distance measure --> <!-- - <healthcheck type="file">server-enabled</healthcheck> + <lst name="spellchecker"> + <str name="name">jarowinkler</str> + <str name="field">spell</str> + <str name="classname">solr.DirectSolrSpellChecker</str> + <str name="distanceMeasure"> + org.apache.lucene.search.spell.JaroWinklerDistance + </str> + </lst> + --> + + <!-- a spellchecker that use an alternate comparator + + comparatorClass be one of: + 1. score (default) + 2. freq (Frequency first, then score) + 3. A fully qualified class name + --> + <!-- + <lst name="spellchecker"> + <str name="name">freq</str> + <str name="field">lowerfilt</str> + <str name="classname">solr.DirectSolrSpellChecker</str> + <str name="comparatorClass">freq</str> + --> + + <!-- A spellchecker that reads the list of words from a file --> + <!-- + <lst name="spellchecker"> + <str name="classname">solr.FileBasedSpellChecker</str> + <str name="name">file</str> + <str name="sourceLocation">spellings.txt</str> + <str name="characterEncoding">UTF-8</str> + <str name="spellcheckIndexDir">spellcheckerFile</str> + </lst> --> + </searchComponent> + + <!-- A request handler for demonstrating the spellcheck component. + + NOTE: This is purely as an example. The whole purpose of the + SpellCheckComponent is to hook it into the request handler that + handles your normal user queries so that a separate request is + not needed to get suggestions. + + IN OTHER WORDS, THERE IS REALLY GOOD CHANCE THE SETUP BELOW IS + NOT WHAT YOU WANT FOR YOUR PRODUCTION SYSTEM! + + See http://wiki.apache.org/solr/SpellCheckComponent for details + on the request parameters. + --> + <requestHandler name="/spell" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <str name="df">text</str> + <!-- Solr will use suggestions from both the 'default' spellchecker + and from the 'wordbreak' spellchecker and combine them. + collations (re-written queries) can include a combination of + corrections from both spellcheckers --> + <str name="spellcheck.dictionary">default</str> + <str name="spellcheck.dictionary">wordbreak</str> + <str name="spellcheck">on</str> + <str name="spellcheck.extendedResults">true</str> + <str name="spellcheck.count">10</str> + <str name="spellcheck.alternativeTermCount">5</str> + <str name="spellcheck.maxResultsForSuggest">5</str> + <str name="spellcheck.collate">true</str> + <str name="spellcheck.collateExtendedResults">true</str> + <str name="spellcheck.maxCollationTries">10</str> + <str name="spellcheck.maxCollations">5</str> + </lst> + <arr name="last-components"> + <str>spellcheck</str> + </arr> + </requestHandler> + + <!-- Term Vector Component + + http://wiki.apache.org/solr/TermVectorComponent + --> + <searchComponent name="tvComponent" class="solr.TermVectorComponent"/> + + + <!-- Clustering Component + + http://wiki.apache.org/solr/ClusteringComponent + + You'll need to set the solr.clustering.enabled system property + when running solr to run with clustering enabled: + + java -Dsolr.clustering.enabled=true -jar start.jar + + --> + <searchComponent name="clustering" + enable="${solr.clustering.enabled:false}" + class="solr.clustering.ClusteringComponent" > + <!-- Declare an engine --> + <lst name="engine"> + <!-- The name, only one can be named "default" --> + <str name="name">default</str> + + <!-- Class name of Carrot2 clustering algorithm. + + Currently available algorithms are: + + * org.carrot2.clustering.lingo.LingoClusteringAlgorithm + * org.carrot2.clustering.stc.STCClusteringAlgorithm + * org.carrot2.clustering.kmeans.BisectingKMeansClusteringAlgorithm + + See http://project.carrot2.org/algorithms.html for the + algorithm's characteristics. + --> + <str name="carrot.algorithm">org.carrot2.clustering.lingo.LingoClusteringAlgorithm</str> + + <!-- Overriding values for Carrot2 default algorithm attributes. + + For a description of all available attributes, see: + http://download.carrot2.org/stable/manual/#chapter.components. + Use attribute key as name attribute of str elements + below. These can be further overridden for individual + requests by specifying attribute key as request parameter + name and attribute value as parameter value. + --> + <str name="LingoClusteringAlgorithm.desiredClusterCountBase">20</str> + + <!-- Location of Carrot2 lexical resources. + + A directory from which to load Carrot2-specific stop words + and stop labels. Absolute or relative to Solr config directory. + If a specific resource (e.g. stopwords.en) is present in the + specified dir, it will completely override the corresponding + default one that ships with Carrot2. + + For an overview of Carrot2 lexical resources, see: + http://download.carrot2.org/head/manual/#chapter.lexical-resources + --> + <str name="carrot.lexicalResourcesDir">clustering/carrot2</str> + + <!-- The language to assume for the documents. + + For a list of allowed values, see: + http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage + --> + <str name="MultilingualClustering.defaultLanguage">ENGLISH</str> + </lst> + <lst name="engine"> + <str name="name">stc</str> + <str name="carrot.algorithm">org.carrot2.clustering.stc.STCClusteringAlgorithm</str> + </lst> + </searchComponent> + + <!-- A request handler for demonstrating the clustering component + + This is purely as an example. + + In reality you will likely want to add the component to your + already specified request handlers. + --> + <requestHandler name="/clustering" + startup="lazy" + enable="${solr.clustering.enabled:false}" + class="solr.SearchHandler"> + <lst name="defaults"> + <bool name="clustering">true</bool> + <str name="clustering.engine">default</str> + <bool name="clustering.results">true</bool> + <!-- The title field --> + <str name="carrot.title">name</str> + <str name="carrot.url">id</str> + <!-- The field to cluster on --> + <str name="carrot.snippet">features</str> + <!-- produce summaries --> + <bool name="carrot.produceSummary">true</bool> + <!-- the maximum number of labels per cluster --> + <!--<int name="carrot.numDescriptions">5</int>--> + <!-- produce sub clusters --> + <bool name="carrot.outputSubClusters">false</bool> + + <str name="defType">edismax</str> + <str name="qf"> + text^0.5 features^1.0 name^1.2 sku^1.5 id^10.0 manu^1.1 cat^1.4 + </str> + <str name="q.alt">*:*</str> + <str name="rows">10</str> + <str name="fl">*,score</str> + </lst> + <arr name="last-components"> + <str>clustering</str> + </arr> + </requestHandler> + + <!-- Terms Component + + http://wiki.apache.org/solr/TermsComponent + + A component to return terms and document frequency of those + terms + --> + <searchComponent name="terms" class="solr.TermsComponent"/> + + <!-- A request handler for demonstrating the terms component --> + <requestHandler name="/terms" class="solr.SearchHandler" startup="lazy"> + <lst name="defaults"> + <bool name="terms">true</bool> + <bool name="distrib">false</bool> + </lst> + <arr name="components"> + <str>terms</str> + </arr> + </requestHandler> + + + <!-- Update Processors + + Chains of Update Processor Factories for dealing with Update + Requests can be declared, and then used by name in Update + Request Processors + + http://wiki.apache.org/solr/UpdateRequestProcessor + + --> + + <queryResponseWriter name="json" class="solr.JSONResponseWriter"> + <!-- For the purposes of the tutorial, JSON responses are written as + plain text so that they are easy to read in *any* browser. + If you expect a MIME type of "application/json" just remove this override. + --> + <str name="content-type">text/plain; charset=UTF-8</str> + </queryResponseWriter> + + <!-- + Custom response writers can be declared as needed... + --> + <queryResponseWriter name="velocity" class="solr.VelocityResponseWriter" startup="lazy"/> + + + <!-- XSLT response writer transforms the XML output by any xslt file found + in Solr's conf/xslt directory. Changes to xslt files are checked for + every xsltCacheLifetimeSeconds. + --> + <queryResponseWriter name="xslt" class="solr.XSLTResponseWriter"> + <int name="xsltCacheLifetimeSeconds">5</int> + </queryResponseWriter> + + <!-- Query Parsers + + http://wiki.apache.org/solr/SolrQuerySyntax + + Multiple QParserPlugins can be registered by name, and then + used in either the "defType" param for the QueryComponent (used + by SearchHandler) or in LocalParams + --> + + <!-- Legacy config for the admin interface --> + <admin> + <defaultQuery>*:*</defaultQuery> </admin> + </config>
