luetzkendorf 2004/11/08 01:46:13
Modified: src/stores/org/apache/slide/index/lucene Index.java
IndexConfiguration.java
Added: src/stores/org/apache/slide/index/lucene defaultConfig.xml
Removed: src/stores/org/apache/slide/index/lucene properties.xml
Log:
configuration improved
Revision Changes Path
1.6 +146 -45
jakarta-slide/src/stores/org/apache/slide/index/lucene/Index.java
Index: Index.java
===================================================================
RCS file:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/Index.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- Index.java 1 Nov 2004 17:47:21 -0000 1.5
+++ Index.java 8 Nov 2004 09:46:13 -0000 1.6
@@ -24,12 +24,15 @@
import java.io.File;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.LinkedList;
+import java.util.List;
import java.util.Locale;
import java.util.Set;
import java.util.StringTokenizer;
@@ -48,6 +51,9 @@
import org.apache.slide.content.NodeProperty;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
+import org.apache.slide.extractor.ContentExtractor;
+import org.apache.slide.extractor.ExtractorException;
+import org.apache.slide.extractor.ExtractorManager;
import org.apache.slide.search.IndexException;
import org.apache.slide.util.logger.Logger;
@@ -62,6 +68,7 @@
public static final String DEPTH_FIELD_NAME = "SLIDE_DEPTH";
public static final String VERSION_FIELD_NAME = "SLIDE_VERSION";
public static final String IS_DEFINED_FIELD_NAME = "SLIDE_ISDEFINED";
+ public static final String CONTENT_FIELD_NAME = "SLIDE_CONTENT";
protected static final SimpleDateFormat DATE_INDEX_FORMAT =
new SimpleDateFormat("yyyy-MM-dd HH:mm", Locale.UK);
@@ -79,7 +86,7 @@
protected IndexConfiguration configuration;
protected JobRunner indexThread;
-
+ protected String indexName;
protected Logger logger;
protected LinkedList txnQueue = new LinkedList();
@@ -91,10 +98,12 @@
private int jobCounter = 0;
- public Index(IndexConfiguration configuration, Logger logger) throws
IndexException
+ public Index(IndexConfiguration configuration, Logger logger, String
name)
+ throws IndexException
{
this.logger = logger;
this.configuration = configuration;
+ this.indexName = name;
File file = new File(this.configuration.getIndexPath());
if (!file.exists() && !file.mkdirs()) {
@@ -119,11 +128,7 @@
this.configuration.getIndexPath(), e);
}
- if (configuration.indexAsynchron) {
- this.indexThread = new JobRunner();
- this.indexThread.setName("Indexing Thread");
- this.indexThread.start();
- }
+
// TODO make configurable
BooleanQuery.setMaxClauseCount(10000);
@@ -137,25 +142,33 @@
return this.logger;
}
- public void close() {
+ public void start() {
+ if (configuration.indexAsynchron) {
+ this.indexThread = new JobRunner();
+ this.indexThread.setName("Indexing Thread (" + this.indexName +
")");
+ this.indexThread.setPriority(configuration.getPriority());
+ this.indexThread.start();
+ }
+ }
+ public void stop() {
if (this.indexThread != null) {
- if (this.txnQueue.size() > 0) {
- try {
- this.indexThread.interrupt();
- this.txnQueue.notify();
- this.indexThread.join();
- } catch (InterruptedException e) {
- //
- }
- try {
- for(Iterator i = this.txnQueue.iterator(); i.hasNext();)
{
- IndexTransaction txn = (IndexTransaction)i.next();
- executeIndexTransaction(txn);
- }
- } catch (IndexException e1) {
- // TODO Auto-generated catch block
- e1.printStackTrace();
+ // stop the indexing thread
+ try {
+ this.indexThread.interrupt();
+ this.txnQueue.notify();
+ this.indexThread.join();
+ } catch (InterruptedException e) {
+ //
+ }
+ // execute remaining indexing jobs
+ try {
+ for(Iterator i = this.txnQueue.iterator(); i.hasNext();) {
+ IndexTransaction txn = (IndexTransaction)i.next();
+ executeIndexTransaction(txn);
}
+ } catch (IndexException e) {
+ logger.log("Error while executing job", e, LOG_CHANNEL,
+ Logger.ERROR);
}
}
}
@@ -181,6 +194,16 @@
private Field textField(String fieldName, String value) {
return new Field(fieldName, value, false, true, true);
}
+ private Field textField(String fieldName, Reader value) {
+ return Field.Text(fieldName, value);
+ }
+
+
+ /**
+ * Creates a lucene index document for a properties indexer.
+ * @param uri resource
+ * @param descriptor properties to be indexed
+ */
private Document createLuceneDocument(String uri, NodeRevisionDescriptor
descriptor) {
Document doc = new Document();
@@ -202,9 +225,6 @@
doc.add(unstoredString(Index.DEPTH_FIELD_NAME,
configuration.intToIndexString(depth)));
- //doc.add(unstoredString(Index.VERSION_FIELD_NAME,
- // descriptor.getRevisionNumber().toString()));
-
// resource type
String rtype = descriptor.getResourceType();
if (rtype.indexOf("collection") != -1) {
@@ -218,6 +238,7 @@
}
}
+ // all other properties
for(Enumeration e = descriptor.enumerateProperties();
e.hasMoreElements();) {
NodeProperty property = (NodeProperty)e.nextElement();
String name = property.getName();
@@ -264,8 +285,59 @@
return doc;
}
+
+ /**
+ * Creates a lucene document for content indexing.
+ */
+ private Document createLuceneDocument(String uri,
+ NodeRevisionDescriptor descriptor, InputStream content)
+ throws IndexException
+ {
+ Document doc = new Document();
+
+ doc.add(unstoredString(Index.KEY_FIELD_NAME,
+ configuration.generateKey(uri,
descriptor.getRevisionNumber())));
+ doc.add(storedString(Index.URI_FIELD_NAME, uri.toString()));
+
+ // scopes
+ StringTokenizer tokenizer = new StringTokenizer(uri, "/");
+ StringBuffer buffer = new StringBuffer(uri.length());
+ doc.add(unstoredString(Index.SCOPE_FIELD_NAME, "/"));
+ int depth = 0;
+ for(; tokenizer.hasMoreTokens();) {
+ buffer.append("/").append(tokenizer.nextToken());
+ doc.add(unstoredString(Index.SCOPE_FIELD_NAME,
buffer.toString()));
+ depth++;
+ }
+ doc.add(unstoredString(Index.DEPTH_FIELD_NAME,
+ configuration.intToIndexString(depth)));
+
+
+ List extractors =
ExtractorManager.getInstance().getContentExtractors(
+ configuration.getNamespaceName(), uri, descriptor);
+
+ try {
+ for(Iterator i = extractors.iterator(); i.hasNext();) {
+ ContentExtractor extractor = (ContentExtractor)i.next();
+ doc.add(textField(Index.CONTENT_FIELD_NAME,
+ extractor.extract(content)));
+ }
+ } catch (ExtractorException e) {
+ throw new IndexException(e);
+ }
+
+ return doc;
+ }
- public void scheduleIndexTransaction (Set removeJobs, Set addJobs)
+ /**
+ * Schedules an index transaction. If asynchron indexing is enabled, this
+ * adds the jobs to the indexing queue, otherwise the indexing is
executed
+ * imediately.
+ * @param removeJobs Set of jobs for deleting objects from the index.
+ * @param addJobs Set of Jobs for adding new objects to the index.
+ * @throws IndexException
+ */
+ public void scheduleIndexTransaction(Set removeJobs, Set addJobs)
throws IndexException
{
if (configuration.isIndexAsynchron()) {
@@ -287,8 +359,8 @@
synchronized void executeIndexTransaction(Set removeJobs, Set addJobs)
throws IndexException {
- // TODO make async by option
try {
+ // execute delete jobs
if (removeJobs.size() > 0) {
IndexReader reader = IndexReader.open(getDirectory());
for(Iterator i = removeJobs.iterator(); i.hasNext();) {
@@ -300,14 +372,21 @@
reader.close();
}
- if (addJobs.size() > 0) {
+ // execute index jobs
+ if (addJobs.size() > 0 || this.jobCounter >=
configuration.getOptimizeThreshold()) {
IndexWriter writer = new IndexWriter(getDirectory(),
configuration.getAnalyzer(), false);
for(Iterator i = addJobs.iterator(); i.hasNext(); ) {
IndexJob job = (IndexJob)i.next();
- logger.log("index: " + job.key, LOG_CHANNEL,
Logger.DEBUG);
- Document doc = createLuceneDocument(job.uri,
job.descriptor);
+ Document doc;
+ if (job.content != null) {
+ logger.log("index content: " + job.key, LOG_CHANNEL,
Logger.DEBUG);
+ doc = createLuceneDocument(job.uri, job.descriptor,
job.content);
+ } else {
+ logger.log("index properties: " + job.key,
LOG_CHANNEL, Logger.DEBUG);
+ doc = createLuceneDocument(job.uri, job.descriptor);
+ }
writer.addDocument(doc);
this.jobCounter++;
}
@@ -323,11 +402,23 @@
}
}
-
+ /**
+ * Creates an IndexJob for <em>property indexing</em>.
+ */
+ public IndexJob createIndexJob(Uri uri, NodeRevisionDescriptor
descriptor) {
+ return new IndexJob(uri, descriptor);
+ }
+
+ /**
+ * Creates an IndexJob for <em>content indexing</em>.
+ */
public IndexJob createIndexJob(Uri uri,
- NodeRevisionNumber number, NodeRevisionDescriptor descriptor) {
- return new IndexJob(uri, number, descriptor);
+ NodeRevisionDescriptor descriptor, InputStream content) {
+ return new IndexJob(uri, descriptor, content);
}
+ /**
+ * Creates an IndexJob for <em>removing</em> of a resource.
+ */
public IndexJob createDeleteJob(Uri uri, NodeRevisionNumber number) {
return new IndexJob(uri, number);
}
@@ -335,23 +426,29 @@
public class IndexJob {
protected String key;
protected String uri;
- protected String number;
- protected NodeRevisionDescriptor descriptor;
+ protected NodeRevisionDescriptor descriptor;
+ protected InputStream content;
protected String getKey() {
return key;
}
protected IndexJob(Uri uri, NodeRevisionNumber number) {
this.uri = uri.toString();
- this.number = number.toString();
this.descriptor = null;
+ this.content = null;
this.key = configuration.generateKey(this.uri, number);
}
- protected IndexJob(Uri uri, NodeRevisionNumber number,
NodeRevisionDescriptor descriptor) {
+ protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor) {
this.uri = uri.toString();
- this.number = number.toString();
this.descriptor = descriptor;
- this.key = configuration.generateKey(this.uri, number);
+ this.content = null;
+ this.key = configuration.generateKey(this.uri,
descriptor.getRevisionNumber());
+ }
+ protected IndexJob(Uri uri, NodeRevisionDescriptor descriptor,
InputStream content) {
+ this.uri = uri.toString();
+ this.descriptor = descriptor;
+ this.content = content;
+ this.key = configuration.generateKey(this.uri,
descriptor.getRevisionNumber());
}
public boolean equals(Object obj)
{
@@ -380,6 +477,9 @@
}
}
+ /**
+ * Thread for executing index jobs scheduled in the queue.
+ */
private class JobRunner extends Thread {
public void run()
{
@@ -409,7 +509,8 @@
executeIndexTransaction(txn);
}
catch(Exception e) {
- logger.log("Error while executing job", e, LOG_CHANNEL,
Logger.ERROR);
+ logger.log("Error while executing job", e, LOG_CHANNEL,
+ Logger.ERROR);
}
}
}
1.5 +122 -67
jakarta-slide/src/stores/org/apache/slide/index/lucene/IndexConfiguration.java
Index: IndexConfiguration.java
===================================================================
RCS file:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/IndexConfiguration.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- IndexConfiguration.java 28 Oct 2004 16:00:14 -0000 1.4
+++ IndexConfiguration.java 8 Nov 2004 09:46:13 -0000 1.5
@@ -22,30 +22,32 @@
*/
package org.apache.slide.index.lucene;
-import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
+import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
+import javax.xml.parsers.SAXParser;
+import javax.xml.parsers.SAXParserFactory;
+
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.search.IndexException;
-import org.jdom.Document;
-import org.jdom.Element;
-import org.jdom.JDOMException;
-import org.jdom.input.SAXBuilder;
+import org.apache.slide.util.conf.Configuration;
+import org.apache.slide.util.conf.ConfigurationElement;
+import org.apache.slide.util.conf.ConfigurationException;
+import org.apache.slide.util.conf.Populate;
+import org.xml.sax.InputSource;
/**
* Holds all configuration infos about indexing.
@@ -54,15 +56,18 @@
{
protected Set stringProperties = new HashSet();
+ /** maps field names (properies) to analyzers. */
protected Map textProperties = new HashMap();
protected Set dateProperties = new HashSet();
protected Set intProperties = new HashSet();
protected Set supportsIsdefinedProperties = new HashSet();
protected Set indexedProperties = new HashSet();
protected int optimizeThreshold = 100;
- protected Analyzer analyzer = new IndexAnalyzer();
+ protected AnalyzerImpl analyzer = new AnalyzerImpl();
protected String indexPath = null;
protected boolean indexAsynchron = false;
+ protected String namespaceName = null;
+ protected int priority = Thread.NORM_PRIORITY;
public void addStringProperty(String namespace, String name) {
String key = namespace + name;
@@ -153,83 +158,126 @@
{
this.indexAsynchron = indexAsynchron;
}
+ public int getPriority() {
+ return this.priority;
+ }
+ public void setPriority(int priority) {
+ this.priority = priority;
+ }
+ public String getNamespaceName() {
+ return this.namespaceName;
+ }
+ public void setNamespaceName(String name) {
+ this.namespaceName = name;
+ }
public Analyzer getAnalyzer() {
return this.analyzer;
}
+
+ public void setContentAnalyzer(Analyzer analyzer) {
+ if (analyzer == null) throw new NullPointerException();
+ this.analyzer.contentAnalyzer = analyzer;
+ }
void initDefaultConfiguration() throws IndexException {
loadConfigurationFromResource(
- "org/apache/slide/index/lucene/properties.xml");
+ "org/apache/slide/index/lucene/defaultConfig.xml");
}
- void loadConfigurationFromResource(String resourceName) throws
IndexException {
+ private void loadConfigurationFromResource(String resourceName) throws
IndexException {
InputStream is =
this.getClass().getClassLoader().getResourceAsStream(
resourceName);
+
if (is != null) {
- importConfiguration(is);
+ try {
+ SAXParserFactory factory = SAXParserFactory.newInstance();
+ factory.setNamespaceAware(false);
+ factory.setValidating(false);
+ SAXParser parser = factory.newSAXParser();
+
+ Populate pop = new Populate();
+ Configuration conf = new ConfigurationElement(
+ pop.load(new InputSource(is),
parser.getXMLReader()));
+
+ readPropertyConfiguration(conf.getConfiguration(
+ "indexed-properties"));
+ } catch (Exception e) {
+ throw new IndexException
+ ("error while loading configuration from " +
resourceName, e);
+ }
} else {
throw new IndexException("Can't find index configuration at: " +
resourceName);
}
}
- private void importConfiguration(InputStream is) throws IndexException {
- try {
- Document doc = new SAXBuilder().build(is);
- List properties = doc.getRootElement().getChildren();
+ void readPropertyConfiguration(Configuration conf)
+ throws IndexException
+ {
+ for(Enumeration e =
conf.getConfigurations("property");e.hasMoreElements();) {
+ Configuration property = (Configuration)e.nextElement();
+
+ String n, ns;
+ try {
+ n = property.getAttribute("name");
+ ns = property.getAttribute("namespace");
+ } catch (ConfigurationException ex) {
+ continue;
+ }
- for(Iterator i = properties.iterator(); i.hasNext();) {
- Element e = (Element)i.next();
- String n = e.getName(); String ns = e.getNamespaceURI();
-
- Element child;
-
- child = e.getChild("string");
- if (child != null) {
- addStringProperty(ns, n);
- addSupportsIsdefinedProperty(ns, n);
- }
- child = e.getChild("integer");
- if (child != null) {
- addIntProperty(ns, n);
- addSupportsIsdefinedProperty(ns, n);
- }
- child = e.getChild("date");
- if (child != null) {
- addDateProperty(ns, n);
- addSupportsIsdefinedProperty(ns, n);
- }
- child = e.getChild("text");
- if (child != null) {
- String clsName = child.getAttributeValue("analyzer");
- if (clsName == null) {
- clsName =
"org.apache.lucene.analysis.SimpleAnalyzer";
- }
- Analyzer analyzer;
- try {
- Class cls = Class.forName(clsName);
- analyzer = (Analyzer)cls.newInstance();
- } catch (ClassNotFoundException ex) {
- throw new IndexException("Analyzer class not found
(" + ns + ", " + n + ")", ex);
- } catch (InstantiationException ex) {
- throw new IndexException("Can't instanciate analyzer
(" + ns + ", " + n + ")", ex);
- } catch (IllegalAccessException ex) {
- throw new IndexException("Can't instanciate analyzer
(" + ns + ", " + n + ")", ex);
- } catch (ClassCastException ex) {
- throw new IndexException("Analyzer does not extend
Analyzer (" + ns + ", " + n + ")", ex);
- }
- addTextProperty(ns, n, analyzer);
+ Configuration child;
+ try {
+ child = property.getConfiguration("string");
+ addStringProperty(ns, n);
+ addSupportsIsdefinedProperty(ns, n);
+ }
+ catch (ConfigurationException ex) {}
+
+ try {
+ child = property.getConfiguration("integer");
+ addIntProperty(ns, n);
+ addSupportsIsdefinedProperty(ns, n);
+ }
+ catch (ConfigurationException ex) {}
+
+ try {
+ child = property.getConfiguration("date");
+ addDateProperty(ns, n);
+ addSupportsIsdefinedProperty(ns, n);
+ }
+ catch (ConfigurationException ex) {}
+
+ try {
+ child = property.getConfiguration("text");
+ String clsName;
+ try {
+ clsName = child.getAttribute("analyzer");
+ } catch (ConfigurationException ex) {
+ clsName = "org.apache.lucene.analysis.SimpleAnalyzer";
}
- child = e.getChild("is-defined");
- if (child != null) {
- addSupportsIsdefinedProperty(ns, n);
+
+ Analyzer analyzer;
+ try {
+ Class cls = Class.forName(clsName);
+ analyzer = (Analyzer)cls.newInstance();
+ } catch (ClassNotFoundException ex) {
+ throw new IndexException("Analyzer class not found (" +
ns + ", " + n + ")", ex);
+ } catch (InstantiationException ex) {
+ throw new IndexException("Can't instanciate analyzer ("
+ ns + ", " + n + ")", ex);
+ } catch (IllegalAccessException ex) {
+ throw new IndexException("Can't instanciate analyzer ("
+ ns + ", " + n + ")", ex);
+ } catch (ClassCastException ex) {
+ throw new IndexException("Analyzer does not extend
Analyzer (" + ns + ", " + n + ")", ex);
}
+ addTextProperty(ns, n, analyzer);
}
+ catch (ConfigurationException ex) {}
- } catch (JDOMException e) {
- throw new IndexException(e);
- } catch (IOException e) {
- throw new IndexException(e);
+ try {
+ child = property.getConfiguration("is-defined");
+ addSupportsIsdefinedProperty(ns, n);
+ }
+ catch (ConfigurationException ex) {}
}
}
@@ -294,11 +342,16 @@
}
- class IndexAnalyzer extends Analyzer {
+ class AnalyzerImpl extends Analyzer {
Analyzer defaultAnalyzer = new SimpleAnalyzer();
+ Analyzer contentAnalyzer = null;
public TokenStream tokenStream(String fieldName, Reader reader)
{
+ if (fieldName.equals(Index.CONTENT_FIELD_NAME)) {
+ return contentAnalyzer.tokenStream(fieldName, reader);
+ }
+
Analyzer analyzer = (Analyzer)textProperties.get(fieldName);
if (analyzer != null) {
return analyzer.tokenStream(fieldName, reader);
@@ -308,4 +361,6 @@
}
}
}
+
+
}
1.1
jakarta-slide/src/stores/org/apache/slide/index/lucene/defaultConfig.xml
Index: defaultConfig.xml
===================================================================
<index-configuration>
<indexed-properties>
<property name="displayname" namespace="DAV:">
<string/><is-defined/>
</property>
<property name="getcontenttype" namespace="DAV:">
<string/><is-defined/>
</property>
<property name="getcontentlanguage" namespace="DAV:">
<string/><is-defined/>
</property>
<property name="getcontentlength" namespace="DAV:">
<integer/>
</property>
<property name="getlastmodified" namespace="DAV:">
<date/>
</property>
<property name="creationdate" namespace="DAV:">
<date/>
</property>
<property name="owner" namespace="DAV:">
<string/><is-defined/>
</property>
<!-- they are updated quite often, e.g. for each collection where a member
is added ore removed
<d:modificationdate>
<date/>
</d:modificationdate>
<d:modificationuser>
<string/><is-defined/>
</d:modificationuser>
-->
<!-- TODO what about href properties -->
<property name="checked-in" namespace="DAV:">
<string/><is-defined/>
</property>
<property name="checked-out" namespace="DAV:">
<string/><is-defined/>
</property>
</indexed-properties>
</index-configuration>
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]