luetzkendorf 2004/11/08 01:45:48
Added: src/stores/org/apache/slide/index/lucene/expressions
ContainsExpression.java
src/stores/org/apache/slide/index/lucene
LuceneContentIndexer.java
Log:
content indexing added
Revision Changes Path
1.1
jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/ContainsExpression.java
Index: ContainsExpression.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/lucene/expressions/ContainsExpression.java,v
1.1 2004/11/08 09:45:47 luetzkendorf Exp $
* $Revision: 1.1 $
* $Date: 2004/11/08 09:45:47 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index.lucene.expressions;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.slide.index.lucene.Index;
import org.apache.slide.index.lucene.IndexConfiguration;
import org.apache.slide.search.BadQueryException;
import org.jdom.Element;
/**
* Implements the <code>contains</code> expression.
*/
public class ContainsExpression extends AbstractExpression
{
public ContainsExpression(Index index, Element element, boolean negated)
throws BadQueryException
{
super(index);
IndexConfiguration config = index.getConfiguration();
String literal = element.getText();
int starPos = literal.indexOf('*');
int qmPos = literal.indexOf('?');
if (starPos != -1 || qmPos != -1) {
if (starPos == literal.length()-1 && qmPos == -1) {
// some thing like "word*"
// TODO the .toLowerCase() should depend from the Analyzer
setQuery(new PrefixQuery(new Term(Index.CONTENT_FIELD_NAME,
literal.substring(0,
literal.length()-1).toLowerCase())));
} else {
// TODO dito
setQuery(new WildcardQuery(new Term(Index.CONTENT_FIELD_NAME,
literal.toLowerCase())));
}
} else {
termOrPhraseQuery(config, Index.CONTENT_FIELD_NAME, literal);
}
if (negated) {
setQuery(negateQuery(getQuery()));
}
}
private void termOrPhraseQuery(IndexConfiguration config, String field,
String text)
{
TokenStream ts = config.getAnalyzer().tokenStream(field,
new StringReader(text));
ArrayList tokens = new ArrayList(20);
try {
for (Token t = ts.next(); t != null; t = ts.next()) {
tokens.add(t.termText());
}
} catch (IOException e) {
// should not happen, because we are reading from StringReader
}
if (tokens.size() > 1) {
PhraseQuery phraseQuery = new PhraseQuery();
for(int i = 0, l = tokens.size(); i<l; i++) {
phraseQuery.add(new Term(field, (String)tokens.get(i)));
}
setQuery(phraseQuery);
} else if (tokens.size() == 1) {
setQuery(new TermQuery(new Term(field, (String)tokens.get(0))));
} else {
// TODO NOP query???
setQuery(new BooleanQuery());
}
}
}
1.1
jakarta-slide/src/stores/org/apache/slide/index/lucene/LuceneContentIndexer.java
Index: LuceneContentIndexer.java
===================================================================
/*
*
* ====================================================================
*
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index.lucene;
import java.io.ByteArrayInputStream;
import java.util.Hashtable;
import javax.transaction.xa.XAException;
import javax.transaction.xa.Xid;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.slide.common.NamespaceAccessToken;
import org.apache.slide.common.ServiceInitializationFailedException;
import org.apache.slide.common.ServiceParameterErrorException;
import org.apache.slide.common.ServiceParameterMissingException;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeRevisionContent;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.extractor.ExtractorManager;
import org.apache.slide.search.IndexException;
/**
* IndexStore implementation for indexing content based on Jakarta Lucene.
*/
public class LuceneContentIndexer extends AbstractLuceneIndexer
{
private static final String ANALYZER_PARAM = "analyzer";
private String analyserClassName;
public void initialize(NamespaceAccessToken token)
throws ServiceInitializationFailedException
{
super.initialize(token);
try {
indexConfiguration.initDefaultConfiguration();
indexConfiguration.setContentAnalyzer(
createAnalyzer(this.analyserClassName));
this.index = new Index(indexConfiguration, getLogger(),
"content " + this.scope);
}
catch (IndexException e) {
throw new ServiceInitializationFailedException(this, e);
}
}
public void setParameters(Hashtable parameters)
throws ServiceParameterErrorException,
ServiceParameterMissingException
{
super.setParameters(parameters);
analyserClassName = (String)parameters.get(ANALYZER_PARAM);
}
/**
* This implementation just calls the super implementation and catches
* all exceptions to ensure that content indexing never makes a commit
failing.
*/
public void commit(Xid xid, boolean onePhase) throws XAException
{
try {
super.commit(xid, onePhase);
} catch (XAException e) {
error("Error while committing to content index ({0})", e);
}
}
/*
* @see
org.apache.slide.search.Indexer#createIndex(org.apache.slide.common.Uri,
org.apache.slide.content.NodeRevisionDescriptor,
org.apache.slide.content.NodeRevisionContent)
*/
public void createIndex(Uri uri, NodeRevisionDescriptor
revisionDescriptor,
NodeRevisionContent revisionContent) throws IndexException
{
if (isIncluded(uri.toString())) {
if (ExtractorManager.getInstance().hasContentExtractor(
uri.getNamespace().getName(), uri.toString(),
revisionDescriptor))
{
TransactionalIndexResource indexResource = getCurrentTxn();
indexResource.addIndexJob(uri, revisionDescriptor,
new
ByteArrayInputStream(revisionContent.getContentBytes()));
}
}
}
/*
* @see
org.apache.slide.search.Indexer#updateIndex(org.apache.slide.common.Uri,
org.apache.slide.content.NodeRevisionDescriptor,
org.apache.slide.content.NodeRevisionContent)
*/
public void updateIndex(Uri uri, NodeRevisionDescriptor
revisionDescriptor,
NodeRevisionContent revisionContent) throws IndexException
{
if (isIncluded(uri.toString())) {
if (ExtractorManager.getInstance().hasContentExtractor(
uri.getNamespace().getName(), uri.toString(),
revisionDescriptor))
{
TransactionalIndexResource indexResource = getCurrentTxn();
indexResource.addUpdateJob(uri, revisionDescriptor,
new
ByteArrayInputStream(revisionContent.getContentBytes()));
}
}
}
/*
* @see
org.apache.slide.search.Indexer#dropIndex(org.apache.slide.common.Uri,
org.apache.slide.content.NodeRevisionNumber)
*/
public void dropIndex(Uri uri, NodeRevisionNumber number)
throws IndexException
{
if (isIncluded(uri.toString())) {
if (ExtractorManager.getInstance().hasContentExtractor(
uri.getNamespace().getName(), uri.toString(), null))
{
TransactionalIndexResource indexResource = getCurrentTxn();
indexResource.addRemoveJob(uri, number);
}
}
}
protected Analyzer createAnalyzer(String clsName)
throws ServiceInitializationFailedException
{
Analyzer analyzer;
if (clsName == null || clsName.length() == 0) {
analyzer = new SimpleAnalyzer();
} else {
try {
Class analyzerClazz = Class.forName(clsName);
analyzer = (Analyzer)analyzerClazz.newInstance();
} catch (ClassNotFoundException e) {
error("Error while instantiating analyzer {1} {2}",
clsName, e.getMessage());
throw new ServiceInitializationFailedException(this, e);
} catch (InstantiationException e) {
error("Error while instantiating analyzer {1} {2}",
clsName, e.getMessage());
throw new ServiceInitializationFailedException(this, e);
} catch (IllegalAccessException e) {
error("Error while instantiating analyzer {1} {2}",
clsName, e.getMessage());
throw new ServiceInitializationFailedException(this, e);
}
}
info("using analyzer: {0}", analyzer.getClass().getName());
return analyzer;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]