wam 2004/02/25 02:23:52
Added: src/stores/org/apache/slide/index
BasicExpressionFactoryTxtContainsSample.java
BasicExpressionTxtContainsSample.java
LuceneIndexer.java SampleTxtContainsIndexer.java
Log:
an example for indexing with Lucene
Revision Changes Path
1.1
jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionFactoryTxtContainsSample.java
Index: BasicExpressionFactoryTxtContainsSample.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionFactoryTxtContainsSample.java,v
1.1 2004/02/25 10:23:52 wam Exp $
* $Revision: 1.1 $
* $Date: 2004/02/25 10:23:52 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;
import java.util.Collection;
import org.apache.slide.content.NodeProperty.NamespaceCache;
import org.apache.slide.search.BadQueryException;
import org.apache.slide.search.PropertyProvider;
import org.apache.slide.search.basic.IBasicExpression;
import org.apache.slide.search.basic.IBasicExpressionFactory;
import org.apache.slide.search.basic.IBasicQuery;
import org.jdom.Element;
/**
* This factory creates executable BasicExpressions. An instance is created for
* each SEARCH request.
* */
public class BasicExpressionFactoryTxtContainsSample implements
IBasicExpressionFactory
{
private IBasicQuery query;
protected PropertyProvider propertyProvider;
private String rootPath;
/**
* Constructor
*
* @param rootPath path to the content files
*
*/
public BasicExpressionFactoryTxtContainsSample (String rootPath)
{
this.rootPath = rootPath;
}
/**
* called for merge expressions (or, and). Not defined here
*
* @param mergeOperator and, or
* @param namespace the namespace of this expression
* @param expressionsToMerge all expressions, that shall be merged
*
* @return an IBasicExpression
*
* @throws BadQueryException
*
*/
public IBasicExpression createMergeExpression (String mergeOperator,
String namespace,
Collection expressionsToMerge)
throws BadQueryException
{
return null;
}
/**
* Called by the expression compiler for each leave expression.
*
* @param element an Element discribing the expression
*
* @return an IBasicExpression
*
* @throws BadQueryException
*
*/
public IBasicExpression createExpression (Element element)
throws BadQueryException
{
BasicExpressionTxtContainsSample result = null;
if (element == null)
{
throw new BadQueryException ("expected a where criteria");
}
else
{
String namespace = element.getNamespace().getURI();
if (namespace.equals (NamespaceCache.DEFAULT_URI))
result = createDAVExpression (element);
// allow store specific extensions
// else if (namespace.equals (MyNamespace))
// result = createMyExpression (element);
}
result.setFactory(this);
return result;
}
/**
* Called, when the expression is in the default (DAV:) namespace.
*
*
* @param e an Element
*
* @return a BasicExpressionTemplate
*
*/
private BasicExpressionTxtContainsSample createDAVExpression (Element e)
{
String name = e.getName();
BasicExpressionTxtContainsSample result = null;
if (name.equals ("contains"))
{
String searchedText = e.getTextTrim();
result = new BasicExpressionTxtContainsSample (searchedText, rootPath);
}
return result;
}
/**
* called by BasicExpressionCompiler after construction.
*
* @param query the associated BasicQuery
* @param propertyProvider the PropertyProvider for this expression.
*
* @throws BadQueryException
*
*/
public void init(IBasicQuery query, PropertyProvider propertyProvider)
throws BadQueryException
{
this.query = (IBasicQuery) query;
this.propertyProvider = propertyProvider;
}
/**
* Method getPropertyProvider
*
* @return the PropertyProvider
*
*/
public PropertyProvider getPropertyProvider()
{
return propertyProvider;
}
/**
* Method getQuery
*
* @return the IBasicQuery
*
*/
public IBasicQuery getQuery()
{
return query;
}
private String propName (Element e)
{
Element propElem = e.getChild ("prop", e.getNamespace());
Element el = (Element) propElem.getChildren().get(0);
return el.getName();
}
}
1.1
jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionTxtContainsSample.java
Index: BasicExpressionTxtContainsSample.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/BasicExpressionTxtContainsSample.java,v
1.1 2004/02/25 10:23:52 wam Exp $
* $Revision: 1.1 $
* $Date: 2004/02/25 10:23:52 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;
import org.apache.slide.search.basic.*;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.slide.common.SlideException;
import org.apache.slide.search.BadQueryException;
import org.apache.slide.search.RequestedResource;
import org.apache.slide.search.SearchException;
import org.apache.slide.structure.ObjectNode;
import org.apache.slide.structure.SubjectNode;
/**
* A very basic sample for a store specific Expression. Depending on the
* complexity of the concrete store specific implementation, iut might be
* a good idea to have an Expression class for each DAV: expression
* (SQLEqExpression, SQLOrExpression, ...)
*
* @author <a href="mailto:[EMAIL PROTECTED]">Martin Wallmer</a>
* @version $Revision: 1.1 $
*/
public class BasicExpressionTxtContainsSample implements IBasicExpression
{
/** an example for an executable command */
String searchedText;
String indexPath;
/** backptr to the factory */
IBasicExpressionFactory factory;
/**
* constructor for a compare expression like gt, eq, ...
* For your concrete implementation you are free, which parameters have to
* be passed, let the factory give you everything you need.
*/
BasicExpressionTxtContainsSample (String searchedText, String rootPath)
{
this.searchedText = searchedText;
this.indexPath = rootPath;
}
/**
* constructor for a merge expression
*/
BasicExpressionTxtContainsSample (String mergeOperator,
Collection children,
IBasicExpressionFactory factory)
throws BadQueryException
{
// this.factory = factory;
// Iterator it = children.iterator();
// BasicExpressionTxtContainsSample firstChild =
(BasicExpressionTxtContainsSample)it.next();
//
// if (firstChild == null)
// throw new BadQueryException (mergeOperator + " needs at least
one nested element");
//
// theExecutableCommand = firstChild.theExecutableCommand;
//
// // create the executable command
// while (it.hasNext()) {
// BasicExpressionTxtContainsSample exp =
(BasicExpressionTxtContainsSample)it.next();
// theExecutableCommand += " " + mergeOperator + " " +
exp.theExecutableCommand;
// }
}
/**
* fake executer. The executable command is printed and a fake result is created.
*
* @return an IBasicResultSet
*
* @throws SearchException
*
*/
public IBasicResultSet execute() throws SearchException
{
IBasicResultSet result = new BasicResultSetImpl (false);
try
{
Searcher searcher = new IndexSearcher(indexPath);
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse(searchedText, "contents", analyzer);
Hits hits = searcher.search (query);
int noOfHits = hits.length();
for (int i = 0; i < noOfHits; i++)
{
Document doc = hits.doc(i);
String uri = doc.get("documentId");
System.out.println(uri);
RequestedResource resource = createResource(uri);
result.add (resource);
}
}
catch (Exception e)
{
throw new SearchException (e);
}
return result;
}
private RequestedResource createResource(String uri) throws SearchException
{
ObjectNode node = new SubjectNode(uri); // this will return the root folder
RequestedResource resource = null;
IBasicQuery query = factory.getQuery();
try
{
resource = new ComparableResourceImpl
(node, query.getSearchToken(), query.getScope(),
factory.getPropertyProvider());
}
catch (SlideException e)
{
throw new SearchException (e);
}
return resource;
}
public void setFactory (IBasicExpressionFactory factory)
{
this.factory = factory;
}
public IBasicExpressionFactory getFactory()
{
return this.factory;
}
}
1.1
jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java
Index: LuceneIndexer.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/LuceneIndexer.java,v 1.1
2004/02/25 10:23:52 wam Exp $
* $Revision: 1.1 $
* $Date: 2004/02/25 10:23:52 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* Not intended for production.
* Bug: running in test mode everything works fine,
* in slide context no index is deleted when deleting resources.
*
*/
public class LuceneIndexer
{
private static final String DOC_ID = "documentId";
private String indexDb;
public LuceneIndexer (String indexDb)
{
this.indexDb = indexDb;
}
/**
* removes an index for a docId
* TODO: works in testmode (running Main), deletes nothing in slide context
*
* @param docId a String
*
* @throws IOException
*
*/
public void removeIndex (String docId) throws IOException
{
Directory directory = FSDirectory.getDirectory (indexDb, false);
IndexReader reader = IndexReader.open(directory);
Term deleteTerm = new Term(DOC_ID, docId);
int deleted = reader.delete(deleteTerm);
reader.close();
directory.close();
}
/**
* Method createIndex
*
* @param docId a String
*
* @throws IOException
* @throws Exception
*
*/
public void index (String docId, Reader reader)
throws Exception
{
IndexWriter writer =
new IndexWriter(indexDb, new StandardAnalyzer(), false);
// reader = new FileReader (docId);
Document doc = new Document();
doc.add (Field.Text ("contents", reader));
Field field = new Field(DOC_ID, docId, true, true, true);
doc.add(field);
writer.addDocument(doc);
writer.optimize();
writer.close();
}
private static Reader getReader (String file) throws IOException
{
FileReader reader = new FileReader (file);
return reader;
}
private Set contains(String stringToFind) throws Exception
{
Set resultSet = new HashSet();
Searcher searcher = new IndexSearcher (indexDb);
Analyzer analyzer = new StandardAnalyzer();
Query query = QueryParser.parse(stringToFind, "contents", analyzer);
Hits hits = searcher.search(query);
int noOfHits = hits.length();
for (int i = 0; i < noOfHits; i++)
{
Document doc = hits.doc(i);
String docId = doc.get(DOC_ID);
resultSet.add(docId);
}
searcher.close();
return resultSet;
}
/**
* Test. To run, adopt INDEX_DB and put two text files in current directory,
* otto.txt and fritz.txt containing the strings
* "hallo otto" and "hallo fritz"
*
* @param args a String[]
*
* @throws Exception
*
*/
public static void main(String[] args) throws Exception
{
String INDEX_DB ="D:\\projects\\tmp\\index";
LuceneIndexer indexer = new LuceneIndexer (INDEX_DB);
IndexWriter writer =
new IndexWriter(INDEX_DB, new StandardAnalyzer(), true);
writer.close();
String ottoFile = "otto.txt";
new LuceneIndexer (INDEX_DB).index (ottoFile, getReader(ottoFile));
System.out.println("expect one element otto.txt");
displayResult(indexer.contains("otto"));
new LuceneIndexer (INDEX_DB).index("fritz.txt", getReader("fritz.txt"));
System.out.println("expect fritz.txt");
displayResult(indexer.contains("fritz"));
System.out.println("expect fritz.txt and otto.txt");
displayResult(indexer.contains("Hallo"));
new LuceneIndexer (INDEX_DB).removeIndex ("otto.txt");
System.out.println("expect fritz.txt");
displayResult (indexer.contains("Hallo"));
System.out.println("expect null");
displayResult (indexer.contains("otto"));
new LuceneIndexer (INDEX_DB).removeIndex ("fritz.txt");
System.out.println("expect null");
displayResult(indexer.contains("fritz"));
}
private static void displayResult(Set result)
{
for (Iterator iter = result.iterator(); iter.hasNext();)
{
String element = (String) iter.next();
System.out.println(element);
}
}
}
1.1
jakarta-slide/src/stores/org/apache/slide/index/SampleTxtContainsIndexer.java
Index: SampleTxtContainsIndexer.java
===================================================================
/*
* $Header:
/home/cvs/jakarta-slide/src/stores/org/apache/slide/index/SampleTxtContainsIndexer.java,v
1.1 2004/02/25 10:23:52 wam Exp $
* $Revision: 1.1 $
* $Date: 2004/02/25 10:23:52 $
*
* ====================================================================
*
* Copyright 1999-2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package org.apache.slide.index;
import java.io.CharArrayReader;
import java.io.IOException;
import java.util.Hashtable;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.slide.common.AbstractService;
import org.apache.slide.common.NamespaceAccessToken;
import org.apache.slide.common.ServiceAccessException;
import org.apache.slide.common.ServiceConnectionFailedException;
import org.apache.slide.common.ServiceDisconnectionFailedException;
import org.apache.slide.common.ServiceInitializationFailedException;
import org.apache.slide.common.ServiceParameterErrorException;
import org.apache.slide.common.ServiceParameterMissingException;
import org.apache.slide.common.ServiceResetFailedException;
import org.apache.slide.common.Uri;
import org.apache.slide.content.NodeRevisionContent;
import org.apache.slide.content.NodeRevisionDescriptor;
import org.apache.slide.content.NodeRevisionNumber;
import org.apache.slide.search.IndexException;
import org.apache.slide.search.basic.IBasicExpressionFactory;
import org.apache.slide.store.IndexStore;
/**
* Take this as a starting point for your own Indexer implementation.
*
* @version $Revision: 1.1 $
*/
public class SampleTxtContainsIndexer extends AbstractService implements IndexStore
{
private static final String INDEX_PATH = "indexpath";
private static final String DOC_ID = "documentId";
private String indexpath;
/**
* Create Index, if not yet done.
*
* @param token a NamespaceAccessToken
*
* @throws ServiceInitializationFailedException
*
*/
public void initialize(NamespaceAccessToken token)
throws ServiceInitializationFailedException
{
String indexPath = token.getNamespaceConfig().getParameter (INDEX_PATH);
IndexWriter indexWriter = null;
try
{
indexWriter = new IndexWriter(indexpath, new StandardAnalyzer(), false);
}
// will fail, if not yet exists
catch (IOException e)
{
try
{
// create index
indexWriter = new IndexWriter(indexpath, new StandardAnalyzer(),
true);
}
catch (IOException ex)
{
throw new ServiceInitializationFailedException (this, ex);
}
}
try
{
indexWriter.close();
}
catch (IOException e)
{
throw new ServiceInitializationFailedException (this, e);
}
}
/**
* Method getFactory
*
* @return an IBasicExpressionFactory
*
*/
public IBasicExpressionFactory getBasicExpressionFactory()
{
return new BasicExpressionFactoryTxtContainsSample (indexpath);
}
private boolean started = false;
/**
* Index an object content.
*
* @param uri Uri
* @exception IndexException Error accessing the Data Source
*/
synchronized public void createIndex (Uri uri,
NodeRevisionDescriptor revisionDescriptor,
NodeRevisionContent revisionContent)
throws IndexException
{
try
{
LuceneIndexer indexer = new LuceneIndexer (indexpath);
indexer.index (uri.toString(),
new CharArrayReader (revisionContent.getContent()));
}
catch (Exception e)
{
throw new IndexException (e);
}
//index(revisionContent, uri);
}
/**
* Method updateIndex
*
* @param uri an Uri
* @param revisionDescriptor a NodeRevisionDescriptor
* @param revisionContent a NodeRevisionContent
*
* @throws IndexException
*
*/
synchronized public void updateIndex(Uri uri,
NodeRevisionDescriptor revisionDescriptor,
NodeRevisionContent revisionContent)
throws IndexException
{
try
{
LuceneIndexer indexer = new LuceneIndexer (indexpath);
indexer.removeIndex (uri.toString());
indexer.index (uri.toString(),
new CharArrayReader (revisionContent.getContent()));
}
catch (Exception e)
{
throw new IndexException (e);
}
}
/**
* Drop an object revision from the index.
*
* @param uri Uri
* @exception ServiceAccessException Error accessing the Data Source
*/
synchronized public void dropIndex(Uri uri, NodeRevisionNumber number)
throws IndexException
{
try
{
LuceneIndexer indexer = new LuceneIndexer (indexpath);
indexer.removeIndex (uri.toString());
}
catch (Exception e)
{
throw new IndexException (e);
}
}
/**
* Connects to the underlying data source (if any is needed).
*
* @exception ServiceConnectionFailedException Connection failed
*/
public void connect() throws ServiceConnectionFailedException
{
System.out.println("SampleIndexer: connect");
started = true;
}
/**
* This function tells whether or not the service is connected.
*
* @return boolean true if we are connected
* @exception ServiceAccessException Service access error
*/
public boolean isConnected() throws ServiceAccessException
{
// System.out.println("isConnected");
return started;
}
/**
* Initializes the service with a set of parameters. Those could be :
* <li>User name, login info
* <li>Host name on which to connect
* <li>Remote port
* <li>JDBC driver whoich is to be used :-)
* <li>Anything else ...
*
* @param parameters Hashtable containing the parameters' names
* and associated values
* @exception ServiceParameterErrorException Incorrect service parameter
* @exception ServiceParameterMissingException Service parameter missing
*/
public void setParameters (Hashtable parameters) throws
ServiceParameterErrorException, ServiceParameterMissingException
{
indexpath = (String)parameters.get (INDEX_PATH);
if (indexpath == null || indexpath.length() == 0)
throw new ServiceParameterMissingException (this, INDEX_PATH);
}
/**
* Disconnects from the underlying data source.
*
* @exception ServiceDisconnectionFailedException Disconnection failed
*/
public void disconnect() throws ServiceDisconnectionFailedException
{
System.out.println("SampleIndexer: disconnect");
started = false;
}
/**
* Deletes service underlying data source, if possible (and meaningful).
*
* @exception ServiceResetFailedException Reset failed
*/
public void reset() throws ServiceResetFailedException
{
System.out.println("SampleIndexer: reset");
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]