Full text indexing patch

Andy Armstrong 11 Mar 2004 11:48:47 -0000

Here's a patch to be applied against the current CVS (as at 2004/03/11) which adds Lucene based full text indexing. In addition to applying the patch you need to add lucene-1.4-rc1-dev.jar (or similar I guess) to /java/lib.

To add a full text index to a collection use an index config like this:

  <index
    class="org.apache.xindice.core.indexer.LuceneIndexer"
    name="text-index"
    pattern="[EMAIL PROTECTED]"
    analyzer="org.apache.lucene.analysis.SimpleAnalyzer"
  />

If omitted analyzer defaults to the value shown above. To find out about other analyzers you'll need to check the Lucene documentation.

To query the full text index do something like this:

    String query = "some lucene query";
    TextQueryService tqs =
        (TextQueryService) col.getService("TextQueryService", "1.0");
    ResourceSet resultSet = tqs.query(query);

At the moment the implementation is pretty much devoid of any kind of XML:DB loveliness - it just lets you fire regular Lucene queries at the index and returns whole matching documents. Comments and criticism welcome.

--
Andy Armstrong, Tagish

diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/config/system.xml xindice/config/system.xml
*** xml-xindice/config/system.xml       Thu Feb 12 13:12:01 2004
--- xindice/config/system.xml   Thu Mar 11 11:07:37 2004
***************
*** 58,63 ****
--- 58,64 ----
                - XUpdate engine. Has no configuration parameters.
                -->
              <resolver 
class="org.apache.xindice.core.xupdate.XUpdateQueryResolver"/>
+             <resolver 
class="org.apache.xindice.core.query.TextQueryResolver"/>
          </queryengine>
      </root-collection>
  
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/client/TextQueryService.java 
xindice/java/src/org/apache/xindice/client/TextQueryService.java
*** xml-xindice/java/src/org/apache/xindice/client/TextQueryService.java        
Thu Jan  1 00:00:00 1970
--- xindice/java/src/org/apache/xindice/client/TextQueryService.java    Thu Mar 
11 11:07:37 2004
***************
*** 0 ****
--- 1,68 ----
+ /*
+  * The Apache Software License, Version 1.1
+  *
+  *
+  * Copyright (c) 1999 The Apache Software Foundation.  All rights
+  * reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  *
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  *
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in
+  *    the documentation and/or other materials provided with the
+  *    distribution.
+  *
+  * 3. The end-user documentation included with the redistribution,
+  *    if any, must include the following acknowledgment:
+  *       "This product includes software developed by the
+  *        Apache Software Foundation (http://www.apache.org/)."
+  *    Alternately, this acknowledgment may appear in the software itself,
+  *    if and wherever such third-party acknowledgments normally appear.
+  *
+  * 4. The names "Xindice" and "Apache Software Foundation" must
+  *    not be used to endorse or promote products derived from this
+  *    software without prior written permission. For written
+  *    permission, please contact [EMAIL PROTECTED]
+  *
+  * 5. Products derived from this software may not be called "Apache",
+  *    nor may "Apache" appear in their name, without prior written
+  *    permission of the Apache Software Foundation.
+  *
+  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  * ====================================================================
+  *
+  * This software consists of voluntary contributions made by many
+  * individuals on behalf of the Apache Software Foundation and was
+  * originally based on software copyright (c) 1999-2001, The dbXML
+  * Group, L.L.C., http://www.dbxmlgroup.com.  For more
+  * information on the Apache Software Foundation, please see
+  * <http://www.apache.org/>.
+  *
+  * CVS $Id: TextQueryService.java,v 1.1 2004/02/24 00:04:24 andy Exp $
+  */
+ package org.apache.xindice.client;
+ 
+ import org.xmldb.api.base.Service;
+ import org.xmldb.api.base.ResourceSet;
+ import org.xmldb.api.base.XMLDBException;
+ 
+ public interface TextQueryService extends Service {
+   ResourceSet query(String string) throws XMLDBException;
+   ResourceSet queryResource(String string, String string1) throws 
XMLDBException;
+ }
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java 
xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java
*** xml-xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java 
Thu Feb 19 02:46:28 2004
--- xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java     
Thu Mar 11 11:10:37 2004
***************
*** 22,27 ****
--- 22,28 ----
  import org.apache.xindice.client.xmldb.resources.BinaryResourceImpl;
  import 
org.apache.xindice.client.xmldb.services.CollectionManagementServiceImpl;
  import org.apache.xindice.client.xmldb.services.MetaService;
+ import org.apache.xindice.client.xmldb.services.TextQueryServiceImpl;
  import org.apache.xindice.client.xmldb.services.XPathQueryServiceImpl;
  import org.apache.xindice.client.xmldb.services.XUpdateQueryServiceImpl;
  import org.apache.xindice.core.FaultCodes;
***************
*** 87,92 ****
--- 88,97 ----
          final XUpdateQueryServiceImpl xupdate = new XUpdateQueryServiceImpl();
          xupdate.setCollection(this);
          registerService(xupdate);
+ 
+         final TextQueryServiceImpl text = new TextQueryServiceImpl();
+         text.setCollection(this); // this seems to be unneccesary - 
setCollection() is called in registerService()
+         registerService(text);
  
          // TODO  if (this.col.isMetaEnabled()) {
          final MetaService meta = new MetaService();
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java
 
xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java
*** 
xml-xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java
     Thu Jan  1 00:00:00 1970
--- 
xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java
 Thu Mar 11 11:07:37 2004
***************
*** 0 ****
--- 1,81 ----
+ /*
+  * The Apache Software License, Version 1.1
+  *
+  *
+  * Copyright (c) 1999 The Apache Software Foundation.  All rights
+  * reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  *
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  *
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in
+  *    the documentation and/or other materials provided with the
+  *    distribution.
+  *
+  * 3. The end-user documentation included with the redistribution,
+  *    if any, must include the following acknowledgment:
+  *       "This product includes software developed by the
+  *        Apache Software Foundation (http://www.apache.org/)."
+  *    Alternately, this acknowledgment may appear in the software itself,
+  *    if and wherever such third-party acknowledgments normally appear.
+  *
+  * 4. The names "Xindice" and "Apache Software Foundation" must
+  *    not be used to endorse or promote products derived from this
+  *    software without prior written permission. For written
+  *    permission, please contact [EMAIL PROTECTED]
+  *
+  * 5. Products derived from this software may not be called "Apache",
+  *    nor may "Apache" appear in their name, without prior written
+  *    permission of the Apache Software Foundation.
+  *
+  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  * ====================================================================
+  *
+  * This software consists of voluntary contributions made by many
+  * individuals on behalf of the Apache Software Foundation and was
+  * originally based on software copyright (c) 1999-2001, The dbXML
+  * Group, L.L.C., http://www.dbxmlgroup.com.  For more
+  * information on the Apache Software Foundation, please see
+  * <http://www.apache.org/>.
+  *
+  * CVS $Id: TextQueryServiceImpl.java,v 1.1 2004/02/24 00:04:24 andy Exp $
+  */
+ 
+ package org.apache.xindice.client.xmldb.services;
+ 
+ import org.apache.xindice.client.TextQueryService;
+ 
+ /**
+  * XML:DB TextQueryService implementation that uses XML-RPC communication
+  * with server
+  *
+  * @author <a href="mailto:[EMAIL PROTECTED]">James Bates</a>
+  * @version CVS $Revision: 1.1 $, $Date: 2004/02/24 00:04:24 $
+  */
+ public class TextQueryServiceImpl extends QueryService implements 
TextQueryService {
+ 
+     /**
+      * Creates new TextQueryService
+      */
+     public TextQueryServiceImpl() {
+ 
+         super();
+         queryLang = "Text";
+     }
+ }
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java 
xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java
*** xml-xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java     
Thu Jan  1 00:00:00 1970
--- xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java Thu Mar 
11 11:07:37 2004
***************
*** 0 ****
--- 1,499 ----
+ /*
+  * The Apache Software License, Version 1.1
+  *
+  *
+  * Copyright (c) 1999 The Apache Software Foundation.  All rights
+  * reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  *
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  *
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in
+  *    the documentation and/or other materials provided with the
+  *    distribution.
+  *
+  * 3. The end-user documentation included with the redistribution,
+  *    if any, must include the following acknowledgment:
+  *       "This product includes software developed by the
+  *        Apache Software Foundation (http://www.apache.org/)."
+  *    Alternately, this acknowledgment may appear in the software itself,
+  *    if and wherever such third-party acknowledgments normally appear.
+  *
+  * 4. The names "Xindice" and "Apache Software Foundation" must
+  *    not be used to endorse or promote products derived from this
+  *    software without prior written permission. For written
+  *    permission, please contact [EMAIL PROTECTED]
+  *
+  * 5. Products derived from this software may not be called "Apache",
+  *    nor may "Apache" appear in their name, without prior written
+  *    permission of the Apache Software Foundation.
+  *
+  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  * ====================================================================
+  *
+  * This software consists of voluntary contributions made by many
+  * individuals on behalf of the Apache Software Foundation and was
+  * originally based on software copyright (c) 1999-2001, The dbXML
+  * Group, L.L.C., http://www.dbxmlgroup.com.  For more
+  * information on the Apache Software Foundation, please see
+  * <http://www.apache.org/>.
+  *
+  * CVS $Id: LuceneIndexer.java,v 1.5 2004/03/10 16:21:33 andy Exp $
+  */
+ 
+ package org.apache.xindice.core.indexer;
+ 
+ // Xindice stuff
+ import org.apache.commons.logging.Log;
+ import org.apache.commons.logging.LogFactory;
+ import org.apache.xindice.core.Collection;
+ import org.apache.xindice.core.DBObject;
+ import org.apache.xindice.core.DBException;
+ import org.apache.xindice.core.data.Key;
+ import org.apache.xindice.core.data.Value;
+ import org.apache.xindice.core.FaultCodes;
+ import org.apache.xindice.core.filer.BTree;
+ import org.apache.xindice.core.filer.BTreeCallback;
+ import org.apache.xindice.core.filer.BTreeCorruptException;
+ import org.apache.xindice.core.filer.BTreeNotFoundException;
+ import org.apache.xindice.core.indexer.*;
+ import org.apache.xindice.core.query.QueryEngine;
+ import org.apache.xindice.util.Configuration;
+ import org.apache.xindice.xml.SymbolTable;
+ 
+ // Lucene stuff
+ import org.apache.lucene.analysis.Analyzer;
+ //import org.apache.lucene.analysis.standard.StandardAnalyzer;
+ import org.apache.lucene.document.Document;
+ import org.apache.lucene.document.Field;
+ import org.apache.lucene.index.IndexReader;
+ import org.apache.lucene.index.IndexWriter;
+ import org.apache.lucene.index.Term;
+ import org.apache.lucene.search.IndexSearcher;
+ 
+ import java.io.File;
+ import java.io.IOException;
+ import java.util.ArrayList;
+ import java.util.List;
+ import java.util.StringTokenizer;
+ 
+ /**
+  * LuceneIndexer is a basic implementation of the Indexer interface.
+  * It is used for maintaining full text indexes
+  * indexes.
+  *
+  * @version CVS $Revision: 1.5 $, $Date: 2004/03/10 16:21:33 $
+  */
+ public final class LuceneIndexer implements Indexer, DBObject {
+ 
+       private static final Log log = LogFactory.getLog(LuceneIndexer.class);
+ 
+       //private static final IndexMatch[] EmptyMatches = new IndexMatch[0];
+       //private static final Value EmptyValue = new Value(new byte[0]);
+ 
+       private static final String NAME     = "name";
+       private static final String PATTERN  = "pattern";
+       private static final String TYPE     = "type";
+       private static final String ANALYZER = "analyzer";
+ 
+       public static final String KEYNAME   = "key";
+       public static final String TEXTNAME  = "text";
+ 
+       // Default analyzer to use
+       private static final String DEFANALYZER = 
"org.apache.lucene.analysis.SimpleAnalyzer";
+ 
+       private File          idxFile;
+       private IndexWriter   iw;
+       private IndexReader   ir;
+       private IndexSearcher is;
+       private Analyzer      an;
+ 
+       private Configuration config;
+       private Collection    collection;
+       //private SymbolTable symbols;
+ 
+       private String        name;
+       private String        pattern;
+       private String        analyzer;
+ 
+       // Keep a count of changes to the index
+       private int           docsAdded;
+       private int           docsDeleted;
+ 
+       public LuceneIndexer() {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".LuceneIndexer()");
+       }
+ 
+       private void setFile(File f) {
+               idxFile = f;
+       }
+ 
+       private File getFile() {
+               if (null == idxFile) {
+                       throw new java.lang.IllegalStateException("Not bound to 
a file");
+               }
+               return idxFile;
+       }
+ 
+       public synchronized boolean isOpened() throws DBException {
+               boolean o = (null != iw) || (null != ir);
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".isOpened() - 
returning " + o);
+               return o;
+       }
+ 
+       private void closeWrite() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".closeWrite()");
+               if (null != iw) {
+                       try {
+                               int nDocs = iw.docCount();
+                               /* Fairly arbitrary rules for triggering index 
optimisation. Need to
+                                * play with these.
+                                */
+                               if (docsAdded > nDocs / 10 || docsAdded > 50 || 
docsDeleted > 10) {
+                                       //System.out.println("Optimizing 
index...");
+                                       iw.optimize();
+                                       docsAdded = 0;
+                                       docsDeleted = 0;
+                               }
+                               iw.close();
+                               iw = null;
+                       } catch (IOException e) {
+                               // Fixme: less than ideal fault code
+                               throw new DBException(FaultCodes.IDX_CORRUPTED, 
"", e);
+                       }
+               }
+       }
+ 
+       private void closeSearch() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".closeSearch()");
+               if (null != is) {
+                       try {
+                               is.close();
+                               is = null;
+                       } catch (IOException e) {
+                               // Fixme: less than ideal fault code
+                               throw new DBException(FaultCodes.IDX_CORRUPTED, 
"", e);
+                       }
+               }
+       }
+ 
+       private void closeRead() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".closeRead()");
+               if (null != ir) {
+                       closeSearch();
+                       try {
+                               ir.close();
+                               ir = null;
+                       } catch (IOException e) {
+                               // Fixme: less than ideal fault code
+                               throw new DBException(FaultCodes.IDX_CORRUPTED, 
"", e);
+                       }
+               }
+       }
+ 
+       private void openRead() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".openRead()");
+               if (null == ir) {
+                       closeWrite();
+                       try {
+                               ir = IndexReader.open(getFile());
+                       } catch (IOException e) {
+                               // Fixme: less than ideal fault code
+                               throw new 
DBException(FaultCodes.IDX_INDEX_NOT_FOUND, "", e);
+                       }
+               }
+       }
+ 
+       private void openSearch() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".openSearch()");
+               if (null == is) {
+                       openRead();
+                       is = new IndexSearcher(ir);
+               }
+       }
+ 
+       private static boolean isBlank(String n) {
+               return null == n || n.length() == 0;
+       }
+ 
+       public synchronized Analyzer getAnalyzer() throws DBException {
+               try {
+                       if (null == an) {
+                               String anc = isBlank(analyzer) ? DEFANALYZER : 
analyzer;
+                               Class c = Class.forName(anc);
+                               an = (Analyzer) c.newInstance();
+                       }
+               } catch (Exception e) {
+                       throw new DBException(FaultCodes.IDX_NOT_SUPPORTED, "", 
e);
+               }
+               return an;
+       }
+ 
+       public synchronized IndexSearcher getSearcher() throws DBException {
+               openSearch();
+               return is;
+       }
+ 
+       public synchronized IndexReader getReader() throws DBException {
+               openRead();
+               return ir;
+       }
+ 
+       private void openWrite(boolean create) throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".openWrite(" 
+ create + ")");
+               if (null == iw) {
+                       closeRead();
+                       try {
+                               iw = new IndexWriter(getFile(), getAnalyzer(), 
create);
+                       } catch (IOException e) {
+                               // Fixme: less than ideal fault code
+                               throw new DBException(create ? 
FaultCodes.IDX_CANNOT_CREATE : FaultCodes.IDX_INDEX_NOT_FOUND, "", e);
+                       } catch (Exception e) {
+                               throw new 
DBException(FaultCodes.IDX_NOT_SUPPORTED, "", e);
+                       }
+               }
+       }
+ 
+       public synchronized boolean close() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".close()");
+               closeWrite();
+               closeRead();
+               return true;
+       }
+ 
+       public synchronized boolean create() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".create()");
+               drop();
+               openWrite(true);
+               return true;
+       }
+ 
+       public synchronized boolean open() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".open()");
+               openWrite(false);
+               return true;
+       }
+ 
+       public synchronized boolean exists() throws DBException {
+               boolean e = getFile().exists();
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".exists() - 
returning " + e);
+               return e;
+       }
+ 
+       private boolean deepDelete(File f) throws IOException {
+               if (f.isDirectory()) {
+                       File fl[] = f.listFiles();
+                       for (int i = 0; i < fl.length; i++) {
+                               //System.out.println(fl[i].getCanonicalPath());
+                               if (!deepDelete(fl[i])) {
+                                       return false;
+                               }
+                       }
+               }
+               return f.delete();
+       }
+ 
+       public synchronized boolean drop() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".drop()");
+               try {
+                       if (exists()) {
+                               close();
+                               return deepDelete(getFile());
+                       } else {
+                               return false;
+                       }
+               } catch (IOException e) {
+                       // Fixme: less than ideal fault code
+                       throw new DBException(FaultCodes.IDX_CORRUPTED, "", e);
+               }
+       }
+ 
+       private void assertOpen() throws DBException {
+               if (!isOpened()) {
+                       throw new IllegalStateException("Index has not been 
opened");
+               }
+       }
+ 
+       private void assertWrite() throws DBException {
+               assertOpen();
+               openWrite(false);
+       }
+ 
+       private void assertRead() throws DBException {
+               assertOpen();
+               openRead();
+       }
+ 
+       public void setConfig(Configuration config) {
+               this.config = config;
+               try {
+                       name     = config.getAttribute(NAME);
+                       pattern  = config.getAttribute(PATTERN);
+                       analyzer = config.getAttribute(ANALYZER);
+ 
+                       //System.out.println("setConfig(), name=" + name + ", 
pattern=" + pattern + ", analyzer=" + analyzer);
+ 
+                       // Destroy any cached information that's based on the 
config
+                       an = null;
+ 
+                       setLocation(name);
+               } catch (Exception e) {
+                       if (log.isWarnEnabled()) {
+                               log.warn("ignored exception", e);
+                       }
+               }
+       }
+ 
+       public Configuration getConfig() {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".getConfig()\n");
+               return config;
+       }
+ 
+       public String getName() {
+               return name;
+       }
+ 
+       private void setLocation(String location) {
+               setFile(new File(collection.getCollectionRoot(), location));
+       }
+ 
+       public void setCollection(Collection collection) {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".setCollection(" + collection + ")");
+               try {
+                       this.collection = collection;
+                       //symbols = collection.getSymbols();
+               } catch (Exception e) {
+                       if (log.isWarnEnabled()) {
+                               log.warn("ignored exception", e);
+                       }
+               }
+       }
+ 
+       public String getIndexStyle() {
+               return STYLE_FULLTEXT;
+       }
+ 
+       public String getPattern() {
+               return pattern;
+       }
+ 
+       private static void quoteBytes(StringBuffer buf, byte b[], int l) {
+               for (int i = 0; i < l; i++) {
+                       int bv = b[i] & 0xFF;
+                       if (bv < 0x20 || bv >= 0x7F || bv == '%' || bv == ',') {
+                               
buf.append('%').append(Integer.toHexString(0x100 | bv).substring(1));
+                       } else {
+                               buf.append((char) bv);
+                       }
+               }
+       }
+ 
+       private static byte[] unquoteBytes(String s) {
+               int sp = 0, sl = s.length();
+               byte b[] = new byte[sl];  // worst case
+               int bp = 0;
+               while (sp < sl) {
+                       char c = s.charAt(sp++);
+                       if (c == '%') {
+                               int hi = Character.digit(s.charAt(sp++), 16);
+                               int lo = Character.digit(s.charAt(sp++), 16);
+                               b[bp++] = (byte) ((hi << 4) | lo);
+                       } else {
+                               b[bp++] = (byte) c;
+                       }
+               }
+ 
+               if (bp < b.length) {
+                       byte nb[] = new byte[bp];
+                       System.arraycopy(b, 0, nb, 0, bp);
+                       return nb;
+               } else {
+                       return b;
+               }
+       }
+ 
+       private static String packMatch(Key key, int pos, int len, short 
elemID, short attrID) {
+               StringBuffer buf = new StringBuffer();
+               quoteBytes(buf, key.getData(), key.getLength());
+               buf.append(',').append(pos);
+               buf.append(',').append(len);
+               buf.append(',').append(elemID);
+               buf.append(',').append(attrID);
+               return buf.toString();
+       }
+ 
+       public static IndexMatch unpackMatch(String match) {
+               Key key = null;
+               int pos = -1;
+               int len = -1;
+               short elemID = -1;
+               short attrID = -1;
+               String m[] = match.split(",");
+               try {
+                       key = new Key(unquoteBytes(m[0]));
+                       pos = Integer.parseInt(m[1]);
+                       len = Integer.parseInt(m[2]);
+                       elemID = Short.parseShort(m[3]);
+                       attrID = Short.parseShort(m[4]);
+               } catch (IndexOutOfBoundsException e) {
+                       // run out of data: ignore
+               }
+               return new IndexMatch(key, pos, len, elemID, attrID);
+       }
+ 
+       public synchronized void remove(String value, Key key, int pos, int 
len, short elemID, short attrID) throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".remove(" + 
value + ", " + key + ", " + pos + ", " + len + ", " + elemID + ", " + attrID + 
")");
+               assertRead();
+ 
+               try {
+                       ir.delete(new Term(KEYNAME, packMatch(key, pos, len, 
elemID, attrID)));
+                       docsDeleted++;
+               } catch (IOException e) {
+                       throw new DBException(FaultCodes.IDX_CORRUPTED, "", e);
+               }
+       }
+ 
+       public synchronized void add(String value, Key key, int pos, int len, 
short elemID, short attrID) throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".add(" + 
value + ", " + key + ", " + pos + ", " + len + ", " + elemID + ", " + attrID + 
")");
+               assertWrite();
+               Document doc = new Document();
+               doc.add(new Field(KEYNAME, packMatch(key, pos, len, elemID, 
attrID), true, true, false));
+               doc.add(new Field(TEXTNAME, value, false, true, true));
+               try {
+                       iw.addDocument(doc);
+                       docsAdded++;
+               } catch (IOException e) {
+                       throw new DBException(FaultCodes.IDX_CORRUPTED, "", e);
+               }
+       }
+ 
+       public synchronized void flush() throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".flush()");
+               //super.flush();
+       }
+ 
+       public synchronized IndexMatch[] queryMatches(final IndexQuery query) 
throws DBException {
+               //System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".queryMatches(" + query + ")");
+               return null;
+       }
+ 
+       public String toString() {
+               return getName() + "(" + getIndexStyle() + ", " + getPattern() 
+ ")";
+       }
+ }
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java 
xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java
*** xml-xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java   
Thu Jan  1 00:00:00 1970
--- xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java       
Thu Mar 11 11:07:38 2004
***************
*** 0 ****
--- 1,288 ----
+ /*
+  * The Apache Software License, Version 1.1
+  *
+  *
+  * Copyright (c) 1999 The Apache Software Foundation.  All rights
+  * reserved.
+  *
+  * Redistribution and use in source and binary forms, with or without
+  * modification, are permitted provided that the following conditions
+  * are met:
+  *
+  * 1. Redistributions of source code must retain the above copyright
+  *    notice, this list of conditions and the following disclaimer.
+  *
+  * 2. Redistributions in binary form must reproduce the above copyright
+  *    notice, this list of conditions and the following disclaimer in
+  *    the documentation and/or other materials provided with the
+  *    distribution.
+  *
+  * 3. The end-user documentation included with the redistribution,
+  *    if any, must include the following acknowledgment:
+  *       "This product includes software developed by the
+  *        Apache Software Foundation (http://www.apache.org/)."
+  *    Alternately, this acknowledgment may appear in the software itself,
+  *    if and wherever such third-party acknowledgments normally appear.
+  *
+  * 4. The names "Xindice" and "Apache Software Foundation" must
+  *    not be used to endorse or promote products derived from this
+  *    software without prior written permission. For written
+  *    permission, please contact [EMAIL PROTECTED]
+  *
+  * 5. Products derived from this software may not be called "Apache",
+  *    nor may "Apache" appear in their name, without prior written
+  *    permission of the Apache Software Foundation.
+  *
+  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+  * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+  * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+  * SUCH DAMAGE.
+  * ====================================================================
+  *
+  * This software consists of voluntary contributions made by many
+  * individuals on behalf of the Apache Software Foundation and was
+  * originally based on software copyright (c) 1999-2001, The dbXML
+  * Group, L.L.C., http://www.dbxmlgroup.com.  For more
+  * information on the Apache Software Foundation, please see
+  * <http://www.apache.org/>.
+  *
+  * CVS $Id: TextQueryResolver.java,v 1.3 2004/03/10 16:21:41 andy Exp $
+  */
+ 
+ package org.apache.xindice.core.query;
+ 
+ import java.io.IOException;
+ import java.util.HashSet;
+ import javax.xml.transform.ErrorListener;
+ import javax.xml.transform.TransformerException;
+ 
+ import org.apache.commons.logging.Log;
+ import org.apache.commons.logging.LogFactory;
+ 
+ import org.apache.lucene.analysis.Analyzer;
+ import org.apache.lucene.document.Document;
+ import org.apache.lucene.index.IndexReader;
+ import org.apache.lucene.search.Hits;
+ import org.apache.lucene.search.IndexSearcher;
+ import org.apache.lucene.queryParser.QueryParser;
+ import org.apache.lucene.queryParser.ParseException;
+ 
+ import org.apache.xindice.core.Collection;
+ import org.apache.xindice.core.data.Key;
+ import org.apache.xindice.core.data.NodeSet;
+ import org.apache.xindice.core.DBException;
+ import org.apache.xindice.core.FaultCodes;
+ import org.apache.xindice.core.indexer.Indexer;
+ import org.apache.xindice.core.indexer.IndexManager;
+ import org.apache.xindice.core.indexer.IndexMatch;
+ import org.apache.xindice.core.indexer.LuceneIndexer;
+ import org.apache.xindice.util.Configuration;
+ import org.apache.xindice.util.SimpleConfigurable;
+ import org.apache.xindice.util.XindiceException;
+ import org.apache.xindice.util.XindiceRuntimeException;
+ import org.apache.xindice.xml.dom.DBDocument;
+ import org.apache.xindice.xml.NamespaceMap;
+ 
+ import org.apache.xml.utils.PrefixResolver;
+ 
+ import org.w3c.dom.Node;
+ import org.xmldb.api.base.XMLDBException;
+ 
+ public class TextQueryResolver extends SimpleConfigurable implements 
QueryResolver {
+ 
+       public final static String STYLE_FT = "Text";
+       private static final Log log = 
LogFactory.getLog(TextQueryResolver.class);
+ 
+       private class TextQuery implements Query {
+               private Collection   context;
+               private String       query;
+               private NamespaceMap nsMap;
+               private Key          keys[];
+ 
+               /**
+                * ResultSet
+                */
+               private class ResultSet implements NodeSet {
+                       private Key[] keySet;
+                       private int keySetSize;
+                       private String query;
+ 
+                       private int keyPos = 0;
+                       private Node nextNode;
+ 
+                       public ResultSet(Key[] keySet, int keySetSize, String 
query) {
+                               this.keySet = keySet;
+                               this.query = query;
+                               this.keySetSize = keySetSize;
+ 
+                               try {
+                                       prepareNextNode();
+                               } catch (Exception e) {
+                                       throw new 
XindiceRuntimeException(e.getMessage());
+                               }
+                       }
+ 
+                       private void prepareNextNode() throws XMLDBException, 
TransformerException, DBException {
+                               nextNode = null;
+ 
+                               while (nextNode == null && keyPos < 
keySet.length) {
+                                       DBDocument d = (DBDocument) 
context.getDocument(keySet[keyPos++]);
+                                       if (d != null) {
+                                               nextNode = 
d.getDocumentElement();
+                                       }
+ 
+                               }
+                       }
+ 
+                       public boolean hasMoreNodes() {
+                               return nextNode != null;
+                       }
+ 
+                       public Object getNextNode() {
+                               Node n = nextNode;
+ 
+                               try {
+                                       prepareNextNode();
+                               } catch (Exception e) {
+                                       throw new 
XindiceRuntimeException(e.getMessage());
+                               }
+ 
+                               return n;
+                       }
+               }
+ 
+               private TextQuery(Collection context, String query, 
NamespaceMap nsMap, Key[] keys) {
+                       this.context   = context;
+                       this.query     = query;
+                       this.nsMap     = nsMap;
+                       this.keys      = keys;
+               }
+ 
+               public String getQueryStyle() {
+                       return STYLE_FT;
+               }
+ 
+               public Collection getQueryContext() {
+                       return context;
+               }
+ 
+               public String getQueryString() {
+                       return query;
+               }
+ 
+               public NamespaceMap getNamespaceMap() {
+                       return nsMap;
+               }
+ 
+               public Key[] getKeySet() {
+                       return keys;
+               }
+ 
+               /**
+                * Not very clever: just find the LuceneIndexer with the 
shortest pattern
+                */
+               private LuceneIndexer findIndex(Collection c) throws 
DBException {
+                       IndexManager im = c.getIndexManager();
+                       LuceneIndexer best = null;
+                       int bestPattern = -1;
+                       String list[] = im.list();
+                       for (int i = 0; i < list.length; i++) {
+                               Indexer idx = im.get(list[i]);
+                               if (idx instanceof LuceneIndexer) {
+                                       int pl = idx.getPattern().length();
+                                       if (bestPattern == -1 || pl < 
bestPattern) {
+                                               best = (LuceneIndexer) idx;
+                                               bestPattern = pl;
+                                       }
+                               }
+                       }
+ 
+                       return best;
+               }
+ 
+               public NodeSet execute() throws QueryException {
+                       try {
+                               LuceneIndexer idx = findIndex(context);
+                               if (null == idx) {
+                                       throw new 
QueryException(FaultCodes.QRY_STYLE_NOT_FOUND, "No text indexer in this 
collection");
+                               }
+                               Analyzer an = idx.getAnalyzer();
+                               IndexReader ir = idx.getReader();
+                               IndexSearcher is = idx.getSearcher();
+                               Hits hits = is.search(QueryParser.parse(query, 
idx.TEXTNAME, an));
+ 
+                               int hl = hits.length();
+                               Key rk[] = new Key[hl];
+                               int rkused = 0;
+                               HashSet filter = null;
+                               HashSet done   = new HashSet(hits.length());
+                               if (keys != null) {
+                                       filter = new HashSet(keys.length);
+                                       for (int k = 0; k < keys.length; k++) {
+                                               filter.add(keys[k]);
+                                       }
+                               }
+                               for (int i = 0; i < hits.length(); i++) {
+                                       int id = hits.id(i);
+                                       Document d = ir.document(id);
+                                       IndexMatch im = 
LuceneIndexer.unpackMatch(d.getField(idx.KEYNAME).stringValue());
+                                       Key k = im.getKey();
+                                       if (!done.contains(k)) {
+                                               if (filter == null || 
filter.contains(k)) {
+                                                       rk[rkused++] = k;
+                                               }
+                                               done.add(k);
+                                       }
+                               }
+ 
+                               return new ResultSet(rk, rkused, query);
+ 
+                       } catch (DBException e) {
+                               throw new QueryException(e.faultCode);
+                       } catch (ParseException e) {
+                               throw new 
QueryException(FaultCodes.QRY_COMPILATION_ERROR, e.getMessage(), e);
+                       } catch (IOException e) {
+                               throw new 
QueryException(FaultCodes.QRY_PROCESSING_ERROR, e.getMessage(), e);
+                       }
+               }
+       }
+ 
+       public void setQueryEngine(QueryEngine engine) {
+               System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".setQueryEngine(" + engine + ")");
+               // do nothing
+       }
+ 
+       public String getQueryStyle() {
+               System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".getQueryStyle()");
+               return STYLE_FT;
+       }
+ 
+       public Query compileQuery(Collection context, String query, 
NamespaceMap nsMap, Key[] keys) throws QueryException {
+               System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + 
".compileQuery(" + context + ", " + query + ")");
+               return new TextQuery(context, query, nsMap, keys);
+       }
+ 
+       public NodeSet query(Collection context, String query, NamespaceMap 
nsMap, Key[] keys) throws QueryException {
+               System.out.println("[" + this + ", " + 
Thread.currentThread().getName() + "] " + getClass().getName() + ".query(" + 
context + ", " + query + ")");
+               TextQuery tq = new TextQuery(context, query, nsMap, keys);
+               return tq.execute();
+       }
+ 
+       //public void setConfig(Configuration config) throws XindiceException {
+       //      /[EMAIL PROTECTED] Implement this 
org.apache.xindice.util.Configurable method*/
+       //      throw new java.lang.UnsupportedOperationException("Method 
setConfig() not yet implemented.");
+       //}
+ 
+       //public Configuration getConfig() {
+       //      /[EMAIL PROTECTED] Implement this 
org.apache.xindice.util.Configurable method*/
+       //      throw new java.lang.UnsupportedOperationException("Method 
getConfig() not yet implemented.");
+       //}
+ }
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: 
xml-xindice/java/src/org/apache/xindice/server/Xindice.java 
xindice/java/src/org/apache/xindice/server/Xindice.java
*** xml-xindice/java/src/org/apache/xindice/server/Xindice.java Sun Feb  8 
02:54:25 2004
--- xindice/java/src/org/apache/xindice/server/Xindice.java     Thu Mar 11 
11:07:38 2004
***************
*** 63,68 ****
--- 63,69 ----
              + "        <queryengine>"
              + "            <resolver autoindex=\"false\" 
class=\"org.apache.xindice.core.query.XPathQueryResolver\" />"
              + "            <resolver 
class=\"org.apache.xindice.core.xupdate.XUpdateQueryResolver\" />"
+             + "            <resolver 
class=\"org.apache.xindice.core.query.TextQueryResolver\" />"
              + "        </queryengine>"
              + "    </root-collection>"
              + "    <xml-rpc>"

Full text indexing patch

Reply via email to