To add a full text index to a collection use an index config like this:
<index class="org.apache.xindice.core.indexer.LuceneIndexer" name="text-index" pattern="[EMAIL PROTECTED]" analyzer="org.apache.lucene.analysis.SimpleAnalyzer" />
If omitted analyzer defaults to the value shown above. To find out about other analyzers you'll need to check the Lucene documentation.
To query the full text index do something like this:
String query = "some lucene query"; TextQueryService tqs = (TextQueryService) col.getService("TextQueryService", "1.0"); ResourceSet resultSet = tqs.query(query);
At the moment the implementation is pretty much devoid of any kind of XML:DB loveliness - it just lets you fire regular Lucene queries at the index and returns whole matching documents. Comments and criticism welcome.
-- Andy Armstrong, Tagish
diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/config/system.xml xindice/config/system.xml *** xml-xindice/config/system.xml Thu Feb 12 13:12:01 2004 --- xindice/config/system.xml Thu Mar 11 11:07:37 2004 *************** *** 58,63 **** --- 58,64 ---- - XUpdate engine. Has no configuration parameters. --> <resolver class="org.apache.xindice.core.xupdate.XUpdateQueryResolver"/> + <resolver class="org.apache.xindice.core.query.TextQueryResolver"/> </queryengine> </root-collection> diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/client/TextQueryService.java xindice/java/src/org/apache/xindice/client/TextQueryService.java *** xml-xindice/java/src/org/apache/xindice/client/TextQueryService.java Thu Jan 1 00:00:00 1970 --- xindice/java/src/org/apache/xindice/client/TextQueryService.java Thu Mar 11 11:07:37 2004 *************** *** 0 **** --- 1,68 ---- + /* + * The Apache Software License, Version 1.1 + * + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xindice" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation and was + * originally based on software copyright (c) 1999-2001, The dbXML + * Group, L.L.C., http://www.dbxmlgroup.com. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + * CVS $Id: TextQueryService.java,v 1.1 2004/02/24 00:04:24 andy Exp $ + */ + package org.apache.xindice.client; + + import org.xmldb.api.base.Service; + import org.xmldb.api.base.ResourceSet; + import org.xmldb.api.base.XMLDBException; + + public interface TextQueryService extends Service { + ResourceSet query(String string) throws XMLDBException; + ResourceSet queryResource(String string, String string1) throws XMLDBException; + } diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java *** xml-xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java Thu Feb 19 02:46:28 2004 --- xindice/java/src/org/apache/xindice/client/xmldb/XindiceCollection.java Thu Mar 11 11:10:37 2004 *************** *** 22,27 **** --- 22,28 ---- import org.apache.xindice.client.xmldb.resources.BinaryResourceImpl; import org.apache.xindice.client.xmldb.services.CollectionManagementServiceImpl; import org.apache.xindice.client.xmldb.services.MetaService; + import org.apache.xindice.client.xmldb.services.TextQueryServiceImpl; import org.apache.xindice.client.xmldb.services.XPathQueryServiceImpl; import org.apache.xindice.client.xmldb.services.XUpdateQueryServiceImpl; import org.apache.xindice.core.FaultCodes; *************** *** 87,92 **** --- 88,97 ---- final XUpdateQueryServiceImpl xupdate = new XUpdateQueryServiceImpl(); xupdate.setCollection(this); registerService(xupdate); + + final TextQueryServiceImpl text = new TextQueryServiceImpl(); + text.setCollection(this); // this seems to be unneccesary - setCollection() is called in registerService() + registerService(text); // TODO if (this.col.isMetaEnabled()) { final MetaService meta = new MetaService(); diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java *** xml-xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java Thu Jan 1 00:00:00 1970 --- xindice/java/src/org/apache/xindice/client/xmldb/services/TextQueryServiceImpl.java Thu Mar 11 11:07:37 2004 *************** *** 0 **** --- 1,81 ---- + /* + * The Apache Software License, Version 1.1 + * + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xindice" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation and was + * originally based on software copyright (c) 1999-2001, The dbXML + * Group, L.L.C., http://www.dbxmlgroup.com. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + * CVS $Id: TextQueryServiceImpl.java,v 1.1 2004/02/24 00:04:24 andy Exp $ + */ + + package org.apache.xindice.client.xmldb.services; + + import org.apache.xindice.client.TextQueryService; + + /** + * XML:DB TextQueryService implementation that uses XML-RPC communication + * with server + * + * @author <a href="mailto:[EMAIL PROTECTED]">James Bates</a> + * @version CVS $Revision: 1.1 $, $Date: 2004/02/24 00:04:24 $ + */ + public class TextQueryServiceImpl extends QueryService implements TextQueryService { + + /** + * Creates new TextQueryService + */ + public TextQueryServiceImpl() { + + super(); + queryLang = "Text"; + } + } diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java *** xml-xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java Thu Jan 1 00:00:00 1970 --- xindice/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java Thu Mar 11 11:07:37 2004 *************** *** 0 **** --- 1,499 ---- + /* + * The Apache Software License, Version 1.1 + * + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xindice" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation and was + * originally based on software copyright (c) 1999-2001, The dbXML + * Group, L.L.C., http://www.dbxmlgroup.com. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + * CVS $Id: LuceneIndexer.java,v 1.5 2004/03/10 16:21:33 andy Exp $ + */ + + package org.apache.xindice.core.indexer; + + // Xindice stuff + import org.apache.commons.logging.Log; + import org.apache.commons.logging.LogFactory; + import org.apache.xindice.core.Collection; + import org.apache.xindice.core.DBObject; + import org.apache.xindice.core.DBException; + import org.apache.xindice.core.data.Key; + import org.apache.xindice.core.data.Value; + import org.apache.xindice.core.FaultCodes; + import org.apache.xindice.core.filer.BTree; + import org.apache.xindice.core.filer.BTreeCallback; + import org.apache.xindice.core.filer.BTreeCorruptException; + import org.apache.xindice.core.filer.BTreeNotFoundException; + import org.apache.xindice.core.indexer.*; + import org.apache.xindice.core.query.QueryEngine; + import org.apache.xindice.util.Configuration; + import org.apache.xindice.xml.SymbolTable; + + // Lucene stuff + import org.apache.lucene.analysis.Analyzer; + //import org.apache.lucene.analysis.standard.StandardAnalyzer; + import org.apache.lucene.document.Document; + import org.apache.lucene.document.Field; + import org.apache.lucene.index.IndexReader; + import org.apache.lucene.index.IndexWriter; + import org.apache.lucene.index.Term; + import org.apache.lucene.search.IndexSearcher; + + import java.io.File; + import java.io.IOException; + import java.util.ArrayList; + import java.util.List; + import java.util.StringTokenizer; + + /** + * LuceneIndexer is a basic implementation of the Indexer interface. + * It is used for maintaining full text indexes + * indexes. + * + * @version CVS $Revision: 1.5 $, $Date: 2004/03/10 16:21:33 $ + */ + public final class LuceneIndexer implements Indexer, DBObject { + + private static final Log log = LogFactory.getLog(LuceneIndexer.class); + + //private static final IndexMatch[] EmptyMatches = new IndexMatch[0]; + //private static final Value EmptyValue = new Value(new byte[0]); + + private static final String NAME = "name"; + private static final String PATTERN = "pattern"; + private static final String TYPE = "type"; + private static final String ANALYZER = "analyzer"; + + public static final String KEYNAME = "key"; + public static final String TEXTNAME = "text"; + + // Default analyzer to use + private static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer"; + + private File idxFile; + private IndexWriter iw; + private IndexReader ir; + private IndexSearcher is; + private Analyzer an; + + private Configuration config; + private Collection collection; + //private SymbolTable symbols; + + private String name; + private String pattern; + private String analyzer; + + // Keep a count of changes to the index + private int docsAdded; + private int docsDeleted; + + public LuceneIndexer() { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".LuceneIndexer()"); + } + + private void setFile(File f) { + idxFile = f; + } + + private File getFile() { + if (null == idxFile) { + throw new java.lang.IllegalStateException("Not bound to a file"); + } + return idxFile; + } + + public synchronized boolean isOpened() throws DBException { + boolean o = (null != iw) || (null != ir); + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".isOpened() - returning " + o); + return o; + } + + private void closeWrite() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".closeWrite()"); + if (null != iw) { + try { + int nDocs = iw.docCount(); + /* Fairly arbitrary rules for triggering index optimisation. Need to + * play with these. + */ + if (docsAdded > nDocs / 10 || docsAdded > 50 || docsDeleted > 10) { + //System.out.println("Optimizing index..."); + iw.optimize(); + docsAdded = 0; + docsDeleted = 0; + } + iw.close(); + iw = null; + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + } + + private void closeSearch() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".closeSearch()"); + if (null != is) { + try { + is.close(); + is = null; + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + } + + private void closeRead() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".closeRead()"); + if (null != ir) { + closeSearch(); + try { + ir.close(); + ir = null; + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + } + + private void openRead() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".openRead()"); + if (null == ir) { + closeWrite(); + try { + ir = IndexReader.open(getFile()); + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(FaultCodes.IDX_INDEX_NOT_FOUND, "", e); + } + } + } + + private void openSearch() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".openSearch()"); + if (null == is) { + openRead(); + is = new IndexSearcher(ir); + } + } + + private static boolean isBlank(String n) { + return null == n || n.length() == 0; + } + + public synchronized Analyzer getAnalyzer() throws DBException { + try { + if (null == an) { + String anc = isBlank(analyzer) ? DEFANALYZER : analyzer; + Class c = Class.forName(anc); + an = (Analyzer) c.newInstance(); + } + } catch (Exception e) { + throw new DBException(FaultCodes.IDX_NOT_SUPPORTED, "", e); + } + return an; + } + + public synchronized IndexSearcher getSearcher() throws DBException { + openSearch(); + return is; + } + + public synchronized IndexReader getReader() throws DBException { + openRead(); + return ir; + } + + private void openWrite(boolean create) throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".openWrite(" + create + ")"); + if (null == iw) { + closeRead(); + try { + iw = new IndexWriter(getFile(), getAnalyzer(), create); + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(create ? FaultCodes.IDX_CANNOT_CREATE : FaultCodes.IDX_INDEX_NOT_FOUND, "", e); + } catch (Exception e) { + throw new DBException(FaultCodes.IDX_NOT_SUPPORTED, "", e); + } + } + } + + public synchronized boolean close() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".close()"); + closeWrite(); + closeRead(); + return true; + } + + public synchronized boolean create() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".create()"); + drop(); + openWrite(true); + return true; + } + + public synchronized boolean open() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".open()"); + openWrite(false); + return true; + } + + public synchronized boolean exists() throws DBException { + boolean e = getFile().exists(); + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".exists() - returning " + e); + return e; + } + + private boolean deepDelete(File f) throws IOException { + if (f.isDirectory()) { + File fl[] = f.listFiles(); + for (int i = 0; i < fl.length; i++) { + //System.out.println(fl[i].getCanonicalPath()); + if (!deepDelete(fl[i])) { + return false; + } + } + } + return f.delete(); + } + + public synchronized boolean drop() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".drop()"); + try { + if (exists()) { + close(); + return deepDelete(getFile()); + } else { + return false; + } + } catch (IOException e) { + // Fixme: less than ideal fault code + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + + private void assertOpen() throws DBException { + if (!isOpened()) { + throw new IllegalStateException("Index has not been opened"); + } + } + + private void assertWrite() throws DBException { + assertOpen(); + openWrite(false); + } + + private void assertRead() throws DBException { + assertOpen(); + openRead(); + } + + public void setConfig(Configuration config) { + this.config = config; + try { + name = config.getAttribute(NAME); + pattern = config.getAttribute(PATTERN); + analyzer = config.getAttribute(ANALYZER); + + //System.out.println("setConfig(), name=" + name + ", pattern=" + pattern + ", analyzer=" + analyzer); + + // Destroy any cached information that's based on the config + an = null; + + setLocation(name); + } catch (Exception e) { + if (log.isWarnEnabled()) { + log.warn("ignored exception", e); + } + } + } + + public Configuration getConfig() { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".getConfig()\n"); + return config; + } + + public String getName() { + return name; + } + + private void setLocation(String location) { + setFile(new File(collection.getCollectionRoot(), location)); + } + + public void setCollection(Collection collection) { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".setCollection(" + collection + ")"); + try { + this.collection = collection; + //symbols = collection.getSymbols(); + } catch (Exception e) { + if (log.isWarnEnabled()) { + log.warn("ignored exception", e); + } + } + } + + public String getIndexStyle() { + return STYLE_FULLTEXT; + } + + public String getPattern() { + return pattern; + } + + private static void quoteBytes(StringBuffer buf, byte b[], int l) { + for (int i = 0; i < l; i++) { + int bv = b[i] & 0xFF; + if (bv < 0x20 || bv >= 0x7F || bv == '%' || bv == ',') { + buf.append('%').append(Integer.toHexString(0x100 | bv).substring(1)); + } else { + buf.append((char) bv); + } + } + } + + private static byte[] unquoteBytes(String s) { + int sp = 0, sl = s.length(); + byte b[] = new byte[sl]; // worst case + int bp = 0; + while (sp < sl) { + char c = s.charAt(sp++); + if (c == '%') { + int hi = Character.digit(s.charAt(sp++), 16); + int lo = Character.digit(s.charAt(sp++), 16); + b[bp++] = (byte) ((hi << 4) | lo); + } else { + b[bp++] = (byte) c; + } + } + + if (bp < b.length) { + byte nb[] = new byte[bp]; + System.arraycopy(b, 0, nb, 0, bp); + return nb; + } else { + return b; + } + } + + private static String packMatch(Key key, int pos, int len, short elemID, short attrID) { + StringBuffer buf = new StringBuffer(); + quoteBytes(buf, key.getData(), key.getLength()); + buf.append(',').append(pos); + buf.append(',').append(len); + buf.append(',').append(elemID); + buf.append(',').append(attrID); + return buf.toString(); + } + + public static IndexMatch unpackMatch(String match) { + Key key = null; + int pos = -1; + int len = -1; + short elemID = -1; + short attrID = -1; + String m[] = match.split(","); + try { + key = new Key(unquoteBytes(m[0])); + pos = Integer.parseInt(m[1]); + len = Integer.parseInt(m[2]); + elemID = Short.parseShort(m[3]); + attrID = Short.parseShort(m[4]); + } catch (IndexOutOfBoundsException e) { + // run out of data: ignore + } + return new IndexMatch(key, pos, len, elemID, attrID); + } + + public synchronized void remove(String value, Key key, int pos, int len, short elemID, short attrID) throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".remove(" + value + ", " + key + ", " + pos + ", " + len + ", " + elemID + ", " + attrID + ")"); + assertRead(); + + try { + ir.delete(new Term(KEYNAME, packMatch(key, pos, len, elemID, attrID))); + docsDeleted++; + } catch (IOException e) { + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + + public synchronized void add(String value, Key key, int pos, int len, short elemID, short attrID) throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".add(" + value + ", " + key + ", " + pos + ", " + len + ", " + elemID + ", " + attrID + ")"); + assertWrite(); + Document doc = new Document(); + doc.add(new Field(KEYNAME, packMatch(key, pos, len, elemID, attrID), true, true, false)); + doc.add(new Field(TEXTNAME, value, false, true, true)); + try { + iw.addDocument(doc); + docsAdded++; + } catch (IOException e) { + throw new DBException(FaultCodes.IDX_CORRUPTED, "", e); + } + } + + public synchronized void flush() throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".flush()"); + //super.flush(); + } + + public synchronized IndexMatch[] queryMatches(final IndexQuery query) throws DBException { + //System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".queryMatches(" + query + ")"); + return null; + } + + public String toString() { + return getName() + "(" + getIndexStyle() + ", " + getPattern() + ")"; + } + } diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java *** xml-xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java Thu Jan 1 00:00:00 1970 --- xindice/java/src/org/apache/xindice/core/query/TextQueryResolver.java Thu Mar 11 11:07:38 2004 *************** *** 0 **** --- 1,288 ---- + /* + * The Apache Software License, Version 1.1 + * + * + * Copyright (c) 1999 The Apache Software Foundation. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. The end-user documentation included with the redistribution, + * if any, must include the following acknowledgment: + * "This product includes software developed by the + * Apache Software Foundation (http://www.apache.org/)." + * Alternately, this acknowledgment may appear in the software itself, + * if and wherever such third-party acknowledgments normally appear. + * + * 4. The names "Xindice" and "Apache Software Foundation" must + * not be used to endorse or promote products derived from this + * software without prior written permission. For written + * permission, please contact [EMAIL PROTECTED] + * + * 5. Products derived from this software may not be called "Apache", + * nor may "Apache" appear in their name, without prior written + * permission of the Apache Software Foundation. + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF + * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT + * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation and was + * originally based on software copyright (c) 1999-2001, The dbXML + * Group, L.L.C., http://www.dbxmlgroup.com. For more + * information on the Apache Software Foundation, please see + * <http://www.apache.org/>. + * + * CVS $Id: TextQueryResolver.java,v 1.3 2004/03/10 16:21:41 andy Exp $ + */ + + package org.apache.xindice.core.query; + + import java.io.IOException; + import java.util.HashSet; + import javax.xml.transform.ErrorListener; + import javax.xml.transform.TransformerException; + + import org.apache.commons.logging.Log; + import org.apache.commons.logging.LogFactory; + + import org.apache.lucene.analysis.Analyzer; + import org.apache.lucene.document.Document; + import org.apache.lucene.index.IndexReader; + import org.apache.lucene.search.Hits; + import org.apache.lucene.search.IndexSearcher; + import org.apache.lucene.queryParser.QueryParser; + import org.apache.lucene.queryParser.ParseException; + + import org.apache.xindice.core.Collection; + import org.apache.xindice.core.data.Key; + import org.apache.xindice.core.data.NodeSet; + import org.apache.xindice.core.DBException; + import org.apache.xindice.core.FaultCodes; + import org.apache.xindice.core.indexer.Indexer; + import org.apache.xindice.core.indexer.IndexManager; + import org.apache.xindice.core.indexer.IndexMatch; + import org.apache.xindice.core.indexer.LuceneIndexer; + import org.apache.xindice.util.Configuration; + import org.apache.xindice.util.SimpleConfigurable; + import org.apache.xindice.util.XindiceException; + import org.apache.xindice.util.XindiceRuntimeException; + import org.apache.xindice.xml.dom.DBDocument; + import org.apache.xindice.xml.NamespaceMap; + + import org.apache.xml.utils.PrefixResolver; + + import org.w3c.dom.Node; + import org.xmldb.api.base.XMLDBException; + + public class TextQueryResolver extends SimpleConfigurable implements QueryResolver { + + public final static String STYLE_FT = "Text"; + private static final Log log = LogFactory.getLog(TextQueryResolver.class); + + private class TextQuery implements Query { + private Collection context; + private String query; + private NamespaceMap nsMap; + private Key keys[]; + + /** + * ResultSet + */ + private class ResultSet implements NodeSet { + private Key[] keySet; + private int keySetSize; + private String query; + + private int keyPos = 0; + private Node nextNode; + + public ResultSet(Key[] keySet, int keySetSize, String query) { + this.keySet = keySet; + this.query = query; + this.keySetSize = keySetSize; + + try { + prepareNextNode(); + } catch (Exception e) { + throw new XindiceRuntimeException(e.getMessage()); + } + } + + private void prepareNextNode() throws XMLDBException, TransformerException, DBException { + nextNode = null; + + while (nextNode == null && keyPos < keySet.length) { + DBDocument d = (DBDocument) context.getDocument(keySet[keyPos++]); + if (d != null) { + nextNode = d.getDocumentElement(); + } + + } + } + + public boolean hasMoreNodes() { + return nextNode != null; + } + + public Object getNextNode() { + Node n = nextNode; + + try { + prepareNextNode(); + } catch (Exception e) { + throw new XindiceRuntimeException(e.getMessage()); + } + + return n; + } + } + + private TextQuery(Collection context, String query, NamespaceMap nsMap, Key[] keys) { + this.context = context; + this.query = query; + this.nsMap = nsMap; + this.keys = keys; + } + + public String getQueryStyle() { + return STYLE_FT; + } + + public Collection getQueryContext() { + return context; + } + + public String getQueryString() { + return query; + } + + public NamespaceMap getNamespaceMap() { + return nsMap; + } + + public Key[] getKeySet() { + return keys; + } + + /** + * Not very clever: just find the LuceneIndexer with the shortest pattern + */ + private LuceneIndexer findIndex(Collection c) throws DBException { + IndexManager im = c.getIndexManager(); + LuceneIndexer best = null; + int bestPattern = -1; + String list[] = im.list(); + for (int i = 0; i < list.length; i++) { + Indexer idx = im.get(list[i]); + if (idx instanceof LuceneIndexer) { + int pl = idx.getPattern().length(); + if (bestPattern == -1 || pl < bestPattern) { + best = (LuceneIndexer) idx; + bestPattern = pl; + } + } + } + + return best; + } + + public NodeSet execute() throws QueryException { + try { + LuceneIndexer idx = findIndex(context); + if (null == idx) { + throw new QueryException(FaultCodes.QRY_STYLE_NOT_FOUND, "No text indexer in this collection"); + } + Analyzer an = idx.getAnalyzer(); + IndexReader ir = idx.getReader(); + IndexSearcher is = idx.getSearcher(); + Hits hits = is.search(QueryParser.parse(query, idx.TEXTNAME, an)); + + int hl = hits.length(); + Key rk[] = new Key[hl]; + int rkused = 0; + HashSet filter = null; + HashSet done = new HashSet(hits.length()); + if (keys != null) { + filter = new HashSet(keys.length); + for (int k = 0; k < keys.length; k++) { + filter.add(keys[k]); + } + } + for (int i = 0; i < hits.length(); i++) { + int id = hits.id(i); + Document d = ir.document(id); + IndexMatch im = LuceneIndexer.unpackMatch(d.getField(idx.KEYNAME).stringValue()); + Key k = im.getKey(); + if (!done.contains(k)) { + if (filter == null || filter.contains(k)) { + rk[rkused++] = k; + } + done.add(k); + } + } + + return new ResultSet(rk, rkused, query); + + } catch (DBException e) { + throw new QueryException(e.faultCode); + } catch (ParseException e) { + throw new QueryException(FaultCodes.QRY_COMPILATION_ERROR, e.getMessage(), e); + } catch (IOException e) { + throw new QueryException(FaultCodes.QRY_PROCESSING_ERROR, e.getMessage(), e); + } + } + } + + public void setQueryEngine(QueryEngine engine) { + System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".setQueryEngine(" + engine + ")"); + // do nothing + } + + public String getQueryStyle() { + System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".getQueryStyle()"); + return STYLE_FT; + } + + public Query compileQuery(Collection context, String query, NamespaceMap nsMap, Key[] keys) throws QueryException { + System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".compileQuery(" + context + ", " + query + ")"); + return new TextQuery(context, query, nsMap, keys); + } + + public NodeSet query(Collection context, String query, NamespaceMap nsMap, Key[] keys) throws QueryException { + System.out.println("[" + this + ", " + Thread.currentThread().getName() + "] " + getClass().getName() + ".query(" + context + ", " + query + ")"); + TextQuery tq = new TextQuery(context, query, nsMap, keys); + return tq.execute(); + } + + //public void setConfig(Configuration config) throws XindiceException { + // /[EMAIL PROTECTED] Implement this org.apache.xindice.util.Configurable method*/ + // throw new java.lang.UnsupportedOperationException("Method setConfig() not yet implemented."); + //} + + //public Configuration getConfig() { + // /[EMAIL PROTECTED] Implement this org.apache.xindice.util.Configurable method*/ + // throw new java.lang.UnsupportedOperationException("Method getConfig() not yet implemented."); + //} + } diff -brcN --exclude=CVS --exclude=andy -I$Id: -I$Revision: -I$Date: -I$Header: xml-xindice/java/src/org/apache/xindice/server/Xindice.java xindice/java/src/org/apache/xindice/server/Xindice.java *** xml-xindice/java/src/org/apache/xindice/server/Xindice.java Sun Feb 8 02:54:25 2004 --- xindice/java/src/org/apache/xindice/server/Xindice.java Thu Mar 11 11:07:38 2004 *************** *** 63,68 **** --- 63,69 ---- + " <queryengine>" + " <resolver autoindex=\"false\" class=\"org.apache.xindice.core.query.XPathQueryResolver\" />" + " <resolver class=\"org.apache.xindice.core.xupdate.XUpdateQueryResolver\" />" + + " <resolver class=\"org.apache.xindice.core.query.TextQueryResolver\" />" + " </queryengine>" + " </root-collection>" + " <xml-rpc>"