Author: natalia Date: Fri Oct 19 17:32:43 2007 New Revision: 586647 URL: http://svn.apache.org/viewvc?rev=586647&view=rev Log: Added extention XPath function 'ftcontains' for full text search
Added: xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java (with props) xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java (with props) Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java?rev=586647&r1=586646&r2=586647&view=diff ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java Fri Oct 19 17:32:43 2007 @@ -91,10 +91,9 @@ private static final String PATTERN_ALIAS = "alias"; public static final String KEYNAME = "key"; - public static final String TEXTNAME = "text"; // Default analyzer to use - private static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer"; + public static final String DEFANALYZER = "org.apache.lucene.analysis.SimpleAnalyzer"; private static final IndexMatch[] EMPTY_MATCHES = new IndexMatch[0]; private File idxFile; @@ -147,6 +146,31 @@ */ public IndexPattern[] getPatterns() { return (IndexPattern[]) patterns.keySet().toArray(new IndexPattern[0]); + } + + /** + * Return alias for the given pattern. If this exact pattern is not indexed, + * method will look for matching indexed pattern. + * @param pattern IndexPattern + * @return Alias for the closest matching pattern or null, if there is none + */ + public String getPatternAlias(IndexPattern pattern) { + if (patterns.containsKey(pattern)) { + return (String) patterns.get(pattern); + } + + int match = 0; + IndexPattern matchPattern = null; + for (Iterator i = patterns.keySet().iterator(); i.hasNext(); ) { + IndexPattern p = (IndexPattern) i.next(); + int cMatch = pattern.getMatchLevel(p); + if (cMatch > match) { + match = cMatch; + matchPattern = p; + } + } + + return (String) patterns.get(matchPattern); } /** Added: xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java?rev=586647&view=auto ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java (added) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java Fri Oct 19 17:32:43 2007 @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $Id$ + */ + +package org.apache.xindice.core.query; + +import org.apache.xpath.XPathContext; +import org.apache.xpath.functions.FunctionOneArg; +import org.apache.xpath.objects.XObject; +import org.apache.xpath.objects.XBoolean; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.analysis.Analyzer; +import org.apache.xindice.core.query.ftsearch.Searcher; +import org.apache.xindice.core.data.NodeSet; +import org.apache.xindice.core.Collection; +import org.apache.xindice.core.indexer.Indexer; +import org.apache.xindice.core.indexer.LuceneIndexer; +import org.apache.xindice.xml.dom.NodeListImpl; +import org.w3c.dom.Node; + +import javax.xml.transform.TransformerException; + +/** + * Full text search extention function for XPath. + * + * @version $Revision$, $Date$ + */ +public class FuncFTContains extends FunctionOneArg { + + /** + * Execute text search function. The function returns XBoolean.S_TRUE + * if the current node text matches the query, XBoolean.S_FALSE otherwise. + * @param xctxt The current execution context. + * @return XBoolean.S_TRUE or XBoolean.S_FALSE. + * + * @throws javax.xml.transform.TransformerException + */ + public XObject execute(XPathContext xctxt) throws TransformerException { + + XPathQueryResolver.XPathResolverContext ctxt = (XPathQueryResolver.XPathResolverContext) xctxt; + + // it is not guaranteed that analyzer will be set at this time, even if there is + // a suitable index in the collection, for various reasons XPathQueryResolver may + // decide not to run optimization for the function + Analyzer analyzer = (Analyzer) ctxt.getParameter(XPathQueryResolver.PARAM_ANALYZER); + if (analyzer == null) { + try { + Collection collection = (Collection) ctxt.getParameter(XPathQueryResolver.PARAM_COLLECTION); + Indexer idx = collection.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, null); + if (idx instanceof LuceneIndexer) { + analyzer = ((LuceneIndexer) idx).getAnalyzer(); + } else { + analyzer = (Analyzer) Class.forName(LuceneIndexer.DEFANALYZER).newInstance(); + } + + // parameters are intentionally altered so next node or next document won't need + // to do an analyzer lookup + ctxt.setParameter(XPathQueryResolver.PARAM_ANALYZER, analyzer); + } catch (Exception e) { + throw new TransformerException("Could not get text analyzer"); + } + } + + String query = getArg0().execute(xctxt).str(); + try { + int ctxtNode = xctxt.getCurrentNode(); + Node node = xctxt.getDTM(ctxtNode).getNode(ctxtNode); + NodeListImpl list = new NodeListImpl(null); + list.add(node); + + Searcher searcher = new Searcher(list, analyzer); + NodeSet nodes = searcher.search(query); + + return nodes.hasMoreNodes() ? XBoolean.S_TRUE : XBoolean.S_FALSE; + } catch (ParseException e) { + throw new TransformerException("Error in text query", e); + } + } +} Propchange: xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java ------------------------------------------------------------------------------ svn:keywords = Id Revision Author Date Modified: xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java?rev=586647&r1=586646&r2=586647&view=diff ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java (original) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java Fri Oct 19 17:32:43 2007 @@ -23,6 +23,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.xindice.core.Collection; import org.apache.xindice.core.DBException; +import org.apache.xindice.core.query.ftsearch.SpecialQueryParser; import org.apache.xindice.core.data.Entry; import org.apache.xindice.core.data.Key; import org.apache.xindice.core.data.NodeSet; @@ -33,6 +34,7 @@ import org.apache.xindice.core.indexer.IndexPattern; import org.apache.xindice.core.indexer.IndexQuery; import org.apache.xindice.core.indexer.Indexer; +import org.apache.xindice.core.indexer.LuceneIndexer; import org.apache.xindice.core.indexer.helpers.IndexQueryANY; import org.apache.xindice.core.indexer.helpers.IndexQueryEQ; import org.apache.xindice.core.indexer.helpers.IndexQueryGEQ; @@ -65,6 +67,7 @@ import org.apache.xpath.objects.XNumber; import org.apache.xpath.objects.XObject; import org.apache.xpath.objects.XString; +import org.apache.lucene.analysis.Analyzer; import org.w3c.dom.DOMException; import org.w3c.dom.Element; @@ -79,11 +82,14 @@ import javax.xml.transform.TransformerException; import java.lang.reflect.Constructor; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.SortedSet; import java.util.TreeSet; +import java.util.HashMap; +import java.util.Map; /** * XPathQueryResolver @@ -101,6 +107,9 @@ public static final String STYLE_XPATH = "XPath"; + static final String PARAM_COLLECTION = "collection"; + static final String PARAM_ANALYZER = "analyzer"; + // Maps Xalan Comparisons To IndexQuery private static final int[] OPMAP = { IndexQuery.NEQ, IndexQuery.EQ, IndexQuery.LEQ, IndexQuery.LT, IndexQuery.GEQ, IndexQuery.GT @@ -111,18 +120,31 @@ private static final boolean XCOMPILER3; // Xalan Compiler constructor private static final Constructor XCOMPILER; + // XPath constructor + private static final Constructor XPATH; static { boolean c3; Constructor c; + Constructor x; try { c = Compiler.class.getConstructor( new Class[] { ErrorListener.class, SourceLocator.class, FunctionTable.class }); + + x = XPath.class.getConstructor( + new Class[] { String.class, SourceLocator.class, PrefixResolver.class, + int.class, ErrorListener.class, FunctionTable.class }); + c3 = true; } catch (NoSuchMethodException nsme) { try { c = Compiler.class.getConstructor( new Class[] { ErrorListener.class, SourceLocator.class }); + + x = XPath.class.getConstructor( + new Class[] { String.class, SourceLocator.class, PrefixResolver.class, + int.class, ErrorListener.class }); + c3 = false; } catch (NoSuchMethodException e) { // Should not happen @@ -133,20 +155,47 @@ XCOMPILER3 = c3; XCOMPILER = c; + XPATH = x; } private DefaultErrorHandler errorListener; private FunctionTable functionTable; private boolean autoIndex; + private int funcFTContainsId; public XPathQueryResolver() { super(); errorListener = new DefaultErrorHandler(); - if (XCOMPILER3) { - functionTable = new FunctionTable(); + + Object num = null; + try { + Method install; + Object function; + + if (XCOMPILER3) { + functionTable = new FunctionTable(); + function = FuncFTContains.class; + install = functionTable.getClass().getMethod("installFunction", + new Class[] {String.class, Class.class}); + num = install.invoke(functionTable, new Object[] {"ftcontains", function}); + } else { + function = new FuncFTContains(); + install = FunctionTable.class.getMethod("installFunction", + new Class[] {String.class, Expression.class}); + num = install.invoke(FunctionTable.class, new Object[] {"ftcontains", function}); + } + } catch (Exception e) { + // no extentions will be available + log.error("Could not invoke installFunction method. Incompatible Xalan version?"); + } + + if (num != null) { + funcFTContainsId = ((Integer) num).intValue(); + } else { + log.error("Could not install ftcontains function."); } } @@ -201,6 +250,8 @@ public Compiler cmp; public XPath xp; public Key[] keys; + public Analyzer analyzer; + private HashMap parameters; public XPathQuery(Collection context, String query, NamespaceMap nsMap, Key[] keys) throws QueryException { @@ -209,6 +260,9 @@ this.nsMap = nsMap; this.keys = keys; + parameters = new HashMap(); + parameters.put(PARAM_COLLECTION, context); + Expression ex; try { if (nsMap != null) { @@ -283,7 +337,7 @@ keySet = (Key[]) set.toArray(EMPTY_KEYS); } - return new ResultSet(context, pr, keySet, query); + return new ResultSet(context, pr, keySet, query, parameters); } catch (Exception e) { if (e instanceof QueryException) { throw (QueryException) e.fillInStackTrace(); @@ -728,6 +782,10 @@ case FunctionTable.FUNC_TRUE: return XBoolean.S_TRUE; default: + // custom extention + if (id == funcFTContainsId) { + return funcFTContains(owner, args); + } return null; } } @@ -860,6 +918,26 @@ return null; } + private Object funcFTContains(String owner, List args) throws Exception { + if (args.size() != 1) { + return null; + } + + if (parameters == null) { + parameters = new HashMap(); + } + + Object o = args.get(0); + + if (o instanceof XString) { + // extract text query + String query = ((XString) o).str(); + return queryTextIndex(owner, query); + } + + return null; + } + private Object funcFloor(List args) throws Exception { if (args.size() == 1) { Object o = args.get(0); @@ -1115,6 +1193,46 @@ return null; } + private Object queryTextIndex(String ps, String query) throws Exception { + IndexPattern pattern = new IndexPattern(symbols, ps, nsMap); + + // check if there is full text indexer for this collection + Indexer idx = context.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, pattern); + if (idx instanceof LuceneIndexer) { + LuceneIndexer textInd = ((LuceneIndexer) idx); + analyzer = textInd.getAnalyzer(); + parameters.put(PARAM_ANALYZER, analyzer); + + // see if index has matching pattern + String alias = textInd.getPatternAlias(pattern); + + if (alias != null) { + // Queries that contain 'NOT', '!', '-' operators cannot be used here + // because LuceneIndexer searches for documents, in that context + // "NOT term" query means to find documents where 'term' does not + // appear in certain field at all. For XPath, however, it means that + // 'term' must not appear in text of the element that currently under + // evaluation, but may appear in the other elements that match the + // same IndexPattern. + // + // To make sure that all potentially matching documents are returned + // by the search, all subqueries with these operators are ignored + // on this step. + org.apache.lucene.search.Query parsedQuery = new SpecialQueryParser(alias, analyzer).parse(query); + IndexMatch[] matches = textInd.queryMatches(parsedQuery); + Key[] keys = QueryEngine.getUniqueKeys(matches); + + return new NamedKeys(ps, ps.indexOf('@') != -1, keys); + } + } else { + // there is no Lucene indexer, fall back to default analyzer + analyzer = (Analyzer) Class.forName(LuceneIndexer.DEFANALYZER).newInstance(); + parameters.put(PARAM_ANALYZER, analyzer); + } + + return null; + } + /** * queryComparison performs a comparison query use the operands that are passed to it, and returns the resulting * Keys. @@ -1220,12 +1338,14 @@ public int keyPos = 0; public NodeIterator ni; public Object node; + private Map parameters; - public ResultSet(Collection context, PrefixResolver pr, Key[] keySet, String query) { + public ResultSet(Collection context, PrefixResolver pr, Key[] keySet, String query, Map parameters) { this.context = context; this.pr = pr; this.keySet = keySet; this.query = query; + this.parameters = parameters; errors = new ErrorListener() { public void fatalError(TransformerException te) { @@ -1254,7 +1374,21 @@ } } - private void prepareNextNode() throws XMLDBException, TransformerException, DBException { + private XPath createXPath(PrefixResolver pfx) { + try { + if (XCOMPILER3) { + return (XPath) XPATH.newInstance( + new Object[] {query, null, pfx, new Integer(XPath.SELECT), errors, functionTable}); + } else { + return (XPath) XPATH.newInstance( + new Object[] {query, null, pfx, new Integer(XPath.SELECT), errors}); + } + } catch (Exception e) { + throw new RuntimeException("Could not instantiate Compiler: " + e); + } + } + + private void prepareNextNode() throws XMLDBException, TransformerException, DBException { node = null; while (keyPos < keySet.length) { @@ -1275,15 +1409,15 @@ continue; } - XPathContext xpc = new XPathContext(); + XPathResolverContext xpc = new XPathResolverContext(parameters); PrefixResolver pfx; if (pr == null) { pfx = new PrefixResolverDefault(d.getDocumentElement()); - xp = new XPath(query, null, pfx, XPath.SELECT, errors); + xp = createXPath(pfx); } else { pfx = pr; if (xp == null) { - xp = new XPath(query, null, pfx, XPath.SELECT, errors); + xp = createXPath(pfx); } } @@ -1407,4 +1541,23 @@ } private final static NodeIterator EMPTY_NODE_ITERATOR = new EmptyNodeIterator(); + + /** + * XPathContext with optional parameters + */ + static class XPathResolverContext extends XPathContext { + private Map parameters; + + public XPathResolverContext(Map parameters) { + this.parameters = parameters; + } + + public Object getParameter(String name) { + return parameters.get(name); + } + + public void setParameter(String name, Object object) { + parameters.put(name, object); + } + } } Added: xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java URL: http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java?rev=586647&view=auto ============================================================================== --- xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java (added) +++ xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java Fri Oct 19 17:32:43 2007 @@ -0,0 +1,110 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * $Id$ + */ + +package org.apache.xindice.core.query.ftsearch; + +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.queryParser.CharStream; +import org.apache.lucene.queryParser.QueryParserTokenManager; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; + +import java.util.Vector; + +/** + * Specialized parser for text queries that ignores query clauses that have + * 'prohibited' modifier. This is only used for XPath full text search + * extention function to search existing text index, if any. + * + * @version $Revision$, $Date$ + */ +public class SpecialQueryParser extends QueryParser { + private static final int CONJ_AND = 1; + private static final int CONJ_OR = 2; + + private static final int MOD_NOT = 10; + private static final int MOD_REQ = 11; + + public SpecialQueryParser(String f, Analyzer a) { + super(f, a); + } + + public SpecialQueryParser(CharStream stream) { + super(stream); + } + + public SpecialQueryParser(QueryParserTokenManager tm) { + super(tm); + } + + /** + * This method is slightly modified copy of superclass method, where it ignores + * boolean clauses that have 'prohibited' modifier. + * @see QueryParser#addClause(java.util.Vector, int, int, org.apache.lucene.search.Query) + */ + protected void addClause(Vector clauses, int conj, int mods, Query q) { + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.size() > 0 && conj == CONJ_AND) { + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); + } + + if (clauses.size() > 0 && getDefaultOperator() == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + boolean required, prohibited; + if (getDefaultOperator() == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) { + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); + } else if (!required && !prohibited) { + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + } else if (required && prohibited) { + throw new RuntimeException("Clause cannot be both required and prohibited"); + } + } +} Propchange: xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java ------------------------------------------------------------------------------ svn:keywords = Id Revision Author Date