Author: natalia
Date: Fri Oct 19 17:32:43 2007
New Revision: 586647
URL: http://svn.apache.org/viewvc?rev=586647&view=rev
Log:
Added extention XPath function 'ftcontains' for full text search
Added:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
(with props)
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
(with props)
Modified:
xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java
xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java
Modified:
xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java
URL:
http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java?rev=586647&r1=586646&r2=586647&view=diff
==============================================================================
---
xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java
(original)
+++
xml/xindice/trunk/java/src/org/apache/xindice/core/indexer/LuceneIndexer.java
Fri Oct 19 17:32:43 2007
@@ -91,10 +91,9 @@
private static final String PATTERN_ALIAS = "alias";
public static final String KEYNAME = "key";
- public static final String TEXTNAME = "text";
// Default analyzer to use
- private static final String DEFANALYZER =
"org.apache.lucene.analysis.SimpleAnalyzer";
+ public static final String DEFANALYZER =
"org.apache.lucene.analysis.SimpleAnalyzer";
private static final IndexMatch[] EMPTY_MATCHES = new IndexMatch[0];
private File idxFile;
@@ -147,6 +146,31 @@
*/
public IndexPattern[] getPatterns() {
return (IndexPattern[]) patterns.keySet().toArray(new IndexPattern[0]);
+ }
+
+ /**
+ * Return alias for the given pattern. If this exact pattern is not
indexed,
+ * method will look for matching indexed pattern.
+ * @param pattern IndexPattern
+ * @return Alias for the closest matching pattern or null, if there is none
+ */
+ public String getPatternAlias(IndexPattern pattern) {
+ if (patterns.containsKey(pattern)) {
+ return (String) patterns.get(pattern);
+ }
+
+ int match = 0;
+ IndexPattern matchPattern = null;
+ for (Iterator i = patterns.keySet().iterator(); i.hasNext(); ) {
+ IndexPattern p = (IndexPattern) i.next();
+ int cMatch = pattern.getMatchLevel(p);
+ if (cMatch > match) {
+ match = cMatch;
+ matchPattern = p;
+ }
+ }
+
+ return (String) patterns.get(matchPattern);
}
/**
Added:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
URL:
http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java?rev=586647&view=auto
==============================================================================
---
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
(added)
+++
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
Fri Oct 19 17:32:43 2007
@@ -0,0 +1,94 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * $Id$
+ */
+
+package org.apache.xindice.core.query;
+
+import org.apache.xpath.XPathContext;
+import org.apache.xpath.functions.FunctionOneArg;
+import org.apache.xpath.objects.XObject;
+import org.apache.xpath.objects.XBoolean;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.xindice.core.query.ftsearch.Searcher;
+import org.apache.xindice.core.data.NodeSet;
+import org.apache.xindice.core.Collection;
+import org.apache.xindice.core.indexer.Indexer;
+import org.apache.xindice.core.indexer.LuceneIndexer;
+import org.apache.xindice.xml.dom.NodeListImpl;
+import org.w3c.dom.Node;
+
+import javax.xml.transform.TransformerException;
+
+/**
+ * Full text search extention function for XPath.
+ *
+ * @version $Revision$, $Date$
+ */
+public class FuncFTContains extends FunctionOneArg {
+
+ /**
+ * Execute text search function. The function returns XBoolean.S_TRUE
+ * if the current node text matches the query, XBoolean.S_FALSE otherwise.
+ * @param xctxt The current execution context.
+ * @return XBoolean.S_TRUE or XBoolean.S_FALSE.
+ *
+ * @throws javax.xml.transform.TransformerException
+ */
+ public XObject execute(XPathContext xctxt) throws TransformerException {
+
+ XPathQueryResolver.XPathResolverContext ctxt =
(XPathQueryResolver.XPathResolverContext) xctxt;
+
+ // it is not guaranteed that analyzer will be set at this time, even
if there is
+ // a suitable index in the collection, for various reasons
XPathQueryResolver may
+ // decide not to run optimization for the function
+ Analyzer analyzer = (Analyzer)
ctxt.getParameter(XPathQueryResolver.PARAM_ANALYZER);
+ if (analyzer == null) {
+ try {
+ Collection collection = (Collection)
ctxt.getParameter(XPathQueryResolver.PARAM_COLLECTION);
+ Indexer idx =
collection.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, null);
+ if (idx instanceof LuceneIndexer) {
+ analyzer = ((LuceneIndexer) idx).getAnalyzer();
+ } else {
+ analyzer = (Analyzer)
Class.forName(LuceneIndexer.DEFANALYZER).newInstance();
+ }
+
+ // parameters are intentionally altered so next node or next
document won't need
+ // to do an analyzer lookup
+ ctxt.setParameter(XPathQueryResolver.PARAM_ANALYZER, analyzer);
+ } catch (Exception e) {
+ throw new TransformerException("Could not get text analyzer");
+ }
+ }
+
+ String query = getArg0().execute(xctxt).str();
+ try {
+ int ctxtNode = xctxt.getCurrentNode();
+ Node node = xctxt.getDTM(ctxtNode).getNode(ctxtNode);
+ NodeListImpl list = new NodeListImpl(null);
+ list.add(node);
+
+ Searcher searcher = new Searcher(list, analyzer);
+ NodeSet nodes = searcher.search(query);
+
+ return nodes.hasMoreNodes() ? XBoolean.S_TRUE : XBoolean.S_FALSE;
+ } catch (ParseException e) {
+ throw new TransformerException("Error in text query", e);
+ }
+ }
+}
Propchange:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/FuncFTContains.java
------------------------------------------------------------------------------
svn:keywords = Id Revision Author Date
Modified:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java
URL:
http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java?rev=586647&r1=586646&r2=586647&view=diff
==============================================================================
---
xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java
(original)
+++
xml/xindice/trunk/java/src/org/apache/xindice/core/query/XPathQueryResolver.java
Fri Oct 19 17:32:43 2007
@@ -23,6 +23,7 @@
import org.apache.commons.logging.LogFactory;
import org.apache.xindice.core.Collection;
import org.apache.xindice.core.DBException;
+import org.apache.xindice.core.query.ftsearch.SpecialQueryParser;
import org.apache.xindice.core.data.Entry;
import org.apache.xindice.core.data.Key;
import org.apache.xindice.core.data.NodeSet;
@@ -33,6 +34,7 @@
import org.apache.xindice.core.indexer.IndexPattern;
import org.apache.xindice.core.indexer.IndexQuery;
import org.apache.xindice.core.indexer.Indexer;
+import org.apache.xindice.core.indexer.LuceneIndexer;
import org.apache.xindice.core.indexer.helpers.IndexQueryANY;
import org.apache.xindice.core.indexer.helpers.IndexQueryEQ;
import org.apache.xindice.core.indexer.helpers.IndexQueryGEQ;
@@ -65,6 +67,7 @@
import org.apache.xpath.objects.XNumber;
import org.apache.xpath.objects.XObject;
import org.apache.xpath.objects.XString;
+import org.apache.lucene.analysis.Analyzer;
import org.w3c.dom.DOMException;
import org.w3c.dom.Element;
@@ -79,11 +82,14 @@
import javax.xml.transform.TransformerException;
import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.SortedSet;
import java.util.TreeSet;
+import java.util.HashMap;
+import java.util.Map;
/**
* XPathQueryResolver
@@ -101,6 +107,9 @@
public static final String STYLE_XPATH = "XPath";
+ static final String PARAM_COLLECTION = "collection";
+ static final String PARAM_ANALYZER = "analyzer";
+
// Maps Xalan Comparisons To IndexQuery
private static final int[] OPMAP = {
IndexQuery.NEQ, IndexQuery.EQ, IndexQuery.LEQ, IndexQuery.LT,
IndexQuery.GEQ, IndexQuery.GT
@@ -111,18 +120,31 @@
private static final boolean XCOMPILER3;
// Xalan Compiler constructor
private static final Constructor XCOMPILER;
+ // XPath constructor
+ private static final Constructor XPATH;
static {
boolean c3;
Constructor c;
+ Constructor x;
try {
c = Compiler.class.getConstructor(
new Class[] { ErrorListener.class, SourceLocator.class,
FunctionTable.class });
+
+ x = XPath.class.getConstructor(
+ new Class[] { String.class, SourceLocator.class,
PrefixResolver.class,
+ int.class, ErrorListener.class,
FunctionTable.class });
+
c3 = true;
} catch (NoSuchMethodException nsme) {
try {
c = Compiler.class.getConstructor(
new Class[] { ErrorListener.class, SourceLocator.class
});
+
+ x = XPath.class.getConstructor(
+ new Class[] { String.class, SourceLocator.class,
PrefixResolver.class,
+ int.class, ErrorListener.class });
+
c3 = false;
} catch (NoSuchMethodException e) {
// Should not happen
@@ -133,20 +155,47 @@
XCOMPILER3 = c3;
XCOMPILER = c;
+ XPATH = x;
}
private DefaultErrorHandler errorListener;
private FunctionTable functionTable;
private boolean autoIndex;
+ private int funcFTContainsId;
public XPathQueryResolver() {
super();
errorListener = new DefaultErrorHandler();
- if (XCOMPILER3) {
- functionTable = new FunctionTable();
+
+ Object num = null;
+ try {
+ Method install;
+ Object function;
+
+ if (XCOMPILER3) {
+ functionTable = new FunctionTable();
+ function = FuncFTContains.class;
+ install = functionTable.getClass().getMethod("installFunction",
+ new Class[]
{String.class, Class.class});
+ num = install.invoke(functionTable, new Object[]
{"ftcontains", function});
+ } else {
+ function = new FuncFTContains();
+ install = FunctionTable.class.getMethod("installFunction",
+ new Class[]
{String.class, Expression.class});
+ num = install.invoke(FunctionTable.class, new Object[]
{"ftcontains", function});
+ }
+ } catch (Exception e) {
+ // no extentions will be available
+ log.error("Could not invoke installFunction method. Incompatible
Xalan version?");
+ }
+
+ if (num != null) {
+ funcFTContainsId = ((Integer) num).intValue();
+ } else {
+ log.error("Could not install ftcontains function.");
}
}
@@ -201,6 +250,8 @@
public Compiler cmp;
public XPath xp;
public Key[] keys;
+ public Analyzer analyzer;
+ private HashMap parameters;
public XPathQuery(Collection context, String query, NamespaceMap
nsMap, Key[] keys)
throws QueryException {
@@ -209,6 +260,9 @@
this.nsMap = nsMap;
this.keys = keys;
+ parameters = new HashMap();
+ parameters.put(PARAM_COLLECTION, context);
+
Expression ex;
try {
if (nsMap != null) {
@@ -283,7 +337,7 @@
keySet = (Key[]) set.toArray(EMPTY_KEYS);
}
- return new ResultSet(context, pr, keySet, query);
+ return new ResultSet(context, pr, keySet, query, parameters);
} catch (Exception e) {
if (e instanceof QueryException) {
throw (QueryException) e.fillInStackTrace();
@@ -728,6 +782,10 @@
case FunctionTable.FUNC_TRUE:
return XBoolean.S_TRUE;
default:
+ // custom extention
+ if (id == funcFTContainsId) {
+ return funcFTContains(owner, args);
+ }
return null;
}
}
@@ -860,6 +918,26 @@
return null;
}
+ private Object funcFTContains(String owner, List args) throws
Exception {
+ if (args.size() != 1) {
+ return null;
+ }
+
+ if (parameters == null) {
+ parameters = new HashMap();
+ }
+
+ Object o = args.get(0);
+
+ if (o instanceof XString) {
+ // extract text query
+ String query = ((XString) o).str();
+ return queryTextIndex(owner, query);
+ }
+
+ return null;
+ }
+
private Object funcFloor(List args) throws Exception {
if (args.size() == 1) {
Object o = args.get(0);
@@ -1115,6 +1193,46 @@
return null;
}
+ private Object queryTextIndex(String ps, String query) throws
Exception {
+ IndexPattern pattern = new IndexPattern(symbols, ps, nsMap);
+
+ // check if there is full text indexer for this collection
+ Indexer idx =
context.getIndexManager().getBestIndexer(Indexer.STYLE_FULLTEXT, pattern);
+ if (idx instanceof LuceneIndexer) {
+ LuceneIndexer textInd = ((LuceneIndexer) idx);
+ analyzer = textInd.getAnalyzer();
+ parameters.put(PARAM_ANALYZER, analyzer);
+
+ // see if index has matching pattern
+ String alias = textInd.getPatternAlias(pattern);
+
+ if (alias != null) {
+ // Queries that contain 'NOT', '!', '-' operators cannot
be used here
+ // because LuceneIndexer searches for documents, in that
context
+ // "NOT term" query means to find documents where 'term'
does not
+ // appear in certain field at all. For XPath, however, it
means that
+ // 'term' must not appear in text of the element that
currently under
+ // evaluation, but may appear in the other elements that
match the
+ // same IndexPattern.
+ //
+ // To make sure that all potentially matching documents
are returned
+ // by the search, all subqueries with these operators are
ignored
+ // on this step.
+ org.apache.lucene.search.Query parsedQuery = new
SpecialQueryParser(alias, analyzer).parse(query);
+ IndexMatch[] matches = textInd.queryMatches(parsedQuery);
+ Key[] keys = QueryEngine.getUniqueKeys(matches);
+
+ return new NamedKeys(ps, ps.indexOf('@') != -1, keys);
+ }
+ } else {
+ // there is no Lucene indexer, fall back to default analyzer
+ analyzer = (Analyzer)
Class.forName(LuceneIndexer.DEFANALYZER).newInstance();
+ parameters.put(PARAM_ANALYZER, analyzer);
+ }
+
+ return null;
+ }
+
/**
* queryComparison performs a comparison query use the operands that
are passed to it, and returns the resulting
* Keys.
@@ -1220,12 +1338,14 @@
public int keyPos = 0;
public NodeIterator ni;
public Object node;
+ private Map parameters;
- public ResultSet(Collection context, PrefixResolver pr, Key[]
keySet, String query) {
+ public ResultSet(Collection context, PrefixResolver pr, Key[] keySet,
String query, Map parameters) {
this.context = context;
this.pr = pr;
this.keySet = keySet;
this.query = query;
+ this.parameters = parameters;
errors = new ErrorListener() {
public void fatalError(TransformerException te) {
@@ -1254,7 +1374,21 @@
}
}
- private void prepareNextNode() throws XMLDBException,
TransformerException, DBException {
+ private XPath createXPath(PrefixResolver pfx) {
+ try {
+ if (XCOMPILER3) {
+ return (XPath) XPATH.newInstance(
+ new Object[] {query, null, pfx, new
Integer(XPath.SELECT), errors, functionTable});
+ } else {
+ return (XPath) XPATH.newInstance(
+ new Object[] {query, null, pfx, new
Integer(XPath.SELECT), errors});
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("Could not instantiate Compiler: "
+ e);
+ }
+ }
+
+ private void prepareNextNode() throws XMLDBException,
TransformerException, DBException {
node = null;
while (keyPos < keySet.length) {
@@ -1275,15 +1409,15 @@
continue;
}
- XPathContext xpc = new XPathContext();
+ XPathResolverContext xpc = new
XPathResolverContext(parameters);
PrefixResolver pfx;
if (pr == null) {
pfx = new PrefixResolverDefault(d.getDocumentElement());
- xp = new XPath(query, null, pfx, XPath.SELECT, errors);
+ xp = createXPath(pfx);
} else {
pfx = pr;
if (xp == null) {
- xp = new XPath(query, null, pfx, XPath.SELECT, errors);
+ xp = createXPath(pfx);
}
}
@@ -1407,4 +1541,23 @@
}
private final static NodeIterator EMPTY_NODE_ITERATOR = new
EmptyNodeIterator();
+
+ /**
+ * XPathContext with optional parameters
+ */
+ static class XPathResolverContext extends XPathContext {
+ private Map parameters;
+
+ public XPathResolverContext(Map parameters) {
+ this.parameters = parameters;
+ }
+
+ public Object getParameter(String name) {
+ return parameters.get(name);
+ }
+
+ public void setParameter(String name, Object object) {
+ parameters.put(name, object);
+ }
+ }
}
Added:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
URL:
http://svn.apache.org/viewvc/xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java?rev=586647&view=auto
==============================================================================
---
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
(added)
+++
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
Fri Oct 19 17:32:43 2007
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ * $Id$
+ */
+
+package org.apache.xindice.core.query.ftsearch;
+
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.queryParser.CharStream;
+import org.apache.lucene.queryParser.QueryParserTokenManager;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.BooleanClause;
+
+import java.util.Vector;
+
+/**
+ * Specialized parser for text queries that ignores query clauses that have
+ * 'prohibited' modifier. This is only used for XPath full text search
+ * extention function to search existing text index, if any.
+ *
+ * @version $Revision$, $Date$
+ */
+public class SpecialQueryParser extends QueryParser {
+ private static final int CONJ_AND = 1;
+ private static final int CONJ_OR = 2;
+
+ private static final int MOD_NOT = 10;
+ private static final int MOD_REQ = 11;
+
+ public SpecialQueryParser(String f, Analyzer a) {
+ super(f, a);
+ }
+
+ public SpecialQueryParser(CharStream stream) {
+ super(stream);
+ }
+
+ public SpecialQueryParser(QueryParserTokenManager tm) {
+ super(tm);
+ }
+
+ /**
+ * This method is slightly modified copy of superclass method, where it
ignores
+ * boolean clauses that have 'prohibited' modifier.
+ * @see QueryParser#addClause(java.util.Vector, int, int,
org.apache.lucene.search.Query)
+ */
+ protected void addClause(Vector clauses, int conj, int mods, Query q) {
+
+ // If this term is introduced by AND, make the preceding term required,
+ // unless it's already prohibited
+ if (clauses.size() > 0 && conj == CONJ_AND) {
+ BooleanClause c = (BooleanClause)
clauses.elementAt(clauses.size()-1);
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.MUST);
+ }
+
+ if (clauses.size() > 0 && getDefaultOperator() == AND_OPERATOR && conj
== CONJ_OR) {
+ // If this term is introduced by OR, make the preceding term
optional,
+ // unless it's prohibited (that means we leave -a OR b but +a OR
b-->a OR b)
+ // notice if the input is a OR b, first term is parsed as
required; without
+ // this modification a OR b would parsed as +a OR b
+ BooleanClause c = (BooleanClause)
clauses.elementAt(clauses.size()-1);
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.SHOULD);
+ }
+
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null)
+ return;
+
+ boolean required, prohibited;
+ if (getDefaultOperator() == OR_OPERATOR) {
+ // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+ // introduced by NOT or -; make sure not to set both.
+ prohibited = (mods == MOD_NOT);
+ required = (mods == MOD_REQ);
+ if (conj == CONJ_AND && !prohibited) {
+ required = true;
+ }
+ } else {
+ // We set PROHIBITED if we're introduced by NOT or -; We set
REQUIRED
+ // if not PROHIBITED and not introduced by OR
+ prohibited = (mods == MOD_NOT);
+ required = (!prohibited && conj != CONJ_OR);
+ }
+ if (required && !prohibited) {
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
+ } else if (!required && !prohibited) {
+ clauses.addElement(new BooleanClause(q,
BooleanClause.Occur.SHOULD));
+ } else if (required && prohibited) {
+ throw new RuntimeException("Clause cannot be both required and
prohibited");
+ }
+ }
+}
Propchange:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange:
xml/xindice/trunk/java/src/org/apache/xindice/core/query/ftsearch/SpecialQueryParser.java
------------------------------------------------------------------------------
svn:keywords = Id Revision Author Date