Author: tommaso
Date: Sat Mar 8 16:17:14 2014
New Revision: 1575557
URL: http://svn.apache.org/r1575557
Log:
OAK-1507 - added MLT feature for Lucene index
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
(with props)
Modified:
jackrabbit/oak/trunk/oak-lucene/pom.xml
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Modified: jackrabbit/oak/trunk/oak-lucene/pom.xml
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/pom.xml?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-lucene/pom.xml (original)
+++ jackrabbit/oak/trunk/oak-lucene/pom.xml Sat Mar 8 16:17:14 2014
@@ -34,7 +34,7 @@
<properties>
<tika.version>1.3</tika.version>
- <lucene.version>4.6.1</lucene.version>
+ <lucene.version>4.7.0</lucene.version>
<known.issues>
<!-- Jackrabbit query tests -->
org.apache.jackrabbit.core.query.ExcerptTest#testMoreTextDotsAtEnd
<!-- OAK-318 -->
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndex.java
Sat Mar 8 16:17:14 2014
@@ -54,8 +54,8 @@ import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
-
import org.apache.jackrabbit.oak.plugins.index.aggregate.NodeAggregator;
+import org.apache.jackrabbit.oak.plugins.index.lucene.util.MoreLikeThisHelper;
import org.apache.jackrabbit.oak.query.fulltext.FullTextAnd;
import org.apache.jackrabbit.oak.query.fulltext.FullTextExpression;
import org.apache.jackrabbit.oak.query.fulltext.FullTextOr;
@@ -79,6 +79,7 @@ import org.apache.lucene.index.MultiFiel
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
@@ -104,11 +105,11 @@ import org.slf4j.LoggerFactory;
/**
* Provides a QueryIndex that does lookups against a Lucene-based index
- *
+ *
* <p>
* To define a lucene index on a subtree you have to add an
* <code>oak:index<code> node.
- *
+ *
* Under it follows the index definition node that:
* <ul>
* <li>must be of type <code>oak:QueryIndexDefinition</code></li>
@@ -137,9 +138,9 @@ import org.slf4j.LoggerFactory;
* }
* </code>
* </pre>
- *
+ *
* @see QueryIndex
- *
+ *
*/
public class LuceneIndex implements FulltextQueryIndex {
@@ -200,7 +201,7 @@ public class LuceneIndex implements Full
* { "a", "c" } is returned. If there are no relative properties, then one
* entry is returned (the empty string). If there is no expression, then an
* empty set is returned.
- *
+ *
* @param ft the full-text expression
* @return the set of relative paths (possibly empty)
*/
@@ -409,7 +410,7 @@ public class LuceneIndex implements Full
/**
* Get the Lucene query for the given filter.
- *
+ *
* @param filter the filter, including full-text constraint
* @param reader the Lucene reader
* @param nonFullTextConstraints whether non-full-text constraints (such a
@@ -431,12 +432,23 @@ public class LuceneIndex implements Full
}
PropertyRestriction pr =
filter.getPropertyRestriction(NATIVE_QUERY_FUNCTION);
if (pr != null) {
- QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
String query =
String.valueOf(pr.first.getValue(pr.first.getType()));
- try {
- qs.add(queryParser.parse(query));
- } catch (ParseException e) {
- throw new RuntimeException(e);
+ QueryParser queryParser = new QueryParser(VERSION, "", analyzer);
+ if (query.startsWith("mlt?")) {
+ String mltQueryString = query.replace("mlt?", "");
+ if (reader != null) {
+ Query moreLikeThis =
MoreLikeThisHelper.getMoreLikeThis(reader, analyzer, mltQueryString);
+ if (moreLikeThis != null) {
+ qs.add(moreLikeThis);
+ }
+ }
+ }
+ else {
+ try {
+ qs.add(queryParser.parse(query));
+ } catch (ParseException e) {
+ throw new RuntimeException(e);
+ }
}
}
else if (nonFullTextConstraints) {
@@ -773,8 +785,8 @@ public class LuceneIndex implements Full
/**
* Tries to merge back tokens that are split on relevant fulltext query
* wildcards ('*' or '?')
- *
- *
+ *
+ *
* @param text
* @param analyzer
* @return
Added:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java?rev=1575557&view=auto
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
(added)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
Sat Mar 8 16:17:14 2014
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.jackrabbit.oak.plugins.index.lucene.util;
+
+import java.io.StringReader;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.queries.mlt.MoreLikeThis;
+import org.apache.lucene.search.Query;
+
+/**
+ * Helper class for generating a {@link
org.apache.lucene.queries.mlt.MoreLikeThisQuery} from the native query
<code>String</code>
+ */
+public class MoreLikeThisHelper {
+
+ public static Query getMoreLikeThis(IndexReader reader, Analyzer analyzer,
String mltQueryString) {
+ Query moreLikeThisQuery = null;
+ MoreLikeThis mlt = new MoreLikeThis(reader);
+ mlt.setAnalyzer(analyzer);
+ try {
+ String text = null;
+ for (String param : mltQueryString.split("&")) {
+ String[] keyValuePair = param.split("=");
+ if (keyValuePair.length != 2 || keyValuePair[0] == null ||
keyValuePair[1] == null) {
+ throw new RuntimeException("Unparsable native Lucene MLT
query: " + mltQueryString);
+ } else {
+ if ("stream.body".equals(keyValuePair[0])) {
+ text = keyValuePair[1];
+ } else if ("mlt.fl".equals(keyValuePair[0])) {
+ mlt.setFieldNames(keyValuePair[1].split(","));
+ } else if ("mlt.mindf".equals(keyValuePair[0])) {
+ mlt.setMinDocFreq(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.mintf".equals(keyValuePair[0])) {
+ mlt.setMinTermFreq(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.boost".equals(keyValuePair[0])) {
+ mlt.setBoost(Boolean.parseBoolean(keyValuePair[1]));
+ } else if ("mlt.qf".equals(keyValuePair[0])) {
+ mlt.setBoostFactor(Float.parseFloat(keyValuePair[1]));
+ } else if ("mlt.maxdf".equals(keyValuePair[0])) {
+ mlt.setMaxDocFreq(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.maxdfp".equals(keyValuePair[0])) {
+
mlt.setMaxDocFreqPct(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.maxntp".equals(keyValuePair[0])) {
+
mlt.setMaxNumTokensParsed(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.maxqt".equals(keyValuePair[0])) {
+
mlt.setMaxQueryTerms(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.maxwl".equals(keyValuePair[0])) {
+ mlt.setMaxWordLen(Integer.parseInt(keyValuePair[1]));
+ } else if ("mlt.minwl".equals(keyValuePair[0])) {
+ mlt.setMinWordLen(Integer.parseInt(keyValuePair[1]));
+ }
+ }
+ }
+ if (text != null) {
+ moreLikeThisQuery = mlt.like(new StringReader(text),
mlt.getFieldNames()[0]);
+ }
+ return moreLikeThisQuery;
+ } catch (Exception e) {
+ throw new RuntimeException("could not handle MLT query " +
mltQueryString);
+ }
+ }
+}
Propchange:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/util/MoreLikeThisHelper.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified:
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java?rev=1575557&r1=1575556&r2=1575557&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/test/java/org/apache/jackrabbit/oak/plugins/index/lucene/LuceneIndexQueryTest.java
Sat Mar 8 16:17:14 2014
@@ -265,4 +265,23 @@ public class LuceneIndexQueryTest extend
assertFalse(result.hasNext());
}
+ @Test
+ public void testNativeMLTQuery() throws Exception {
+ String nativeQueryString = "select [jcr:path] from [nt:base] where
native('lucene', 'mlt?stream.body=World&mlt.fl=name&mlt.mindf=0&mlt.mintf=0')";
+
+ Tree tree = root.getTree("/");
+ Tree test = tree.addChild("test");
+ test.addChild("a").setProperty("name", "Hello World, today weather is
nice");
+ test.addChild("b").setProperty("name", "Cheers World, today weather is
quite nice");
+ tree.addChild("c");
+ root.commit();
+
+ Iterator<String> strings = executeQuery(nativeQueryString,
"JCR-SQL2").iterator();
+ assertTrue(strings.hasNext());
+ assertEquals("/test/a", strings.next());
+ assertTrue(strings.hasNext());
+ assertEquals("/test/b", strings.next());
+ assertFalse(strings.hasNext());
+ }
+
}