Author: rwesten
Date: Thu Jun 13 09:17:51 2013
New Revision: 1492591
URL: http://svn.apache.org/r1492591
Log:
STANBOL-1104: Implementation of STANBOL-1105 (proximity ranking) and
STANBOL-1106 (constraint boosts) for the SolrYard
Added:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/defaults/QueryConst.java
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrQueryFactory.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/AssignmentEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GeEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GtEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LeEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LtEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/RegexEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/WildcardEncoder.java
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
Added:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/defaults/QueryConst.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/defaults/QueryConst.java?rev=1492591&view=auto
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/defaults/QueryConst.java
(added)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/defaults/QueryConst.java
Thu Jun 13 09:17:51 2013
@@ -0,0 +1,33 @@
+package org.apache.stanbol.entityhub.yard.solr.defaults;
+
+import org.apache.stanbol.entityhub.servicesapi.query.FieldQuery;
+import org.apache.stanbol.entityhub.yard.solr.impl.SolrYard;
+
+
+/**
+ * Defines parameters used by the {@link FieldQuery} implementation of the
+ * SolrYard. Some of those might also be supported by the {@link SolrYard}
+ * configuration to set default values<p>
+ *
+ * @author Rupert Westenthaler
+ *
+ */
+public class QueryConst {
+ private QueryConst(){/*do not allow instances*/}
+
+ /**
+ * Property allowing to enable/disable the generation of Phrase queries for
+ * otional query terms (without wildcards). Values are expected to be
+ * {@link Boolean}
+ */
+ public static final String PHRASE_QUERY_STATE =
"stanbol.entityhub.yard.solr.query.phraseQuery";
+ /**
+ * The default state for the {@link #PHRASE_QUERY_STATE} (default: false)
+ */
+ public static final Boolean DEFAULT_PHRASE_QUERY_STATE = Boolean.FALSE;
+ /**
+ * Property allowing to set a query time boost for certain query terms.
+ * Values are expected to be floating point values grater than zero.
+ */
+ public static final String QUERY_BOOST =
"stanbol.entityhub.yard.solr.query.boost";
+}
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrQueryFactory.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrQueryFactory.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrQueryFactory.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/SolrQueryFactory.java
Thu Jun 13 09:17:51 2013
@@ -29,6 +29,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
@@ -60,6 +61,7 @@ import org.apache.stanbol.entityhub.serv
import
org.apache.stanbol.entityhub.servicesapi.query.Constraint.ConstraintType;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
+import org.apache.stanbol.entityhub.yard.solr.defaults.QueryConst;
import
org.apache.stanbol.entityhub.yard.solr.impl.queryencoders.AssignmentEncoder;
import
org.apache.stanbol.entityhub.yard.solr.impl.queryencoders.DataTypeEncoder;
import org.apache.stanbol.entityhub.yard.solr.impl.queryencoders.FieldEncoder;
@@ -264,7 +266,7 @@ public class SolrQueryFactory {
if (queryString.length() > 0) {
String qs = queryString.toString();
log.debug("QueryString: {}", qs);
- if (MLT_QUERY_TYPE.equals(query.getQueryType())) {
+ if (MLT_QUERY_TYPE.equals(query.getRequestHandler())) {
query.set(CommonParams.FQ, qs);
} else {
query.setQuery(qs);
@@ -349,12 +351,21 @@ public class SolrQueryFactory {
RangeConstraint rangeConstraint =
(RangeConstraint)indexConstraint.getConstraint();
// we need to find the Index DataType for the range query
IndexDataType dataType = null;
+ ConstraintValue lowerBound = new ConstraintValue();
+ ConstraintValue upperBound = new ConstraintValue();
+ //init the boosts
+ addBoost(lowerBound, rangeConstraint);
+ addBoost(upperBound, rangeConstraint);
+ //init IndexValues and check for the dataType
if (rangeConstraint.getLowerBound() != null) {
- dataType =
indexValueFactory.createIndexValue(rangeConstraint.getLowerBound()).getType();
+ IndexValue value =
indexValueFactory.createIndexValue(rangeConstraint.getLowerBound());
+ lowerBound.getValues().add(value);
+ dataType = value.getType();
}
if (rangeConstraint.getUpperBound() != null) {
- IndexDataType upperDataType =
indexValueFactory.createIndexValue(rangeConstraint.getUpperBound())
- .getType();
+ IndexValue value =
indexValueFactory.createIndexValue(rangeConstraint.getUpperBound());
+ upperBound.getValues().add(value);
+ IndexDataType upperDataType = value.getType();
if (dataType == null) {
dataType = upperDataType;
} else {
@@ -365,7 +376,7 @@ public class SolrQueryFactory {
"upper:[value=%s|datatype=%s])",
rangeConstraint.getLowerBound(), dataType,
rangeConstraint.getUpperBound(), upperDataType));
- }
+ }
}
}
if (dataType == null) {
@@ -377,11 +388,11 @@ public class SolrQueryFactory {
}
//set the value range
if (rangeConstraint.isInclusive()) {
- indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.LE,
rangeConstraint.getUpperBound());
- indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.GE,
rangeConstraint.getLowerBound());
+ indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.LE,
upperBound);
+ indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.GE,
lowerBound);
} else {
- indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.LT,
rangeConstraint.getUpperBound());
- indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.GT,
rangeConstraint.getLowerBound());
+ indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.LT,
upperBound);
+ indexConstraint.setFieldConstraint(IndexConstraintTypeEnum.GT,
lowerBound);
}
}
@@ -392,6 +403,15 @@ public class SolrQueryFactory {
private void initTextConstraint(IndexConstraint indexConstraint) {
TextConstraint textConstraint =
(TextConstraint)indexConstraint.getConstraint();
ConstraintValue constraintValue = new ConstraintValue();
+ //init the boost
+ addBoost(constraintValue, textConstraint);
+ //init the Phrase Query based on the ProximityRanking state
+ if(textConstraint.isProximityRanking() != null){
+ constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE,
textConstraint.isProximityRanking());
+ } else {
+ //TODO: maybe make the default configureable for the SolrYard
+ constraintValue.setProperty(QueryConst.PHRASE_QUERY_STATE,
QueryConst.DEFAULT_PHRASE_QUERY_STATE);
+ }
for(String text : textConstraint.getTexts()){
constraintValue.getValues().add(indexValueFactory.createIndexValue(
valueFactory.createText(text)));
@@ -417,7 +437,16 @@ public class SolrQueryFactory {
"PatterType %s not supported for Solr Index Queries!",
textConstraint.getPatternType()));
}
}
-
+ /**
+ * Utility method that copies over the {@link Constraint#getBoost()} value
+ * to the {@link ConstraintValue}
+ */
+ private void addBoost(ConstraintValue constraintValue, Constraint
constraint){
+ Double boost = constraint.getBoost();
+ if(boost != null && boost != 1.0){
+ constraintValue.setProperty(QueryConst.QUERY_BOOST,
constraint.getBoost());
+ }
+ }
/**
* @param indexConstraint
* @param refConstraint
@@ -461,6 +490,7 @@ public class SolrQueryFactory {
}
} //else empty we will initialise based on the first parsed value!
ConstraintValue constraintValue = new
ConstraintValue(valueConstraint.getMode());
+ addBoost(constraintValue, valueConstraint); //init the boost
for(Object value : valueConstraint.getValues()){
IndexValue indexValue;
if(indexDataType == null){ // if no supported types are present
@@ -628,11 +658,13 @@ public class SolrQueryFactory {
* @author Rupert Westenthaler
*
*/
- public static class ConstraintValue implements Iterable<IndexValue>{
+ public static class ConstraintValue implements Iterable<IndexValue> {
private final MODE mode;
private final Set<IndexValue> values = new LinkedHashSet<IndexValue>();
+ private Map<String,Object> properties;
+
public ConstraintValue() {
this(null);
}
@@ -653,6 +685,35 @@ public class SolrQueryFactory {
public Iterator<IndexValue> iterator() {
return values.iterator();
}
+ /**
+ * Sets a property
+ * @param key the key
+ * @param value the value
+ * @return the old value or <code>null</code> if none.
+ */
+ public Object setProperty(String key, Object value){
+ if(key == null){
+ return null;
+ }
+ if(properties == null){
+ if(value != null){
+ properties = new HashMap<String,Object>();
+ } else {
+ return null;
+ }
+ }
+ return properties.put(key, value);
+ }
+
+ public Object getProperty(String key){
+ return properties == null ? null : properties.get(key);
+ }
+
+ public Double getBoost() {
+ return properties == null ? null :
+ (Double)properties.get(QueryConst.QUERY_BOOST);
+ }
+
}
/**
* Class internally used to process FieldConstraint. This class accesses
the
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/AssignmentEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/AssignmentEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/AssignmentEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/AssignmentEncoder.java
Thu Jun 13 09:17:51 2013
@@ -16,12 +16,17 @@
*/
package org.apache.stanbol.entityhub.yard.solr.impl.queryencoders;
+import static
org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.encodePhraseQuery;
+
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
+import org.apache.commons.lang.StringUtils;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
+import org.apache.stanbol.entityhub.yard.solr.defaults.QueryConst;
import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
@@ -31,6 +36,7 @@ import org.apache.stanbol.entityhub.yard
import org.apache.stanbol.entityhub.yard.solr.query.IndexConstraintTypeEncoder;
import org.apache.stanbol.entityhub.yard.solr.query.IndexConstraintTypeEnum;
import org.apache.stanbol.entityhub.yard.solr.query.QueryUtils;
+import org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.QueryTerm;
/**
* Encodes the Assignment of the field to an value. If a value is parsed, than
it encodes that the field must
@@ -40,8 +46,8 @@ import org.apache.stanbol.entityhub.yard
*/
public class AssignmentEncoder implements
IndexConstraintTypeEncoder<ConstraintValue> {
- private static final ConstraintTypePosition POS = new
ConstraintTypePosition(PositionType.assignment);
- private static final String EQ = ":";
+ public static final ConstraintTypePosition POS = new
ConstraintTypePosition(PositionType.assignment);
+ public static final String EQ = ":";
// private final IndexValueFactory indexValueFactory;
public AssignmentEncoder(IndexValueFactory indexValueFactory) {
@@ -58,11 +64,26 @@ public class AssignmentEncoder implement
return; //and return
} //else encode the values and add them depending on the MODE
Set<String> queryConstraints = new HashSet<String>();
+ Collection<String> phraseTerms = new ArrayList<String>();
for(IndexValue indexValue : value){
- String[] valueConstraints =
QueryUtils.encodeQueryValue(indexValue, true);
- if (valueConstraints != null) {
- for (String stringConstraint : valueConstraints) {
- queryConstraints.add(EQ + stringConstraint);
+ QueryTerm[] qts = QueryUtils.encodeQueryValue(indexValue, true);
+ if (qts != null) {
+ for (QueryTerm qt : qts) {
+ StringBuilder sb = new StringBuilder(qt.term.length() +
+ (qt.needsQuotes ? 3 : 1));
+ sb.append(EQ);
+ if(qt.needsQuotes){
+ sb.append('"').append(qt.term).append('"');
+ } else {
+ sb.append(qt.term);
+ }
+ if(value.getBoost() != null){
+ sb.append("^").append(value.getBoost());
+ }
+ queryConstraints.add(sb.toString());
+ if(!qt.hasWildcard && qt.isText) {
+ phraseTerms.add(qt.term);
+ }
}
} else {
queryConstraints.add(EQ);
@@ -77,11 +98,24 @@ public class AssignmentEncoder implement
if(value.getMode() == MODE.all){
//in all mode we need to add all values in a single call
constraint.addEncoded(POS, queryConstraints);
+ //NOTE also that for ALL mode Phrase queries do not make sense, as
+ // they would weaken the selection criteria
+ } else {
+ if(phraseTerms.size() > 1){
+ Boolean state = (Boolean)
value.getProperty(QueryConst.PHRASE_QUERY_STATE);
+ if(state != null && state.booleanValue()){
+ StringBuilder sb = encodePhraseQuery(phraseTerms);
+ sb.insert(0, EQ);
+ if(value.getBoost() != null){
+ sb.append("^").append(value.getBoost());
+ }
+ constraint.addEncoded(POS, sb.toString());
+ }//phrase query deactivated
+ } //else for less than two terms we can not build a phrase query
}
}
-
@Override
public boolean supportsDefault() {
return true;
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GeEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GeEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GeEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GeEncoder.java
Thu Jun 13 09:17:51 2013
@@ -19,6 +19,7 @@ package org.apache.stanbol.entityhub.yar
import java.util.Arrays;
import java.util.Collection;
+import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition;
@@ -47,6 +48,10 @@ public class GeEncoder implements IndexC
indexValue = null; // default value
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
+ } else if (value instanceof ConstraintValue){
+ ConstraintValue cv = (ConstraintValue) value;
+ indexValue = cv.getValues() == null || cv.getValues().isEmpty() ?
null :
+ cv.getValues().iterator().next();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GtEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GtEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GtEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/GtEncoder.java
Thu Jun 13 09:17:51 2013
@@ -19,6 +19,7 @@ package org.apache.stanbol.entityhub.yar
import java.util.Arrays;
import java.util.Collection;
+import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition;
@@ -47,6 +48,10 @@ public class GtEncoder implements IndexC
indexValue = null; // default value
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
+ } else if (value instanceof ConstraintValue){
+ ConstraintValue cv = (ConstraintValue) value;
+ indexValue = cv.getValues() == null || cv.getValues().isEmpty() ?
null :
+ cv.getValues().iterator().next();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LeEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LeEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LeEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LeEncoder.java
Thu Jun 13 09:17:51 2013
@@ -19,6 +19,7 @@ package org.apache.stanbol.entityhub.yar
import java.util.Arrays;
import java.util.Collection;
+import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition;
@@ -43,17 +44,31 @@ public class LeEncoder implements IndexC
@Override
public void encode(EncodedConstraintParts constraint, Object value) {
IndexValue indexValue;
+ Double boost = null;
if (value == null) {
indexValue = null; // default value
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
+ } else if (value instanceof ConstraintValue){
+ ConstraintValue cv = (ConstraintValue) value;
+ indexValue = cv.getValues() == null || cv.getValues().isEmpty() ?
null :
+ cv.getValues().iterator().next();
+ boost = cv.getBoost();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
- String geConstraint = String
- .format("TO %s]", indexValue != null && indexValue.getValue()
!= null
- && !indexValue.getValue().isEmpty() ?
indexValue.getValue() : DEFAULT);
- constraint.addEncoded(POS, geConstraint);
+ StringBuilder leConstraint = new StringBuilder("TO ");
+ if(indexValue != null && indexValue.getValue() != null
+ && !indexValue.getValue().isEmpty()){
+ leConstraint.append(indexValue.getValue());
+ } else {
+ leConstraint.append(DEFAULT);
+ }
+ leConstraint.append(']');
+ if(boost != null){
+ leConstraint.append("^").append(boost);
+ }
+ constraint.addEncoded(POS, leConstraint.toString());
}
@Override
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LtEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LtEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LtEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/LtEncoder.java
Thu Jun 13 09:17:51 2013
@@ -19,6 +19,7 @@ package org.apache.stanbol.entityhub.yar
import java.util.Arrays;
import java.util.Collection;
+import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
import org.apache.stanbol.entityhub.yard.solr.query.ConstraintTypePosition;
@@ -42,18 +43,32 @@ public class LtEncoder implements IndexC
@Override
public void encode(EncodedConstraintParts constraint, Object value) {
+ Double boost = null;
IndexValue indexValue;
if (value == null) {
indexValue = null; // default value
} else if (value instanceof IndexValue) {
indexValue = (IndexValue) value;
+ } else if (value instanceof ConstraintValue){
+ ConstraintValue cv = (ConstraintValue) value;
+ indexValue = cv.getValues() == null || cv.getValues().isEmpty() ?
null :
+ cv.getValues().iterator().next();
+ boost = cv.getBoost();
} else {
indexValue = indexValueFactory.createIndexValue(value);
}
- String geConstraint = String
- .format("TO %s}", indexValue != null && indexValue.getValue()
!= null
- && !indexValue.getValue().isEmpty() ?
indexValue.getValue() : DEFAULT);
- constraint.addEncoded(POS, geConstraint);
+ StringBuilder ltConstraint = new StringBuilder("TO ");
+ if(indexValue != null && indexValue.getValue() != null
+ && !indexValue.getValue().isEmpty()){
+ ltConstraint.append(indexValue.getValue());
+ } else {
+ ltConstraint.append(DEFAULT);
+ }
+ ltConstraint.append('}');
+ if(boost != null){
+ ltConstraint.append("^").append(boost);
+ }
+ constraint.addEncoded(POS, ltConstraint.toString());
}
@Override
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/RegexEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/RegexEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/RegexEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/RegexEncoder.java
Thu Jun 13 09:17:51 2013
@@ -76,8 +76,14 @@ public class RegexEncoder implements Ind
"This encoder does not support the IndexDataType %s
(supported: %s)", indexValue.getType(),
SUPPORTED_TYPES));
} else {
- // TODO: Implement some REGEX to WILDCard conversion for
Solr
- queryConstraints.add(indexValue.getValue().toLowerCase());
+ // NOTE that not all regex queries can be supported by Solr
+ // see https://issues.apache.org/jira/browse/LUCENE-2604
+ StringBuilder sb = new
StringBuilder(indexValue.getValue().length()+2);
+ sb.append('/').append(indexValue.getValue()).append('/');
+ if(value.getBoost() != null){
+ sb.append('^').append(value.getBoost());
+ }
+ queryConstraints.add(sb.toString());
}
if(value.getMode() == MODE.any){ //in any mode
//we need to add constraints separately (to connect them
with OR)
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/WildcardEncoder.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/WildcardEncoder.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/WildcardEncoder.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/impl/queryencoders/WildcardEncoder.java
Thu Jun 13 09:17:51 2013
@@ -16,6 +16,9 @@
*/
package org.apache.stanbol.entityhub.yard.solr.impl.queryencoders;
+import static
org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.encodePhraseQuery;
+
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
@@ -24,6 +27,7 @@ import java.util.Set;
import org.apache.stanbol.entityhub.servicesapi.query.ValueConstraint.MODE;
import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
+import org.apache.stanbol.entityhub.yard.solr.defaults.QueryConst;
import
org.apache.stanbol.entityhub.yard.solr.impl.SolrQueryFactory.ConstraintValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexDataType;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
@@ -34,6 +38,7 @@ import org.apache.stanbol.entityhub.yard
import org.apache.stanbol.entityhub.yard.solr.query.IndexConstraintTypeEncoder;
import org.apache.stanbol.entityhub.yard.solr.query.IndexConstraintTypeEnum;
import org.apache.stanbol.entityhub.yard.solr.query.QueryUtils;
+import org.apache.stanbol.entityhub.yard.solr.query.QueryUtils.QueryTerm;
public class WildcardEncoder implements
IndexConstraintTypeEncoder<ConstraintValue> {
@@ -62,6 +67,8 @@ public class WildcardEncoder implements
}
// encode the value based on the type
Set<String> queryConstraints = new HashSet<String>();
+ //the query constraints used for the phrase constraint
+ Collection<String> phraseTerms = new ArrayList<String>();
for(IndexValue indexValue : value){
if (indexValue != null) {
if (!SUPPORTED_TYPES.contains(indexValue.getType())) {
@@ -69,7 +76,24 @@ public class WildcardEncoder implements
"This encoder does not support the IndexDataType %s
(supported: %s)", indexValue.getType(),
SUPPORTED_TYPES));
} else {
-
queryConstraints.addAll(Arrays.asList(QueryUtils.encodeQueryValue(indexValue,
false)));
+ for(QueryTerm qt : QueryUtils.encodeQueryValue(indexValue,
false)){
+ StringBuilder sb = new StringBuilder(qt.needsQuotes ?
+ qt.term.length()+2 : 0);
+ if(qt.needsQuotes){
+ sb.append('"').append(qt.term).append('"');
+ queryConstraints.add(sb.toString());
+ } else {
+ queryConstraints.add(qt.term);
+ }
+ if(value.getBoost() != null){
+ sb.append("^").append(value.getBoost());
+ }
+ if(!qt.hasWildcard && qt.isText) {
+ //phrases do not work with wildcard and are only
+ //relevant for texts
+ phraseTerms.add(qt.term);
+ }
+ }
}
if(value.getMode() == MODE.any){ //in any mode
//we need to add constraints separately (to connect them
with OR)
@@ -81,6 +105,17 @@ public class WildcardEncoder implements
if(value.getMode() == MODE.all){ // an all mode we need to add all
//constraint in a single call (to connect them with AND)
constraint.addEncoded(POS, queryConstraints);
+ } else {
+ if(phraseTerms.size() > 1){
+ Boolean state = (Boolean)
value.getProperty(QueryConst.PHRASE_QUERY_STATE);
+ if(state != null && state.booleanValue()){
+ StringBuilder sb = encodePhraseQuery(phraseTerms);
+ if(value.getBoost() != null){
+ sb.append("^").append(value.getBoost());
+ }
+ constraint.addEncoded(POS, sb.toString());
+ }
+ }
}
}
Modified:
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java?rev=1492591&r1=1492590&r2=1492591&view=diff
==============================================================================
---
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
(original)
+++
stanbol/trunk/entityhub/yard/solr/src/main/java/org/apache/stanbol/entityhub/yard/solr/query/QueryUtils.java
Thu Jun 13 09:17:51 2013
@@ -20,12 +20,14 @@ import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
@@ -35,6 +37,7 @@ import org.apache.lucene.analysis.tokena
import org.apache.lucene.analysis.util.TokenizerFactory;
import org.apache.stanbol.commons.solr.utils.SolrUtil;
import org.apache.stanbol.entityhub.yard.solr.defaults.IndexDataTypeEnum;
+import
org.apache.stanbol.entityhub.yard.solr.impl.queryencoders.AssignmentEncoder;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValue;
import org.apache.stanbol.entityhub.yard.solr.model.IndexValueFactory;
@@ -82,11 +85,11 @@ public final class QueryUtils {
* not escaped.
* @return the (possible multiple) values that need to be connected with
AND
*/
- public static String[] encodeQueryValue(IndexValue indexValue, boolean
escape) {
+ public static QueryTerm[] encodeQueryValue(IndexValue indexValue, boolean
escape) {
if (indexValue == null) {
return null;
}
- String[] queryConstraints;
+ QueryTerm[] queryConstraints;
String value = indexValue.getValue();
if (escape) {
value = SolrUtil.escapeSolrSpecialChars(value);
@@ -96,27 +99,24 @@ public final class QueryUtils {
if (IndexDataTypeEnum.TXT.getIndexType().equals(indexValue.getType()))
{
if(escape) {
//value does not contain '*' and '?' as they would be escaped.
- queryConstraints = new String[] {
- new StringBuilder(value).insert(0,
'"').append('"').toString()
- };
+ queryConstraints = new QueryTerm[] {new
QueryTerm(value,false,true, true)};
} else { //non escaped strings might contain wildcard chars '*',
'?'
//those need to be treated specially (STANBOL-607)
- //Change to 2nd param to false after switching to Solr 3.6+
(see SOLR-2438)
- queryConstraints = parseWildcardQueryTerms(value, true);
+ //Changed 2nd param to false as Stanbol now uses Solr 3.6+
(see SOLR-2438)
+ queryConstraints = parseWildcardQueryTerms(value, false);
}
} else if
(IndexDataTypeEnum.STR.getIndexType().equals(indexValue.getType())) {
if(escape){
//rw: 20120314: respect case sensitivity for escaped (non
wildcard)
- queryConstraints = new String[] { value.indexOf(' ')>=0 ?
- '"'+value+'"' : value
- };
+ queryConstraints = new QueryTerm[] { new QueryTerm(value,
false,
+ value.indexOf(' ') >= 0 ? true : false, true)};
} else { //encode non
//rw: 20120314: respect case sensitivity for escaped (non
wildcard)
//Change to 2nd param to false after switching to Solr 3.6+
(see SOLR-2438)
queryConstraints = parseWildcardQueryTerms(value, true);
}
} else {
- queryConstraints = new String[] {value};
+ queryConstraints = new QueryTerm[] {new
QueryTerm(value,false,false,false)};
}
return queryConstraints;
}
@@ -161,6 +161,25 @@ public final class QueryUtils {
}
/**
+ * Represents a term within a SolrQuery.
+ * @author Rupert Westenthaler
+ *
+ */
+ public static class QueryTerm {
+ public final boolean hasWildcard;
+ public final boolean needsQuotes;
+ public final String term;
+ public final boolean isText;
+
+ private QueryTerm(String term, boolean hasWildcard, boolean
needsQuotes, boolean isText){
+ this.term = term;
+ this.hasWildcard = hasWildcard;
+ this.needsQuotes = needsQuotes;
+ this.isText = isText;
+ }
+ }
+
+ /**
* Parses query terms for Wildcard queries as described in the first
* comment of STANBOL-607. <p>
* As an example the String:
@@ -178,16 +197,16 @@ public final class QueryUtils {
* @return the query terms
* @throws IOException
*/
- private static String[] parseWildcardQueryTerms(String value,boolean
loewercaseWildcardTokens) {
+ private static QueryTerm[] parseWildcardQueryTerms(String value,boolean
loewercaseWildcardTokens) {
//This assumes that the Tokenizer does tokenize '*' and '?',
//what makes it a little bit tricky.
Tokenizer tokenizer = new ICUTokenizer(new
StringReader(value),tokenizerConfig);
Matcher m = WILDCARD_QUERY_CHAR_PATTERN.matcher(value);
int next = m.find()?m.start()+1:-1;
if(next < 0){ //No wildcard
- return new String[]{'"'+value+'"'};
+ return new QueryTerm[]{new QueryTerm(value, false, true, true)};
}
- ArrayList<String> queryElements = new ArrayList<String>(5);
+ ArrayList<QueryTerm> queryElements = new ArrayList<QueryTerm>(5);
int lastAdded = -1;
int lastOffset = 0;
boolean foundWildcard = false;
@@ -214,7 +233,7 @@ public final class QueryUtils {
queryElement = queryElement.toLowerCase();
}
- queryElements.add(queryElement);
+ queryElements.add(new QueryTerm(queryElement, true,
false, true));
lastAdded = offset.startOffset(); //previous token
consumed
//set to the start of the current token
foundWildcard = false;
@@ -223,7 +242,7 @@ public final class QueryUtils {
if(loewercaseWildcardTokens){
queryElement = queryElement.toLowerCase();
}
- queryElements.add(queryElement);
+ queryElements.add(new
QueryTerm(queryElement,true,false, true));
lastAdded = -1; //consume the current token
foundWildcard = false;
}
@@ -235,7 +254,7 @@ public final class QueryUtils {
// a single word
if(!foundWildcard && lastAdded<lastOffset){
String queryElement =
value.substring(lastAdded,lastOffset);
- queryElements.add('"'+queryElement+'"');
+ queryElements.add(new
QueryTerm(queryElement,false,true, true));
lastAdded = offset.startOffset();
}//else multiple wildcards in a single token
foundWildcard = true;
@@ -252,12 +271,24 @@ public final class QueryUtils {
queryElement = queryElement.toLowerCase();
}
if(foundWildcard){
- queryElements.add(queryElement);
+ queryElements.add(new QueryTerm(queryElement,true,false,
true));
} else {
- queryElements.add('"'+queryElement+'"');
+ queryElements.add(new QueryTerm(queryElement,false,true,
true));
}
}
- return queryElements.toArray(new String[queryElements.size()]);
+ return queryElements.toArray(new QueryTerm[queryElements.size()]);
+ }
+ /**
+ * Creates a Phrase query over the parsed constraints
+ * @param phraseConstraints
+ */
+ public static StringBuilder encodePhraseQuery(Collection<String>
phraseConstraints) {
+ StringBuilder sb = new
StringBuilder(StringUtils.join(phraseConstraints, ' '));
+ sb.insert(0, '"');
+ sb.append("\"~");
+ //the span is 5+3*numTokens (9 ... 2 Tokens, 11 ... 3 Tokens ...)
+ sb.append(5+(3*phraseConstraints.size()));
+ return sb;
}