Author: rwesten
Date: Fri May 10 09:31:40 2013
New Revision: 1480961
URL: http://svn.apache.org/r1480961
Log:
STANBOL-723, STANBOL-1054, STANBOL-1055: The MLT constraint now allows to
define the type of the parsed context. The namespace enum now defines the
disambiguation namespace
Modified:
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/SimilarityConstraint.java
Modified:
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java?rev=1480961&r1=1480960&r2=1480961&view=diff
==============================================================================
---
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
(original)
+++
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/defaults/NamespaceEnum.java
Fri May 10 09:31:40 2013
@@ -397,13 +397,11 @@ public enum NamespaceEnum {
frbr("http://purl.org/vocab/frbr/core#"),
/**
- * Special namespace used for disambiguation metadata
- * @deprecated All none core namespaces where deprecated. Users should use
- * the NamespacePrefixService (module:
- * org.apache.stanbol.commons.namespaceprefixservice) instead (see also
- * <a
href="https://issues.apache.org/jira/browse/STANBOL-824">STANBOL-824)</a>
+ * Special namespace used for disambiguation metadata. NOTE: that with
STANBOL-1053
+ * the URI used for disambiguation changed.
+ *
*/
- disambiguation("dis","urn:stanbol.entityhub:disambiguation:"),
+
disambiguation("dis","http://stanbol.apache.org/ontology/disambiguation/disambiguation#"),
/*
* Old namespaces still kept for historical reasons
*/
Modified:
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/SimilarityConstraint.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/SimilarityConstraint.java?rev=1480961&r1=1480960&r2=1480961&view=diff
==============================================================================
---
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/SimilarityConstraint.java
(original)
+++
stanbol/trunk/entityhub/generic/servicesapi/src/main/java/org/apache/stanbol/entityhub/servicesapi/query/SimilarityConstraint.java
Fri May 10 09:31:40 2013
@@ -17,30 +17,81 @@
package org.apache.stanbol.entityhub.servicesapi.query;
import java.util.ArrayList;
+import java.util.Collection;
import java.util.Collections;
import java.util.List;
+import org.apache.stanbol.entityhub.servicesapi.defaults.DataTypeEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Reference;
+import org.apache.stanbol.entityhub.servicesapi.model.Text;
+
/**
* Ensure that results have fields that is contextually similar. The
implementation is typically based on a
* cosine similarity score a normalized vector space of term frequencies -
inverse document frequencies as
* done by the MoreLikeThis feature of Solr for instance.
- *
+ * <p>
* This type of constraint might not be supported by all the yard
implementations. If it is not supported it
* is just ignored.
+ * <p>
+ * With version <code>0.12.0</code> support for {@link Text} and {@link
Reference}
+ * contexts where added. The {@link #getContextType()} can be used to
determine the
+ * type of the parsed context and the {@link #getStringContext()},
+ * {@link #getTextContext()} and {@link #getReferenceContext()} methods can be
+ * used to get the typed context versions. The {@link #getContext()}
*/
public class SimilarityConstraint extends Constraint {
protected final String context;
+ protected final DataTypeEnum contextType;
+
protected final List<String> additionalFields;
+ private final Collection<String> languages;
+
+ /**
+ * Constructs a Similarity Constraint with a given context. The value is
+ * interpreted as {@link DataTypeEnum#Text} with unknown language.
+ * @param context the context
+ * @deprecated use one of the constructor explicitly parsing the
+ * {@link DataTypeEnum} or the languages (assuming {@link
DataTypeEnum#Text}
+ */
public SimilarityConstraint(String context) {
- this(context, null);
+ this(context, DataTypeEnum.Text, null,null);
+ }
+ /**
+ * Constructs a Similarity Constraint with a given context. The value is
+ * interpreted as {@link DataTypeEnum#Text} with unknown language.
+ * @param context the context
+ * @param additionalFields additional fields to include in the similarity
search
+ * @deprecated use one of the constructor explicitly parsing the
+ * {@link DataTypeEnum} or the languages (assuming {@link
DataTypeEnum#Text}
+ */
+ public SimilarityConstraint(String context,List<String> additionalFields) {
+ this(context, DataTypeEnum.Text, null,null);
+ }
+ public SimilarityConstraint(Collection<String> context, Collection<String>
languages) {
+ this(getCollectionContext(context), DataTypeEnum.Text, languages,
null);
+ }
+ public SimilarityConstraint(Collection<String> context,DataTypeEnum
contextType) {
+ this(getCollectionContext(context), contextType, null, null);
+ }
+ public SimilarityConstraint(Collection<String> context, Collection<String>
languages,List<String> additionalFields) {
+ this(getCollectionContext(context), DataTypeEnum.Text, languages,
additionalFields);
+ }
+ public SimilarityConstraint(Collection<String> context,DataTypeEnum
contextType,List<String> additionalFields) {
+ this(getCollectionContext(context), contextType, null,
additionalFields);
}
- public SimilarityConstraint(String context, List<String> additionalFields)
{
+ private SimilarityConstraint(String context, DataTypeEnum contextType,
+ Collection<String> languages, List<String> additionalFields){
super(ConstraintType.similarity);
+ if(context == null){
+ throw new IllegalArgumentException("The parsed Context MUST NOT be
NULL nor empty");
+ }
this.context = context;
+ this.contextType = contextType;
+ this.languages = languages;
if(additionalFields == null || additionalFields.isEmpty()){
this.additionalFields = Collections.emptyList();
} else {
@@ -53,6 +104,7 @@ public class SimilarityConstraint extend
this.additionalFields = Collections.unmodifiableList(fields);
}
}
+
/**
* Additional fields used for similarity calculations
* @return
@@ -61,11 +113,45 @@ public class SimilarityConstraint extend
return additionalFields;
}
/**
- * The context used for checking the similarity
- * @return
+ * The languages for the Context, or <code>null</code> if none are defined
+ * @return the languages or <code>null</code> if none. NOTE that the
+ * returned collection may contain the <code>null</code> value
+ * as it represents the default language
+ */
+ public Collection<String> getLanguages() {
+ return languages;
+ }
+ /**
+ * Getter for the context
+ * @return the string representation of the context
*/
public String getContext() {
return context;
}
-
+
+ /**
+ * The type of the Context. Can be {@link DataTypeEnum#String String},
+ * {@link DataTypeEnum#Text Text} and {@link DataTypeEnum#Reference
Reference}
+ * @return the type of the context.
+ */
+ public DataTypeEnum getContextType() {
+ return contextType;
+ }
+
+ private static String getCollectionContext(Collection<String> context) {
+ if(context == null || context.isEmpty()){
+ return null;
+ }
+ StringBuilder refContext = new StringBuilder();
+ boolean first = true;
+ for(String string : context){
+ if(!first){
+ refContext.append(' ');
+ } else {
+ first = false;
+ }
+ refContext.append(string);
+ }
+ return refContext.toString();
+ }
}