Author: rwesten
Date: Mon Apr 15 07:09:12 2013
New Revision: 1467873
URL: http://svn.apache.org/r1467873
Log:
STANBOL-1017: The FieldValueFilter now also supports filtering of Entities that
do have some value of the configured Property.
Added:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
(with props)
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
Modified:
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java?rev=1467873&r1=1467872&r2=1467873&view=diff
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
(original)
+++
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
Mon Apr 15 07:09:12 2013
@@ -16,14 +16,17 @@
*/
package org.apache.stanbol.entityhub.indexing.core.processor;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
@@ -50,29 +53,42 @@ public class FieldValueFilter implements
public static final String DEFAULT_FIELD = "rdf:type";
- public String field;
- public Collection<String> values;
+ protected String field;
+ protected boolean includeAll = false;
+ protected Collection<String> included;
+ protected Collection<String> exclude;
+ //now represented by adding "" to included and exclude
+ //boolean includeEmpty;
+
+ private NamespacePrefixProvider nsPrefixProvider;
+
+ public FieldValueFilter() {}
+
/**
- * Parsing 'null' or '' as value can be used to include entities that do
not
- * define any values for the configured {@link #field}
+ * Only for unit testing
*/
- boolean includeEmpty;
-
- private NamespacePrefixService nsPrefixService;
+ protected FieldValueFilter(NamespacePrefixProvider nsPrefixProvider,
String field, Object filterConfig){
+ this.nsPrefixProvider = nsPrefixProvider;
+ this.field = getUri(field);
+ parseFilterConfig(filterConfig);
+ }
@Override
public Representation process(Representation source) {
- if(includeEmpty && values.isEmpty()){ //no filter set
- return source;
+ if(includeAll && exclude.isEmpty()){
+ return source; //filter inactive
}
Iterator<Reference> refs = source.getReferences(field);
- if(includeEmpty && !refs.hasNext()){ //no values and includeNull
- return source;
+ if(!refs.hasNext()){ //no values and includeNull
+ return (includeAll && !exclude.contains("")) || //include and
empty not excluded
+ (!includeAll && included.contains("")) ? //empty is
included
+ source : null;
}
while(refs.hasNext()){
- //NOTE: if !includeEmpty values may be NULL (any value accepted)
- if(values == null || values.contains(refs.next().getReference())){
- return source;
+ String value = refs.next().getReference();
+ if((includeAll && !exclude.contains(value)) || //include and empty
not excluded
+ (!includeAll && included.contains(value))){ //empty is
included
+ return source;
}
}
//not found -> filter
@@ -95,64 +111,96 @@ public class FieldValueFilter implements
@Override
public void setConfiguration(Map<String,Object> config) {
IndexingConfig indexingConfig =
(IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
- nsPrefixService = indexingConfig.getNamespacePrefixService();
+ nsPrefixProvider = indexingConfig.getNamespacePrefixService();
Object value = config.get(PARAM_FIELD);
if(value == null || value.toString().isEmpty()){
- this.field =
NamespaceMappingUtils.getConfiguredUri(nsPrefixService, DEFAULT_FIELD);
+ this.field = getUri(DEFAULT_FIELD);
log.info("Using default Field {}",field);
} else {
- this.field =
NamespaceMappingUtils.getConfiguredUri(nsPrefixService, value.toString());
+ this.field = getUri(value.toString());
log.info("configured Field: {}",field);
}
value = config.get(PARAM_VALUES);
+ parseFilterConfig(value);
+ }
+
+ /**
+ * @param value
+ */
+ private void parseFilterConfig(Object value) {
+ Collection<String> values;
if(value instanceof String){
- String stringValue = value.toString().trim();
- if(stringValue.equals("*")){ // * -> deactivate Filtering
- this.values = Collections.emptySet();
- this.includeEmpty = true;
- } else {
- Set<String> values = new HashSet<String>();
- for(String fieldValue : stringValue.split(";")){
- if(fieldValue != null){
- if(fieldValue.isEmpty() ||
fieldValue.equalsIgnoreCase("null")){
- this.includeEmpty = true;
- } else {
-
values.add(NamespaceMappingUtils.getConfiguredUri(nsPrefixService, fieldValue));
- }
- }
+ values = Arrays.asList(value.toString().split(";"));
+ } else if (value instanceof String[]){
+ values = Arrays.asList((String[])value);
+ } else if(value == null){ // no values (accept all entities with any
value)
+ values = Collections.emptySet();
+ } else if(value instanceof Collection<?>){
+ values = (Collection<String>)value;
+ } else {
+ throw new IllegalArgumentException("Parameter '" + PARAM_VALUES
+ + "' must be of type String, String[] or Collection<String>
(present: "
+ + value.getClass()+")!");
+ }
+ if(values.isEmpty()){
+ includeAll = true;
+ this.included = values;
+ this.exclude = Collections.emptySet();
+ } else {
+ this.included = new HashSet<String>();
+ this.exclude = new HashSet<String>();
+ for(String entry : values) {
+ if(entry == null){ //NULL is a valid option, but we use ""
instead
+ entry = "";
}
- if(values.isEmpty() && !includeEmpty){
- throw new IllegalArgumentException("Parameter
"+PARAM_VALUES+'='+value+" does not contain a valid field value!");
- } else {
- this.values = values;
+ entry = entry.trim();
+ if(entry.equalsIgnoreCase("null")){
+ entry = "";
}
- }
- } else if (value instanceof String[]){
- String[] typeArray = (String[])value;
- if(typeArray.length == 0 || //if an empty array or
- typeArray.length == 1 && typeArray[0].equals("*")){ //only
a * is parsed
- this.values = Collections.emptySet(); // than deactivate
filtering
- this.includeEmpty = true;
- } else {
- Set<String> values = new HashSet<String>();
- for(String filterString : typeArray){
- if(filterString != null){
- if(filterString.isEmpty() ||
filterString.equalsIgnoreCase("null")){
- this.includeEmpty = true;
- } else {
-
values.add(NamespaceMappingUtils.getConfiguredUri(nsPrefixService,
filterString));
- }
+ if(!includeAll && entry.equals("*")){
+ includeAll = true;
+ continue;
+ }
+ boolean exclude = !entry.isEmpty() && entry.charAt(0) == '!';
+ if(exclude){
+ entry = entry.substring(1);
+ if(entry.equalsIgnoreCase("null")){
+ entry = "";
+ }
+ if(entry.equals("*")){
+ throw new IllegalArgumentException("'!*' is not
allowed in the config ("
+ + "it is the default if '*' is not present)!");
}
}
- if(values.isEmpty() && !this.includeEmpty){
- throw new IllegalArgumentException("Parameter
"+PARAM_VALUES+'='+value+" does not contain a valid field value!");
- } else {
- this.values = values;
+ String uri = getUri(entry);
+ if((exclude ? this.included : this.exclude).contains(uri)){
+ throw new IllegalArgumentException("'"+entry+"' both
included and excluded by the"
+ + "parsed configuration!");
}
+ //if exclude add to this.exclude otherwise to this.values
+ (exclude ? this.exclude : this.included).add(uri);
}
- } else {// no values (accept all entities with any value)
- values = Collections.emptySet();
}
}
+ /**
+ * @param entry
+ * @return
+ */
+ private String getUri(String entry) {
+ String uri;
+ String nsPrefix = NamespaceMappingUtils.getPrefix(entry);
+ if(nsPrefix != null){
+ String ns = nsPrefixProvider.getNamespace(nsPrefix);
+ if(ns == null){
+ throw new IllegalArgumentException("Unable to resolve
namesoace prefix used by '"
+ +entry+"' by using the NamespacePrefixService!");
+ }
+ uri = new StringBuilder(ns).append(entry,nsPrefix.length()+1,
entry.length()).toString();
+ } else {
+ uri = entry;
+ }
+ return uri;
+ }
+
}
Added:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java?rev=1467873&view=auto
==============================================================================
---
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
(added)
+++
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
Mon Apr 15 07:09:12 2013
@@ -0,0 +1,114 @@
+package org.apache.stanbol.entityhub.indexing.core.processor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider;
+import
org.apache.stanbol.commons.namespaceprefix.impl.NamespacePrefixProviderImpl;
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class FieldValueFilterTest {
+ private static final String FB = "http://rdf.freebase.com/ns/";
+
+ private static final String TEST_CONFIG = "prefix.config";
+
+ private static ValueFactory vf = InMemoryValueFactory.getInstance();
+
+ private static NamespacePrefixProvider nsPrefixProvider;
+
+ private static final Map<String,String> nsMappings = new
HashMap<String,String>();
+ static {
+ nsMappings.put("fb", FB);
+ nsMappings.put("rdf", NamespaceEnum.rdf.getNamespace());
+ nsMappings.put("rdfs", NamespaceEnum.rdfs.getNamespace());
+ nsMappings.put("skos", NamespaceEnum.skos.getNamespace());
+ nsMappings.put("foaf", NamespaceEnum.foaf.getNamespace());
+ }
+
+
+
+ @BeforeClass
+ public static void init() throws IOException{
+ nsPrefixProvider = new NamespacePrefixProviderImpl(nsMappings);
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testIncludeExcludeConfig1(){
+ new
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person;skos:Concept;!skos:Concept");
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testIncludeExcludeConfig2(){
+ new
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person;!skos:Concept;skos:Concept");
+ }
+
+
+
+ @Test
+ public void testIncludeConfig(){
+ EntityProcessor filter = new
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person");
+
+ Representation r = getRepresentation(NamespaceEnum.foaf+"Person");
+ Assert.assertNotNull(filter.process(r));
+
+ r = getRepresentation(NamespaceEnum.skos+"Concept");
+ Assert.assertNull(filter.process(r));
+
+ r = getRepresentation(NamespaceEnum.skos+"Concept",
NamespaceEnum.foaf+"Person");
+ Assert.assertNotNull(filter.process(r));
+
+ //test empty value
+ filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","");
+ Assert.assertNotNull(filter.process(r));
+
+ filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","null");
+ Assert.assertNotNull(filter.process(r));
+
+ filter = new FieldValueFilter(nsPrefixProvider,"skos:releated", null);
+ Assert.assertNotNull(filter.process(r));
+ }
+
+ @Test
+ public void testExcludeConfig(){
+ EntityProcessor filter = new
FieldValueFilter(nsPrefixProvider,"rdf:type","*;!foaf:Person");
+
+ Representation r = getRepresentation(NamespaceEnum.foaf+"Person");
+ Assert.assertNull(filter.process(r));
+
+ r = getRepresentation(NamespaceEnum.skos+"Concept");
+ Assert.assertNotNull(filter.process(r));
+
+ r = getRepresentation(NamespaceEnum.skos+"Concept",
NamespaceEnum.foaf+"Person");
+ Assert.assertNotNull(filter.process(r));
+
+ //test empty value
+ filter = new
FieldValueFilter(nsPrefixProvider,"skos:releated","*;!null");
+ Assert.assertNull(filter.process(r));
+
+ filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","*;!");
+ Assert.assertNull(filter.process(r));
+
+ filter = new FieldValueFilter(nsPrefixProvider,"skos:releated",
"*;!;!foaf:Person");
+ Assert.assertNull(filter.process(r));
+ }
+
+ private Representation getRepresentation(String...types){
+ Representation r = vf.createRepresentation("urn:test");
+ for(String type : types){
+ r.add(NamespaceEnum.rdf+"type", vf.createReference(type));
+ }
+ return r;
+ }
+
+
+}
+
Propchange:
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
------------------------------------------------------------------------------
svn:mime-type = text/plain