Author: rwesten
Date: Mon Apr 15 07:09:12 2013
New Revision: 1467873

URL: http://svn.apache.org/r1467873
Log:
STANBOL-1017: The FieldValueFilter now also supports filtering of Entities that 
do have some value of the configured Property.

Added:
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/
    
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
   (with props)
Modified:
    
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java

Modified: 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java?rev=1467873&r1=1467872&r2=1467873&view=diff
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
 (original)
+++ 
stanbol/trunk/entityhub/indexing/core/src/main/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilter.java
 Mon Apr 15 07:09:12 2013
@@ -16,14 +16,17 @@
 */
 package org.apache.stanbol.entityhub.indexing.core.processor;
 
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.stanbol.commons.namespaceprefix.NamespaceMappingUtils;
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider;
 import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
 import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
 import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
@@ -50,29 +53,42 @@ public class FieldValueFilter implements
     
     public static final String DEFAULT_FIELD = "rdf:type";
 
-    public String field;
-    public Collection<String> values;
+    protected String field;
+    protected boolean includeAll = false;
+    protected Collection<String> included;
+    protected Collection<String> exclude;
+    //now represented by adding "" to included and exclude
+    //boolean includeEmpty;
+
+    private NamespacePrefixProvider nsPrefixProvider;
+    
+    public FieldValueFilter() {}
+    
     /**
-     * Parsing 'null' or '' as value can be used to include entities that do 
not
-     * define any values for the configured {@link #field}
+     * Only for unit testing
      */
-    boolean includeEmpty;
-
-    private NamespacePrefixService nsPrefixService;
+    protected FieldValueFilter(NamespacePrefixProvider nsPrefixProvider, 
String field, Object filterConfig){
+        this.nsPrefixProvider = nsPrefixProvider;
+        this.field = getUri(field);
+        parseFilterConfig(filterConfig);
+    }
     
     @Override
     public Representation process(Representation source) {
-        if(includeEmpty && values.isEmpty()){ //no filter set
-            return source;
+        if(includeAll && exclude.isEmpty()){
+            return source; //filter inactive
         }
         Iterator<Reference> refs = source.getReferences(field);
-        if(includeEmpty && !refs.hasNext()){ //no values and includeNull
-            return source;
+        if(!refs.hasNext()){ //no values and includeNull
+            return (includeAll && !exclude.contains("")) || //include and 
empty not excluded
+                    (!includeAll && included.contains("")) ? //empty is 
included
+                            source : null;
         }
         while(refs.hasNext()){
-            //NOTE: if !includeEmpty values may be NULL (any value accepted)
-            if(values == null || values.contains(refs.next().getReference())){
-                return source;
+            String value = refs.next().getReference();
+            if((includeAll && !exclude.contains(value)) || //include and empty 
not excluded
+                    (!includeAll && included.contains(value))){ //empty is 
included
+               return source; 
             }
         }
         //not found -> filter
@@ -95,64 +111,96 @@ public class FieldValueFilter implements
     @Override
     public void setConfiguration(Map<String,Object> config) {
         IndexingConfig indexingConfig = 
(IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
-        nsPrefixService = indexingConfig.getNamespacePrefixService();
+        nsPrefixProvider = indexingConfig.getNamespacePrefixService();
         Object value = config.get(PARAM_FIELD);
         if(value == null || value.toString().isEmpty()){
-            this.field = 
NamespaceMappingUtils.getConfiguredUri(nsPrefixService, DEFAULT_FIELD);
+            this.field = getUri(DEFAULT_FIELD);
             log.info("Using default Field {}",field);
         } else {
-            this.field = 
NamespaceMappingUtils.getConfiguredUri(nsPrefixService, value.toString());
+            this.field = getUri(value.toString());
             log.info("configured Field: {}",field);
         }
         value = config.get(PARAM_VALUES);
+        parseFilterConfig(value);
+    }
+
+    /**
+     * @param value
+     */
+    private void parseFilterConfig(Object value) {
+        Collection<String> values; 
         if(value instanceof String){
-            String stringValue = value.toString().trim();
-            if(stringValue.equals("*")){ // * -> deactivate Filtering
-                this.values = Collections.emptySet();
-                this.includeEmpty = true;
-            } else {
-                Set<String> values = new HashSet<String>();
-                for(String fieldValue : stringValue.split(";")){
-                    if(fieldValue != null){
-                        if(fieldValue.isEmpty() || 
fieldValue.equalsIgnoreCase("null")){
-                            this.includeEmpty = true;
-                        } else {
-                            
values.add(NamespaceMappingUtils.getConfiguredUri(nsPrefixService, fieldValue));
-                        }
-                    } 
+            values = Arrays.asList(value.toString().split(";"));
+        } else if (value instanceof String[]){
+            values = Arrays.asList((String[])value);
+        } else if(value == null){ // no values (accept all entities with any 
value)
+            values = Collections.emptySet();
+        } else if(value instanceof Collection<?>){
+            values = (Collection<String>)value;
+        } else {
+            throw new IllegalArgumentException("Parameter '" + PARAM_VALUES 
+                + "' must be of type String, String[] or Collection<String> 
(present: "
+                + value.getClass()+")!");
+        }
+        if(values.isEmpty()){
+            includeAll = true;
+            this.included = values;
+            this.exclude = Collections.emptySet();
+        } else {
+            this.included = new HashSet<String>();
+            this.exclude = new HashSet<String>();
+            for(String entry : values) {
+                if(entry == null){ //NULL is a valid option, but we use "" 
instead
+                    entry = "";
                 }
-                if(values.isEmpty() && !includeEmpty){
-                    throw new IllegalArgumentException("Parameter 
"+PARAM_VALUES+'='+value+" does not contain a valid field value!");
-                } else {
-                    this.values = values;
+                entry = entry.trim();
+                if(entry.equalsIgnoreCase("null")){
+                    entry = "";
                 }
-            }
-        } else if (value instanceof String[]){
-            String[] typeArray = (String[])value;
-            if(typeArray.length == 0 || //if an empty array or
-                    typeArray.length == 1 && typeArray[0].equals("*")){ //only 
a * is parsed
-                this.values = Collections.emptySet(); // than deactivate 
filtering
-                this.includeEmpty = true;
-            } else {
-                Set<String> values = new HashSet<String>();
-                for(String filterString : typeArray){
-                    if(filterString != null){
-                        if(filterString.isEmpty() || 
filterString.equalsIgnoreCase("null")){
-                            this.includeEmpty = true;
-                        } else {
-                            
values.add(NamespaceMappingUtils.getConfiguredUri(nsPrefixService, 
filterString));
-                        }
+                if(!includeAll && entry.equals("*")){
+                    includeAll = true;
+                    continue;
+                }
+                boolean exclude = !entry.isEmpty() && entry.charAt(0) == '!';
+                if(exclude){
+                    entry = entry.substring(1);
+                    if(entry.equalsIgnoreCase("null")){
+                        entry = "";
+                    }
+                    if(entry.equals("*")){
+                        throw new IllegalArgumentException("'!*' is not 
allowed in the config ("
+                            + "it is the default if '*' is not present)!");
                     }
                 }
-                if(values.isEmpty() && !this.includeEmpty){
-                    throw new IllegalArgumentException("Parameter 
"+PARAM_VALUES+'='+value+" does not contain a valid field value!");
-                } else {
-                    this.values = values;
+                String uri = getUri(entry);
+                if((exclude ? this.included : this.exclude).contains(uri)){
+                    throw new IllegalArgumentException("'"+entry+"' both 
included and excluded by the"
+                        + "parsed configuration!");
                 }
+                //if exclude add to this.exclude otherwise to this.values
+                (exclude ? this.exclude : this.included).add(uri);
             }
-        } else {// no values (accept all entities with any value)
-            values = Collections.emptySet();
         }
     }
 
+    /**
+     * @param entry
+     * @return
+     */
+    private String getUri(String entry) {
+        String uri; 
+        String nsPrefix = NamespaceMappingUtils.getPrefix(entry);
+        if(nsPrefix != null){
+            String ns = nsPrefixProvider.getNamespace(nsPrefix);
+            if(ns == null){
+                throw new IllegalArgumentException("Unable to resolve 
namesoace prefix used by '"
+                        +entry+"' by using the NamespacePrefixService!");
+            }
+            uri = new StringBuilder(ns).append(entry,nsPrefix.length()+1, 
entry.length()).toString();
+        } else {
+            uri = entry;
+        }
+        return uri;
+    }
+
 }

Added: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java?rev=1467873&view=auto
==============================================================================
--- 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
 (added)
+++ 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
 Mon Apr 15 07:09:12 2013
@@ -0,0 +1,114 @@
+package org.apache.stanbol.entityhub.indexing.core.processor;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixProvider;
+import 
org.apache.stanbol.commons.namespaceprefix.impl.NamespacePrefixProviderImpl;
+import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
+import org.apache.stanbol.entityhub.indexing.core.EntityProcessor;
+import org.apache.stanbol.entityhub.servicesapi.defaults.NamespaceEnum;
+import org.apache.stanbol.entityhub.servicesapi.model.Representation;
+import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class FieldValueFilterTest {
+    private static final String FB = "http://rdf.freebase.com/ns/";;
+
+    private static final String TEST_CONFIG = "prefix.config";
+    
+    private static ValueFactory vf = InMemoryValueFactory.getInstance();
+    
+    private static NamespacePrefixProvider nsPrefixProvider;
+
+    private static final Map<String,String> nsMappings = new 
HashMap<String,String>();
+    static {
+        nsMappings.put("fb", FB);
+        nsMappings.put("rdf", NamespaceEnum.rdf.getNamespace());
+        nsMappings.put("rdfs", NamespaceEnum.rdfs.getNamespace());
+        nsMappings.put("skos", NamespaceEnum.skos.getNamespace());
+        nsMappings.put("foaf", NamespaceEnum.foaf.getNamespace());
+    }
+    
+    
+    
+    @BeforeClass
+    public static void init() throws IOException{
+        nsPrefixProvider = new NamespacePrefixProviderImpl(nsMappings);
+    }
+    
+    @Test(expected=IllegalArgumentException.class)
+    public void testIncludeExcludeConfig1(){
+        new 
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person;skos:Concept;!skos:Concept");
+    }
+    
+    @Test(expected=IllegalArgumentException.class)
+    public void testIncludeExcludeConfig2(){
+        new 
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person;!skos:Concept;skos:Concept");
+    }
+
+    
+    
+    @Test
+    public void testIncludeConfig(){
+        EntityProcessor filter = new 
FieldValueFilter(nsPrefixProvider,"rdf:type","foaf:Person");
+        
+        Representation r = getRepresentation(NamespaceEnum.foaf+"Person");
+        Assert.assertNotNull(filter.process(r));
+        
+        r = getRepresentation(NamespaceEnum.skos+"Concept");
+        Assert.assertNull(filter.process(r));
+        
+        r = getRepresentation(NamespaceEnum.skos+"Concept", 
NamespaceEnum.foaf+"Person");
+        Assert.assertNotNull(filter.process(r));
+        
+        //test empty value
+        filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","");
+        Assert.assertNotNull(filter.process(r));
+        
+        filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","null");
+        Assert.assertNotNull(filter.process(r));
+
+        filter = new FieldValueFilter(nsPrefixProvider,"skos:releated", null);
+        Assert.assertNotNull(filter.process(r));
+    }
+    
+    @Test
+    public void testExcludeConfig(){
+        EntityProcessor filter = new 
FieldValueFilter(nsPrefixProvider,"rdf:type","*;!foaf:Person");
+        
+        Representation r = getRepresentation(NamespaceEnum.foaf+"Person");
+        Assert.assertNull(filter.process(r));
+        
+        r = getRepresentation(NamespaceEnum.skos+"Concept");
+        Assert.assertNotNull(filter.process(r));
+        
+        r = getRepresentation(NamespaceEnum.skos+"Concept", 
NamespaceEnum.foaf+"Person");
+        Assert.assertNotNull(filter.process(r));
+        
+        //test empty value
+        filter = new 
FieldValueFilter(nsPrefixProvider,"skos:releated","*;!null");
+        Assert.assertNull(filter.process(r));
+        
+        filter = new FieldValueFilter(nsPrefixProvider,"skos:releated","*;!");
+        Assert.assertNull(filter.process(r));
+
+        filter = new FieldValueFilter(nsPrefixProvider,"skos:releated", 
"*;!;!foaf:Person");
+        Assert.assertNull(filter.process(r));
+    }
+
+    private Representation getRepresentation(String...types){
+        Representation r = vf.createRepresentation("urn:test");
+        for(String type : types){
+            r.add(NamespaceEnum.rdf+"type", vf.createReference(type));
+        }
+        return r;
+    }
+
+
+}
+

Propchange: 
stanbol/trunk/entityhub/indexing/core/src/test/java/org/apache/stanbol/entityhub/indexing/core/processor/FieldValueFilterTest.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to