Author: rwesten
Date: Wed Nov 28 10:18:08 2012
New Revision: 1414628

URL: http://svn.apache.org/viewvc?rev=1414628&view=rev
Log:
Fixes STANBOL-821: (1) tokens with low POS tag probabilities do now correctly 
check MinTokenSize (because of that the '´' or 'Poor´s' will no longer be a 
matchable token (2) Backwards label matches now correctly set the 
lastFoundProcessableToken index if this was not yet set by the forward label 
match process. (3) The search for additional matchable Tokens now correctly 
prefers tokens after the current position. NOTE that (3) is not related to 
STANBOL-821 but was noticed/fixed during that work

Modified:
    
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
    
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java

Modified: 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java?rev=1414628&r1=1414627&r2=1414628&view=diff
==============================================================================
--- 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
 (original)
+++ 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
 Wed Nov 28 10:18:08 2012
@@ -157,7 +157,9 @@ public class EntityLinker {
                     (prevIndex > minIncludeIndex || pastIndex < 
maxIndcludeIndex));
             //we might have an additional element in the list
             if(searchStrings.size() > linkerConfig.getMaxSearchTokens()){
-                searchStrings = searchStrings.subList(0, 
linkerConfig.getMaxSearchTokens());
+                searchStrings = searchStrings.subList( //the last part of the 
list
+                    searchStrings.size()-linkerConfig.getMaxSearchTokens(), 
+                    searchStrings.size());
             }
             log.debug("  >> searchStrings {}",searchStrings);
             //search for Entities
@@ -591,6 +593,9 @@ public class EntityLinker {
                 if(found){ //found
                     if(currentToken.isMatchable){
                         foundProcessableTokens++; //only count processable 
Tokens
+                        if(lastProcessableFoundIndex < 0){ //if last is not 
yet set
+                            lastProcessableFoundIndex = currentIndex;
+                        }
                         firstProcessableFoundIndex = currentIndex;
                         foundTokensWithinCoveredProcessableTokens++;
                         if(matchedTokensNotWithinProcessableTokenSpan > 0){

Modified: 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
URL: 
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java?rev=1414628&r1=1414627&r2=1414628&view=diff
==============================================================================
--- 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
 (original)
+++ 
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
 Wed Nov 28 10:18:08 2012
@@ -510,7 +510,7 @@ public class ProcessingState {
                 }
                 if(!matchedPosTag) { //not matched against a POS Tag ...
                     // ... fall back to the token length
-                    isProcessable = token.getSpan().length() != 
elc.getMinSearchTokenLength();
+                    isProcessable = token.getSpan().length() >= 
elc.getMinSearchTokenLength();
                 }
             }
             


Reply via email to