Author: rwesten
Date: Wed Nov 28 10:18:08 2012
New Revision: 1414628
URL: http://svn.apache.org/viewvc?rev=1414628&view=rev
Log:
Fixes STANBOL-821: (1) tokens with low POS tag probabilities do now correctly
check MinTokenSize (because of that the '´' or 'Poor´s' will no longer be a
matchable token (2) Backwards label matches now correctly set the
lastFoundProcessableToken index if this was not yet set by the forward label
match process. (3) The search for additional matchable Tokens now correctly
prefers tokens after the current position. NOTE that (3) is not related to
STANBOL-821 but was noticed/fixed during that work
Modified:
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
Modified:
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java?rev=1414628&r1=1414627&r2=1414628&view=diff
==============================================================================
---
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
(original)
+++
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/EntityLinker.java
Wed Nov 28 10:18:08 2012
@@ -157,7 +157,9 @@ public class EntityLinker {
(prevIndex > minIncludeIndex || pastIndex <
maxIndcludeIndex));
//we might have an additional element in the list
if(searchStrings.size() > linkerConfig.getMaxSearchTokens()){
- searchStrings = searchStrings.subList(0,
linkerConfig.getMaxSearchTokens());
+ searchStrings = searchStrings.subList( //the last part of the
list
+ searchStrings.size()-linkerConfig.getMaxSearchTokens(),
+ searchStrings.size());
}
log.debug(" >> searchStrings {}",searchStrings);
//search for Entities
@@ -591,6 +593,9 @@ public class EntityLinker {
if(found){ //found
if(currentToken.isMatchable){
foundProcessableTokens++; //only count processable
Tokens
+ if(lastProcessableFoundIndex < 0){ //if last is not
yet set
+ lastProcessableFoundIndex = currentIndex;
+ }
firstProcessableFoundIndex = currentIndex;
foundTokensWithinCoveredProcessableTokens++;
if(matchedTokensNotWithinProcessableTokenSpan > 0){
Modified:
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java?rev=1414628&r1=1414627&r2=1414628&view=diff
==============================================================================
---
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
(original)
+++
stanbol/trunk/enhancer/engines/entitylinking/src/main/java/org/apache/stanbol/enhancer/engines/entitylinking/impl/ProcessingState.java
Wed Nov 28 10:18:08 2012
@@ -510,7 +510,7 @@ public class ProcessingState {
}
if(!matchedPosTag) { //not matched against a POS Tag ...
// ... fall back to the token length
- isProcessable = token.getSpan().length() !=
elc.getMinSearchTokenLength();
+ isProcessable = token.getSpan().length() >=
elc.getMinSearchTokenLength();
}
}