Author: rwesten
Date: Tue Jan 7 07:54:39 2014
New Revision: 1556135
URL: http://svn.apache.org/r1556135
Log:
implementation for STANBOL-1249; also added debug level logging for the use FST
models
Modified:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
Modified:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java?rev=1556135&r1=1556134&r2=1556135&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/IndexConfiguration.java
Tue Jan 7 07:54:39 2014
@@ -290,9 +290,22 @@ public class IndexConfiguration {
this.rankingField = rankingField == null ? null :
FieldEncodingEnum.encodeFloat(rankingField, fieldEncoding);
}
-
+ /**
+ * Returns the CorpusInfo for the parsed language. If the language has an
+ * extension (e.g. en-US) it first tires to load the corpus for the exact
+ * match and falls back to the main lanugage (en) if such a corpus does not
+ * exist.
+ * @param language the language
+ * @return the corpus information or <code>null</code> if not present
+ */
public CorpusInfo getCorpus(String language) {
- return corpusInfos.get(language);
+ CorpusInfo langCorpusInfo = corpusInfos.get(language);
+ if(langCorpusInfo == null && language.indexOf('-') > 0){
+ String rootLang = language.substring(0,language.indexOf('-'));
+ log.debug(" - no FST corpus for {}. Fallback to {}",
language,rootLang);
+ langCorpusInfo = corpusInfos.get(rootLang);
+ }
+ return langCorpusInfo;
}
/**
* Getter for the languages of all configured FST corpora
Modified:
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
URL:
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java?rev=1556135&r1=1556134&r2=1556135&view=diff
==============================================================================
---
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
(original)
+++
stanbol/branches/release-0.12/enhancement-engines/lucenefstlinking/src/main/java/org/apache/stanbol/enhancer/engines/lucenefstlinking/TaggingSession.java
Tue Jan 7 07:54:39 2014
@@ -131,7 +131,9 @@ public class TaggingSession implements C
//get the corpusInfo
CorpusInfo langCorpusInfo = config.getCorpus(language);
+ log.debug("> language Corpus: {}", langCorpusInfo);
CorpusInfo defaultCorpusInfo = config.getDefaultCorpus();
+ log.debug("> default Corpus: {}", defaultCorpusInfo);
//obtain the Solr Document Id field
SchemaField idSchemaField =
config.getIndex().getLatestSchema().getUniqueKeyField();