Author: rwesten
Date: Thu Mar 28 07:56:37 2013
New Revision: 1461969
URL: http://svn.apache.org/r1461969
Log:
minor: improved logging for failed language detections. Now the first 200 chars
of the processed text are included in the Exception message
Modified:
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
Modified:
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
URL:
http://svn.apache.org/viewvc/stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java?rev=1461969&r1=1461968&r2=1461969&view=diff
==============================================================================
---
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
(original)
+++
stanbol/trunk/enhancement-engines/langdetect/src/main/java/org/apache/stanbol/enhancer/engines/langdetect/LanguageDetectionEnhancementEngine.java
Thu Mar 28 07:56:37 2013
@@ -215,7 +215,8 @@ public class LanguageDetectionEnhancemen
} catch (IOException e) {
throw new InvalidContentException(this, ci, e);
}
- if (text.trim().length() == 0) {
+ //do not call trim() on long texts to check if the text is empty
+ if (text.length() < 50 && text.trim().length() == 0) {
log.info("No text contained in ContentPart {} of ContentItem {}",
contentPart.getKey(),ci.getUri());
return;
@@ -230,10 +231,14 @@ public class LanguageDetectionEnhancemen
try {
languages = languageIdentifier.getLanguages(text);
log.debug("language identified: {}",languages);
- }
- catch (LangDetectException e) {
- log.warn("Could not identify language", e);
- throw new EngineException(this, ci, "Could not identify language",
e);
+ } catch (LangDetectException e) {
+ StringBuilder msg = new StringBuilder("Could not identify language
of text: ");
+ if(text.length() < 200){
+ msg.append(text);
+ } else {
+ msg.append(text.subSequence(0, 199)).append("...");
+ }
+ throw new EngineException(this, ci, msg.toString(), e);
}
// add language to metadata