Author: catholicon
Date: Fri Feb 9 16:05:17 2018
New Revision: 1823671
URL: http://svn.apache.org/viewvc?rev=1823671&view=rev
Log:
OAK-7251: BinaryTextExtractor should not ignore parse exception - they should
at least be logged at DEBUG in all cases
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
Modified:
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java?rev=1823671&r1=1823670&r2=1823671&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
(original)
+++
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
Fri Feb 9 16:05:17 2018
@@ -162,10 +162,17 @@ public class BinaryTextExtractor {
stream.close();
}
} catch (LinkageError e) {
- // Capture and ignore errors caused by extraction libraries
+ // Capture errors caused by extraction libraries
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
+ log.debug(
+ "[{}] Failed to extract text from a binary property: {}."
+ + " This often happens when some media types are
disabled by configuration."
+ + " The stack trace is included to flag some
'unintended' failures",
+ getIndexName(), path, e);
+ extractedTextCache.put(v, ExtractedText.ERROR);
+ return TEXT_EXTRACTION_ERROR;
} catch (TimeoutException t) {
log.warn(
"[{}] Failed to extract text from a binary property due to
timeout: {}.",
@@ -185,6 +192,8 @@ public class BinaryTextExtractor {
getIndexName(), path, t);
extractedTextCache.put(v, ExtractedText.ERROR);
return TEXT_EXTRACTION_ERROR;
+ } else {
+ log.debug("Extracted text size exceeded configured limit({})",
definition.getMaxExtractLength());
}
}
String result = handler.toString();
Modified:
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
URL:
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java?rev=1823671&r1=1823670&r2=1823671&view=diff
==============================================================================
---
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
(original)
+++
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
Fri Feb 9 16:05:17 2018
@@ -253,10 +253,16 @@ class TextExtractor implements Closeable
stream.close();
}
} catch (LinkageError e) {
- // Capture and ignore errors caused by extraction libraries
+ // Capture errors caused by extraction libraries
// not being present. This is equivalent to disabling
// selected media types in configuration, so we can simply
// ignore these errors.
+ log.debug("Failed to extract text from a binary property: {}."
+ + " This often happens when some media types are
disabled by configuration."
+ + " The stack trace is included to flag some
'unintended' failures",
+ path, e);
+ parserErrorCount.incrementAndGet();
+ return ERROR_TEXT;
} catch (Throwable t) {
// Capture and report any other full text extraction problems.
// The special STOP exception is used for normal termination.
@@ -268,6 +274,8 @@ class TextExtractor implements Closeable
+ " worry about. The stack trace is included to"
+ " help improve the text extraction feature.", t);
return ERROR_TEXT;
+ } else {
+ parserError.debug("Extracted text size exceeded configured
limit({})", maxExtractedLength);
}
}
String result = handler.toString();