Author: catholicon
Date: Fri Feb  9 16:05:17 2018
New Revision: 1823671

URL: http://svn.apache.org/viewvc?rev=1823671&view=rev
Log:
OAK-7251: BinaryTextExtractor should not ignore parse exception - they should 
at least be logged at DEBUG in all cases

Modified:
    
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
    
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java

Modified: 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java?rev=1823671&r1=1823670&r2=1823671&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-lucene/src/main/java/org/apache/jackrabbit/oak/plugins/index/lucene/binary/BinaryTextExtractor.java
 Fri Feb  9 16:05:17 2018
@@ -162,10 +162,17 @@ public class BinaryTextExtractor {
                 stream.close();
             }
         } catch (LinkageError e) {
-            // Capture and ignore errors caused by extraction libraries
+            // Capture errors caused by extraction libraries
             // not being present. This is equivalent to disabling
             // selected media types in configuration, so we can simply
             // ignore these errors.
+            log.debug(
+                    "[{}] Failed to extract text from a binary property: {}."
+                            + " This often happens when some media types are 
disabled by configuration."
+                            + " The stack trace is included to flag some 
'unintended' failures",
+                    getIndexName(), path, e);
+            extractedTextCache.put(v, ExtractedText.ERROR);
+            return TEXT_EXTRACTION_ERROR;
         } catch (TimeoutException t) {
             log.warn(
                     "[{}] Failed to extract text from a binary property due to 
timeout: {}.",
@@ -185,6 +192,8 @@ public class BinaryTextExtractor {
                         getIndexName(), path, t);
                 extractedTextCache.put(v, ExtractedText.ERROR);
                 return TEXT_EXTRACTION_ERROR;
+            } else {
+                log.debug("Extracted text size exceeded configured limit({})", 
definition.getMaxExtractLength());
             }
         }
         String result = handler.toString();

Modified: 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
URL: 
http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java?rev=1823671&r1=1823670&r2=1823671&view=diff
==============================================================================
--- 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
 (original)
+++ 
jackrabbit/oak/trunk/oak-run/src/main/java/org/apache/jackrabbit/oak/plugins/tika/TextExtractor.java
 Fri Feb  9 16:05:17 2018
@@ -253,10 +253,16 @@ class TextExtractor implements Closeable
                 stream.close();
             }
         } catch (LinkageError e) {
-            // Capture and ignore errors caused by extraction libraries
+            // Capture errors caused by extraction libraries
             // not being present. This is equivalent to disabling
             // selected media types in configuration, so we can simply
             // ignore these errors.
+            log.debug("Failed to extract text from a binary property: {}."
+                            + " This often happens when some media types are 
disabled by configuration."
+                            + " The stack trace is included to flag some 
'unintended' failures",
+                    path, e);
+            parserErrorCount.incrementAndGet();
+            return ERROR_TEXT;
         } catch (Throwable t) {
             // Capture and report any other full text extraction problems.
             // The special STOP exception is used for normal termination.
@@ -268,6 +274,8 @@ class TextExtractor implements Closeable
                         + " worry about. The stack trace is included to"
                         + " help improve the text extraction feature.", t);
                 return ERROR_TEXT;
+            } else {
+                parserError.debug("Extracted text size exceeded configured 
limit({})", maxExtractedLength);
             }
         }
         String result = handler.toString();


Reply via email to