Author: grossws
Date: Mon Sep 21 17:19:26 2015
New Revision: 1704368

URL: http://svn.apache.org/viewvc?rev=1704368&view=rev
Log:
Fix license headers and reformat in tika-example

Apache License v2.0 headers are changed to recommended[1] (as used in 
o.a.tika.Tika).
Reformat from tabs to 4 spaces.
Added paragraphs in javadoc.
Trivial migrations to java 7 (like diamond operator).
Removed some extra lines to have consistent formatting in examples.

[1]: http://www.apache.org/legal/src-headers.html#headers

Modified:
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -24,32 +27,30 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypesFactory;
 
 public class AdvancedTypeDetector {
-
-       public static String detectWithCustomConfig(String name) throws 
Exception {
-               String config = "/org/apache/tika/mime/tika-mimetypes.xml";
-               Tika tika = new Tika(MimeTypesFactory.create(config));
-               return tika.detect(name);
-       }
-
-       public static String detectWithCustomDetector(String name) throws 
Exception {
-               String config = "/org/apache/tika/mime/tika-mimetypes.xml";
-               Detector detector = MimeTypesFactory.create(config);
-
-               Detector custom = new Detector() {
-                       private static final long serialVersionUID = 
-5420638839201540749L;
-
-                       public MediaType detect(InputStream input, Metadata 
metadata) {
-                               String type = 
metadata.get("my-custom-type-override");
-                               if (type != null) {
-                                       return MediaType.parse(type);
-                               } else {
-                                       return MediaType.OCTET_STREAM;
-                               }
-                       }
-               };
-
-               Tika tika = new Tika(new CompositeDetector(custom, detector));
-               return tika.detect(name);
-       }
-
+    public static String detectWithCustomConfig(String name) throws Exception {
+        String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+        Tika tika = new Tika(MimeTypesFactory.create(config));
+        return tika.detect(name);
+    }
+
+    public static String detectWithCustomDetector(String name) throws 
Exception {
+        String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+        Detector detector = MimeTypesFactory.create(config);
+
+        Detector custom = new Detector() {
+            private static final long serialVersionUID = -5420638839201540749L;
+
+            public MediaType detect(InputStream input, Metadata metadata) {
+                String type = metadata.get("my-custom-type-override");
+                if (type != null) {
+                    return MediaType.parse(type);
+                } else {
+                    return MediaType.OCTET_STREAM;
+                }
+            }
+        };
+
+        Tika tika = new Tika(new CompositeDetector(custom, detector));
+        return tika.detect(name);
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
 Mon Sep 21 17:19:26 2015
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.tika.example;
 
 import java.io.IOException;
@@ -36,7 +37,7 @@ import org.xml.sax.SAXException;
 
 /**
  * Examples of using different Content Handlers to
- *  get different parts of the file's contents 
+ * get different parts of the file's contents
  */
 public class ContentHandlerExample {
     /**
@@ -67,10 +68,10 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     /**
      * Example of extracting just the body as HTML, without the
-     *  head part, as a string
+     * head part, as a string
      */
     public String parseBodyToHTML() throws IOException, SAXException, 
TikaException {
         ContentHandler handler = new BodyContentHandler(
@@ -83,16 +84,15 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     /**
      * Example of extracting just one part of the document's body,
-     *  as HTML as a string, excluding the rest
+     * as HTML as a string, excluding the rest
      */
     public String parseOnePartToHTML() throws IOException, SAXException, 
TikaException {
         // Only get things under html -> body -> div (class=header)
         XPathParser xhtmlParser = new XPathParser("xhtml", 
XHTMLContentHandler.XHTML);
-        Matcher divContentMatcher = xhtmlParser.parse(
-                "/xhtml:html/xhtml:body/xhtml:div/descendant::node()");        
+        Matcher divContentMatcher = 
xhtmlParser.parse("/xhtml:html/xhtml:body/xhtml:div/descendant::node()");
         ContentHandler handler = new MatchingContentHandler(
                 new ToXMLContentHandler(), divContentMatcher);
 
@@ -103,25 +103,26 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     protected final int MAXIMUM_TEXT_CHUNK_SIZE = 40;
+
     /**
      * Example of extracting the plain text in chunks, with each chunk
-     *  of no more than a certain maximum size
+     * of no more than a certain maximum size
      */
     public List<String> parseToPlainTextChunks() throws IOException, 
SAXException, TikaException {
-        final List<String> chunks = new ArrayList<String>();
+        final List<String> chunks = new ArrayList<>();
         chunks.add("");
         ContentHandlerDecorator handler = new ContentHandlerDecorator() {
             @Override
             public void characters(char[] ch, int start, int length) {
-                String lastChunk = chunks.get(chunks.size()-1);
+                String lastChunk = chunks.get(chunks.size() - 1);
                 String thisStr = new String(ch, start, length);
-                
-                if (lastChunk.length()+length > MAXIMUM_TEXT_CHUNK_SIZE) {
+
+                if (lastChunk.length() + length > MAXIMUM_TEXT_CHUNK_SIZE) {
                     chunks.add(thisStr);
                 } else {
-                    chunks.set(chunks.size()-1, lastChunk+thisStr);
+                    chunks.set(chunks.size() - 1, lastChunk + thisStr);
                 }
             }
         };

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -22,28 +25,25 @@ import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
 
 public class CustomMimeInfo {
-
-       public static String customMimeInfo() throws Exception {
-               String path = "file:///path/to/prescription-type.xml";
-               MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
-               Tika tika = new Tika(typeDatabase);
-               String type = tika.detect("/path/to/prescription.xpd");
-               return type;
-       }
-
-       public static String customCompositeDetector() throws Exception {
-               String path = "file:///path/to/prescription-type.xml";
-               MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
-               Tika tika = new Tika(new CompositeDetector(typeDatabase,
-                               new EncryptedPrescriptionDetector()));
-               String type = tika.detect("/path/to/tmp/prescription.xpd");
-               return type;
-       }
-
-       public static void main(String[] args) throws Exception {
-               System.out.println("customMimeInfo=" + customMimeInfo());
-               System.out.println("customCompositeDetector="
-                               + customCompositeDetector());
-       }
-
+    public static String customMimeInfo() throws Exception {
+        String path = "file:///path/to/prescription-type.xml";
+        MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+        Tika tika = new Tika(typeDatabase);
+        String type = tika.detect("/path/to/prescription.xpd");
+        return type;
+    }
+
+    public static String customCompositeDetector() throws Exception {
+        String path = "file:///path/to/prescription-type.xml";
+        MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+        Tika tika = new Tika(new CompositeDetector(typeDatabase,
+                new EncryptedPrescriptionDetector()));
+        String type = tika.detect("/path/to/tmp/prescription.xpd");
+        return type;
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.out.println("customMimeInfo=" + customMimeInfo());
+        System.out.println("customCompositeDetector=" + 
customCompositeDetector());
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,14 +20,10 @@ package org.apache.tika.example;
 import org.apache.tika.cli.TikaCLI;
 
 /**
- *
  * Print the supported Tika Metadata models and their fields.
- *
  */
 public class DescribeMetadata {
-
-       public static void main(String[] args) throws Exception {
-               TikaCLI.main(new String[] { "--list-met-models" });
-       }
-
+    public static void main(String[] args) throws Exception {
+        TikaCLI.main(new String[]{"--list-met-models"});
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,7 +19,7 @@ package org.apache.tika.example;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -40,101 +43,101 @@ import static java.nio.charset.StandardC
  */
 public class DirListParser implements Parser {
 
-       private static final long serialVersionUID = 2717930544410610735L;
+    private static final long serialVersionUID = 2717930544410610735L;
 
-       private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
-                       Arrays.asList(MediaType.TEXT_PLAIN));
+    private static Set<MediaType> SUPPORTED_TYPES = new HashSet<>(
+            Collections.singletonList(MediaType.TEXT_PLAIN));
 
-       /*
-        * (non-Javadoc)
-        * 
-        * @see org.apache.tika.parser.Parser#getSupportedTypes(
-        * org.apache.tika.parser.ParseContext)
-        */
-       public Set<MediaType> getSupportedTypes(ParseContext context) {
-               return SUPPORTED_TYPES;
-       }
-
-       /*
-        * (non-Javadoc)
-        * 
-        * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-        * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
-        */
-       public void parse(InputStream is, ContentHandler handler, Metadata 
metadata)
-                       throws IOException, SAXException, TikaException {
-               this.parse(is, handler, metadata, new ParseContext());
-       }
-
-       /*
-        * (non-Javadoc)
-        * 
-        * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-        * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
-        * org.apache.tika.parser.ParseContext)
-        */
-       public void parse(InputStream is, ContentHandler handler,
-                       Metadata metadata, ParseContext context) throws 
IOException,
-                       SAXException, TikaException {
-
-               List<String> lines = 
FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
-               for (String line : lines) {
-                       String[] fileToks = line.split("\\s+");
-                       if (fileToks.length < 8)
-                               continue;
-                       String filePermissions = fileToks[0];
-                       String numHardLinks = fileToks[1];
-                       String fileOwner = fileToks[2];
-                       String fileOwnerGroup = fileToks[3];
-                       String fileSize = fileToks[4];
-                       StringBuffer lastModDate = new StringBuffer();
-                       lastModDate.append(fileToks[5]);
-                       lastModDate.append(" ");
-                       lastModDate.append(fileToks[6]);
-                       lastModDate.append(" ");
-                       lastModDate.append(fileToks[7]);
-                       StringBuffer fileName = new StringBuffer();
-                       for (int i = 8; i < fileToks.length; i++) {
-                               fileName.append(fileToks[i]);
-                               fileName.append(" ");
-                       }
-                       fileName.deleteCharAt(fileName.length() - 1);
-                       this.addMetadata(metadata, filePermissions, 
numHardLinks,
-                                       fileOwner, fileOwnerGroup, fileSize,
-                                       lastModDate.toString(), 
fileName.toString());
-               }
-       }
-
-       public static void main(String[] args) throws IOException, SAXException,
-                       TikaException {
-               DirListParser parser = new DirListParser();
-               Metadata met = new Metadata();
-               parser.parse(System.in, new BodyContentHandler(), met);
-
-               System.out.println("Num files: " + 
met.getValues("Filename").length);
-               System.out.println("Num executables: " + 
met.get("NumExecutables"));
-       }
-
-       private void addMetadata(Metadata metadata, String filePerms,
-                       String numHardLinks, String fileOwner, String 
fileOwnerGroup,
-                       String fileSize, String lastModDate, String fileName) {
-               metadata.add("FilePermissions", filePerms);
-               metadata.add("NumHardLinks", numHardLinks);
-               metadata.add("FileOwner", fileOwner);
-               metadata.add("FileOwnerGroup", fileOwnerGroup);
-               metadata.add("FileSize", fileSize);
-               metadata.add("LastModifiedDate", lastModDate);
-               metadata.add("Filename", fileName);
-
-               if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == 
-1) {
-                       if (metadata.get("NumExecutables") != null) {
-                               int numExecs = 
Integer.valueOf(metadata.get("NumExecutables"));
-                               numExecs++;
-                               metadata.set("NumExecutables", 
String.valueOf(numExecs));
-                       } else {
-                               metadata.set("NumExecutables", "1");
-                       }
-               }
-       }
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#getSupportedTypes(
+     * org.apache.tika.parser.ParseContext)
+     */
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+     */
+    public void parse(InputStream is, ContentHandler handler, Metadata 
metadata)
+            throws IOException, SAXException, TikaException {
+        this.parse(is, handler, metadata, new ParseContext());
+    }
+
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+     * org.apache.tika.parser.ParseContext)
+     */
+    public void parse(InputStream is, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws 
IOException,
+            SAXException, TikaException {
+
+        List<String> lines = 
FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
+        for (String line : lines) {
+            String[] fileToks = line.split("\\s+");
+            if (fileToks.length < 8)
+                continue;
+            String filePermissions = fileToks[0];
+            String numHardLinks = fileToks[1];
+            String fileOwner = fileToks[2];
+            String fileOwnerGroup = fileToks[3];
+            String fileSize = fileToks[4];
+            StringBuilder lastModDate = new StringBuilder();
+            lastModDate.append(fileToks[5]);
+            lastModDate.append(" ");
+            lastModDate.append(fileToks[6]);
+            lastModDate.append(" ");
+            lastModDate.append(fileToks[7]);
+            StringBuilder fileName = new StringBuilder();
+            for (int i = 8; i < fileToks.length; i++) {
+                fileName.append(fileToks[i]);
+                fileName.append(" ");
+            }
+            fileName.deleteCharAt(fileName.length() - 1);
+            this.addMetadata(metadata, filePermissions, numHardLinks,
+                    fileOwner, fileOwnerGroup, fileSize,
+                    lastModDate.toString(), fileName.toString());
+        }
+    }
+
+    public static void main(String[] args) throws IOException, SAXException,
+            TikaException {
+        DirListParser parser = new DirListParser();
+        Metadata met = new Metadata();
+        parser.parse(System.in, new BodyContentHandler(), met);
+
+        System.out.println("Num files: " + met.getValues("Filename").length);
+        System.out.println("Num executables: " + met.get("NumExecutables"));
+    }
+
+    private void addMetadata(Metadata metadata, String filePerms,
+                             String numHardLinks, String fileOwner, String 
fileOwnerGroup,
+                             String fileSize, String lastModDate, String 
fileName) {
+        metadata.add("FilePermissions", filePerms);
+        metadata.add("NumHardLinks", numHardLinks);
+        metadata.add("FileOwner", fileOwner);
+        metadata.add("FileOwnerGroup", fileOwnerGroup);
+        metadata.add("FileSize", fileSize);
+        metadata.add("LastModifiedDate", lastModDate);
+        metadata.add("Filename", fileName);
+
+        if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
+            if (metadata.get("NumExecutables") != null) {
+                int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
+                numExecs++;
+                metadata.set("NumExecutables", String.valueOf(numExecs));
+            } else {
+                metadata.set("NumExecutables", "1");
+            }
+        }
+    }
 
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -28,19 +31,16 @@ import org.xml.sax.SAXException;
  * Grabs a PDF file from a URL and prints its {@link Metadata}
  */
 public class DisplayMetInstance {
-
-       public static Metadata getMet(URL url) throws IOException, SAXException,
-                       TikaException {
-               Metadata met = new Metadata();
-               PDFParser parser = new PDFParser();
-               parser.parse(url.openStream(), new BodyContentHandler(), met,
-                               new ParseContext());
-               return met;
-       }
-
-       public static void main(String[] args) throws Exception {
-               Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
-               System.out.println(met);
-       }
-
+    public static Metadata getMet(URL url) throws IOException, SAXException,
+            TikaException {
+        Metadata met = new Metadata();
+        PDFParser parser = new PDFParser();
+        parser.parse(url.openStream(), new BodyContentHandler(), met, new 
ParseContext());
+        return met;
+    }
+
+    public static void main(String[] args) throws Exception {
+        Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+        System.out.println(met);
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
 Mon Sep 21 17:19:26 2015
@@ -1,4 +1,3 @@
-package org.apache.tika.example;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,13 +15,7 @@ package org.apache.tika.example;
  * limitations under the License.
  */
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
+package org.apache.tika.example;
 
 import java.io.File;
 import java.io.FileOutputStream;
@@ -36,6 +29,13 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.DefaultDetector;
@@ -65,9 +65,7 @@ import static java.nio.charset.StandardC
  * for your custom mime types.
  */
 public class DumpTikaConfigExample {
-
     /**
-     *
      * @param config config file to dump
      * @param writer writer to which to write
      * @throws Exception
@@ -104,8 +102,7 @@ public class DumpTikaConfigExample {
         Translator translator = config.getTranslator();
         if (translator instanceof DefaultTranslator) {
             Node mimeComment = doc.createComment(
-                    "for example: "+
-                            "<translator 
class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
+                    "for example: <translator 
class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
             rootElement.appendChild(mimeComment);
         } else {
             Element translatorElement = doc.createElement("translator");
@@ -125,7 +122,7 @@ public class DumpTikaConfigExample {
         Element detectorsElement = doc.createElement("detectors");
 
         if (detector instanceof DefaultDetector) {
-            List<Detector> children = 
((DefaultDetector)detector).getDetectors();
+            List<Detector> children = ((DefaultDetector) 
detector).getDetectors();
             for (Detector d : children) {
                 Element detectorElement = doc.createElement("detector");
                 detectorElement.setAttribute("class", 
d.getClass().getCanonicalName());
@@ -147,9 +144,9 @@ public class DumpTikaConfigExample {
             Parser child = e.getValue();
             String className = e.getKey();
             parserElement.setAttribute("class", className);
-            Set<MediaType> types = new TreeSet<MediaType>();
+            Set<MediaType> types = new TreeSet<>();
             types.addAll(child.getSupportedTypes(context));
-            for (MediaType type : types){
+            for (MediaType type : types) {
                 Element mimeElement = doc.createElement("mime");
                 mimeElement.appendChild(doc.createTextNode(type.toString()));
                 parserElement.appendChild(mimeElement);
@@ -160,10 +157,10 @@ public class DumpTikaConfigExample {
 
     }
 
-    private Map<String, Parser> getConcreteParsers(Parser parentParser)throws 
TikaException, IOException  {
-        Map<String, Parser> parsers = new TreeMap<String, Parser>();
+    private Map<String, Parser> getConcreteParsers(Parser parentParser) throws 
TikaException, IOException {
+        Map<String, Parser> parsers = new TreeMap<>();
         if (parentParser instanceof CompositeParser) {
-            addParsers((CompositeParser)parentParser, parsers);
+            addParsers((CompositeParser) parentParser, parsers);
         } else {
             addParser(parentParser, parsers);
         }
@@ -173,7 +170,7 @@ public class DumpTikaConfigExample {
     private void addParsers(CompositeParser p, Map<String, Parser> parsers) {
         for (Parser child : p.getParsers().values()) {
             if (child instanceof CompositeParser) {
-                addParsers((CompositeParser)child, parsers);
+                addParsers((CompositeParser) child, parsers);
             } else {
                 addParser(child, parsers);
             }
@@ -185,12 +182,10 @@ public class DumpTikaConfigExample {
     }
 
     /**
-     *
      * @param args outputFile, outputEncoding, if args is empty, this prints 
to console
      * @throws Exception
      */
     public static void main(String[] args) throws Exception {
-
         Charset encoding = UTF_8;
         Writer writer = null;
         if (args.length > 0) {

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,7 +21,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.security.GeneralSecurityException;
 import java.security.Key;
-
 import javax.crypto.Cipher;
 import javax.crypto.CipherInputStream;
 import javax.xml.namespace.QName;
@@ -30,30 +32,28 @@ import org.apache.tika.metadata.Metadata
 import org.apache.tika.mime.MediaType;
 
 public class EncryptedPrescriptionDetector implements Detector {
+    private static final long serialVersionUID = -1709652690773421147L;
 
-       private static final long serialVersionUID = -1709652690773421147L;
-
-       public MediaType detect(InputStream stream, Metadata metadata)
-                       throws IOException {
-               Key key = Pharmacy.getKey();
-               MediaType type = MediaType.OCTET_STREAM;
-
-               try (InputStream lookahead = new LookaheadInputStream(stream, 
1024)) {
-                       Cipher cipher = Cipher.getInstance("RSA");
-
-                       cipher.init(Cipher.DECRYPT_MODE, key);
-                       InputStream decrypted = new 
CipherInputStream(lookahead, cipher);
-
-                       QName name = new 
XmlRootExtractor().extractRootElement(decrypted);
-                       if (name != null
-                                       && 
"http://example.com/xpd".equals(name.getNamespaceURI())
-                                       && 
"prescription".equals(name.getLocalPart())) {
-                               type = MediaType.application("x-prescription");
-                       }
-               } catch (GeneralSecurityException e) {
-                       // unable to decrypt, fall through
-               }
-               return type;
-       }
-
+    public MediaType detect(InputStream stream, Metadata metadata)
+            throws IOException {
+        Key key = Pharmacy.getKey();
+        MediaType type = MediaType.OCTET_STREAM;
+
+        try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
+            Cipher cipher = Cipher.getInstance("RSA");
+
+            cipher.init(Cipher.DECRYPT_MODE, key);
+            InputStream decrypted = new CipherInputStream(lookahead, cipher);
+
+            QName name = new XmlRootExtractor().extractRootElement(decrypted);
+            if (name != null
+                    && "http://example.com/xpd".equals(name.getNamespaceURI())
+                    && "prescription".equals(name.getLocalPart())) {
+                type = MediaType.application("x-prescription");
+            }
+        } catch (GeneralSecurityException e) {
+            // unable to decrypt, fall through
+        }
+        return type;
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.security.GeneralSecurityExce
 import java.security.Key;
 import java.util.Collections;
 import java.util.Set;
-
 import javax.crypto.Cipher;
 import javax.crypto.CipherInputStream;
 
@@ -33,28 +35,26 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 public class EncryptedPrescriptionParser extends AbstractParser {
+    private static final long serialVersionUID = -7816987249611278541L;
 
-       private static final long serialVersionUID = -7816987249611278541L;
-
-       public void parse(InputStream stream, ContentHandler handler,
-                       Metadata metadata, ParseContext context) throws 
IOException,
-                       SAXException, TikaException {
-               try {
-                       Key key = Pharmacy.getKey();
-                       Cipher cipher = Cipher.getInstance("RSA");
-                       cipher.init(Cipher.DECRYPT_MODE, key);
-                       InputStream decrypted = new CipherInputStream(stream, 
cipher);
-
-                       new PrescriptionParser().parse(decrypted, handler, 
metadata,
-                                       context);
-               } catch (GeneralSecurityException e) {
-                       throw new TikaException("Unable to decrypt a digital 
prescription",
-                                       e);
-               }
-       }
-
-       public Set<MediaType> getSupportedTypes(ParseContext context) {
-               return 
Collections.singleton(MediaType.application("x-prescription"));
-       }
-
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws 
IOException,
+            SAXException, TikaException {
+        try {
+            Key key = Pharmacy.getKey();
+            Cipher cipher = Cipher.getInstance("RSA");
+            cipher.init(Cipher.DECRYPT_MODE, key);
+            InputStream decrypted = new CipherInputStream(stream, cipher);
+
+            new PrescriptionParser().parse(decrypted, handler, metadata,
+                    context);
+        } catch (GeneralSecurityException e) {
+            throw new TikaException("Unable to decrypt a digital prescription",
+                    e);
+        }
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.singleton(MediaType.application("x-prescription"));
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +17,6 @@
 
 package org.apache.tika.example;
 
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
@@ -37,9 +39,8 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 public class ExtractEmbeddedFiles {
-
     private Parser parser = new AutoDetectParser();
-    private Detector detector = ((AutoDetectParser)parser).getDetector();
+    private Detector detector = ((AutoDetectParser) parser).getDetector();
     private TikaConfig config = TikaConfig.getDefaultConfig();
 
     public void extract(InputStream is, Path outputDir) throws SAXException, 
TikaException, IOException {
@@ -87,7 +88,7 @@ public class ExtractEmbeddedFiles {
             //now try to figure out the right extension for the embedded file
             MediaType contentType = detector.detect(stream, metadata);
 
-            if (name.indexOf('.')==-1 && contentType!=null) {
+            if (name.indexOf('.') == -1 && contentType != null) {
                 try {
                     name += config.getMimeRepository().forName(
                             contentType.toString()).getExtension();

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
 Mon Sep 21 17:19:26 2015
@@ -1,10 +1,12 @@
-package org.apache.tika.example;
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,6 +14,14 @@ package org.apache.tika.example;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.HashSet;
+
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -19,15 +29,10 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.PhoneExtractingContentHandler;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.util.HashSet;
-
 /**
  * Class to demonstrate how to use the {@link 
org.apache.tika.sax.PhoneExtractingContentHandler}
  * to get a list of all of the phone numbers from every file in a directory.
- *
+ * <p>
  * You can run this main method by running
  * <code>
  *     mvn exec:java 
-Dexec.mainClass="org.apache.tika.example.GrabPhoneNumbersExample" 
-Dexec.args="/path/to/directory"
@@ -38,7 +43,7 @@ public class GrabPhoneNumbersExample {
     private static HashSet<String> phoneNumbers = new HashSet<String>();
     private static int failedFiles, successfulFiles = 0;
 
-    public static void main(String[] args){
+    public static void main(String[] args) {
         if (args.length != 1) {
             System.err.println("Usage `java GrabPhoneNumbers [corpus]");
             return;

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Date;
-
 import javax.jcr.Item;
 
 import org.apache.jackrabbit.server.io.DefaultIOListener;
@@ -38,205 +40,196 @@ import org.slf4j.LoggerFactory;
  * <code>ImportContextImpl</code>...
  */
 public class ImportContextImpl implements ImportContext {
+    private static Logger log = 
LoggerFactory.getLogger(ImportContextImpl.class);
 
-       private static Logger log = LoggerFactory
-                       .getLogger(ImportContextImpl.class);
-
-       private final IOListener ioListener;
-       private final Item importRoot;
-       private final String systemId;
-       private final File inputFile;
-
-       private InputContext inputCtx;
-       private boolean completed;
-
-       private final Detector detector;
-
-       private final MediaType type;
-
-       /**
-        * Creates a new item import context. The specified InputStream is 
written
-        * to a temporary file in order to avoid problems with multiple 
IOHandlers
-        * that try to run the import but fail. The temporary file is deleted as
-        * soon as this context is informed that the import has been completed 
and
-        * it will not be used any more.
-        *
-        * @param importRoot
-        * @param systemId
-        * @param ctx
-        *            input context, or <code>null</code>
-        * @param stream
-        *            document input stream, or <code>null</code>
-        * @param ioListener
-        * @param detector
-        *            content type detector
-        * @throws IOException
-        * @see ImportContext#informCompleted(boolean)
-        */
-       public ImportContextImpl(Item importRoot, String systemId,
-                       InputContext ctx, InputStream stream, IOListener 
ioListener,
-                       Detector detector) throws IOException {
-               this.importRoot = importRoot;
-               this.systemId = systemId;
-               this.inputCtx = ctx;
-               this.ioListener = (ioListener != null) ? ioListener
-                               : new DefaultIOListener(log);
-               this.detector = detector;
-
-               Metadata metadata = new Metadata();
-               if (ctx != null && ctx.getContentType() != null) {
-                       metadata.set(Metadata.CONTENT_TYPE, 
ctx.getContentType());
-               }
-               if (systemId != null) {
-                       metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
-               }
-               if (stream != null && !stream.markSupported()) {
-                       stream = new BufferedInputStream(stream);
-               }
-               type = detector.detect(stream, metadata);
-
-               this.inputFile = IOUtil.getTempFile(stream);
-       }
-
-       /**
-        * @see ImportContext#getIOListener()
-        */
-       public IOListener getIOListener() {
-               return ioListener;
-       }
-
-       /**
-        * @see ImportContext#getImportRoot()
-        */
-       public Item getImportRoot() {
-               return importRoot;
-       }
-
-       /**
-        * @see ImportContext#getDetector()
-        */
-       public Detector getDetector() {
-               return detector;
-       }
-
-       /**
-        * @see ImportContext#hasStream()
-        */
-       public boolean hasStream() {
-               return inputFile != null;
-       }
-
-       /**
-        * Returns a new <code>InputStream</code> to the temporary file created
-        * during instanciation or <code>null</code>, if this context does not
-        * provide a stream.
-        *
-        * @see ImportContext#getInputStream()
-        * @see #hasStream()
-        */
-       public InputStream getInputStream() {
-               checkCompleted();
-               InputStream in = null;
-               if (inputFile != null) {
-                       try {
-                               in = new FileInputStream(inputFile);
-                       } catch (IOException e) {
-                               // unexpected error... ignore and return null
-                       }
-               }
-               return in;
-       }
-
-       /**
-        * @see ImportContext#getSystemId()
-        */
-       public String getSystemId() {
-               return systemId;
-       }
-
-       /**
-        * @see ImportContext#getModificationTime()
-        */
-       public long getModificationTime() {
-               return (inputCtx != null) ? inputCtx.getModificationTime() : 
new Date()
-                               .getTime();
-       }
-
-       /**
-        * @see ImportContext#getContentLanguage()
-        */
-       public String getContentLanguage() {
-               return (inputCtx != null) ? inputCtx.getContentLanguage() : 
null;
-       }
-
-       /**
-        * @see ImportContext#getContentLength()
-        */
-       public long getContentLength() {
-               long length = IOUtil.UNDEFINED_LENGTH;
-               if (inputCtx != null) {
-                       length = inputCtx.getContentLength();
-               }
-               if (length < 0 && inputFile != null) {
-                       length = inputFile.length();
-               }
-               if (length < 0) {
-                       log.debug("Unable to determine content length -> 
default value = "
-                                       + IOUtil.UNDEFINED_LENGTH);
-               }
-               return length;
-       }
-
-       /**
-        * @see ImportContext#getMimeType()
-        */
-       public String getMimeType() {
-               return IOUtil.getMimeType(type.toString());
-       }
-
-       /**
-        * @see ImportContext#getEncoding()
-        */
-       public String getEncoding() {
-               return IOUtil.getEncoding(type.toString());
-       }
-
-       /**
-        * @see ImportContext#getProperty(Object)
-        */
-       public Object getProperty(Object propertyName) {
-               return (inputCtx != null) ? inputCtx.getProperty(propertyName
-                               .toString()) : null;
-       }
-
-       /**
-        * @see ImportContext#informCompleted(boolean)
-        */
-       public void informCompleted(boolean success) {
-               checkCompleted();
-               completed = true;
-               if (inputFile != null) {
-                       inputFile.delete();
-               }
-       }
-
-       /**
-        * @see ImportContext#isCompleted()
-        */
-       public boolean isCompleted() {
-               return completed;
-       }
-
-       /**
-        * @throws IllegalStateException
-        *             if the context is already completed.
-        * @see #isCompleted()
-        * @see #informCompleted(boolean)
-        */
-       private void checkCompleted() {
-               if (completed) {
-                       throw new IllegalStateException(
-                                       "ImportContext has already been 
consumed.");
-               }
-       }
+    private final IOListener ioListener;
+    private final Item importRoot;
+    private final String systemId;
+    private final File inputFile;
+
+    private InputContext inputCtx;
+    private boolean completed;
+
+    private final Detector detector;
+
+    private final MediaType type;
+
+    /**
+     * Creates a new item import context. The specified InputStream is written
+     * to a temporary file in order to avoid problems with multiple IOHandlers
+     * that try to run the import but fail. The temporary file is deleted as
+     * soon as this context is informed that the import has been completed and
+     * it will not be used any more.
+     *
+     * @param importRoot
+     * @param systemId
+     * @param ctx        input context, or <code>null</code>
+     * @param stream     document input stream, or <code>null</code>
+     * @param ioListener
+     * @param detector   content type detector
+     * @throws IOException
+     * @see ImportContext#informCompleted(boolean)
+     */
+    public ImportContextImpl(Item importRoot, String systemId,
+                             InputContext ctx, InputStream stream, IOListener 
ioListener,
+                             Detector detector) throws IOException {
+        this.importRoot = importRoot;
+        this.systemId = systemId;
+        this.inputCtx = ctx;
+        this.ioListener = (ioListener != null) ? ioListener
+                : new DefaultIOListener(log);
+        this.detector = detector;
+
+        Metadata metadata = new Metadata();
+        if (ctx != null && ctx.getContentType() != null) {
+            metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
+        }
+        if (systemId != null) {
+            metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+        }
+        if (stream != null && !stream.markSupported()) {
+            stream = new BufferedInputStream(stream);
+        }
+        type = detector.detect(stream, metadata);
+
+        this.inputFile = IOUtil.getTempFile(stream);
+    }
+
+    /**
+     * @see ImportContext#getIOListener()
+     */
+    public IOListener getIOListener() {
+        return ioListener;
+    }
+
+    /**
+     * @see ImportContext#getImportRoot()
+     */
+    public Item getImportRoot() {
+        return importRoot;
+    }
+
+    /**
+     * @see ImportContext#getDetector()
+     */
+    public Detector getDetector() {
+        return detector;
+    }
+
+    /**
+     * @see ImportContext#hasStream()
+     */
+    public boolean hasStream() {
+        return inputFile != null;
+    }
+
+    /**
+     * Returns a new <code>InputStream</code> to the temporary file created
+     * during instanciation or <code>null</code>, if this context does not
+     * provide a stream.
+     *
+     * @see ImportContext#getInputStream()
+     * @see #hasStream()
+     */
+    public InputStream getInputStream() {
+        checkCompleted();
+        InputStream in = null;
+        if (inputFile != null) {
+            try {
+                in = new FileInputStream(inputFile);
+            } catch (IOException e) {
+                // unexpected error... ignore and return null
+            }
+        }
+        return in;
+    }
+
+    /**
+     * @see ImportContext#getSystemId()
+     */
+    public String getSystemId() {
+        return systemId;
+    }
+
+    /**
+     * @see ImportContext#getModificationTime()
+     */
+    public long getModificationTime() {
+        return (inputCtx != null) ? inputCtx.getModificationTime() : new 
Date().getTime();
+    }
+
+    /**
+     * @see ImportContext#getContentLanguage()
+     */
+    public String getContentLanguage() {
+        return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
+    }
+
+    /**
+     * @see ImportContext#getContentLength()
+     */
+    public long getContentLength() {
+        long length = IOUtil.UNDEFINED_LENGTH;
+        if (inputCtx != null) {
+            length = inputCtx.getContentLength();
+        }
+        if (length < 0 && inputFile != null) {
+            length = inputFile.length();
+        }
+        if (length < 0) {
+            log.debug("Unable to determine content length -> default value = "
+                    + IOUtil.UNDEFINED_LENGTH);
+        }
+        return length;
+    }
+
+    /**
+     * @see ImportContext#getMimeType()
+     */
+    public String getMimeType() {
+        return IOUtil.getMimeType(type.toString());
+    }
+
+    /**
+     * @see ImportContext#getEncoding()
+     */
+    public String getEncoding() {
+        return IOUtil.getEncoding(type.toString());
+    }
+
+    /**
+     * @see ImportContext#getProperty(Object)
+     */
+    public Object getProperty(Object propertyName) {
+        return (inputCtx != null) ? 
inputCtx.getProperty(propertyName.toString()) : null;
+    }
+
+    /**
+     * @see ImportContext#informCompleted(boolean)
+     */
+    public void informCompleted(boolean success) {
+        checkCompleted();
+        completed = true;
+        if (inputFile != null) {
+            inputFile.delete();
+        }
+    }
+
+    /**
+     * @see ImportContext#isCompleted()
+     */
+    public boolean isCompleted() {
+        return completed;
+    }
+
+    /**
+     * @throws IllegalStateException if the context is already completed.
+     * @see #isCompleted()
+     * @see #informCompleted(boolean)
+     */
+    private void checkCompleted() {
+        if (completed) {
+            throw new IllegalStateException("ImportContext has already been 
consumed.");
+        }
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java 
(original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java 
Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,35 +28,31 @@ import org.apache.tika.parser.AutoDetect
 import org.apache.tika.parser.ParseContext;
 
 public class Language {
-
-       public static void languageDetection() throws IOException {
-               LanguageProfile profile = new LanguageProfile(
-                               "Alla människor är födda fria och"
-                                               + " lika i värde och 
rättigheter.");
-
-               LanguageIdentifier identifier = new LanguageIdentifier(profile);
-               System.out.println(identifier.getLanguage());
-       }
-
-       public static void languageDetectionWithWriter() throws IOException {
-               ProfilingWriter writer = new ProfilingWriter();
-               writer.append("Minden emberi lény");
-               writer.append(" szabadon születik és");
-               writer.append(" egyenlő méltósága és");
-               writer.append(" joga van.");
-
-               LanguageIdentifier identifier = writer.getLanguage();
-               System.out.println(identifier.getLanguage());
-               writer.close();
-
-       }
-
-       public static void languageDetectionWithHandler() throws Exception {
-               ProfilingHandler handler = new ProfilingHandler();
-               new AutoDetectParser().parse(System.in, handler, new Metadata(),
-                               new ParseContext());
-
-               LanguageIdentifier identifier = handler.getLanguage();
-               System.out.println(identifier.getLanguage());
-       }
+    public static void languageDetection() throws IOException {
+        LanguageProfile profile = new LanguageProfile(
+                "Alla människor är födda fria och lika i värde och 
rättigheter.");
+
+        LanguageIdentifier identifier = new LanguageIdentifier(profile);
+        System.out.println(identifier.getLanguage());
+    }
+
+    public static void languageDetectionWithWriter() throws IOException {
+        ProfilingWriter writer = new ProfilingWriter();
+        writer.append("Minden emberi lény");
+        writer.append(" szabadon születik és");
+        writer.append(" egyenlő méltósága és");
+        writer.append(" joga van.");
+
+        LanguageIdentifier identifier = writer.getLanguage();
+        System.out.println(identifier.getLanguage());
+        writer.close();
+    }
+
+    public static void languageDetectionWithHandler() throws Exception {
+        ProfilingHandler handler = new ProfilingHandler();
+        new AutoDetectParser().parse(System.in, handler, new Metadata(), new 
ParseContext());
+
+        LanguageIdentifier identifier = handler.getLanguage();
+        System.out.println(identifier.getLanguage());
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -29,21 +32,19 @@ import org.xml.sax.SAXException;
 
 @SuppressWarnings("deprecation")
 public class LanguageDetectingParser extends DelegatingParser {
+    private static final long serialVersionUID = 4291320409396502774L;
 
-       private static final long serialVersionUID = 4291320409396502774L;
-
-       public void parse(InputStream stream, ContentHandler handler,
-                       final Metadata metadata, ParseContext context) throws 
SAXException,
-                       IOException, TikaException {
-               ProfilingHandler profiler = new ProfilingHandler();
-               ContentHandler tee = new TeeContentHandler(handler, profiler);
-
-               super.parse(stream, tee, metadata, context);
-
-               LanguageIdentifier identifier = profiler.getLanguage();
-               if (identifier.isReasonablyCertain()) {
-                       metadata.set(Metadata.LANGUAGE, 
identifier.getLanguage());
-               }
-       }
-
+    public void parse(InputStream stream, ContentHandler handler,
+                      final Metadata metadata, ParseContext context) throws 
SAXException,
+            IOException, TikaException {
+        ProfilingHandler profiler = new ProfilingHandler();
+        ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+        super.parse(stream, tee, metadata, context);
+
+        LanguageIdentifier identifier = profiler.getLanguage();
+        if (identifier.isReasonablyCertain()) {
+            metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
+        }
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -44,171 +47,164 @@ import org.xml.sax.helpers.DefaultHandle
  */
 @SuppressWarnings("serial")
 public class LazyTextExtractorField extends AbstractField {
-
-       /**
-        * The logger instance for this class.
-        */
-       private static final Logger log = LoggerFactory
-                       .getLogger(LazyTextExtractorField.class);
-
-       /**
-        * The exception used to forcibly terminate the extraction process when 
the
-        * maximum field length is reached.
-        */
-       private static final SAXException STOP = new SAXException(
-                       "max field length reached");
-
-       /**
-        * The extracted text content of the given binary value. Set to non-null
-        * when the text extraction task finishes.
-        */
-       private volatile String extract = null;
-
-       /**
-        * Creates a new <code>LazyTextExtractorField</code> with the given
-        * <code>name</code>.
-        *
-        * @param name
-        *            the name of the field.
-        * @param reader
-        *            the reader where to obtain the string from.
-        * @param highlighting
-        *            set to <code>true</code> to enable result highlighting 
support
-        */
-       public LazyTextExtractorField(Parser parser, InternalValue value,
-                       Metadata metadata, Executor executor, boolean 
highlighting,
-                       int maxFieldLength) {
-               super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
-                               Field.Index.ANALYZED, highlighting ? 
TermVector.WITH_OFFSETS
-                                               : TermVector.NO);
-               executor.execute(new ParsingTask(parser, value, metadata,
-                               maxFieldLength));
-       }
-
-       /**
-        * Returns the extracted text. This method blocks until the text 
extraction
-        * task has been completed.
-        *
-        * @return the string value of this field
-        */
-       public synchronized String stringValue() {
-               try {
-                       while (!isExtractorFinished()) {
-                               wait();
-                       }
-                       return extract;
-               } catch (InterruptedException e) {
-                       log.error("Text extraction thread was interrupted", e);
-                       return "";
-               }
-       }
-
-       /**
-        * @return always <code>null</code>
-        */
-       public Reader readerValue() {
-               return null;
-       }
-
-       /**
-        * @return always <code>null</code>
-        */
-       public byte[] binaryValue() {
-               return null;
-       }
-
-       /**
-        * @return always <code>null</code>
-        */
-       public TokenStream tokenStreamValue() {
-               return null;
-       }
-
-       /**
-        * Checks whether the text extraction task has finished.
-        *
-        * @return <code>true</code> if the extracted text is available
-        */
-       public boolean isExtractorFinished() {
-               return extract != null;
-       }
-
-       private synchronized void setExtractedText(String value) {
-               extract = value;
-               notify();
-       }
-
-       /**
-        * Releases all resources associated with this field.
-        */
-       public void dispose() {
-               // TODO: Cause the ContentHandler below to throw an exception
-       }
-
-       /**
-        * The background task for extracting text from a binary value.
-        */
-       private class ParsingTask extends DefaultHandler implements Runnable {
-
-               private final Parser parser;
-
-               private final InternalValue value;
-
-               private final Metadata metadata;
-
-               private final int maxFieldLength;
-
-               private final StringBuilder builder = new StringBuilder();
-
-               private final ParseContext context = new ParseContext();
-
-               // NOTE: not a part of Jackrabbit code, made
-               private final ContentHandler handler = new DefaultHandler();
-
-               public ParsingTask(Parser parser, InternalValue value,
-                               Metadata metadata, int maxFieldLength) {
-                       this.parser = parser;
-                       this.value = value;
-                       this.metadata = metadata;
-                       this.maxFieldLength = maxFieldLength;
-               }
-
-               public void run() {
-                       try {
-                               try (InputStream stream = value.getStream()) {
-                                       parser.parse(stream, handler, metadata, 
context);
-                               }
-                       } catch (LinkageError e) {
-                               // Capture and ignore
-                       } catch (Throwable t) {
-                               if (t != STOP) {
-                                       log.debug("Failed to extract text.", t);
-                                       setExtractedText("TextExtractionError");
-                                       return;
-                               }
-                       } finally {
-                               value.discard();
-                       }
-                       setExtractedText(handler.toString());
-
-               }
-
-               @Override
-               public void characters(char[] ch, int start, int length)
-                               throws SAXException {
-                       builder.append(ch, start,
-                                       Math.min(length, maxFieldLength - 
builder.length()));
-                       if (builder.length() >= maxFieldLength) {
-                               throw STOP;
-                       }
-               }
-
-               @Override
-               public void ignorableWhitespace(char[] ch, int start, int 
length)
-                               throws SAXException {
-                       characters(ch, start, length);
-               }
-
-       }
-
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = 
LoggerFactory.getLogger(LazyTextExtractorField.class);
+
+    /**
+     * The exception used to forcibly terminate the extraction process when the
+     * maximum field length is reached.
+     * <p>
+     * Such exceptions shouldn't be used in logging since its stack trace is 
meaningless.
+     */
+    private static final SAXException STOP = new SAXException("max field 
length reached");
+
+    /**
+     * The extracted text content of the given binary value. Set to non-null
+     * when the text extraction task finishes.
+     */
+    private volatile String extract = null;
+
+    /**
+     * Creates a new <code>LazyTextExtractorField</code> with the given
+     * <code>name</code>.
+     *
+     * @param name         the name of the field.
+     * @param reader       the reader where to obtain the string from.
+     * @param highlighting set to <code>true</code> to enable result 
highlighting support
+     */
+    public LazyTextExtractorField(Parser parser, InternalValue value,
+                                  Metadata metadata, Executor executor, 
boolean highlighting,
+                                  int maxFieldLength) {
+        super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+                Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
+                        : TermVector.NO);
+        executor.execute(new ParsingTask(parser, value, metadata,
+                maxFieldLength));
+    }
+
+    /**
+     * Returns the extracted text. This method blocks until the text extraction
+     * task has been completed.
+     *
+     * @return the string value of this field
+     */
+    public synchronized String stringValue() {
+        try {
+            while (!isExtractorFinished()) {
+                wait();
+            }
+            return extract;
+        } catch (InterruptedException e) {
+            log.error("Text extraction thread was interrupted", e);
+            return "";
+        }
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public Reader readerValue() {
+        return null;
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public byte[] binaryValue() {
+        return null;
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public TokenStream tokenStreamValue() {
+        return null;
+    }
+
+    /**
+     * Checks whether the text extraction task has finished.
+     *
+     * @return <code>true</code> if the extracted text is available
+     */
+    public boolean isExtractorFinished() {
+        return extract != null;
+    }
+
+    private synchronized void setExtractedText(String value) {
+        extract = value;
+        notify();
+    }
+
+    /**
+     * Releases all resources associated with this field.
+     */
+    public void dispose() {
+        // TODO: Cause the ContentHandler below to throw an exception
+    }
+
+    /**
+     * The background task for extracting text from a binary value.
+     */
+    private class ParsingTask extends DefaultHandler implements Runnable {
+        private final Parser parser;
+
+        private final InternalValue value;
+
+        private final Metadata metadata;
+
+        private final int maxFieldLength;
+
+        private final StringBuilder builder = new StringBuilder();
+
+        private final ParseContext context = new ParseContext();
+
+        // NOTE: not a part of Jackrabbit code, made
+        private final ContentHandler handler = new DefaultHandler();
+
+        public ParsingTask(Parser parser, InternalValue value,
+                           Metadata metadata, int maxFieldLength) {
+            this.parser = parser;
+            this.value = value;
+            this.metadata = metadata;
+            this.maxFieldLength = maxFieldLength;
+        }
+
+        public void run() {
+            try {
+                try (InputStream stream = value.getStream()) {
+                    parser.parse(stream, handler, metadata, context);
+                }
+            } catch (LinkageError e) {
+                // Capture and ignore
+            } catch (Throwable t) {
+                if (t != STOP) {
+                    log.debug("Failed to extract text.", t);
+                    setExtractedText("TextExtractionError");
+                    return;
+                }
+            } finally {
+                value.discard();
+            }
+            setExtractedText(handler.toString());
+
+        }
+
+        @Override
+        public void characters(char[] ch, int start, int length)
+                throws SAXException {
+            builder.append(ch, start,
+                    Math.min(length, maxFieldLength - builder.length()));
+            if (builder.length() >= maxFieldLength) {
+                throw STOP;
+            }
+        }
+
+        @Override
+        public void ignorableWhitespace(char[] ch, int start, int length)
+                throws SAXException {
+            characters(ch, start, length);
+        }
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,8 +17,8 @@
 
 package org.apache.tika.example;
 
-/* */
 import java.io.File;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
@@ -24,23 +27,19 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.tika.Tika;
 
 public class LuceneIndexer {
+    private final Tika tika;
 
-       private final Tika tika;
-
-       private final IndexWriter writer;
-
-       public LuceneIndexer(Tika tika, IndexWriter writer) {
-               this.tika = tika;
-               this.writer = writer;
-       }
-
-       public void indexDocument(File file) throws Exception {
-               Document document = new Document();
-               document.add(new Field("filename", file.getName(), Store.YES,
-                               Index.ANALYZED));
-               document.add(new Field("fulltext", tika.parseToString(file), 
Store.NO,
-                               Index.ANALYZED));
-               writer.addDocument(document);
-       }
+    private final IndexWriter writer;
 
+    public LuceneIndexer(Tika tika, IndexWriter writer) {
+        this.tika = tika;
+        this.writer = writer;
+    }
+
+    public void indexDocument(File file) throws Exception {
+        Document document = new Document();
+        document.add(new Field("filename", file.getName(), Store.YES, 
Index.ANALYZED));
+        document.add(new Field("fulltext", tika.parseToString(file), Store.NO, 
Index.ANALYZED));
+        writer.addDocument(document);
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -30,35 +33,33 @@ import org.apache.tika.Tika;
 
 @SuppressWarnings("deprecation")
 public class LuceneIndexerExtended {
+    private final Tika tika;
 
-       private final IndexWriter writer;
-
-       private final Tika tika;
-
-       public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
-               this.writer = writer;
-               this.tika = tika;
-       }
-
-       public static void main(String[] args) throws Exception {
-               try (IndexWriter writer = new IndexWriter(new 
SimpleFSDirectory(new File(
-                               args[0])), new 
StandardAnalyzer(Version.LUCENE_30),
-                               MaxFieldLength.UNLIMITED)) {
-                       LuceneIndexer indexer = new LuceneIndexer(new Tika(), 
writer);
-                       for (int i = 1; i < args.length; i++) {
-                               indexer.indexDocument(new File(args[i]));
-                       }
-               }
-       }
-
-       public void indexDocument(File file) throws Exception {
-               try (Reader fulltext = tika.parse(file)) {
-                       Document document = new Document();
-                       document.add(new Field("filename", file.getName(), 
Store.YES,
-                                       Index.ANALYZED));
-                       document.add(new Field("fulltext", fulltext));
-                       writer.addDocument(document);
-               }
-       }
+    private final IndexWriter writer;
 
+    public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+        this.writer = writer;
+        this.tika = tika;
+    }
+
+    public static void main(String[] args) throws Exception {
+        try (IndexWriter writer = new IndexWriter(
+                new SimpleFSDirectory(new File(args[0])),
+                new StandardAnalyzer(Version.LUCENE_30),
+                MaxFieldLength.UNLIMITED)) {
+            LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
+            for (int i = 1; i < args.length; i++) {
+                indexer.indexDocument(new File(args[i]));
+            }
+        }
+    }
+
+    public void indexDocument(File file) throws Exception {
+        try (Reader fulltext = tika.parse(file)) {
+            Document document = new Document();
+            document.add(new Field("filename", file.getName(), Store.YES, 
Index.ANALYZED));
+            document.add(new Field("fulltext", fulltext));
+            writer.addDocument(document);
+        }
+    }
 }

Modified: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
 (original)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
 Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -21,38 +24,35 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 
 public class MediaTypeExample {
+    public static void describeMediaType() {
+        MediaType type = MediaType.parse("text/plain; charset=UTF-8");
 
-       public static void describeMediaType() {
-
-               MediaType type = MediaType.parse("text/plain; charset=UTF-8");
-
-               System.out.println("type:    " + type.getType());
-               System.out.println("subtype: " + type.getSubtype());
-
-               Map<String, String> parameters = type.getParameters();
-               System.out.println("parameters:");
-               for (String name : parameters.keySet()) {
-                       System.out.println("  " + name + "=" + 
parameters.get(name));
-               }
-       }
-
-       public static void listAllTypes() {
-               MediaTypeRegistry registry = 
MediaTypeRegistry.getDefaultRegistry();
-
-               for (MediaType type : registry.getTypes()) {
-                       Set<MediaType> aliases = registry.getAliases(type);
-                       System.out.println(type + ", also known as " + aliases);
-               }
-       }
-
-       public static void main(String[] args) throws Exception {
-               MediaTypeRegistry registry = 
MediaTypeRegistry.getDefaultRegistry();
-
-               MediaType type = MediaType.parse("image/svg+xml");
-               while (type != null) {
-                       System.out.println(type);
-                       type = registry.getSupertype(type);
-               }
-       }
+        System.out.println("type:    " + type.getType());
+        System.out.println("subtype: " + type.getSubtype());
 
+        Map<String, String> parameters = type.getParameters();
+        System.out.println("parameters:");
+        for (String name : parameters.keySet()) {
+            System.out.println("  " + name + "=" + parameters.get(name));
+        }
+    }
+
+    public static void listAllTypes() {
+        MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+        for (MediaType type : registry.getTypes()) {
+            Set<MediaType> aliases = registry.getAliases(type);
+            System.out.println(type + ", also known as " + aliases);
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+        MediaType type = MediaType.parse("image/svg+xml");
+        while (type != null) {
+            System.out.println(type);
+            type = registry.getSupertype(type);
+        }
+    }
 }


Reply via email to