Author: mattmann
Date: Mon May  4 21:52:53 2015
New Revision: 1677694

URL: http://svn.apache.org/r1677694
Log:
TIKA-1562: Add examples from the Tika in Action book

Added:
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java 
  (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java  
 (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java 
  (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java  
 (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
   (with props)
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java 
  (with props)
    
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml   
(with props)
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
   (with props)
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
   (with props)
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
   (with props)
Modified:
    tika/trunk/tika-example/pom.xml

Modified: tika/trunk/tika-example/pom.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/pom.xml?rev=1677694&r1=1677693&r2=1677694&view=diff
==============================================================================
--- tika/trunk/tika-example/pom.xml (original)
+++ tika/trunk/tika-example/pom.xml Mon May  4 21:52:53 2015
@@ -56,6 +56,17 @@
   <!-- List of dependencies that we depend on for the examples. See the full 
list of Tika
        modules and how to use them at 
http://mvnrepository.com/artifact/org.apache.tika.-->
   <dependencies>
+       <dependency>
+           <groupId>org.apache.tika</groupId>
+           <artifactId>tika-app</artifactId>
+           <version>${project.version}</version>
+           <exclusions>
+             <exclusion>
+               <artifactId>tika-parsers</artifactId>
+               <groupId>org.apache.tika</groupId>
+             </exclusion>
+           </exclusions>
+       </dependency>  
     <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
@@ -78,6 +89,36 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+       <dependency>
+           <groupId>javax.jcr</groupId>
+           <artifactId>jcr</artifactId>
+           <version>2.0</version>
+       </dependency>
+       <dependency>
+           <groupId>org.apache.jackrabbit</groupId>
+           <artifactId>jackrabbit-jcr-server</artifactId>
+           <version>2.3.6</version>
+       </dependency>
+    <dependency>
+        <groupId>org.apache.jackrabbit</groupId>
+        <artifactId>jackrabbit-core</artifactId>
+        <version>2.3.6</version>
+    </dependency>      
+       <dependency>
+           <groupId>org.apache.lucene</groupId>
+           <artifactId>lucene-core</artifactId>
+           <version>3.5.0</version>
+       </dependency>   
+       <dependency>
+           <groupId>commons-io</groupId>
+           <artifactId>commons-io</artifactId>
+           <version>2.4</version>
+       </dependency>
+       <dependency>
+           <groupId>org.springframework</groupId>
+           <artifactId>spring-context</artifactId>
+           <version>3.0.2.RELEASE</version>
+       </dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,55 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class AdvancedTypeDetector {
+
+       public static String detectWithCustomConfig(String name) throws 
Exception {
+               String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+               Tika tika = new Tika(MimeTypesFactory.create(config));
+               return tika.detect(name);
+       }
+
+       public static String detectWithCustomDetector(String name) throws 
Exception {
+               String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+               Detector detector = MimeTypesFactory.create(config);
+
+               Detector custom = new Detector() {
+                       private static final long serialVersionUID = 
-5420638839201540749L;
+
+                       public MediaType detect(InputStream input, Metadata 
metadata) {
+                               String type = 
metadata.get("my-custom-type-override");
+                               if (type != null) {
+                                       return MediaType.parse(type);
+                               } else {
+                                       return MediaType.OCTET_STREAM;
+                               }
+                       }
+               };
+
+               Tika tika = new Tika(new CompositeDetector(custom, detector));
+               return tika.detect(name);
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.net.URL;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class CustomMimeInfo {
+
+       public static String customMimeInfo() throws Exception {
+               String path = "file:///path/to/prescription-type.xml";
+               MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+               Tika tika = new Tika(typeDatabase);
+               String type = tika.detect("/path/to/prescription.xpd");
+               return type;
+       }
+
+       public static String customCompositeDetector() throws Exception {
+               String path = "file:///path/to/prescription-type.xml";
+               MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+               Tika tika = new Tika(new CompositeDetector(typeDatabase,
+                               new EncryptedPrescriptionDetector()));
+               String type = tika.detect("/path/to/tmp/prescription.xpd");
+               return type;
+       }
+
+       public static void main(String[] args) throws Exception {
+               System.out.println("customMimeInfo=" + customMimeInfo());
+               System.out.println("customCompositeDetector="
+                               + customCompositeDetector());
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,30 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import org.apache.tika.cli.TikaCLI;
+
+/**
+ *
+ * Print the supported Tika Metadata models and their fields.
+ *
+ */
+public class DescribeMetadata {
+
+       public static void main(String[] args) throws Exception {
+               TikaCLI.main(new String[] { "--list-met-models" });
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,139 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the output of /bin/ls and counts the number of files and the number 
of
+ * executables using Tika.
+ */
+public class DirListParser implements Parser {
+
+       private static final long serialVersionUID = 2717930544410610735L;
+
+       private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
+                       Arrays.asList(MediaType.TEXT_PLAIN));
+
+       /*
+        * (non-Javadoc)
+        * 
+        * @see org.apache.tika.parser.Parser#getSupportedTypes(
+        * org.apache.tika.parser.ParseContext)
+        */
+       public Set<MediaType> getSupportedTypes(ParseContext context) {
+               return SUPPORTED_TYPES;
+       }
+
+       /*
+        * (non-Javadoc)
+        * 
+        * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+        * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+        */
+       public void parse(InputStream is, ContentHandler handler, Metadata 
metadata)
+                       throws IOException, SAXException, TikaException {
+               this.parse(is, handler, metadata, new ParseContext());
+       }
+
+       /*
+        * (non-Javadoc)
+        * 
+        * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+        * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+        * org.apache.tika.parser.ParseContext)
+        */
+       public void parse(InputStream is, ContentHandler handler,
+                       Metadata metadata, ParseContext context) throws 
IOException,
+                       SAXException, TikaException {
+
+               List<String> lines = FileUtils.readLines(TikaInputStream.get(is)
+                               .getFile());
+               for (String line : lines) {
+                       String[] fileToks = line.split("\\s+");
+                       if (fileToks.length < 8)
+                               continue;
+                       String filePermissions = fileToks[0];
+                       String numHardLinks = fileToks[1];
+                       String fileOwner = fileToks[2];
+                       String fileOwnerGroup = fileToks[3];
+                       String fileSize = fileToks[4];
+                       StringBuffer lastModDate = new StringBuffer();
+                       lastModDate.append(fileToks[5]);
+                       lastModDate.append(" ");
+                       lastModDate.append(fileToks[6]);
+                       lastModDate.append(" ");
+                       lastModDate.append(fileToks[7]);
+                       StringBuffer fileName = new StringBuffer();
+                       for (int i = 8; i < fileToks.length; i++) {
+                               fileName.append(fileToks[i]);
+                               fileName.append(" ");
+                       }
+                       fileName.deleteCharAt(fileName.length() - 1);
+                       this.addMetadata(metadata, filePermissions, 
numHardLinks,
+                                       fileOwner, fileOwnerGroup, fileSize,
+                                       lastModDate.toString(), 
fileName.toString());
+               }
+       }
+
+       public static void main(String[] args) throws IOException, SAXException,
+                       TikaException {
+               DirListParser parser = new DirListParser();
+               Metadata met = new Metadata();
+               parser.parse(System.in, new BodyContentHandler(), met);
+
+               System.out.println("Num files: " + 
met.getValues("Filename").length);
+               System.out.println("Num executables: " + 
met.get("NumExecutables"));
+       }
+
+       private void addMetadata(Metadata metadata, String filePerms,
+                       String numHardLinks, String fileOwner, String 
fileOwnerGroup,
+                       String fileSize, String lastModDate, String fileName) {
+               metadata.add("FilePermissions", filePerms);
+               metadata.add("NumHardLinks", numHardLinks);
+               metadata.add("FileOwner", fileOwner);
+               metadata.add("FileOwnerGroup", fileOwnerGroup);
+               metadata.add("FileSize", fileSize);
+               metadata.add("LastModifiedDate", lastModDate);
+               metadata.add("Filename", fileName);
+
+               if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == 
-1) {
+                       if (metadata.get("NumExecutables") != null) {
+                               int numExecs = 
Integer.valueOf(metadata.get("NumExecutables"));
+                               numExecs++;
+                               metadata.set("NumExecutables", 
String.valueOf(numExecs));
+                       } else {
+                               metadata.set("NumExecutables", "1");
+                       }
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Grabs a PDF file from a URL and prints its {@link Metadata}
+ */
+public class DisplayMetInstance {
+
+       public static Metadata getMet(URL url) throws IOException, SAXException,
+                       TikaException {
+               Metadata met = new Metadata();
+               PDFParser parser = new PDFParser();
+               parser.parse(url.openStream(), new BodyContentHandler(), met,
+                               new ParseContext());
+               return met;
+       }
+
+       public static void main(String[] args) throws Exception {
+               Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+               System.out.println(met);
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,62 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+import javax.xml.namespace.QName;
+
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.XmlRootExtractor;
+import org.apache.tika.io.LookaheadInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+public class EncryptedPrescriptionDetector implements Detector {
+
+       private static final long serialVersionUID = -1709652690773421147L;
+
+       public MediaType detect(InputStream stream, Metadata metadata)
+                       throws IOException {
+               Key key = Pharmacy.getKey();
+               MediaType type = MediaType.OCTET_STREAM;
+
+               InputStream lookahead = new LookaheadInputStream(stream, 1024);
+               try {
+                       Cipher cipher = Cipher.getInstance("RSA");
+
+                       cipher.init(Cipher.DECRYPT_MODE, key);
+                       InputStream decrypted = new 
CipherInputStream(lookahead, cipher);
+
+                       QName name = new 
XmlRootExtractor().extractRootElement(decrypted);
+                       if (name != null
+                                       && 
"http://example.com/xpd".equals(name.getNamespaceURI())
+                                       && 
"prescription".equals(name.getLocalPart())) {
+                               type = MediaType.application("x-prescription");
+                       }
+               } catch (GeneralSecurityException e) {
+                       // unable to decrypt, fall through
+               } finally {
+                       lookahead.close();
+               }
+               return type;
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,60 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+import java.util.Collections;
+import java.util.Set;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class EncryptedPrescriptionParser extends AbstractParser {
+
+       private static final long serialVersionUID = -7816987249611278541L;
+
+       public void parse(InputStream stream, ContentHandler handler,
+                       Metadata metadata, ParseContext context) throws 
IOException,
+                       SAXException, TikaException {
+               try {
+                       Key key = Pharmacy.getKey();
+                       Cipher cipher = Cipher.getInstance("RSA");
+                       cipher.init(Cipher.DECRYPT_MODE, key);
+                       InputStream decrypted = new CipherInputStream(stream, 
cipher);
+
+                       new PrescriptionParser().parse(decrypted, handler, 
metadata,
+                                       context);
+               } catch (GeneralSecurityException e) {
+                       throw new TikaException("Unable to decrypt a digital 
prescription",
+                                       e);
+               }
+       }
+
+       public Set<MediaType> getSupportedTypes(ParseContext context) {
+               return 
Collections.singleton(MediaType.application("x-prescription"));
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,242 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+
+import javax.jcr.Item;
+
+import org.apache.jackrabbit.server.io.DefaultIOListener;
+import org.apache.jackrabbit.server.io.IOListener;
+import org.apache.jackrabbit.server.io.IOUtil;
+import org.apache.jackrabbit.server.io.ImportContext;
+import org.apache.jackrabbit.webdav.io.InputContext;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <code>ImportContextImpl</code>...
+ */
+public class ImportContextImpl implements ImportContext {
+
+       private static Logger log = LoggerFactory
+                       .getLogger(ImportContextImpl.class);
+
+       private final IOListener ioListener;
+       private final Item importRoot;
+       private final String systemId;
+       private final File inputFile;
+
+       private InputContext inputCtx;
+       private boolean completed;
+
+       private final Detector detector;
+
+       private final MediaType type;
+
+       /**
+        * Creates a new item import context. The specified InputStream is 
written
+        * to a temporary file in order to avoid problems with multiple 
IOHandlers
+        * that try to run the import but fail. The temporary file is deleted as
+        * soon as this context is informed that the import has been completed 
and
+        * it will not be used any more.
+        *
+        * @param importRoot
+        * @param systemId
+        * @param ctx
+        *            input context, or <code>null</code>
+        * @param stream
+        *            document input stream, or <code>null</code>
+        * @param ioListener
+        * @param detector
+        *            content type detector
+        * @throws IOException
+        * @see ImportContext#informCompleted(boolean)
+        */
+       public ImportContextImpl(Item importRoot, String systemId,
+                       InputContext ctx, InputStream stream, IOListener 
ioListener,
+                       Detector detector) throws IOException {
+               this.importRoot = importRoot;
+               this.systemId = systemId;
+               this.inputCtx = ctx;
+               this.ioListener = (ioListener != null) ? ioListener
+                               : new DefaultIOListener(log);
+               this.detector = detector;
+
+               Metadata metadata = new Metadata();
+               if (ctx != null && ctx.getContentType() != null) {
+                       metadata.set(Metadata.CONTENT_TYPE, 
ctx.getContentType());
+               }
+               if (systemId != null) {
+                       metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+               }
+               if (stream != null && !stream.markSupported()) {
+                       stream = new BufferedInputStream(stream);
+               }
+               type = detector.detect(stream, metadata);
+
+               this.inputFile = IOUtil.getTempFile(stream);
+       }
+
+       /**
+        * @see ImportContext#getIOListener()
+        */
+       public IOListener getIOListener() {
+               return ioListener;
+       }
+
+       /**
+        * @see ImportContext#getImportRoot()
+        */
+       public Item getImportRoot() {
+               return importRoot;
+       }
+
+       /**
+        * @see ImportContext#getDetector()
+        */
+       public Detector getDetector() {
+               return detector;
+       }
+
+       /**
+        * @see ImportContext#hasStream()
+        */
+       public boolean hasStream() {
+               return inputFile != null;
+       }
+
+       /**
+        * Returns a new <code>InputStream</code> to the temporary file created
+        * during instanciation or <code>null</code>, if this context does not
+        * provide a stream.
+        *
+        * @see ImportContext#getInputStream()
+        * @see #hasStream()
+        */
+       public InputStream getInputStream() {
+               checkCompleted();
+               InputStream in = null;
+               if (inputFile != null) {
+                       try {
+                               in = new FileInputStream(inputFile);
+                       } catch (IOException e) {
+                               // unexpected error... ignore and return null
+                       }
+               }
+               return in;
+       }
+
+       /**
+        * @see ImportContext#getSystemId()
+        */
+       public String getSystemId() {
+               return systemId;
+       }
+
+       /**
+        * @see ImportContext#getModificationTime()
+        */
+       public long getModificationTime() {
+               return (inputCtx != null) ? inputCtx.getModificationTime() : 
new Date()
+                               .getTime();
+       }
+
+       /**
+        * @see ImportContext#getContentLanguage()
+        */
+       public String getContentLanguage() {
+               return (inputCtx != null) ? inputCtx.getContentLanguage() : 
null;
+       }
+
+       /**
+        * @see ImportContext#getContentLength()
+        */
+       public long getContentLength() {
+               long length = IOUtil.UNDEFINED_LENGTH;
+               if (inputCtx != null) {
+                       length = inputCtx.getContentLength();
+               }
+               if (length < 0 && inputFile != null) {
+                       length = inputFile.length();
+               }
+               if (length < 0) {
+                       log.debug("Unable to determine content length -> 
default value = "
+                                       + IOUtil.UNDEFINED_LENGTH);
+               }
+               return length;
+       }
+
+       /**
+        * @see ImportContext#getMimeType()
+        */
+       public String getMimeType() {
+               return IOUtil.getMimeType(type.toString());
+       }
+
+       /**
+        * @see ImportContext#getEncoding()
+        */
+       public String getEncoding() {
+               return IOUtil.getEncoding(type.toString());
+       }
+
+       /**
+        * @see ImportContext#getProperty(Object)
+        */
+       public Object getProperty(Object propertyName) {
+               return (inputCtx != null) ? inputCtx.getProperty(propertyName
+                               .toString()) : null;
+       }
+
+       /**
+        * @see ImportContext#informCompleted(boolean)
+        */
+       public void informCompleted(boolean success) {
+               checkCompleted();
+               completed = true;
+               if (inputFile != null) {
+                       inputFile.delete();
+               }
+       }
+
+       /**
+        * @see ImportContext#isCompleted()
+        */
+       public boolean isCompleted() {
+               return completed;
+       }
+
+       /**
+        * @throws IllegalStateException
+        *             if the context is already completed.
+        * @see #isCompleted()
+        * @see #informCompleted(boolean)
+        */
+       private void checkCompleted() {
+               if (completed) {
+                       throw new IllegalStateException(
+                                       "ImportContext has already been 
consumed.");
+               }
+       }
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java 
(added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java 
Mon May  4 21:52:53 2015
@@ -0,0 +1,59 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.language.ProfilingWriter;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+
+public class Language {
+
+       public static void languageDetection() throws IOException {
+               LanguageProfile profile = new LanguageProfile(
+                               "Alla människor är födda fria och"
+                                               + " lika i värde och 
rättigheter.");
+
+               LanguageIdentifier identifier = new LanguageIdentifier(profile);
+               System.out.println(identifier.getLanguage());
+       }
+
+       public static void languageDetectionWithWriter() throws IOException {
+               ProfilingWriter writer = new ProfilingWriter();
+               writer.append("Minden emberi lény");
+               writer.append(" szabadon születik és");
+               writer.append(" egyenlő méltósága és");
+               writer.append(" joga van.");
+
+               LanguageIdentifier identifier = writer.getLanguage();
+               System.out.println(identifier.getLanguage());
+               writer.close();
+
+       }
+
+       public static void languageDetectionWithHandler() throws Exception {
+               ProfilingHandler handler = new ProfilingHandler();
+               new AutoDetectParser().parse(System.in, handler, new Metadata(),
+                               new ParseContext());
+
+               LanguageIdentifier identifier = handler.getLanguage();
+               System.out.println(identifier.getLanguage());
+       }
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.DelegatingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+@SuppressWarnings("deprecation")
+public class LanguageDetectingParser extends DelegatingParser {
+
+       private static final long serialVersionUID = 4291320409396502774L;
+
+       public void parse(InputStream stream, ContentHandler handler,
+                       final Metadata metadata, ParseContext context) throws 
SAXException,
+                       IOException, TikaException {
+               ProfilingHandler profiler = new ProfilingHandler();
+               ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+               super.parse(stream, tee, metadata, context);
+
+               LanguageIdentifier identifier = profiler.getLanguage();
+               if (identifier.isReasonablyCertain()) {
+                       metadata.set(Metadata.LANGUAGE, 
identifier.getLanguage());
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,217 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.concurrent.Executor;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * <code>LazyTextExtractorField</code> implements a Lucene field with a String
+ * value that is lazily initialized from a given {@link Reader}. In addition
+ * this class provides a method to find out whether the purpose of the reader 
is
+ * to extract text and whether the extraction process is already finished.
+ *
+ * @see #isExtractorFinished()
+ */
+@SuppressWarnings("serial")
+public class LazyTextExtractorField extends AbstractField {
+
+       /**
+        * The logger instance for this class.
+        */
+       private static final Logger log = LoggerFactory
+                       .getLogger(LazyTextExtractorField.class);
+
+       /**
+        * The exception used to forcibly terminate the extraction process when 
the
+        * maximum field length is reached.
+        */
+       private static final SAXException STOP = new SAXException(
+                       "max field length reached");
+
+       /**
+        * The extracted text content of the given binary value. Set to non-null
+        * when the text extraction task finishes.
+        */
+       private volatile String extract = null;
+
+       /**
+        * Creates a new <code>LazyTextExtractorField</code> with the given
+        * <code>name</code>.
+        *
+        * @param name
+        *            the name of the field.
+        * @param reader
+        *            the reader where to obtain the string from.
+        * @param highlighting
+        *            set to <code>true</code> to enable result highlighting 
support
+        */
+       public LazyTextExtractorField(Parser parser, InternalValue value,
+                       Metadata metadata, Executor executor, boolean 
highlighting,
+                       int maxFieldLength) {
+               super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+                               Field.Index.ANALYZED, highlighting ? 
TermVector.WITH_OFFSETS
+                                               : TermVector.NO);
+               executor.execute(new ParsingTask(parser, value, metadata,
+                               maxFieldLength));
+       }
+
+       /**
+        * Returns the extracted text. This method blocks until the text 
extraction
+        * task has been completed.
+        *
+        * @return the string value of this field
+        */
+       public synchronized String stringValue() {
+               try {
+                       while (!isExtractorFinished()) {
+                               wait();
+                       }
+                       return extract;
+               } catch (InterruptedException e) {
+                       log.error("Text extraction thread was interrupted", e);
+                       return "";
+               }
+       }
+
+       /**
+        * @return always <code>null</code>
+        */
+       public Reader readerValue() {
+               return null;
+       }
+
+       /**
+        * @return always <code>null</code>
+        */
+       public byte[] binaryValue() {
+               return null;
+       }
+
+       /**
+        * @return always <code>null</code>
+        */
+       public TokenStream tokenStreamValue() {
+               return null;
+       }
+
+       /**
+        * Checks whether the text extraction task has finished.
+        *
+        * @return <code>true</code> if the extracted text is available
+        */
+       public boolean isExtractorFinished() {
+               return extract != null;
+       }
+
+       private synchronized void setExtractedText(String value) {
+               extract = value;
+               notify();
+       }
+
+       /**
+        * Releases all resources associated with this field.
+        */
+       public void dispose() {
+               // TODO: Cause the ContentHandler below to throw an exception
+       }
+
+       /**
+        * The background task for extracting text from a binary value.
+        */
+       private class ParsingTask extends DefaultHandler implements Runnable {
+
+               private final Parser parser;
+
+               private final InternalValue value;
+
+               private final Metadata metadata;
+
+               private final int maxFieldLength;
+
+               private final StringBuilder builder = new StringBuilder();
+
+               private final ParseContext context = new ParseContext();
+
+               // NOTE: not a part of Jackrabbit code, made
+               private final ContentHandler handler = new DefaultHandler();
+
+               public ParsingTask(Parser parser, InternalValue value,
+                               Metadata metadata, int maxFieldLength) {
+                       this.parser = parser;
+                       this.value = value;
+                       this.metadata = metadata;
+                       this.maxFieldLength = maxFieldLength;
+               }
+
+               public void run() {
+                       try {
+                               InputStream stream = value.getStream();
+                               try {
+                                       parser.parse(stream, handler, metadata, 
context);
+                               } finally {
+                                       stream.close();
+                               }
+                       } catch (LinkageError e) {
+                               // Capture and ignore
+                       } catch (Throwable t) {
+                               if (t != STOP) {
+                                       log.debug("Failed to extract text.", t);
+                                       setExtractedText("TextExtractionError");
+                                       return;
+                               }
+                       } finally {
+                               value.discard();
+                       }
+                       setExtractedText(handler.toString());
+
+               }
+
+               @Override
+               public void characters(char[] ch, int start, int length)
+                               throws SAXException {
+                       builder.append(ch, start,
+                                       Math.min(length, maxFieldLength - 
builder.length()));
+                       if (builder.length() >= maxFieldLength) {
+                               throw STOP;
+                       }
+               }
+
+               @Override
+               public void ignorableWhitespace(char[] ch, int start, int 
length)
+                               throws SAXException {
+                       characters(ch, start, length);
+               }
+
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+/* */
+import java.io.File;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+
+public class LuceneIndexer {
+
+       private final Tika tika;
+
+       private final IndexWriter writer;
+
+       public LuceneIndexer(Tika tika, IndexWriter writer) {
+               this.tika = tika;
+               this.writer = writer;
+       }
+
+       public void indexDocument(File file) throws Exception {
+               Document document = new Document();
+               document.add(new Field("filename", file.getName(), Store.YES,
+                               Index.ANALYZED));
+               document.add(new Field("fulltext", tika.parseToString(file), 
Store.NO,
+                               Index.ANALYZED));
+               writer.addDocument(document);
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,70 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.util.Version;
+import org.apache.tika.Tika;
+
+@SuppressWarnings("deprecation")
+public class LuceneIndexerExtended {
+
+       private final IndexWriter writer;
+
+       private final Tika tika;
+
+       public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+               this.writer = writer;
+               this.tika = tika;
+       }
+
+       public static void main(String[] args) throws Exception {
+               IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new 
File(
+                               args[0])), new 
StandardAnalyzer(Version.LUCENE_30),
+                               MaxFieldLength.UNLIMITED);
+               try {
+                       LuceneIndexer indexer = new LuceneIndexer(new Tika(), 
writer);
+                       for (int i = 1; i < args.length; i++) {
+                               indexer.indexDocument(new File(args[i]));
+                       }
+               } finally {
+                       writer.close();
+               }
+       }
+
+       public void indexDocument(File file) throws Exception {
+               Reader fulltext = tika.parse(file);
+               try {
+                       Document document = new Document();
+                       document.add(new Field("filename", file.getName(), 
Store.YES,
+                                       Index.ANALYZED));
+                       document.add(new Field("fulltext", fulltext));
+                       writer.addDocument(document);
+               } finally {
+                       fulltext.close();
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,58 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+public class MediaTypeExample {
+
+       public static void describeMediaType() {
+
+               MediaType type = MediaType.parse("text/plain; charset=UTF-8");
+
+               System.out.println("type:    " + type.getType());
+               System.out.println("subtype: " + type.getSubtype());
+
+               Map<String, String> parameters = type.getParameters();
+               System.out.println("parameters:");
+               for (String name : parameters.keySet()) {
+                       System.out.println("  " + name + "=" + 
parameters.get(name));
+               }
+       }
+
+       public static void listAllTypes() {
+               MediaTypeRegistry registry = 
MediaTypeRegistry.getDefaultRegistry();
+
+               for (MediaType type : registry.getTypes()) {
+                       Set<MediaType> aliases = registry.getAliases(type);
+                       System.out.println(type + ", also known as " + aliases);
+               }
+       }
+
+       public static void main(String[] args) throws Exception {
+               MediaTypeRegistry registry = 
MediaTypeRegistry.getDefaultRegistry();
+
+               MediaType type = MediaType.parse("image/svg+xml");
+               while (type != null) {
+                       System.out.println(type);
+                       type = registry.getSupertype(type);
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,93 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Date;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+
+/**
+ * Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata.
+ */
+@SuppressWarnings("deprecation")
+public class MetadataAwareLuceneIndexer {
+
+       private Tika tika;
+
+       private IndexWriter writer;
+
+       public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) {
+               this.writer = writer;
+               this.tika = tika;
+       }
+
+       public void indexContentSpecificMet(File file) throws Exception {
+               Metadata met = new Metadata();
+               InputStream is = new FileInputStream(file);
+               try {
+                       tika.parse(is, met);
+                       Document document = new Document();
+                       for (String key : met.names()) {
+                               String[] values = met.getValues(key);
+                               for (String val : values) {
+                                       document.add(new Field(key, val, 
Store.YES, Index.ANALYZED));
+                               }
+                               writer.addDocument(document);
+                       }
+               } finally {
+                       is.close();
+               }
+       }
+
+       public void indexWithDublinCore(File file) throws Exception {
+               Metadata met = new Metadata();
+               met.add(Metadata.CREATOR, "Manning");
+               met.add(Metadata.CREATOR, "Tika in Action");
+               met.set(Metadata.DATE, new Date());
+               met.set(Metadata.FORMAT, tika.detect(file));
+               met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
+               met.add(Metadata.SUBJECT, "File");
+               met.add(Metadata.SUBJECT, "Indexing");
+               met.add(Metadata.SUBJECT, "Metadata");
+               met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
+                               "private"), "public");
+               InputStream is = new FileInputStream(file);
+               try {
+                       tika.parse(is, met);
+                       Document document = new Document();
+                       for (String key : met.names()) {
+                               String[] values = met.getValues(key);
+                               for (String val : values) {
+                                       document.add(new Field(key, val, 
Store.YES, Index.ANALYZED));
+                               }
+                               writer.addDocument(document);
+                       }
+               } finally {
+                       is.close();
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java 
(added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java 
Mon May  4 21:52:53 2015
@@ -0,0 +1,79 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Demonstrates how to call the different components within Tika: its
+ * {@link Detector} framework (aka MIME identification and repository), its
+ * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ */
+
+@SuppressWarnings("deprecation")
+public class MyFirstTika {
+
+       public static void main(String[] args) throws Exception {
+               String filename = args[0];
+               MimeTypes mimeRegistry = TikaConfig.getDefaultConfig()
+                               .getMimeRepository();
+
+               System.out.println("Examining: [" + filename + "]");
+
+               System.out.println("The MIME type (based on filename) is: ["
+                               + mimeRegistry.getMimeType(filename) + "]");
+
+               System.out.println("The MIME type (based on MAGIC) is: ["
+                               + mimeRegistry.getMimeType(new File(filename)) 
+ "]");
+
+               Detector mimeDetector = (Detector) mimeRegistry;
+               System.out
+                               .println("The MIME type (based on the Detector 
interface) is: ["
+                                               + mimeDetector.detect(new 
File(filename).toURI().toURL()
+                                                               .openStream(), 
new Metadata()) + "]");
+
+               LanguageIdentifier lang = new LanguageIdentifier(new 
LanguageProfile(
+                               FileUtils.readFileToString(new 
File(filename))));
+
+               System.out.println("The language of this content is: ["
+                               + lang.getLanguage() + "]");
+
+               Parser parser = TikaConfig.getDefaultConfig().getParser(
+                               
MediaType.parse(mimeRegistry.getMimeType(filename).getName()));
+               Metadata parsedMet = new Metadata();
+               ContentHandler handler = new BodyContentHandler();
+               parser.parse(new File(filename).toURI().toURL().openStream(), 
handler,
+                               parsedMet, new ParseContext());
+
+               System.out.println("Parsed Metadata: ");
+               System.out.println(parsedMet);
+               System.out.println("Parsed Text: ");
+               System.out.println(handler.toString());
+
+       }
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java 
(added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java 
Mon May  4 21:52:53 2015
@@ -0,0 +1,31 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.security.Key;
+
+public class Pharmacy {
+
+       private static Key key = null;
+
+       public static Key getKey() {
+               return key;
+       }
+
+       public static void setKey(Key key) {
+               Pharmacy.key = key;
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xml.ElementMetadataHandler;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+
+public class PrescriptionParser extends XMLParser {
+
+       private static final long serialVersionUID = 7690682277511967388L;
+
+       @Override
+       protected ContentHandler getContentHandler(ContentHandler handler,
+                       Metadata metadata, ParseContext context) {
+               String xpd = "http://example.com/2011/xpd";;
+
+               ContentHandler doctor = new ElementMetadataHandler(xpd, 
"doctor",
+                               metadata, "xpd:doctor");
+               ContentHandler patient = new ElementMetadataHandler(xpd, 
"patient",
+                               metadata, "xpd:patient");
+
+               return new TeeContentHandler(super.getContentHandler(handler, 
metadata,
+                               context), doctor, patient);
+       }
+
+       @Override
+       public Set<MediaType> getSupportedTypes(ParseContext context) {
+               return Collections.singleton(MediaType
+                               .application("x-prescription+xml"));
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java 
(added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java 
Mon May  4 21:52:53 2015
@@ -0,0 +1,148 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.jackrabbit.util.ISO8601;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ *
+ * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6
+ * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within
+ * the last N minutes.
+ */
+@SuppressWarnings("deprecation")
+public class RecentFiles {
+
+       private IndexReader reader;
+
+       private SimpleDateFormat rssDateFormat = new SimpleDateFormat(
+                       "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault());
+
+       public String generateRSS(File indexFile) throws CorruptIndexException,
+                       IOException {
+               StringBuffer output = new StringBuffer();
+               output.append(getRSSHeaders());
+               IndexSearcher searcher = null;
+               try {
+                       reader = IndexReader.open(new 
SimpleFSDirectory(indexFile));
+                       searcher = new IndexSearcher(reader);
+                       GregorianCalendar gc = new 
java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+                       gc.setTime(new Date());
+                       String nowDateTime = ISO8601.format(gc);
+                       gc.add(java.util.GregorianCalendar.MINUTE, -5);
+                       String fiveMinsAgo = ISO8601.format(gc);
+                       TermRangeQuery query = new 
TermRangeQuery(Metadata.DATE.toString(),
+                                       fiveMinsAgo, nowDateTime, true, true);
+                       TopScoreDocCollector collector = 
TopScoreDocCollector.create(20,
+                                       true);
+                       searcher.search(query, collector);
+                       ScoreDoc[] hits = collector.topDocs().scoreDocs;
+                       for (int i = 0; i < hits.length; i++) {
+                               Document doc = searcher.doc(hits[i].doc);
+                               output.append(getRSSItem(doc));
+                       }
+
+               } finally {
+                       if (reader != null) reader.close();
+                       if (searcher != null) searcher.close();
+               }
+
+               output.append(getRSSFooters());
+               return output.toString();
+       }
+
+       public String getRSSItem(Document doc) {
+               StringBuffer output = new StringBuffer();
+               output.append("<item>");
+               output.append(emitTag("guid", 
doc.get(DublinCore.SOURCE.getName()),
+                               "isPermalink", "true"));
+               output.append(emitTag("title", doc.get(Metadata.TITLE), null, 
null));
+               output.append(emitTag("link", 
doc.get(DublinCore.SOURCE.getName()),
+                               null, null));
+               output.append(emitTag("author", doc.get(Metadata.CREATOR), 
null, null));
+               for (String topic : doc.getValues(Metadata.SUBJECT)) {
+                       output.append(emitTag("category", topic, null, null));
+               }
+               output.append(emitTag("pubDate", 
rssDateFormat.format(ISO8601.parse(doc
+                               .get(Metadata.DATE.toString()))), null, null));
+               output.append(emitTag("description", doc.get(Metadata.TITLE), 
null,
+                               null));
+               output.append("</item>");
+               return output.toString();
+       }
+
+       public String getRSSHeaders() {
+               StringBuffer output = new StringBuffer();
+               output.append("<?xml version=\"1.0\" encoding=\"utf-8\">");
+               output.append("<rss version=\"2.0\">");
+               output.append("  <channel>");
+               output.append("     <title>Tika in Action: Recent Files Feed."
+                               + "</title>");
+               output.append("     <description>Chapter 6 Examples 
demonstrating "
+                               + "use of Tika Metadata for 
RSS.</description>");
+               output.append("     <link>tikainaction.rss</link>");
+               output.append("     <lastBuildDate>" + rssDateFormat.format(new 
Date())
+                               + "</lastBuildDate>");
+               output.append("     <generator>Manning Publications: Tika in 
Action"
+                               + "</generator>");
+               output.append("     <copyright>All Rights 
Reserved</copyright>");
+               return output.toString();
+       }
+
+       public String getRSSFooters() {
+               StringBuffer output = new StringBuffer();
+               output.append("   </channel>");
+               return output.toString();
+       }
+
+       private String emitTag(String tagName, String value, String 
attributeName,
+                       String attributeValue) {
+               StringBuffer output = new StringBuffer();
+               output.append("<");
+               output.append(tagName);
+               if (attributeName != null) {
+                       output.append(" ");
+                       output.append(attributeName);
+                       output.append("=\"");
+                       output.append(attributeValue);
+                       output.append("\"");
+               }
+               output.append(">");
+               output.append(value);
+               output.append("</");
+               output.append(tagName);
+               output.append(">");
+               return output.toString();
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,141 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.Link;
+import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Demonstrates Tika and its ability to sense symlinks.
+ */
+@SuppressWarnings("deprecation")
+public class RollbackSoftware {
+
+       public static void main(String[] args) throws Exception {
+               RollbackSoftware r = new RollbackSoftware();
+               r.rollback(new File(args[0]));
+       }
+
+       public void rollback(File deployArea) throws IOException, SAXException,
+                       TikaException {
+               LinkContentHandler handler = new LinkContentHandler();
+               Metadata met = new Metadata();
+               DeploymentAreaParser parser = new DeploymentAreaParser();
+               
parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath()),
+                               handler, met);
+               List<Link> links = handler.getLinks();
+               if (links.size() < 2)
+                       throw new IOException("Must have installed at least 2 
versions!");
+               Collections.sort(links, new Comparator<Link>() {
+                       public int compare(Link o1, Link o2) {
+                               return o1.getText().compareTo(o2.getText());
+                       }
+               });
+
+               this.updateVersion(links.get(links.size() - 2).getText());
+
+       }
+
+       private void updateVersion(String version) {
+               System.out.println("Rolling back to version: [" + version + 
"]");
+       }
+
+       class DeploymentAreaParser implements Parser {
+
+               private static final long serialVersionUID = 
-2356647405087933468L;
+
+               /*
+                * (non-Javadoc)
+                * 
+                * @see org.apache.tika.parser.Parser#getSupportedTypes(
+                * org.apache.tika.parser.ParseContext)
+                */
+               public Set<MediaType> getSupportedTypes(ParseContext context) {
+                       return Collections.unmodifiableSet(new 
HashSet<MediaType>(Arrays
+                                       .asList(MediaType.TEXT_PLAIN)));
+               }
+
+               /*
+                * (non-Javadoc)
+                * 
+                * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+                * org.xml.sax.ContentHandler, 
org.apache.tika.metadata.Metadata)
+                */
+               public void parse(InputStream is, ContentHandler handler,
+                               Metadata metadata) throws IOException, 
SAXException,
+                               TikaException {
+                       parse(is, handler, metadata, new ParseContext());
+               }
+
+               /*
+                * (non-Javadoc)
+                * 
+                * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+                * org.xml.sax.ContentHandler, 
org.apache.tika.metadata.Metadata,
+                * org.apache.tika.parser.ParseContext)
+                */
+
+               public void parse(InputStream is, ContentHandler handler,
+                               Metadata metadata, ParseContext context) throws 
IOException,
+                               SAXException, TikaException {
+
+                       File deployArea = new File(IOUtils.toString(is));
+                       File[] versions = deployArea.listFiles(new FileFilter() 
{
+
+                               public boolean accept(File pathname) {
+                                       return 
!pathname.getName().startsWith("current");
+                               }
+                       });
+
+                       XHTMLContentHandler xhtml = new 
XHTMLContentHandler(handler,
+                                       metadata);
+                       xhtml.startDocument();
+                       for (File v : versions) {
+                               if (isSymlink(v))
+                                       continue;
+                               xhtml.startElement("a", "href", 
v.toURI().toURL().toExternalForm());
+                               xhtml.characters(v.getName());
+                               xhtml.endElement("a");
+                       }
+
+               }
+
+       }
+
+       private boolean isSymlink(File f) throws IOException {
+               return !f.getAbsolutePath().equals(f.getCanonicalPath());
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,34 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import org.apache.tika.Tika;
+
+public class SimpleTextExtractor {
+
+       public static void main(String[] args) throws Exception {
+               // Create a Tika instance with the default configuration
+               Tika tika = new Tika();
+
+               // Parse all given files and print out the extracted
+               // text content
+               for (String file : args) {
+                       String text = tika.parseToString(new File(file));
+                       System.out.print(text);
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,32 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.tika.Tika;
+
+public class SimpleTypeDetector {
+
+       public static void main(String[] args) throws Exception {
+               Tika tika = new Tika();
+
+               for (String file : args) {
+                       String type = tika.detect(new File(file));
+                       System.out.println(file + ": " + type);
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,38 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+import com.google.common.base.Charsets;
+
+public class SpringExample {
+
+       public static void main(String[] args) throws Exception {
+               ApplicationContext context = new ClassPathXmlApplicationContext(
+                               new String[] { 
"org/apache/tika/example/spring.xml" });
+               Parser parser = context.getBean("tika", Parser.class);
+               parser.parse(new ByteArrayInputStream("Hello, 
World!".getBytes(Charsets.UTF_8)),
+                               new WriteOutContentHandler(System.out), new 
Metadata(),
+                               new ParseContext());
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
------------------------------------------------------------------------------
    svn:executable = *


Reply via email to