Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,218 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.io.Reader;
+import java.net.URL;
+import java.nio.CharBuffer;
+import java.util.HashMap;
+import java.util.Locale;
+import java.util.Map;
+import java.util.zip.GZIPInputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.html.HtmlMapper;
+import org.apache.tika.parser.html.HtmlParser;
+import org.apache.tika.parser.html.IdentityHtmlMapper;
+import org.apache.tika.parser.txt.TXTParser;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TIAParsingExample {
+
+       public static String parseToStringExample() throws Exception {
+               File document = new File("example.doc");
+               String content = new Tika().parseToString(document);
+               System.out.print(content);
+               return content;
+       }
+
+       public static void parseToReaderExample() throws Exception {
+               File document = new File("example.doc");
+               Reader reader = new Tika().parse(document);
+               try {
+                       char[] buffer = new char[1000];
+                       int n = reader.read(buffer);
+                       while (n != -1) {
+                               System.out.append(CharBuffer.wrap(buffer, 0, 
n));
+                               n = reader.read(buffer);
+                       }
+               } finally {
+                       reader.close();
+               }
+       }
+
+       public static void parseFileInputStream(String filename) throws 
Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = new FileInputStream(new File(filename));
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void parseURLStream(String address) throws Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = new GZIPInputStream(new 
URL(address).openStream());
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void parseTikaInputStream(String filename) throws 
Exception {
+               Parser parser = new AutoDetectParser();
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               InputStream stream = TikaInputStream.get(new File(filename));
+               try {
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static File tikaInputStreamGetFile(String filename) throws 
Exception {
+               InputStream stream = TikaInputStream.get(new File(filename));
+               try {
+                       TikaInputStream tikaInputStream = 
TikaInputStream.get(stream);
+                       File file = tikaInputStream.getFile();
+                       return file;
+               } finally {
+                       stream.close();
+               }
+       }
+
+       public static void useHtmlParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new HtmlParser();
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void useCompositeParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               ParseContext context = new ParseContext();
+               Map<MediaType, Parser> parsersByType = new HashMap<MediaType, 
Parser>();
+               parsersByType.put(MediaType.parse("text/html"), new 
HtmlParser());
+               parsersByType.put(MediaType.parse("application/xml"), new 
XMLParser());
+
+               CompositeParser parser = new CompositeParser();
+               parser.setParsers(parsersByType);
+               parser.setFallback(new TXTParser());
+
+               Metadata metadata = new Metadata();
+               metadata.set(Metadata.CONTENT_TYPE, "text/html");
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void useAutoDetectParser() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new AutoDetectParser();
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testTeeContentHandler(String filename) throws 
Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               Metadata metadata = new Metadata();
+               ParseContext context = new ParseContext();
+               Parser parser = new AutoDetectParser();
+               LinkContentHandler linkCollector = new LinkContentHandler();
+               OutputStream output = new FileOutputStream(new File(filename));
+               try {
+                       ContentHandler handler = new TeeContentHandler(
+                                       new BodyContentHandler(output), 
linkCollector);
+                       parser.parse(stream, handler, metadata, context);
+               } finally {
+                       output.close();
+               }
+       }
+
+       public static void testLocale() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(Locale.class, Locale.ENGLISH);
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testHtmlMapper() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(HtmlMapper.class, new IdentityHtmlMapper());
+               parser.parse(stream, handler, metadata, context);
+       }
+
+       public static void testCompositeDocument() throws Exception {
+               InputStream stream = new ByteArrayInputStream(new byte[0]);
+               ContentHandler handler = new DefaultHandler();
+               Metadata metadata = new Metadata();
+               Parser parser = new AutoDetectParser();
+               ParseContext context = new ParseContext();
+               context.set(Parser.class, new ParserDecorator(parser) {
+                       private static final long serialVersionUID = 
4424210691523343833L;
+
+                       @Override
+                       public void parse(InputStream stream, ContentHandler 
handler,
+                                       Metadata metadata, ParseContext context)
+                                       throws IOException, SAXException, 
TikaException {
+                               // custom processing of the component document
+                       }
+               });
+               parser.parse(stream, handler, metadata, context);
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,111 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Date;
+
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ * 
+ * Generates document summaries for corpus analysis in the Open Relevance
+ * project.
+ * 
+ */
+@SuppressWarnings("deprecation")
+public class TrecDocumentGenerator {
+
+       public TrecDocument summarize(File file) throws FileNotFoundException,
+                       IOException, TikaException {
+               Tika tika = new Tika(); 
+               Metadata met = new Metadata();
+
+               String contents = tika.parseToString(new FileInputStream(file), 
met);
+               return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), 
contents,
+                               met.getDate(Metadata.DATE)); 
+
+       }
+
+       // copied from
+       // 
http://svn.apache.org/repos/asf/lucene/openrelevance/trunk/src/java/org/
+       // apache/orp/util/TrecDocument.java
+       // since the ORP jars aren't published anywhere
+       class TrecDocument {
+               private CharSequence docname;
+               private CharSequence body;
+               private Date date;
+
+               public TrecDocument(CharSequence docname, CharSequence body, 
Date date) {
+                       this.docname = docname;
+                       this.body = body;
+                       this.date = date;
+               }
+
+               public TrecDocument() {
+               }
+
+               /**
+                * @return the docname
+                */
+               public CharSequence getDocname() {
+                       return docname;
+               }
+
+               /**
+                * @param docname
+                *            the docname to set
+                */
+               public void setDocname(CharSequence docname) {
+                       this.docname = docname;
+               }
+
+               /**
+                * @return the body
+                */
+               public CharSequence getBody() {
+                       return body;
+               }
+
+               /**
+                * @param body
+                *            the body to set
+                */
+               public void setBody(CharSequence body) {
+                       this.body = body;
+               }
+
+               /**
+                * @return the date
+                */
+               public Date getDate() {
+                       return date;
+               }
+
+               /**
+                * @param date
+                *            the date to set
+                */
+               public void setDate(Date date) {
+                       this.date = date;
+               }
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java 
(added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java 
Mon May  4 21:52:53 2015
@@ -0,0 +1,47 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+//JDK imports
+import java.io.IOException;
+import java.util.Collections;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+/**
+ * 
+ *
+ * Example code listing from Chapter 1. Lists a zip file's entries using JDK's
+ * standard APIs.
+ *
+ */
+public class ZipListFiles {
+       public static void main(String[] args) throws Exception {
+               if (args.length > 0) {
+                       for (String file : args) {
+                               System.out.println("Files in " + file + " 
file:");
+                               listZipEntries(file);
+                       }
+               }
+       }
+
+       public static void listZipEntries(String path) throws IOException {
+               ZipFile zip = new ZipFile(path);
+               for (ZipEntry entry : Collections.list(zip.entries())) {
+                       System.out.println(entry.getName());
+               }
+       }
+
+}
\ No newline at end of file

Propchange: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml 
(added)
+++ 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml 
Mon May  4 21:52:53 2015
@@ -0,0 +1,36 @@
+<!-- 
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+ -->
+<?xml version="1.0" encoding="UTF-8"?>
+<beans xmlns="http://www.springframework.org/schema/beans";
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+       xsi:schemaLocation="http://www.springframework.org/schema/beans
+                           
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd";>
+
+<!--<start id="spring"/>-->
+  <bean id="tika" class="org.apache.tika.parser.AutoDetectParser">
+    <constructor-arg>
+        <list>
+           <ref bean="txt"/>
+           <ref bean="pdf"/>
+        </list>
+    </constructor-arg>
+  </bean>
+
+  <bean id="txt" class="org.apache.tika.parser.txt.TXTParser"/>
+  <bean id="pdf" class="org.apache.tika.parser.pdf.PDFParser"/>
+<!--<end id="spring"/>-->
+
+</beans>
\ No newline at end of file

Propchange: 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
 (added)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,30 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+@SuppressWarnings("deprecation")
+public class AdvancedTypeDetectorTest {
+
+       @Test
+       public void testDetectWithCustomConfig() throws Exception {
+               Assert.assertEquals("application/xml",
+                               
AdvancedTypeDetector.detectWithCustomConfig("pom.xml"));
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 (added)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
+
+import junit.framework.Assert;
+
+import org.apache.commons.io.FileUtils;
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+@SuppressWarnings("deprecation")
+public class SimpleTextExtractorTest {
+
+    @Test
+    public void testSimpleTextExtractor() throws Exception {
+        String message =
+            "Hello, World! This is simple UTF-8 text content written"
+            + " in English to test autodetection of the character"
+            + " encoding of the input stream.";
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+        PrintStream out = System.out;
+        System.setOut(new PrintStream(buffer, true, Charsets.UTF_8.name()));
+
+        File file = new File("target", "test.txt");
+        FileUtils.writeStringToFile(file, message);
+        SimpleTextExtractor.main(new String[] { file.getPath() });
+        file.delete();
+
+        System.setOut(out);
+
+        Assert.assertEquals(message, 
buffer.toString(Charsets.UTF_8.name()).trim());
+    }
+
+}

Propchange: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java
------------------------------------------------------------------------------
    svn:executable = *

Added: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java?rev=1677694&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
 (added)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
 Mon May  4 21:52:53 2015
@@ -0,0 +1,45 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayOutputStream;
+import java.io.OutputStreamWriter;
+import java.io.PrintStream;
+
+import junit.framework.Assert;
+
+import org.junit.Test;
+
+import com.google.common.base.Charsets;
+
+@SuppressWarnings("deprecation")
+public class SimpleTypeDetectorTest {
+
+       @Test
+       public void testSimpleTypeDetector() throws Exception {
+               ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+
+               PrintStream out = System.out;
+               System.setOut(new PrintStream(buffer, true, 
Charsets.UTF_8.name()));
+
+               SimpleTypeDetector.main(new String[] { "pom.xml" });
+
+               System.setOut(out);
+
+               Assert.assertEquals("pom.xml: application/xml",
+                               buffer.toString(Charsets.UTF_8.name()).trim());
+       }
+
+}

Propchange: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java
------------------------------------------------------------------------------
    svn:executable = *


Reply via email to