Author: tpalsulich
Date: Wed Aug 13 22:00:02 2014
New Revision: 1617846

URL: http://svn.apache.org/r1617846
Log:
Second initial commit for TIKA-1391, parsing examples.

Added:
    
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
    tika/trunk/tika-example/src/main/resources/org/
    tika/trunk/tika-example/src/main/resources/org/apache/
    tika/trunk/tika-example/src/main/resources/org/apache/tika/
    tika/trunk/tika-example/src/main/resources/org/apache/tika/example/
    tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc 
  (with props)
    
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java

Added: 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java?rev=1617846&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
 (added)
+++ 
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
 Wed Aug 13 22:00:02 2014
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.example;
+
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class ParsingExample {
+
+    /**
+     * Example of how to use Tika's parseToString method to parse the content 
of a file,
+     * and return any text found.
+     *
+     * @return The content of a file.
+     */
+    public String parseToStringExample() throws IOException, SAXException, 
TikaException {
+        InputStream stream = 
ParsingExample.class.getResourceAsStream("test.doc");
+        Tika tika = new Tika();
+        try {
+            return tika.parseToString(stream);
+        } finally {
+            stream.close();
+        }
+    }
+
+    /**
+     * Example of how to use Tika to parse an file when you do not know its 
file type
+     * ahead of time.
+     *
+     * AutoDetectParser attempts to discover the file's type automatically, 
then call
+     * the exact Parser built for that file type.
+     *
+     * The stream to be parsed by the Parser. In this case, we get a file from 
the
+     * resources folder of this project.
+     *
+     * Handlers are used to get the exact information you want out of the host 
of
+     * information gathered by Parsers. The body content handler, intuitively, 
extracts
+     * everything that would go between HTML body tags.
+     *
+     * The Metadata object will be filled by the Parser with Metadata 
discovered about
+     * the file being parsed.
+     *
+     * @return The content of a file.
+     */
+    public String parseExample() throws IOException, SAXException, 
TikaException {
+        InputStream stream = 
ParsingExample.class.getResourceAsStream("test.doc");
+        AutoDetectParser parser = new AutoDetectParser();
+        BodyContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        try {
+            parser.parse(stream, handler, metadata);
+            return handler.toString();
+        } finally {
+            stream.close();
+        }
+    }
+}

Added: 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc?rev=1617846&view=auto
==============================================================================
Binary file - no diff available.

Propchange: 
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java?rev=1617846&view=auto
==============================================================================
--- 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
 (added)
+++ 
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
 Wed Aug 13 22:00:02 2014
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import org.apache.tika.exception.TikaException;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestParsingExample {
+    ParsingExample parsingExample;
+    @Before
+    public void setUp() {
+        parsingExample = new ParsingExample();
+    }
+
+    @Test
+    public void testParseToStringExample() throws IOException, SAXException, 
TikaException {
+        String result = parsingExample.parseToStringExample().trim();
+        assertEquals("Expected 'test', but got '" + result + "'", "test", 
result);
+    }
+
+    @Test
+    public void testParseExample() throws IOException, SAXException, 
TikaException {
+        String result = parsingExample.parseExample().trim();
+        assertEquals("Expected 'test', but got '" + result + "'", "test", 
result);
+    }
+
+}


Reply via email to