Author: tpalsulich
Date: Wed Aug 13 22:00:02 2014
New Revision: 1617846
URL: http://svn.apache.org/r1617846
Log:
Second initial commit for TIKA-1391, parsing examples.
Added:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
tika/trunk/tika-example/src/main/resources/org/
tika/trunk/tika-example/src/main/resources/org/apache/
tika/trunk/tika-example/src/main/resources/org/apache/tika/
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc
(with props)
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
Added:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java?rev=1617846&view=auto
==============================================================================
---
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
(added)
+++
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
Wed Aug 13 22:00:02 2014
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.example;
+
+import org.apache.tika.Tika;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+public class ParsingExample {
+
+ /**
+ * Example of how to use Tika's parseToString method to parse the content
of a file,
+ * and return any text found.
+ *
+ * @return The content of a file.
+ */
+ public String parseToStringExample() throws IOException, SAXException,
TikaException {
+ InputStream stream =
ParsingExample.class.getResourceAsStream("test.doc");
+ Tika tika = new Tika();
+ try {
+ return tika.parseToString(stream);
+ } finally {
+ stream.close();
+ }
+ }
+
+ /**
+ * Example of how to use Tika to parse an file when you do not know its
file type
+ * ahead of time.
+ *
+ * AutoDetectParser attempts to discover the file's type automatically,
then call
+ * the exact Parser built for that file type.
+ *
+ * The stream to be parsed by the Parser. In this case, we get a file from
the
+ * resources folder of this project.
+ *
+ * Handlers are used to get the exact information you want out of the host
of
+ * information gathered by Parsers. The body content handler, intuitively,
extracts
+ * everything that would go between HTML body tags.
+ *
+ * The Metadata object will be filled by the Parser with Metadata
discovered about
+ * the file being parsed.
+ *
+ * @return The content of a file.
+ */
+ public String parseExample() throws IOException, SAXException,
TikaException {
+ InputStream stream =
ParsingExample.class.getResourceAsStream("test.doc");
+ AutoDetectParser parser = new AutoDetectParser();
+ BodyContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+ try {
+ parser.parse(stream, handler, metadata);
+ return handler.toString();
+ } finally {
+ stream.close();
+ }
+ }
+}
Added:
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc?rev=1617846&view=auto
==============================================================================
Binary file - no diff available.
Propchange:
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/test.doc
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
Added:
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java?rev=1617846&view=auto
==============================================================================
---
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
(added)
+++
tika/trunk/tika-example/src/test/java/org/apache/tika/example/TestParsingExample.java
Wed Aug 13 22:00:02 2014
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import org.apache.tika.exception.TikaException;
+import org.junit.Before;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+import java.io.IOException;
+
+import static org.junit.Assert.assertEquals;
+
+public class TestParsingExample {
+ ParsingExample parsingExample;
+ @Before
+ public void setUp() {
+ parsingExample = new ParsingExample();
+ }
+
+ @Test
+ public void testParseToStringExample() throws IOException, SAXException,
TikaException {
+ String result = parsingExample.parseToStringExample().trim();
+ assertEquals("Expected 'test', but got '" + result + "'", "test",
result);
+ }
+
+ @Test
+ public void testParseExample() throws IOException, SAXException,
TikaException {
+ String result = parsingExample.parseExample().trim();
+ assertEquals("Expected 'test', but got '" + result + "'", "test",
result);
+ }
+
+}