Author: mattmann
Date: Mon Oct 19 05:26:14 2015
New Revision: 1709349
URL: http://svn.apache.org/viewvc?rev=1709349&view=rev
Log:
Fix for TIKA-1745 Add methods accepting java.nio.file.Path to
org.apache.tika.Tika and org.apache.tika.parser.ParsingReader contributed by
Yaniv Kunda.
Modified:
tika/trunk/CHANGES.txt
tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
Modified: tika/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Oct 19 05:26:14 2015
@@ -1,5 +1,8 @@
Release 1.11 - Current Development
+ * Java7 API support for allowing java.nio.file.Path as method arguments
+ was added to Tika and to ParsingReader (TIKA-1745).
+
* MIME support was added for WebVTT: The Web Video Text Tracks Format
files (TIKA-1772).
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java Mon Oct 19
05:26:14 2015
@@ -22,6 +22,7 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URL;
+import java.nio.file.Path;
import java.util.Properties;
import org.apache.tika.config.TikaConfig;
@@ -250,6 +251,25 @@ public class Tika {
}
/**
+ * Detects the media type of the file at the given path. The type
+ * detection is based on the document content and a potential known
+ * file extension.
+ * <p>
+ * Use the {@link #detect(String)} method when you want to detect the
+ * type of the document without actually accessing the file.
+ *
+ * @param path the path of the file
+ * @return detected media type
+ * @throws IOException if the file can not be read
+ */
+ public String detect(Path path) throws IOException {
+ Metadata metadata = new Metadata();
+ try (InputStream stream = TikaInputStream.get(path, metadata)) {
+ return detect(stream, metadata);
+ }
+ }
+
+ /**
* Detects the media type of the given file. The type detection is
* based on the document content and a potential known file extension.
* <p>
@@ -259,6 +279,7 @@ public class Tika {
* @param file the file
* @return detected media type
* @throws IOException if the file can not be read
+ * @see #detect(Path)
*/
public String detect(File file) throws IOException {
Metadata metadata = new Metadata();
@@ -405,11 +426,25 @@ public class Tika {
}
/**
+ * Parses the file at the given path and returns the extracted text
content.
+ *
+ * @param path the path of the file to be parsed
+ * @return extracted text content
+ * @throws IOException if the file can not be read or parsed
+ */
+ public Reader parse(Path path) throws IOException {
+ Metadata metadata = new Metadata();
+ InputStream stream = TikaInputStream.get(path, metadata);
+ return parse(stream, metadata);
+ }
+
+ /**
* Parses the given file and returns the extracted text content.
*
* @param file the file to be parsed
* @return extracted text content
* @throws IOException if the file can not be read or parsed
+ * @see #parse(Path)
*/
public Reader parse(File file) throws IOException {
Metadata metadata = new Metadata();
@@ -537,6 +572,25 @@ public class Tika {
}
/**
+ * Parses the file at the given path and returns the extracted text
content.
+ * <p>
+ * To avoid unpredictable excess memory use, the returned string contains
+ * only up to {@link #getMaxStringLength()} first characters extracted
+ * from the input document. Use the {@link #setMaxStringLength(int)}
+ * method to adjust this limitation.
+ *
+ * @param path the path of the file to be parsed
+ * @return extracted text content
+ * @throws IOException if the file can not be read
+ * @throws TikaException if the file can not be parsed
+ */
+ public String parseToString(Path path) throws IOException, TikaException {
+ Metadata metadata = new Metadata();
+ InputStream stream = TikaInputStream.get(path, metadata);
+ return parseToString(stream, metadata);
+ }
+
+ /**
* Parses the given file and returns the extracted text content.
* <p>
* To avoid unpredictable excess memory use, the returned string contains
@@ -548,6 +602,7 @@ public class Tika {
* @return extracted text content
* @throws IOException if the file can not be read
* @throws TikaException if the file can not be parsed
+ * @see #parseToString(Path)
*/
public String parseToString(File file) throws IOException, TikaException {
Metadata metadata = new Metadata();
Modified:
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
---
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
(original)
+++
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
Mon Oct 19 05:26:14 2015
@@ -26,6 +26,8 @@ import java.io.PipedReader;
import java.io.PipedWriter;
import java.io.Reader;
import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
import java.util.concurrent.Executor;
import org.apache.tika.metadata.Metadata;
@@ -119,11 +121,23 @@ public class ParsingReader extends Reade
}
/**
+ * Creates a reader for the text content of the file at the given path.
+ *
+ * @param path path
+ * @throws FileNotFoundException if the given file does not exist
+ * @throws IOException if the document can not be parsed
+ */
+ public ParsingReader(Path path) throws IOException {
+ this(Files.newInputStream(path), path.getFileName().toString());
+ }
+
+ /**
* Creates a reader for the text content of the given file.
*
* @param file file
* @throws FileNotFoundException if the given file does not exist
* @throws IOException if the document can not be parsed
+ * @see #ParsingReader(Path)
*/
public ParsingReader(File file) throws FileNotFoundException, IOException {
this(new FileInputStream(file), file.getName());