ParsingReader.java

mattmann Sun, 18 Oct 2015 22:27:07 -0700

Author: mattmann
Date: Mon Oct 19 05:26:14 2015
New Revision: 1709349

URL: http://svn.apache.org/viewvc?rev=1709349&view=rev
Log:
Fix for TIKA-1745 Add methods accepting java.nio.file.Path to 
org.apache.tika.Tika and org.apache.tika.parser.ParsingReader contributed by  
Yaniv Kunda.


Modified:
    tika/trunk/CHANGES.txt
    tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
    tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java

Modified: tika/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewvc/tika/trunk/CHANGES.txt?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
--- tika/trunk/CHANGES.txt (original)
+++ tika/trunk/CHANGES.txt Mon Oct 19 05:26:14 2015
@@ -1,5 +1,8 @@
 Release 1.11 - Current Development
 
+  * Java7 API support for allowing java.nio.file.Path as method arguments
+    was added to Tika and to ParsingReader (TIKA-1745).
+
   * MIME support was added for WebVTT: The Web Video Text Tracks Format
     files (TIKA-1772).
 

Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/Tika.java Mon Oct 19 
05:26:14 2015
@@ -22,6 +22,7 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.Reader;
 import java.net.URL;
+import java.nio.file.Path;
 import java.util.Properties;
 
 import org.apache.tika.config.TikaConfig;
@@ -250,6 +251,25 @@ public class Tika {
     }
 
     /**
+     * Detects the media type of the file at the given path. The type
+     * detection is based on the document content and a potential known
+     * file extension.
+     * <p>
+     * Use the {@link #detect(String)} method when you want to detect the
+     * type of the document without actually accessing the file.
+     *
+     * @param path the path of the file
+     * @return detected media type
+     * @throws IOException if the file can not be read
+     */
+    public String detect(Path path) throws IOException {
+        Metadata metadata = new Metadata();
+        try (InputStream stream = TikaInputStream.get(path, metadata)) {
+            return detect(stream, metadata);
+        }
+    }
+
+    /**
      * Detects the media type of the given file. The type detection is
      * based on the document content and a potential known file extension.
      * <p>
@@ -259,6 +279,7 @@ public class Tika {
      * @param file the file
      * @return detected media type
      * @throws IOException if the file can not be read
+     * @see #detect(Path)
      */
     public String detect(File file) throws IOException {
         Metadata metadata = new Metadata();
@@ -405,11 +426,25 @@ public class Tika {
     }
 
     /**
+     * Parses the file at the given path and returns the extracted text 
content.
+     *
+     * @param path the path of the file to be parsed
+     * @return extracted text content
+     * @throws IOException if the file can not be read or parsed
+     */
+    public Reader parse(Path path) throws IOException {
+        Metadata metadata = new Metadata();
+        InputStream stream = TikaInputStream.get(path, metadata);
+        return parse(stream, metadata);
+    }
+
+    /**
      * Parses the given file and returns the extracted text content.
      *
      * @param file the file to be parsed
      * @return extracted text content
      * @throws IOException if the file can not be read or parsed
+     * @see #parse(Path)
      */
     public Reader parse(File file) throws IOException {
         Metadata metadata = new Metadata();
@@ -537,6 +572,25 @@ public class Tika {
     }
 
     /**
+     * Parses the file at the given path and returns the extracted text 
content.
+     * <p>
+     * To avoid unpredictable excess memory use, the returned string contains
+     * only up to {@link #getMaxStringLength()} first characters extracted
+     * from the input document. Use the {@link #setMaxStringLength(int)}
+     * method to adjust this limitation.
+     *
+     * @param path the path of the file to be parsed
+     * @return extracted text content
+     * @throws IOException if the file can not be read
+     * @throws TikaException if the file can not be parsed
+     */
+    public String parseToString(Path path) throws IOException, TikaException {
+        Metadata metadata = new Metadata();
+        InputStream stream = TikaInputStream.get(path, metadata);
+        return parseToString(stream, metadata);
+    }
+
+    /**
      * Parses the given file and returns the extracted text content.
      * <p>
      * To avoid unpredictable excess memory use, the returned string contains
@@ -548,6 +602,7 @@ public class Tika {
      * @return extracted text content
      * @throws IOException if the file can not be read
      * @throws TikaException if the file can not be parsed
+     * @see #parseToString(Path)
      */
     public String parseToString(File file) throws IOException, TikaException {
         Metadata metadata = new Metadata();

Modified: 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java?rev=1709349&r1=1709348&r2=1709349&view=diff
==============================================================================
--- 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java 
(original)
+++ 
tika/trunk/tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java 
Mon Oct 19 05:26:14 2015
@@ -26,6 +26,8 @@ import java.io.PipedReader;
 import java.io.PipedWriter;
 import java.io.Reader;
 import java.io.Writer;
+import java.nio.file.Files;
+import java.nio.file.Path;
 import java.util.concurrent.Executor;
 
 import org.apache.tika.metadata.Metadata;
@@ -119,11 +121,23 @@ public class ParsingReader extends Reade
     }
 
     /**
+     * Creates a reader for the text content of the file at the given path.
+     *
+     * @param path path
+     * @throws FileNotFoundException if the given file does not exist
+     * @throws IOException if the document can not be parsed
+     */
+    public ParsingReader(Path path) throws IOException {
+        this(Files.newInputStream(path), path.getFileName().toString());
+    }
+
+    /**
      * Creates a reader for the text content of the given file.
      *
      * @param file file
      * @throws FileNotFoundException if the given file does not exist
      * @throws IOException if the document can not be parsed
+     * @see #ParsingReader(Path)
      */
     public ParsingReader(File file) throws FileNotFoundException, IOException {
         this(new FileInputStream(file), file.getName());

svn commit: r1709349 - in /tika/trunk: CHANGES.txt tika-core/src/main/java/org/apache/tika/Tika.java tika-core/src/main/java/org/apache/tika/parser/ParsingReader.java

Reply via email to