Can we just remove (not deprecate) TemporaryFiles...? (We are not at 1.0 release yet).
Mike McCandless http://blog.mikemccandless.com On Thu, Sep 1, 2011 at 5:38 AM, <[email protected]> wrote: > Author: jukka > Date: Thu Sep 1 09:38:04 2011 > New Revision: 1163970 > > URL: http://svn.apache.org/viewvc?rev=1163970&view=rev > Log: > TIKA-701: Fix problems with TemporaryFiles > > Add a more generic TemporaryResources class that can handle any kinds of > Closeable resources. > > Use the new TemporaryResources class in TikaInputStream to better track all > the resources being used. > > Update all client classes to use the TemporaryResources class instead of > TemporaryFiles. > > Fix some problemns in how TikaInputStreams were being used. > > Added: > > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java > Modified: > > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java > > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java > > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java > > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java > > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java > > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java > > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java > > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java > > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java > > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java > > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParserContainerExtractor.java > Thu Sep 1 09:38:04 2011 > @@ -25,7 +25,7 @@ import org.apache.tika.config.TikaConfig > import org.apache.tika.detect.DefaultDetector; > import org.apache.tika.detect.Detector; > import org.apache.tika.exception.TikaException; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -106,7 +106,7 @@ public class ParserContainerExtractor im > InputStream stream, ContentHandler ignored, > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > TikaInputStream tis = TikaInputStream.get(stream, tmp); > > @@ -121,8 +121,13 @@ public class ParserContainerExtractor im > // Use a temporary file to process the stream twice > File file = tis.getFile(); > > - // Let the handler process the embedded resource > - handler.handle(filename, type, > TikaInputStream.get(file)); > + // Let the handler process the embedded resource > + InputStream input = TikaInputStream.get(file); > + try { > + handler.handle(filename, type, input); > + } finally { > + input.close(); > + } > > // Recurse > extractor.extract(tis, extractor, handler); > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/extractor/ParsingEmbeddedDocumentExtractor.java > Thu Sep 1 09:38:04 2011 > @@ -25,7 +25,7 @@ import java.io.InputStream; > > import org.apache.tika.exception.TikaException; > import org.apache.tika.io.CloseShieldInputStream; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.parser.DelegatingParser; > @@ -90,7 +90,7 @@ public class ParsingEmbeddedDocumentExtr > } > > // Use the delegate parser to parse this entry > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > DELEGATING_PARSER.parse( > TikaInputStream.get(new CloseShieldInputStream(stream), > tmp), > @@ -99,7 +99,7 @@ public class ParsingEmbeddedDocumentExtr > } catch (TikaException e) { > // Could not parse the entry, just skip the content > } finally { > - tmp.dispose(); > + tmp.close(); > } > > if(outputHtml) { > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java > (original) > +++ tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryFiles.java > Thu Sep 1 09:38:04 2011 > @@ -16,24 +16,19 @@ > */ > package org.apache.tika.io; > > -import java.io.File; > import java.io.IOException; > -import java.util.ArrayList; > -import java.util.List; > > -public class TemporaryFiles { > - > - private final List<File> files = new ArrayList<File>(); > - > - public File createTemporaryFile() throws IOException { > - File file = File.createTempFile("apache-tika-", ".tmp"); > - files.add(file); > - return file; > - } > +/** > + * @deprecated Use the {@link TemporaryResources} class instead > + */ > +public class TemporaryFiles extends TemporaryResources { > > + @Override > public void dispose() { > - for (File file : files) { > - file.delete(); > + try { > + close(); > + } catch (IOException e) { > + throw new RuntimeException(e); > } > } > > > Added: > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java?rev=1163970&view=auto > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java > (added) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java > Thu Sep 1 09:38:04 2011 > @@ -0,0 +1,156 @@ > +/* > + * Licensed to the Apache Software Foundation (ASF) under one or more > + * contributor license agreements. See the NOTICE file distributed with > + * this work for additional information regarding copyright ownership. > + * The ASF licenses this file to You under the Apache License, Version 2.0 > + * (the "License"); you may not use this file except in compliance with > + * the License. You may obtain a copy of the License at > + * > + * http://www.apache.org/licenses/LICENSE-2.0 > + * > + * Unless required by applicable law or agreed to in writing, software > + * distributed under the License is distributed on an "AS IS" BASIS, > + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > + * See the License for the specific language governing permissions and > + * limitations under the License. > + */ > +package org.apache.tika.io; > + > +import java.io.Closeable; > +import java.io.File; > +import java.io.IOException; > +import java.util.LinkedList; > +import java.util.List; > + > +import org.apache.tika.exception.TikaException; > + > +/** > + * Utility class for tracking and ultimately closing or otherwise disposing > + * a collection of temporary resources. > + * <p> > + * Note that this class is not thread-safe. > + * > + * @since Apache Tika 1.0 > + */ > +public class TemporaryResources implements Closeable { > + > + /** > + * Tracked resources in LIFO order. > + */ > + private final LinkedList<Closeable> resources = new > LinkedList<Closeable>(); > + > + /** > + * Directory for temporary files, <code>null</code> for the system > default. > + */ > + private File tmp = null; > + > + /** > + * Sets the directory to be used for the temporary files created by > + * the {@link #createTemporaryFile()} method. > + * > + * @param tmp temporary file directory, > + * or <code>null</code> for the system default > + */ > + public void setTemporaryFileDirectory(File tmp) { > + this.tmp = tmp; > + } > + > + /** > + * Creates and returns a temporary file that will automatically be > + * deleted when the {@link #close()} method is called. > + * > + * @return > + * @throws IOException > + */ > + public File createTemporaryFile() throws IOException { > + final File file = File.createTempFile("apache-tika-", ".tmp", tmp); > + addResource(new Closeable() { > + public void close() throws IOException { > + if (!file.delete()) { > + throw new IOException( > + "Could not delete temporary file " > + + file.getPath()); > + } > + } > + }); > + return file; > + } > + > + /** > + * Adds a new resource to the set of tracked resources that will all be > + * closed when the {@link #close()} method is called. > + * > + * @param resource resource to be tracked > + */ > + public void addResource(Closeable resource) { > + resources.addFirst(resource); > + } > + > + /** > + * Returns the latest of the tracked resources that implements or > + * extends the given interface or class. > + * > + * @param klass interface or class > + * @return matching resource, or <code>null</code> if not found > + */ > + @SuppressWarnings("unchecked") > + public <T extends Closeable> T getResource(Class<T> klass) { > + for (Closeable resource : resources) { > + if (klass.isAssignableFrom(resource.getClass())) { > + return (T) resource; > + } > + } > + return null; > + } > + > + /** > + * Closes all tracked resources. The resources are closed in reverse > order > + * from how they were added. > + * <p> > + * Any thrown exceptions from managed resources are collected and > + * then re-thrown only once all the resources have been closed. > + * > + * @throws IOException if one or more of the tracked resources > + * could not be closed > + */ > + public void close() throws IOException { > + // Release all resources and keep track of any exceptions > + List<IOException> exceptions = new LinkedList<IOException>(); > + for (Closeable resource : resources) { > + try { > + resource.close(); > + } catch (IOException e) { > + exceptions.add(e); > + } > + } > + resources.clear(); > + > + // Throw any exceptions that were captured from above > + if (!exceptions.isEmpty()) { > + if (exceptions.size() == 1) { > + throw exceptions.get(0); > + } else { > + throw new IOExceptionWithCause( > + "Multiple IOExceptions" + exceptions, > + exceptions.get(0)); > + } > + } > + } > + > + /** > + * Calls the {@link #close()} method and wraps the potential > + * {@link IOException} into a {@link TikaException} for convenience > + * when used within Tika. > + * > + * @throws TikaException if one or more of the tracked resources > + * could not be closed > + */ > + public void dispose() throws TikaException { > + try { > + close(); > + } catch (IOException e) { > + throw new TikaException("Failed to close temporary resources", > e); > + } > + } > + > +} > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/io/TikaInputStream.java > Thu Sep 1 09:38:04 2011 > @@ -30,6 +30,7 @@ import java.net.URI; > import java.net.URISyntaxException; > import java.net.URL; > import java.net.URLConnection; > +import java.nio.channels.FileChannel; > import java.sql.Blob; > import java.sql.SQLException; > > @@ -84,34 +85,54 @@ public class TikaInputStream extends Tag > * when you <em>don't</em> explicitly close the returned stream. The > * recommended access pattern is: > * <pre> > - * TemporaryFiles tmp = new TemporaryFiles(); > + * TemporaryResources tmp = new TemporaryResources(); > * try { > * TikaInputStream stream = TikaInputStream.get(..., tmp); > * // process stream but don't close it > * } finally { > - * tmp.dispose(); > + * tmp.close(); > * } > * </pre> > + * <p> > + * The given stream instance will <em>not</em> be closed when the > + * {@link TemporaryResources#close()} method is called. The caller > + * is expected to explicitly close the original stream when it's no > + * longer used. > * > * @param stream normal input stream > * @return a TikaInputStream instance > */ > - public static TikaInputStream get(InputStream stream, TemporaryFiles > tmp) { > + public static TikaInputStream get( > + InputStream stream, TemporaryResources tmp) { > if (stream instanceof TikaInputStream) { > return (TikaInputStream) stream; > } else { > + // Make sure that the stream is buffered and that it > + // (properly) supports the mark feature > + if (!(stream instanceof BufferedInputStream) > + && !(stream instanceof ByteArrayInputStream)) { > + stream = new BufferedInputStream(stream); > + } > return new TikaInputStream(stream, tmp, -1); > } > } > > /** > + * @deprecated Use the {@link #get(InputStream, TemporaryResources)} > instead > + */ > + public static TikaInputStream get(InputStream stream, TemporaryFiles > tmp) { > + return get(stream, (TemporaryResources) tmp); > + } > + > + /** > * Casts or wraps the given stream to a TikaInputStream instance. > * This method can be used to access the functionality of this class > * even when given just a normal input stream instance. > * <p> > - * Use this method instead of the {@link #get(InputStream, > TemporaryFiles)} > - * alternative when you <em>do</em> explicitly close the returned stream. > - * The recommended access pattern is: > + * Use this method instead of the > + * {@link #get(InputStream, TemporaryResources)} alternative when you > + * <em>do</em> explicitly close the returned stream. The recommended > + * access pattern is: > * <pre> > * TikaInputStream stream = TikaInputStream.get(...); > * try { > @@ -120,12 +141,16 @@ public class TikaInputStream extends Tag > * stream.close(); > * } > * </pre> > + * <p> > + * The given stream instance will be closed along with any other > resources > + * associated with the returned TikaInputStream instance when the > + * {@link #close()} method is called. > * > * @param stream normal input stream > * @return a TikaInputStream instance > */ > public static TikaInputStream get(InputStream stream) { > - return get(stream, new TemporaryFiles()); > + return get(stream, new TemporaryResources()); > } > > /** > @@ -156,7 +181,8 @@ public class TikaInputStream extends Tag > public static TikaInputStream get(byte[] data, Metadata metadata) { > metadata.set(Metadata.CONTENT_LENGTH, Integer.toString(data.length)); > return new TikaInputStream( > - new ByteArrayInputStream(data), new TemporaryFiles(), > data.length); > + new ByteArrayInputStream(data), > + new TemporaryResources(), data.length); > } > > /** > @@ -247,7 +273,7 @@ public class TikaInputStream extends Tag > } else { > return new TikaInputStream( > new BufferedInputStream(blob.getBinaryStream()), > - null, length); > + new TemporaryResources(), length); > } > } > > @@ -355,25 +381,7 @@ public class TikaInputStream extends Tag > > return new TikaInputStream( > new BufferedInputStream(connection.getInputStream()), > - new TemporaryFiles(), length); > - } > - > - /** > - * Makes sure that a stream is buffered and correctly supports the > - * mark feature by wrapping the given stream to a > - * {@link BufferedInputStream} if needed. > - * > - * @param stream original stream > - * @return buffered stream that supports the mark feature > - */ > - private static InputStream withBufferingAndMarkSupport(InputStream > stream) { > - if (stream instanceof ByteArrayInputStream) { > - return stream; > - } else if (stream instanceof BufferedInputStream) { > - return stream; > - } else { > - return new BufferedInputStream(stream); > - } > + new TemporaryResources(), length); > } > > /** > @@ -386,9 +394,9 @@ public class TikaInputStream extends Tag > private File file; > > /** > - * Temporary file provider. > + * Tracker of temporary resources. > */ > - private final TemporaryFiles tmp; > + private final TemporaryResources tmp; > > /** > * Total length of the stream, or -1 if unknown. > @@ -422,20 +430,25 @@ public class TikaInputStream extends Tag > private TikaInputStream(File file) throws FileNotFoundException { > super(new BufferedInputStream(new FileInputStream(file))); > this.file = file; > - this.tmp = new TemporaryFiles(); > + this.tmp = new TemporaryResources(); > this.length = file.length(); > } > > /** > * Creates a TikaInputStream instance. This private constructor is used > * by the static factory methods based on the available information. > + * <p> > + * The given stream needs to be included in the given temporary resource > + * collection if the caller wants it also to get closed when the > + * {@link #close()} method is invoked. > * > * @param stream <em>buffered</em> stream (must support the mark feature) > + * @param tmp tracker for temporary resources associated with this stream > * @param length total length of the stream, or -1 if unknown > */ > private TikaInputStream( > - InputStream stream, TemporaryFiles tmp, long length) { > - super(withBufferingAndMarkSupport(stream)); > + InputStream stream, TemporaryResources tmp, long length) { > + super(stream); > this.file = null; > this.tmp = tmp; > this.length = length; > @@ -489,6 +502,9 @@ public class TikaInputStream extends Tag > */ > public void setOpenContainer(Object container) { > openContainer = container; > + if (container instanceof Closeable) { > + tmp.addResource((Closeable) container); > + } > } > > public boolean hasFile() { > @@ -497,11 +513,10 @@ public class TikaInputStream extends Tag > > public File getFile() throws IOException { > if (file == null) { > - if (in == null) { > - throw new IOException("Stream has already been read"); > - } else if (position > 0) { > + if (position > 0) { > throw new IOException("Stream is already being read"); > } else { > + // Spool the entire stream into a temporary file > file = tmp.createTemporaryFile(); > OutputStream out = new FileOutputStream(file); > try { > @@ -509,15 +524,37 @@ public class TikaInputStream extends Tag > } finally { > out.close(); > } > - in.close(); > - // Re-point the stream at the file now we have it > - in = new BufferedInputStream(new FileInputStream(file)); > + > + // Create a new input stream and make sure it'll get closed > + FileInputStream newStream = new FileInputStream(file); > + tmp.addResource(newStream); > + > + // Replace the spooled stream with the new stream in a way > + // that still ends up closing the old stream if or when the > + // close() method is called. The closing of the new stream > + // is already being handled as noted above. > + final InputStream oldStream = in; > + in = new BufferedInputStream(newStream) { > + @Override > + public void close() throws IOException { > + oldStream.close(); > + } > + }; > + > length = file.length(); > } > } > return file; > } > > + public FileChannel getFileChannel() throws IOException { > + FileInputStream fis = new FileInputStream(getFile()); > + tmp.addResource(fis); > + FileChannel channel = fis.getChannel(); > + tmp.addResource(channel); > + return channel; > + } > + > public boolean hasLength() { > return length != -1; > } > @@ -549,46 +586,10 @@ public class TikaInputStream extends Tag > } > > @Override > - public int available() throws IOException { > - if (in == null) { > - return 0; > - } else { > - return super.available(); > - } > - } > - > - @Override > public long skip(long ln) throws IOException { > - if (in == null) { > - return 0; > - } else { > - long n = super.skip(ln); > - position += n; > - return n; > - } > - } > - > - @Override > - public int read() throws IOException { > - if (in == null) { > - return -1; > - } else { > - return super.read(); > - } > - } > - > - @Override > - public int read(byte[] bts, int off, int len) throws IOException { > - if (in == null) { > - return -1; > - } else { > - return super.read(bts, off, len); > - } > - } > - > - @Override > - public int read(byte[] bts) throws IOException { > - return read(bts, 0, bts.length); > + long n = super.skip(ln); > + position += n; > + return n; > } > > @Override > @@ -611,33 +612,22 @@ public class TikaInputStream extends Tag > > @Override > public void close() throws IOException { > - if (in != null) { > - in.close(); > - in = null; > - } > - if (openContainer != null) { > - if (openContainer instanceof Closeable) { > - ((Closeable)openContainer).close(); > - } > - openContainer = null; > - } > file = null; > - tmp.dispose(); > - } > + mark = -1; > > - @Override > - protected void beforeRead(int n) throws IOException { > - if (in == null) { > - throw new IOException("End of the stream reached"); > - } > + // The close method was explicitly called, so we indeed > + // are expected to close the input stream. Handle that > + // by adding that stream as a resource to be tracked before > + // closing all of them. This way also possible exceptions from > + // the close() calls get managed properly. > + tmp.addResource(in); > + tmp.close(); > } > > @Override > - protected void afterRead(int n) throws IOException { > + protected void afterRead(int n) { > if (n != -1) { > position += n; > - } else if (mark == -1) { > - close(); > } > } > > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java > Thu Sep 1 09:38:04 2011 > @@ -23,7 +23,7 @@ import org.apache.tika.config.TikaConfig > import org.apache.tika.detect.DefaultDetector; > import org.apache.tika.detect.Detector; > import org.apache.tika.exception.TikaException; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -114,7 +114,7 @@ public class AutoDetectParser extends Co > InputStream stream, ContentHandler handler, > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > TikaInputStream tis = TikaInputStream.get(stream, tmp); > > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/CompositeParser.java > Thu Sep 1 09:38:04 2011 > @@ -27,7 +27,7 @@ import java.util.Map; > import java.util.Set; > > import org.apache.tika.exception.TikaException; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -234,7 +234,7 @@ public class CompositeParser extends Abs > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > Parser parser = getParser(metadata); > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > TikaInputStream taggedStream = TikaInputStream.get(stream, tmp); > TaggedContentHandler taggedHandler = new > TaggedContentHandler(handler); > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/NetworkParser.java > Thu Sep 1 09:38:04 2011 > @@ -30,7 +30,7 @@ import java.util.Set; > import org.apache.tika.exception.TikaException; > import org.apache.tika.io.CloseShieldInputStream; > import org.apache.tika.io.IOUtils; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -64,7 +64,7 @@ public class NetworkParser extends Abstr > InputStream stream, ContentHandler handler, > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > TikaInputStream tis = TikaInputStream.get(stream, tmp); > parse(tis, handler, metadata, context); > > Modified: > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java > (original) > +++ > tika/trunk/tika-core/src/main/java/org/apache/tika/parser/external/ExternalParser.java > Thu Sep 1 09:38:04 2011 > @@ -34,7 +34,7 @@ import java.util.regex.Pattern; > import org.apache.tika.exception.TikaException; > import org.apache.tika.io.IOUtils; > import org.apache.tika.io.NullOutputStream; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -80,9 +80,8 @@ public class ExternalParser extends Abst > * @see Runtime#exec(String[]) > */ > private String[] command = new String[] { "cat" }; > - > - private TemporaryFiles tmp = new TemporaryFiles(); > - > + > + private TemporaryResources tmp = new TemporaryResources(); > > public Set<MediaType> getSupportedTypes(ParseContext context) { > return getSupportedTypes(); > > Modified: > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java > (original) > +++ > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java > Thu Sep 1 09:38:04 2011 > @@ -16,14 +16,13 @@ > */ > package org.apache.tika.parser.jpeg; > > -import java.io.FilterInputStream; > import java.io.IOException; > import java.io.InputStream; > import java.util.Collections; > import java.util.Set; > > import org.apache.tika.exception.TikaException; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -48,7 +47,7 @@ public class JpegParser extends Abstract > InputStream stream, ContentHandler handler, > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > TikaInputStream tis = TikaInputStream.get(stream, tmp); > new ImageMetadataExtractor(metadata).parseJpeg(tis.getFile()); > > Modified: > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java > (original) > +++ > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java > Thu Sep 1 09:38:04 2011 > @@ -165,13 +165,13 @@ public class OfficeParser extends Abstra > NPOIFSFileSystem filesystem; > if(stream instanceof TikaInputStream) { > TikaInputStream tstream = (TikaInputStream)stream; > - if(tstream.getOpenContainer() != null) { > - filesystem = > (NPOIFSFileSystem)tstream.getOpenContainer(); > - } else if(tstream.hasFile()) { > - filesystem = new NPOIFSFileSystem(tstream.getFile()); > - } else { > - filesystem = new NPOIFSFileSystem(tstream); > - } > + if(tstream.getOpenContainer() != null) { > + filesystem = (NPOIFSFileSystem)tstream.getOpenContainer(); > + } else if(tstream.hasFile()) { > + filesystem = new NPOIFSFileSystem(tstream.getFileChannel()); > + } else { > + filesystem = new NPOIFSFileSystem(tstream); > + } > } else { > filesystem = new NPOIFSFileSystem(stream); > } > > Modified: > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java > (original) > +++ > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java > Thu Sep 1 09:38:04 2011 > @@ -21,6 +21,7 @@ import static org.apache.tika.mime.Media > import java.io.File; > import java.io.IOException; > import java.io.InputStream; > +import java.nio.channels.FileChannel; > import java.util.Collections; > import java.util.HashSet; > import java.util.Set; > @@ -28,7 +29,7 @@ import java.util.Set; > import org.apache.poi.poifs.filesystem.Entry; > import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; > import org.apache.tika.detect.Detector; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -92,44 +93,41 @@ public class POIFSContainerDetector impl > > // We can only detect the exact type when given a TikaInputStream > if (TikaInputStream.isTikaInputStream(input)) { > - TemporaryFiles tmp = new TemporaryFiles(); > - try { > - // Look for known top level entry names to detect the > document type > - Set<String> names = > - getTopLevelNames(TikaInputStream.get(input, tmp)); > - if (names.contains("Workbook")) { > - return XLS; > - } else if (names.contains("EncryptedPackage")) { > - return OLE; > - } else if (names.contains("WordDocument")) { > - return DOC; > - } else if (names.contains("Quill")) { > - return PUB; > - } else if (names.contains("PowerPoint Document")) { > - return PPT; > - } else if (names.contains("VisioDocument")) { > - return VSD; > - } else if (names.contains("CONTENTS")) { > - return WPS; > - } else if (names.contains("\u0001Ole10Native")) { > - return OLE; > - } else if (names.contains("PerfectOffice_MAIN")) { > - if (names.contains("SlideShow")) { > - return > MediaType.application("x-corelpresentations"); // .shw > - } else if (names.contains("PerfectOffice_OBJECTS")) { > - return MediaType.application("x-quattro-pro"); // > .wb? > - } > - } else if (names.contains("NativeContent_MAIN")) { > - return MediaType.application("x-quattro-pro"); // .qpw > - } else { > - for (String name : names) { > - if (name.startsWith("__substg1.0_")) { > - return MSG; > - } > + // No TemporaryResources as this is for sure a TikaInputStream > + TikaInputStream tis = TikaInputStream.get(input); > + > + // Look for known top level entry names to detect the document > type > + Set<String> names = getTopLevelNames(tis); > + if (names.contains("Workbook")) { > + return XLS; > + } else if (names.contains("EncryptedPackage")) { > + return OLE; > + } else if (names.contains("WordDocument")) { > + return DOC; > + } else if (names.contains("Quill")) { > + return PUB; > + } else if (names.contains("PowerPoint Document")) { > + return PPT; > + } else if (names.contains("VisioDocument")) { > + return VSD; > + } else if (names.contains("CONTENTS")) { > + return WPS; > + } else if (names.contains("\u0001Ole10Native")) { > + return OLE; > + } else if (names.contains("PerfectOffice_MAIN")) { > + if (names.contains("SlideShow")) { > + return MediaType.application("x-corelpresentations"); // > .shw > + } else if (names.contains("PerfectOffice_OBJECTS")) { > + return MediaType.application("x-quattro-pro"); // .wb? > + } > + } else if (names.contains("NativeContent_MAIN")) { > + return MediaType.application("x-quattro-pro"); // .qpw > + } else { > + for (String name : names) { > + if (name.startsWith("__substg1.0_")) { > + return MSG; > } > } > - } finally { > - tmp.dispose(); > } > } > > @@ -141,10 +139,10 @@ public class POIFSContainerDetector impl > throws IOException { > // Force the document stream to a (possibly temporary) file > // so we don't modify the current position of the stream > - File file = stream.getFile(); > + FileChannel channel = stream.getFileChannel(); > > try { > - NPOIFSFileSystem fs = new NPOIFSFileSystem(file); > + NPOIFSFileSystem fs = new NPOIFSFileSystem(channel); > > // Optimize a possible later parsing process by keeping > // a reference to the already opened POI file system > > Modified: > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java > (original) > +++ > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java > Thu Sep 1 09:38:04 2011 > @@ -30,7 +30,7 @@ import org.apache.poi.openxml4j.opc.Pack > import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; > import org.apache.tika.detect.Detector; > import org.apache.tika.io.IOUtils; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -70,7 +70,7 @@ public class ZipContainerDetector implem > return MediaType.APPLICATION_ZIP; > } > > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > ZipFile zip = null; > try { > File file = TikaInputStream.get(input, tmp).getFile(); > @@ -99,7 +99,7 @@ public class ZipContainerDetector implem > } catch (IOException e) { > } > } > - tmp.dispose(); > + tmp.close(); > } > } > > > Modified: > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java > URL: > http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=1163970&r1=1163969&r2=1163970&view=diff > ============================================================================== > --- > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java > (original) > +++ > tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java > Thu Sep 1 09:38:04 2011 > @@ -16,9 +16,32 @@ > */ > package org.apache.tika.parser.rtf; > > +import java.io.BufferedOutputStream; > +import java.io.ByteArrayOutputStream; > +import java.io.File; > +import java.io.FileOutputStream; > +import java.io.IOException; > +import java.io.InputStream; > +import java.io.OutputStream; > +import java.io.UnsupportedEncodingException; > +import java.util.Collections; > +import java.util.HashMap; > +import java.util.LinkedList; > +import java.util.Map; > +import java.util.Set; > +import java.util.regex.Matcher; > +import java.util.regex.Pattern; > + > +import javax.swing.text.AttributeSet; > +import javax.swing.text.BadLocationException; > +import javax.swing.text.DefaultStyledDocument; > +import javax.swing.text.Document; > +import javax.swing.text.StyleContext; > +import javax.swing.text.rtf.RTFEditorKit; > + > import org.apache.tika.exception.TikaException; > import org.apache.tika.io.TaggedInputStream; > -import org.apache.tika.io.TemporaryFiles; > +import org.apache.tika.io.TemporaryResources; > import org.apache.tika.io.TikaInputStream; > import org.apache.tika.metadata.Metadata; > import org.apache.tika.mime.MediaType; > @@ -28,13 +51,6 @@ import org.apache.tika.sax.XHTMLContentH > import org.xml.sax.ContentHandler; > import org.xml.sax.SAXException; > > -import javax.swing.text.*; > -import javax.swing.text.rtf.RTFEditorKit; > -import java.io.*; > -import java.util.*; > -import java.util.regex.Matcher; > -import java.util.regex.Pattern; > - > /** > * RTF parser > */ > @@ -106,7 +122,7 @@ public class RTFParser extends AbstractP > Metadata metadata, ParseContext context) > throws IOException, SAXException, TikaException { > TaggedInputStream tagged = new TaggedInputStream(stream); > - TemporaryFiles tmp = new TemporaryFiles(); > + TemporaryResources tmp = new TemporaryResources(); > try { > File tempFile = tmp.createTemporaryFile(); > createUnicodeRtfTempFile(tempFile, stream); > > >
