Author: tboehme
Date: Fri Apr  6 14:25:10 2012
New Revision: 1310338

URL: http://svn.apache.org/viewvc?rev=1310338&view=rev
Log:
as announced on PDFBOX-1199 this adds new input stream class with buffering and 
seek functionality;
PushBackInputStream is extended to support seek operation if the underlying 
stream implements RandomAccessRead

Added:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
   (with props)
Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java?rev=1310338&r1=1310337&r2=1310338&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/PushBackInputStream.java 
Fri Apr  6 14:25:10 2012
@@ -33,6 +33,10 @@ public class PushBackInputStream extends
      */
     private long offset = 0;
     
+    /** In case provided input stream implements {@link RandomAccessRead} we 
hold
+     *  a typed reference to it in order to support seek operations. */
+    private final RandomAccessRead raInput;
+    
     /**
      * Constructor.
      *
@@ -48,6 +52,9 @@ public class PushBackInputStream extends
         {
             throw new IOException( "Error: input was null" );
         }
+        
+        raInput = ( input instanceof RandomAccessRead ) ?
+                                                                               
(RandomAccessRead) input : null;
     }
 
     /**
@@ -198,4 +205,33 @@ public class PushBackInputStream extends
         return data;
     }
 
+    /** Allows to seek to another position within stream in case the underlying
+     *  stream implements {@link RandomAccessRead}. Otherwise an {@link 
IOException}
+     *  is thrown.
+     *  
+     *  Pushback buffer is cleared before seek operation by skipping over all 
bytes
+     *  of buffer.
+     *  
+     *  @param newOffset  new position within stream from which to read next
+     *  
+     *  @throws IOException if underlying stream does not implement {@link 
RandomAccessRead}
+     *                      or seek operation on underlying stream was not 
successful
+     */
+    public void seek( long newOffset ) throws IOException
+    {
+       if ( raInput == null )
+                       throw new IOException( "Provided stream of type " + 
in.getClass().getSimpleName() +
+                                                                               
                                 " is not seekable." );
+       
+       // clear unread buffer by skipping over all bytes of buffer
+       int unreadLength = buf.length - pos;
+       if ( unreadLength > 0 )
+       {
+                       skip( unreadLength );
+       }
+       
+       raInput.seek( newOffset );
+       offset = newOffset;
+    }
+
 }

Added: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java?rev=1310338&view=auto
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
 (added)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
 Fri Apr  6 14:25:10 2012
@@ -0,0 +1,227 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.io;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.util.LinkedHashMap;
+
+/**
+ * Provides {@link InputStream} access to portions of a file combined with
+ * buffered reading of content. Start of next bytes to read can be set via 
seek method.
+ * 
+ * File is accessed via {@link RandomAccessFile} and is read in byte chunks 
which are
+ * cached.
+ * 
+ * @author Timo Boehme (timo.boehme at ontochem com)
+ */
+public class RandomAccessBufferedFileInputStream extends InputStream 
implements RandomAccessRead
+{
+
+               private int  pageSizeShift  = 12;
+               private int  pageSize       = 1 << pageSizeShift;
+               private long pageOffsetMask = -1L << pageSizeShift;
+               private int  maxCachedPages = 1000;
+       
+               private byte[] lastRemovedCachePage = null;
+       
+               /** Create a LRU page cache. */
+               private final LinkedHashMap<Long,byte[]> pageCache = new 
LinkedHashMap<Long, byte[]>( maxCachedPages, 0.75f, true ) 
+               {
+                               private static final long serialVersionUID = 
-6302488539257741101L;
+
+                               @Override
+                               protected boolean removeEldestEntry( 
java.util.Map.Entry<Long, byte[]> _eldest )
+                               {
+                                               final boolean doRemove = size() 
> maxCachedPages;
+                                               if ( doRemove )
+                                                       lastRemovedCachePage = 
_eldest.getValue();
+                                               return doRemove;
+                               }
+               };
+       
+               private long   curPageOffset    = -1;
+               private byte[] curPage          = new byte[ pageSize ];
+               private int    offsetWithinPage = 0;
+       
+               private final RandomAccessFile raFile;
+               private final long             fileLength;
+               private long                   fileOffset = 0;
+       
+               // 
------------------------------------------------------------------------
+               /** Create input stream instance for given file. */
+               public RandomAccessBufferedFileInputStream( File _file )
+               throws FileNotFoundException, IOException
+               {
+                               raFile     = new RandomAccessFile( _file, "r" );
+                               fileLength = _file.length();
+               
+                               seek( 0 );
+               }
+       
+               // 
------------------------------------------------------------------------
+               /** Returns offset in file at which next byte would be read. */
+               public final long getFilePointer()
+               {
+                               return fileOffset;
+               }
+       
+               // 
------------------------------------------------------------------------
+               /** Seeks to new position. If new position is outside of 
current page
+                *  the new page is either taken from cache or read from file 
and added to cache. */
+               public final void seek( final long newOffset ) throws 
IOException
+               {
+                               final long newPageOffset = newOffset & 
pageOffsetMask;
+                               if ( newPageOffset != curPageOffset )
+                               {
+                                               byte[] newPage = pageCache.get( 
newPageOffset );
+                                               if ( newPage == null )
+                                               {
+                                                               raFile.seek( 
newPageOffset );
+                                                               newPage = 
readPage();
+                                                               pageCache.put( 
newPageOffset, newPage );
+                                               }
+                                               curPageOffset = newPageOffset;
+                                               curPage       = newPage;
+                               }
+               
+                               offsetWithinPage = (int) (newOffset - 
curPageOffset);
+                               fileOffset       = newOffset;
+               }
+       
+               // 
------------------------------------------------------------------------
+               /** Reads a page with data from current file position. If we 
have a previously
+                *  removed page from cache the buffer of this page is reused. 
Otherwise a new
+                *  byte buffer is created. */
+               private final byte[] readPage() throws IOException
+               {
+                               byte[] page;
+               
+                               if ( lastRemovedCachePage != null )
+                               {
+                                               page = lastRemovedCachePage;
+                                               lastRemovedCachePage = null;
+                               } else
+                                       page = new byte[ pageSize ];
+               
+                               int readBytes = 0;
+                               while ( readBytes < pageSize )
+                               {
+                                               int curBytesRead = raFile.read( 
page, readBytes, pageSize - readBytes );
+                                               if ( curBytesRead < 0 )
+                                                               // EOF
+                                                               break;
+                                               readBytes += curBytesRead;
+                               }
+               
+                               return page;
+               }
+       
+       // 
------------------------------------------------------------------------
+       @Override
+       public int read() throws IOException
+       {
+               if ( fileOffset >= fileLength )
+               {
+                               return -1;
+               }
+               
+               if ( offsetWithinPage == pageSize )
+               {
+                               seek( fileOffset );
+               }
+
+               fileOffset++;
+               return curPage[ offsetWithinPage++ ] & 0xff;
+       }
+
+       // 
------------------------------------------------------------------------
+       @Override
+       public int read( byte[] b, int off, int len ) throws IOException
+       {       
+                       if ( fileOffset >= fileLength )
+                       {
+                                       return -1;
+                       }
+                       
+                       if ( offsetWithinPage == pageSize ) 
+                       {
+                                       seek( fileOffset );
+                       }
+       
+                       int commonLen = Math.min( pageSize - offsetWithinPage, 
len );
+                       if ( ( fileLength - fileOffset ) < pageSize )
+                                       commonLen = Math.min( commonLen, (int) 
( fileLength - fileOffset ) );
+                       
+                       System.arraycopy( curPage, offsetWithinPage, b, off, 
commonLen );
+                       
+                       offsetWithinPage += commonLen;
+                       fileOffset       += commonLen;
+                       
+                       return commonLen;
+       }
+       
+       // 
------------------------------------------------------------------------
+       @Override
+       public int available() throws IOException
+       {
+                       return (int) Math.min( fileLength - fileOffset, 
Integer.MAX_VALUE );
+       }
+       
+       // 
------------------------------------------------------------------------
+       @Override
+       public long skip( long n ) throws IOException
+       {       
+                       // test if we have to reduce skip count because of EOF
+                       long toSkip = n;
+                       
+                       if ( fileLength - fileOffset < toSkip )
+                                       toSkip = fileLength - fileOffset;
+                       
+                       if ( ( toSkip < pageSize ) && ( ( offsetWithinPage + 
toSkip ) <= pageSize ) )
+                       {
+                                       // we can skip within current page
+                                       offsetWithinPage += toSkip;
+                                 fileOffset       += toSkip;
+                       }
+                       else
+                       {
+                                       // seek to the page we will get after 
skipping
+                                       seek( fileOffset + toSkip );
+                       }
+                       
+                       return toSkip;
+       }
+       
+       // 
------------------------------------------------------------------------
+       @Override
+       public long length() throws IOException 
+       {
+                       return fileLength;
+       }
+       
+       // 
------------------------------------------------------------------------
+       @Override
+       public void close() throws IOException
+       {
+                       raFile.close();
+                       pageCache.clear();
+       }
+}

Propchange: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/RandomAccessBufferedFileInputStream.java
------------------------------------------------------------------------------
    svn:mime-type = text/plain


Reply via email to