Author: tboehme Date: Sun Jul 19 15:43:53 2015 New Revision: 1691833 URL: http://svn.apache.org/r1691833 Log: PDFBOX-2883: added support for different ScratchFile modes using new MemoryUsageSetting object; all PDDocument.load methods having a useScratchFile parameter were duplicated with memoryUsage parameter; COSDocument and PDFParser now support providing ScratchFile object directly; using the boolean 'useScratchFile' parameter works like before
Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java (with props) Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/acroforms/FillFormField.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Modified: pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/acroforms/FillFormField.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/acroforms/FillFormField.java?rev=1691833&r1=1691832&r2=1691833&view=diff ============================================================================== --- pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/acroforms/FillFormField.java (original) +++ pdfbox/trunk/examples/src/main/java/org/apache/pdfbox/examples/acroforms/FillFormField.java Sun Jul 19 15:43:53 2015 @@ -37,7 +37,7 @@ public class FillFormField // load the document PDDocument pdfDocument = PDDocument - .load(new File(formTemplate),null); + .load(new File(formTemplate),(String)null); // get the document catalog PDAcroForm acroForm = pdfDocument.getDocumentCatalog().getAcroForm(); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1691833&r1=1691832&r2=1691833&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Sun Jul 19 15:43:53 2015 @@ -86,7 +86,7 @@ public class COSDocument extends COSBase */ public COSDocument(boolean useScratchFiles) { - this(null, useScratchFiles); + this((File)null, useScratchFiles); } /** @@ -115,6 +115,18 @@ public class COSDocument extends COSBase } /** + * Constructor that will use the provide memory handler for storage of the + * PDF streams. + * + * @param scratchFiles memory handler for storage of PDF streams + * + */ + public COSDocument(ScratchFile scratchFile) + { + this.scratchFile = scratchFile; + } + + /** * Constructor. Uses memory to store stream. */ public COSDocument() Added: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java?rev=1691833&view=auto ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java (added) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java Sun Jul 19 15:43:53 2015 @@ -0,0 +1,257 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.io; + +import java.io.File; + +/** + * Controls how memory/temporary files are used for + * buffering streams etc. + */ +public class MemoryUsageSetting +{ + private final boolean useMainMemory; + private final boolean useTempFile; + + /** maximum number of main-memory bytes allowed to be used; + * <code>-1</code> means 'unrestricted' */ + private final long maxMainMemoryBytes; + + /** maximum number of bytes allowed for storage at all (main-memory+file); + * <code>-1</code> means 'unrestricted' */ + private final long maxStorageBytes; + + /** directory to be used for scratch file */ + private File tempDir; + + /** + * Private constructor for setup buffering memory usage called by one of the setup methods. + * + * @param useMainMemory if <code>true</code> main memory usage is enabled; in case of + * <code>false</code> and <code>useTempFile</code> is <code>false</code> too + * we set this to <code>true</code> + * @param useTempFile if <code>true</code> using of temporary file(s) is enabled + * @param maxMainMemoryBytes maximum number of main-memory to be used; + * if <code>-1</code> means 'unrestricted'; + * if <code>0</code> we only use temporary file if <code>useTempFile</code> + * is <code>true</code> otherwise main-memory usage will have restriction + * defined by maxStorageBytes + * @param maxStorageBytes maximum size the main-memory and temporary file(s) may have all together; + * <code>0</code> or less will be ignored; if it is less than + * maxMainMemoryBytes we use maxMainMemoryBytes value instead + */ + private MemoryUsageSetting(boolean useMainMemory, boolean useTempFile, + long maxMainMemoryBytes, long maxStorageBytes) + { + // do some checks; adjust values as needed to get consistent setting + boolean locUseMainMemory = useTempFile ? useMainMemory : true; + long locMaxMainMemoryBytes = useMainMemory ? maxMainMemoryBytes : -1; + long locMaxStorageBytes = maxStorageBytes > 0 ? maxStorageBytes : -1; + + if (locMaxMainMemoryBytes < -1) + { + locMaxMainMemoryBytes = -1; + } + + if (locUseMainMemory && (locMaxMainMemoryBytes == 0)) + { + if (useTempFile) { + locUseMainMemory = false; + } + else + { + locMaxMainMemoryBytes = locMaxStorageBytes; + } + } + + if (locUseMainMemory && (locMaxStorageBytes > -1) && + ((locMaxMainMemoryBytes == -1) || (locMaxMainMemoryBytes > locMaxStorageBytes))) + { + locMaxStorageBytes = locMaxMainMemoryBytes; + } + + + this.useMainMemory = locUseMainMemory; + this.useTempFile = useTempFile; + this.maxMainMemoryBytes = locMaxMainMemoryBytes; + this.maxStorageBytes = locMaxStorageBytes; + } + + /** + * Setups buffering memory usage to only use main-memory (no temporary file) + * which is not restricted in size. + */ + public static MemoryUsageSetting setupMainMemoryOnly() + { + return setupMainMemoryOnly(-1); + } + + /** + * Setups buffering memory usage to only use main-memory with the defined maximum. + * + * @param maxMainMemoryBytes maximum number of main-memory to be used; + * <code>-1</code> for no restriction; + * <code>0</code> will also be interpreted here as no restriction + */ + public static MemoryUsageSetting setupMainMemoryOnly(long maxMainMemoryBytes) + { + return new MemoryUsageSetting(true, false, maxMainMemoryBytes, maxMainMemoryBytes); + } + + /** + * Setups buffering memory usage to only use temporary file(s) (no main-memory) + * with not restricted size. + */ + public static MemoryUsageSetting setupTempFileOnly() + { + return setupTempFileOnly(-1); + } + + /** + * Setups buffering memory usage to only use temporary file(s) (no main-memory) + * with the specified maximum size. + * + * @param maxStorageBytes maximum size the temporary file(s) may have all together; + * <code>-1</code> for no restriction; + * <code>0</code> will also be interpreted here as no restriction + */ + public static MemoryUsageSetting setupTempFileOnly(long maxStorageBytes) + { + return new MemoryUsageSetting(false, true, 0, maxStorageBytes); + } + + /** + * Setups buffering memory usage to use a portion of main-memory and additionally + * temporary file(s) in case the specified portion is exceeded. + * + * @param maxMainMemoryBytes maximum number of main-memory to be used; + * if <code>-1</code> this is the same as {@link #setupMainMemoryOnly()}; + * if <code>0</code> this is the same as {@link #setupTempFileOnly()} + */ + public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes) + { + return setupMixed(maxMainMemoryBytes, -1); + } + + /** + * Setups buffering memory usage to use a portion of main-memory and additionally + * temporary file(s) in case the specified portion is exceeded. + * + * @param maxMainMemoryBytes maximum number of main-memory to be used; + * if <code>-1</code> this is the same as {@link #setupMainMemoryOnly()}; + * if <code>0</code> this is the same as {@link #setupTempFileOnly()} + * @param maxStorageBytes maximum size the main-memory and temporary file(s) may have all together; + * <code>0</code> or less will be ignored; if it is less than + * maxMainMemoryBytes we use maxMainMemoryBytes value instead + */ + public static MemoryUsageSetting setupMixed(long maxMainMemoryBytes, long maxStorageBytes) + { + return new MemoryUsageSetting(true, true, maxMainMemoryBytes, maxStorageBytes); + } + + /** + * Sets directory to be used for temporary files. + * + * @param tempDir directory for temporary files + * + * @return this instance + */ + public MemoryUsageSetting setTempDir(File tempDir) + { + this.tempDir = tempDir; + return this; + } + + /** + * Returns <code>true</code> if main-memory is to be used. + * + * <p>If this returns <code>false</code> it is ensured {@link #useTempFile()} + * returns <code>true</code>.</p> + */ + public boolean useMainMemory() + { + return useMainMemory; + } + + /** + * Returns <code>true</code> if temporary file is to be used. + * + * <p>If this returns <code>false</code> it is ensured {@link #useMainMemory} + * returns <code>true</code>.</p> + */ + public boolean useTempFile() + { + return useTempFile; + } + + /** + * Returns <code>true</code> if maximum main memory is restricted to a specific + * number of bytes. + */ + public boolean isMainMemoryRestricted() + { + return maxMainMemoryBytes >= 0; + } + + /** + * Returns <code>true</code> if maximum amount of storage is restricted to a specific + * number of bytes. + */ + public boolean isStorageRestricted() + { + return maxStorageBytes > 0; + } + + /** + * Returns maximum size of main-memory in bytes to be used. + */ + public long getMaxMainMemoryBytes() + { + return maxMainMemoryBytes; + } + + /** + * Returns maximum size of storage bytes to be used + * (main-memory in temporary files all together). + */ + public long getMaxStorageBytes() + { + return maxStorageBytes; + } + + /** + * Returns directory to be used for temporary files or <code>null</code> + * if it was not set. + */ + public File getTempDir() + { + return tempDir; + } + + @Override + public String toString() + { + return useMainMemory ? + (useTempFile ? "Mixed mode with max. of " + maxMainMemoryBytes + " main memory bytes" + + (isStorageRestricted() ? " and max. of " + maxStorageBytes + " storage bytes" : + " and unrestricted scratch file size") : + (isMainMemoryRestricted() ? "Main memory only with max. of " + maxMainMemoryBytes + " bytes" : + "Main memory only with no size restriction")): + (isStorageRestricted() ? "Scratch file only with max. of " + maxStorageBytes + " bytes" : + "Scratch file only with no size restriction"); + } +} Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java ------------------------------------------------------------------------------ svn:eol-style = native Propchange: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java ------------------------------------------------------------------------------ svn:mime-type = text/plain Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1691833&r1=1691832&r2=1691833&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Sun Jul 19 15:43:53 2015 @@ -19,6 +19,7 @@ package org.apache.pdfbox.io; import java.io.Closeable; import java.io.File; import java.io.IOException; +import java.io.InputStream; import java.util.BitSet; import org.apache.commons.logging.Log; @@ -53,6 +54,9 @@ public class ScratchFile implements Clos /** number of pages by which we enlarge the scratch file (reduce I/O-operations) */ private static final int ENLARGE_PAGE_COUNT = 16; + /** in case of unrestricted main memory usage this is the initial number of pages + * {@link #inMemoryPages} is setup for */ + private static final int INIT_UNRESTRICTED_MAINMEM_PAGECOUNT = 100000; private static final int PAGE_SIZE = 4096; private final Object ioLock = new Object(); @@ -63,8 +67,14 @@ public class ScratchFile implements Clos private java.io.RandomAccessFile raf; private volatile int pageCount = 0; private final BitSet freePages = new BitSet(); - private final byte[][] inMemoryPages; + /** holds pointers to in-memory page content; will be initialized once in case of restricted + * main memory, otherwise it is enlarged as needed and first initialized to a size of + * {@link #INIT_UNRESTRICTED_MAINMEM_PAGECOUNT} */ + private volatile byte[][] inMemoryPages; private final int inMemoryMaxPageCount; + private final int maxPageCount; + private final boolean useScratchFile; + private final boolean maxMainMemoryIsRestricted; private volatile boolean isClosed = false; @@ -81,7 +91,7 @@ public class ScratchFile implements Clos */ public ScratchFile(File scratchFileDirectory) throws IOException { - this(scratchFileDirectory, 0); + this(MemoryUsageSetting.setupTempFileOnly().setTempDir(scratchFileDirectory)); } /** @@ -98,19 +108,29 @@ public class ScratchFile implements Clos * * @throws IOException If scratch file directory was given but don't exist. */ - public ScratchFile(File scratchFileDirectory, long maxInMemoryByteSize) throws IOException + public ScratchFile(MemoryUsageSetting memUsageSetting) throws IOException { - this.scratchFileDirectory = scratchFileDirectory; + maxMainMemoryIsRestricted = (!memUsageSetting.useMainMemory()) || memUsageSetting.isMainMemoryRestricted(); + useScratchFile = maxMainMemoryIsRestricted ? memUsageSetting.useTempFile() : false; + scratchFileDirectory = useScratchFile ? memUsageSetting.getTempDir() : null; - if ((this.scratchFileDirectory != null) && (!this.scratchFileDirectory.isDirectory())) + if ((scratchFileDirectory != null) && (!scratchFileDirectory.isDirectory())) { throw new IOException("Scratch file directory does not exist: " + this.scratchFileDirectory); } - inMemoryMaxPageCount = (int) Math.min(Integer.MAX_VALUE, Math.max(0, maxInMemoryByteSize) / PAGE_SIZE); - inMemoryPages = new byte[inMemoryMaxPageCount][]; + maxPageCount = memUsageSetting.isStorageRestricted() ? + (int) Math.min(Integer.MAX_VALUE, memUsageSetting.getMaxStorageBytes() / PAGE_SIZE) : + Integer.MAX_VALUE; + + inMemoryMaxPageCount = memUsageSetting.useMainMemory() ? + (memUsageSetting.isMainMemoryRestricted() ? + (int) Math.min(Integer.MAX_VALUE, memUsageSetting.getMaxMainMemoryBytes() / PAGE_SIZE) : + Integer.MAX_VALUE) : + 0; + inMemoryPages = new byte[maxMainMemoryIsRestricted ? inMemoryMaxPageCount : INIT_UNRESTRICTED_MAINMEM_PAGECOUNT][]; - freePages.set(0, inMemoryMaxPageCount); + freePages.set(0, inMemoryPages.length); } /** @@ -132,7 +152,7 @@ public class ScratchFile implements Clos idx = freePages.nextSetBit( 0 ); if (idx < 0) { - throw new IOException("Expected free page but did not found one."); + throw new IOException("Maximum allowed scratch file memory exceeded."); } } @@ -148,9 +168,16 @@ public class ScratchFile implements Clos } /** - * Enlarges the scratch file by a number of pages defined by - * {@link #ENLARGE_PAGE_COUNT}. This will create the scratch - * file if it does not exist already. + * This will provide new free pages by either enlarging the scratch file + * by a number of pages defined by {@link #ENLARGE_PAGE_COUNT} - in case + * scratch file usage is allowed - or increase the {@link #inMemoryPages} + * array in case main memory was not restricted. If neither of both is + * allowed/the case than free pages count won't be changed. The same is true + * if no new pages could be added because we reached the maximum of + * {@link Integer#MAX_VALUE} pages. + * + * <p>If scratch file uage is allowed and scratch file does not exist already + * it will be created.</p> * * <p>Only to be called under synchronization on {@link #freePages}.</p> */ @@ -159,38 +186,64 @@ public class ScratchFile implements Clos synchronized (ioLock) { checkClosed(); - - // create scratch file is needed - if ( raf == null ) + + if (pageCount >= maxPageCount) { - file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory); - try - { - raf = new java.io.RandomAccessFile(file, "rw"); - } - catch (IOException e) + return; + } + + if (useScratchFile) + { + // create scratch file is needed + if ( raf == null ) { - if (!file.delete()) + file = File.createTempFile("PDFBox", ".tmp", scratchFileDirectory); + try + { + raf = new java.io.RandomAccessFile(file, "rw"); + } + catch (IOException e) { - LOG.warn("Error deleting scratch file: " + file.getAbsolutePath()); + if (!file.delete()) + { + LOG.warn("Error deleting scratch file: " + file.getAbsolutePath()); + } + throw e; } - throw e; + } + + long fileLen = raf.length(); + long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE; + + if (expectedFileLen != fileLen) + { + throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen); + } + + // enlarge if we do not overflow + if (pageCount + ENLARGE_PAGE_COUNT > pageCount) + { + fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE; + + raf.setLength(fileLen); + + freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT); } } - - long fileLen = raf.length(); - long expectedFileLen = ((long)pageCount - inMemoryMaxPageCount) * PAGE_SIZE; - - if (expectedFileLen != fileLen) + else if (!maxMainMemoryIsRestricted) { - throw new IOException("Expected scratch file size of " + expectedFileLen + " but found " + fileLen); + // increase number of in-memory pages + int oldSize = inMemoryPages.length; + int newSize = (int) Math.min( ((long)oldSize) * 2, Integer.MAX_VALUE); // this handles integer overflow + if (newSize > oldSize) + { + byte[][] newInMemoryPages = new byte[newSize][]; + System.arraycopy(inMemoryPages, 0, newInMemoryPages, 0, oldSize); + inMemoryPages = newInMemoryPages; + + freePages.set(oldSize, newSize); + } } - - fileLen += ENLARGE_PAGE_COUNT * PAGE_SIZE; - - raf.setLength(fileLen); - - freePages.set(pageCount, pageCount + ENLARGE_PAGE_COUNT); } } @@ -233,7 +286,7 @@ public class ScratchFile implements Clos throw new IOException("Requested page with index " + pageIdx + " was not written before."); } - return inMemoryPages[pageIdx]; + return page; } synchronized (ioLock) @@ -280,14 +333,21 @@ public class ScratchFile implements Clos if (pageIdx < inMemoryMaxPageCount) { - inMemoryPages[pageIdx] = page; - - // in case we were closed in between remove page and throw exception - if (isClosed) + if (maxMainMemoryIsRestricted) { - inMemoryPages[pageIdx] = null; - checkClosed(); + inMemoryPages[pageIdx] = page; } + else + { + // need synchronization since inMemoryPages may change + synchronized (ioLock) + { + inMemoryPages[pageIdx] = page; + } + } + + // in case we were closed in between throw exception + checkClosed(); } else { @@ -327,6 +387,30 @@ public class ScratchFile implements Clos } /** + * Creates a new buffer using this page handler and initializes it with the + * data read from provided input stream (input stream is copied to buffer). + * The buffer data pointer is reset to point to first byte. + * + * @return A new buffer containing data read from input stream. + * + * @throws IOException If an error occurred. + */ + public RandomAccess createBuffer(InputStream input) throws IOException + { + ScratchFileBuffer buf = new ScratchFileBuffer(this); + + byte[] byteBuffer = new byte[8192]; + int bytesRead = 0; + while ((bytesRead = input.read(byteBuffer)) > -1) + { + buf.write(byteBuffer, 0, bytesRead); + } + buf.seek(0); + + return buf; + } + + /** * Allows a buffer which is cleared/closed to release its pages to be re-used. * * @param pageIndexes pages indexes of pages to release @@ -344,7 +428,8 @@ public class ScratchFile implements Clos freePages.set(pageIdx); if (pageIdx < inMemoryMaxPageCount) { - inMemoryPages[pageIdx] = null; + inMemoryPages[pageIdx] = null; // remark: not in ioLock synchronization since behavior won't + // change even in case of parallel called 'enlarge' method } } @@ -362,6 +447,11 @@ public class ScratchFile implements Clos @Override public void close() throws IOException { + if (isClosed) + { + return; + } + isClosed = true; IOException ioexc = null; Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java?rev=1691833&r1=1691832&r2=1691833&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java Sun Jul 19 15:43:53 2015 @@ -30,6 +30,7 @@ import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.encryption.AccessPermission; import org.apache.pdfbox.pdmodel.encryption.DecryptionMaterial; @@ -136,6 +137,47 @@ public class PDFParser extends COSParser init(useScratchFiles); } + /** + * Constructor. + * + * @param source input representing the pdf. + * @param decryptionPassword password to be used for decryption. + * @param keyStore key store to be used for decryption when using public key security + * @param alias alias to be used for decryption when using public key security + * @param scratchFile buffer handler for temporary storage; it will be closed on + * {@link COSDocument#close()} + * + * @throws IOException If something went wrong. + */ + public PDFParser(RandomAccessRead source, String decryptionPassword, InputStream keyStore, + String alias, ScratchFile scratchFile) throws IOException + { + pdfSource = source; + fileLen = source.length(); + password = decryptionPassword; + keyStoreInputStream = keyStore; + keyAlias = alias; + init(scratchFile); + } + + private void init(ScratchFile scratchFile) throws IOException + { + String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); + if (eofLookupRangeStr != null) + { + try + { + setEOFLookupRange(Integer.parseInt(eofLookupRangeStr)); + } + catch (NumberFormatException nfe) + { + LOG.warn("System property " + SYSPROP_EOFLOOKUPRANGE + + " does not contain an integer value, but: '" + eofLookupRangeStr + "'"); + } + } + document = new COSDocument(scratchFile); + } + private void init(boolean useScratchFiles) throws IOException { String eofLookupRangeStr = System.getProperty(SYSPROP_EOFLOOKUPRANGE); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java?rev=1691833&r1=1691832&r2=1691833&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java Sun Jul 19 15:43:53 2015 @@ -39,10 +39,12 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.io.MemoryUsageSetting; import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; import org.apache.pdfbox.io.RandomAccessReadInputStream; import org.apache.pdfbox.io.RandomAccessRead; +import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfwriter.COSWriter; import org.apache.pdfbox.pdmodel.common.COSArrayList; @@ -123,7 +125,52 @@ public class PDDocument implements Close */ public PDDocument(boolean useScratchFiles) { - document = new COSDocument(useScratchFiles); + this(useScratchFiles, null); + } + + /** + * Creates an empty PDF document. + * You need to add at least one page for the document to be valid. + * + * @param memUsageSetting defines how memory is used for buffering PDF streams + */ + public PDDocument(MemoryUsageSetting memUsageSetting) + { + this(true, memUsageSetting); + } + + /** + * Internal constructor which support setting scratch file usage + * via boolean parameter or directly (new). This will be only needed + * as long as the new ScratchFile handling is tested. + * + * <p>You need to add at least one page for the document to be valid.</p> + * + * @param useScratchFiles enables the usage of a scratch file if set to true + * @param memUsageSetting defines how memory is used for buffering PDF streams + */ + private PDDocument(boolean useScratchFiles, MemoryUsageSetting memUsageSetting) + { + ScratchFile scratchFile = null; + if (memUsageSetting != null) + { + try + { + scratchFile = new ScratchFile(memUsageSetting); + } + catch (IOException ioe) + { + LOG.warn("Error initializing scratch file: " + ioe.getMessage() + ". Fall back to main memory usage only."); + try + { + scratchFile = new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); + } catch ( IOException ioe2 ) {} + } + } + + + document = scratchFile != null ? new COSDocument(scratchFile) : + new COSDocument(useScratchFiles); pdfSource = null; // First we need a trailer @@ -783,6 +830,21 @@ public class PDDocument implements Close * Parses a PDF. * * @param file file to be loaded + * @param memUsageSetting defines how memory is used for buffering PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(File file, MemoryUsageSetting memUsageSetting) throws IOException + { + return load(file, "", null, null, memUsageSetting); + } + + /** + * Parses a PDF. + * + * @param file file to be loaded * @param password password to be used for decryption * * @return loaded document @@ -815,6 +877,22 @@ public class PDDocument implements Close * * @param file file to be loaded * @param password password to be used for decryption + * @param memUsageSetting defines how memory is used for buffering PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(File file, String password, MemoryUsageSetting memUsageSetting) throws IOException + { + return load(file, password, null, null, memUsageSetting); + } + + /** + * Parses a PDF. + * + * @param file file to be loaded + * @param password password to be used for decryption * @param keyStore key store to be used for decryption when using public key security * @param alias alias to be used for decryption when using public key security * @@ -851,6 +929,28 @@ public class PDDocument implements Close } /** + * Parses a PDF. + * + * @param file file to be loaded + * @param password password to be used for decryption + * @param keyStore key store to be used for decryption when using public key security + * @param alias alias to be used for decryption when using public key security + * @param memUsageSetting defines how memory is used for buffering PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(File file, String password, InputStream keyStore, String alias, + MemoryUsageSetting memUsageSetting) throws IOException + { + RandomAccessBufferedFileInputStream raFile = new RandomAccessBufferedFileInputStream(file); + PDFParser parser = new PDFParser(raFile, password, keyStore, alias, new ScratchFile(memUsageSetting)); + parser.parse(); + return parser.getPDDocument(); + } + + /** * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. * * @param input stream that contains the document. @@ -882,6 +982,23 @@ public class PDDocument implements Close } /** + * Parses a PDF. Depending on the parameter useScratchFiles the given input + * stream is either copied to the memory or to a temporary file to enable + * random access to the pdf. + * + * @param input stream that contains the document. + * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(InputStream input, MemoryUsageSetting memUsageSetting) throws IOException + { + return load(input, "", null, null, memUsageSetting); + } + + /** * Parses a PDF. The given input stream is copied to the memory to enable random access to the pdf. * * @param input stream that contains the document. @@ -930,18 +1047,25 @@ public class PDDocument implements Close */ public static PDDocument load(InputStream input, String password, boolean useScratchFiles) throws IOException { - RandomAccessRead source; - if (useScratchFiles) - { - source = new RandomAccessBufferedFileInputStream(input); - } - else - { - source = new RandomAccessBuffer(input); - } - PDFParser parser = new PDFParser(source, password, null, null, useScratchFiles); - parser.parse(); - return parser.getPDDocument(); + return load(input, password, null, null, useScratchFiles); + } + + /** + * Parses a PDF. Depending on the parameter useScratchFiles the given input + * stream is either copied to the memory or to a temporary file to enable + * random access to the pdf. + * + * @param input stream that contains the document. + * @param password password to be used for decryption + * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(InputStream input, String password, MemoryUsageSetting memUsageSetting) throws IOException + { + return load(input, password, null, null, memUsageSetting); } /** @@ -975,6 +1099,31 @@ public class PDDocument implements Close parser.parse(); return parser.getPDDocument(); } + + /** + * Parses a PDF. Depending on the parameter useScratchFiles the given input + * stream is either copied to the memory or to a temporary file to enable + * random access to the pdf. + * + * @param input stream that contains the document. + * @param password password to be used for decryption + * @param keyStore key store to be used for decryption when using public key security + * @param alias alias to be used for decryption when using public key security + * @param memUsageSetting defines how memory is used for buffering input stream and PDF streams + * + * @return loaded document + * + * @throws IOException in case of a file reading or parsing error + */ + public static PDDocument load(InputStream input, String password, InputStream keyStore, + String alias, MemoryUsageSetting memUsageSetting) throws IOException + { + ScratchFile scratchFile = new ScratchFile(memUsageSetting); + RandomAccessRead source = scratchFile.createBuffer(input); + PDFParser parser = new PDFParser(source, password, keyStore, alias, scratchFile); + parser.parse(); + return parser.getPDDocument(); + } /** * Parses a PDF.