svn commit: r1705737 - /pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java
Author: lehmi Date: Mon Sep 28 18:05:41 2015 New Revision: 1705737 URL: http://svn.apache.org/viewvc?rev=1705737=rev Log: PDFBOX-2905: add print menu entry Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java?rev=1705737=1705736=1705737=diff == --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Mon Sep 28 18:05:41 2015 @@ -29,6 +29,8 @@ import java.awt.event.ActionListener; import java.awt.event.InputEvent; import java.awt.event.KeyEvent; import java.awt.event.WindowEvent; +import java.awt.print.PrinterException; +import java.awt.print.PrinterJob; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FilenameFilter; @@ -40,6 +42,7 @@ import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; + import javax.swing.AbstractAction; import javax.swing.Action; import javax.swing.JComponent; @@ -70,6 +73,7 @@ import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.printing.PDFPageable; import org.apache.pdfbox.tools.gui.ArrayEntry; import org.apache.pdfbox.tools.gui.DocumentEntry; import org.apache.pdfbox.tools.gui.ErrorDialog; @@ -139,6 +143,7 @@ public class PDFDebugger extends JFrame private JMenuItem saveAsMenuItem; private JMenuItem saveMenuItem; private JMenu recentFilesMenu; +private JMenuItem printMenuItem; // edit > find menu private JMenu findMenu; @@ -341,6 +346,23 @@ public class PDFDebugger extends JFrame addRecentFileItems(); fileMenu.add(recentFilesMenu); +printMenuItem = new JMenuItem("Print"); +printMenuItem.setEnabled(false); +printMenuItem.addActionListener(new ActionListener() +{ +@Override +public void actionPerformed(ActionEvent evt) +{ +printMenuItemActionPerformed(evt); +} +}); + +if (!IS_MAC_OS) +{ +fileMenu.addSeparator(); +fileMenu.add(printMenuItem); +} + JMenuItem exitMenuItem = new JMenuItem("Exit"); exitMenuItem.setAccelerator(KeyStroke.getKeyStroke("alt F4")); exitMenuItem.addActionListener(new ActionListener() @@ -997,6 +1019,26 @@ public class PDFDebugger extends JFrame System.exit(0); } +private void printMenuItemActionPerformed(ActionEvent evt) +{ +if( document != null ) +{ +try +{ +PrinterJob job = PrinterJob.getPrinterJob(); +job.setPageable(new PDFPageable(document)); +if (job.printDialog()) +{ +job.print(); +} +} +catch (PrinterException e) +{ +throw new RuntimeException(e); +} +} +} + /** * Exit the Application. */ @@ -1172,6 +1214,7 @@ public class PDFDebugger extends JFrame private void parseDocument( File file, String password )throws IOException { document = PDDocument.load(file, password); +printMenuItem.setEnabled(true); } private void addRecentFileItems()
svn commit: r1705755 - /pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java
Author: tilman Date: Mon Sep 28 19:52:58 2015 New Revision: 1705755 URL: http://svn.apache.org/viewvc?rev=1705755=rev Log: PDFBOX-2852: typo Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java?rev=1705755=1705754=1705755=diff == --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Mon Sep 28 19:52:58 2015 @@ -1258,7 +1258,7 @@ public class PDFDebugger extends JFrame private static void usage() { String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFDebugger [options] \n" -+ "\nOptons:\n" ++ "\nOptions:\n" + " -password : Password to decrypt the document\n" + " -viewpages : activate the page mode on startup\n" + ": The PDF document to be loaded\n";
svn commit: r1705738 - /pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java
Author: lehmi Date: Mon Sep 28 18:08:08 2015 New Revision: 1705738 URL: http://svn.apache.org/viewvc?rev=1705738=rev Log: PDFBOX-2905: add new command line parameter to usage message Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java?rev=1705738=1705737=1705738=diff == --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Mon Sep 28 18:08:08 2015 @@ -1260,6 +1260,7 @@ public class PDFDebugger extends JFrame String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFDebugger [options] \n" + "\nOptons:\n" + " -password : Password to decrypt the document\n" ++ " -viewpages : activate the page mode on startup\n" + ": The PDF document to be loaded\n"; System.err.println(message);
svn commit: r1705734 - /pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java
Author: lehmi Date: Mon Sep 28 17:46:53 2015 New Revision: 1705734 URL: http://svn.apache.org/viewvc?rev=1705734=rev Log: PDFBOX-2905: use PDFDebugger in "View Pages" mode instead of PDFReader Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java?rev=1705734=1705733=1705734=diff == --- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java (original) +++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFBox.java Mon Sep 28 17:46:53 2015 @@ -43,62 +43,60 @@ public final class PDFBox if (args.length > 0) { String command = args[0]; -String[] arguments = new String[args.length - 1]; -System.arraycopy(args, 1, arguments, 0, arguments.length); boolean exitAfterCallingMain = true; if (command.equals("Decrypt")) { -Decrypt.main(arguments); +Decrypt.main(copyCommandlineArgs(args)); } else if (command.equals("Encrypt")) { -Encrypt.main(arguments); +Encrypt.main(copyCommandlineArgs(args)); } else if (command.equals("ExtractText")) { -ExtractText.main(arguments); +ExtractText.main(copyCommandlineArgs(args)); } else if (command.equals("ExtractImages")) { -ExtractImages.main(arguments); +ExtractImages.main(copyCommandlineArgs(args)); } else if (command.equals("OverlayPDF")) { -OverlayPDF.main(arguments); +OverlayPDF.main(copyCommandlineArgs(args)); } else if (command.equals("PrintPDF")) { -PrintPDF.main(arguments); +PrintPDF.main(copyCommandlineArgs(args)); } else if (command.equals("PDFDebugger")) { -PDFDebugger.main(arguments); +PDFDebugger.main(copyCommandlineArgs(args)); exitAfterCallingMain = false; } else if (command.equals("PDFMerger")) { -PDFMerger.main(arguments); +PDFMerger.main(copyCommandlineArgs(args)); } else if (command.equals("PDFReader")) { -PDFReader.main(arguments); +PDFDebugger.main(copyCommandlineArgs(args, "-viewpages")); exitAfterCallingMain = false; } else if (command.equals("PDFSplit")) { -PDFSplit.main(arguments); +PDFSplit.main(copyCommandlineArgs(args)); } else if (command.equals("PDFToImage")) { -PDFToImage.main(arguments); +PDFToImage.main(copyCommandlineArgs(args)); } else if (command.equals("TextToPDF")) { -TextToPDF.main(arguments); +TextToPDF.main(copyCommandlineArgs(args)); } else if (command.equals("WriteDecodedDoc")) { -WriteDecodedDoc.main(arguments); +WriteDecodedDoc.main(copyCommandlineArgs(args)); } else { @@ -115,6 +113,23 @@ public final class PDFBox } } +private static String[] copyCommandlineArgs(String[] args) +{ +// copy all arguments but the first +String[] arguments = new String[args.length - 1]; +System.arraycopy(args, 1, arguments, 0, arguments.length); +return arguments; +} + +private static String[] copyCommandlineArgs(String[] args, String arg0) +{ +// copy all arguments, replace the first with the given new one +String[] arguments = new String[args.length]; +System.arraycopy(args, 0, arguments, 0, arguments.length); +arguments[0] = arg0; +return arguments; +} + private static void showMessageAndExit() { String message = "PDFBox version: \""+ Version.getVersion()+ "\""
svn commit: r1705729 - /pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java
Author: lehmi Date: Mon Sep 28 17:39:17 2015 New Revision: 1705729 URL: http://svn.apache.org/viewvc?rev=1705729=rev Log: PDFBOX-2905: add a new comandline parameter to activate "View Pages" mode on startup Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java?rev=1705729=1705728=1705729=diff == --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Mon Sep 28 17:39:17 2015 @@ -113,6 +113,7 @@ public class PDFDebugger extends JFrame COSName.CALRGB, COSName.LAB)); private static final String PASSWORD = "-password"; +private static final String VIEWPAGES = "-viewpages"; private static final int SHORCUT_KEY_MASK = Toolkit.getDefaultToolkit().getMenuShortcutKeyMask(); @@ -153,6 +154,15 @@ public class PDFDebugger extends JFrame */ public PDFDebugger() { +this(false); +} + +/** + * Constructor. + */ +public PDFDebugger(boolean viewPages) +{ +isPageMode = viewPages; initComponents(); } @@ -379,7 +389,14 @@ public class PDFDebugger extends JFrame private JMenu createViewMenu() { JMenu viewMenu = new JMenu("View"); -viewModeItem = new JMenuItem("Show Pages"); +if (isPageMode) +{ +viewModeItem = new JMenuItem("Show Internal Structure"); +} +else +{ +viewModeItem = new JMenuItem("Show Pages"); +} viewModeItem.addActionListener(new ActionListener() { @Override @@ -1025,11 +1042,10 @@ public class PDFDebugger extends JFrame } }); -final PDFDebugger viewer = new PDFDebugger(); - // open file, if any String filename = null; String password = ""; +boolean viewPages = false; for( int i = 0; i < args.length; i++ ) { @@ -1042,11 +1058,17 @@ public class PDFDebugger extends JFrame } password = args[i]; } +else if( args[i].equals( VIEWPAGES ) ) +{ +viewPages = true; +} else { filename = args[i]; } } +final PDFDebugger viewer = new PDFDebugger(viewPages); + if (filename != null) {
svn commit: r1705766 - /pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java
Author: jahewson Date: Mon Sep 28 20:38:11 2015 New Revision: 1705766 URL: http://svn.apache.org/viewvc?rev=1705766=rev Log: PDFBOX-2905: Make page view the default Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java?rev=1705766=1705765=1705766=diff == --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/tools/PDFDebugger.java Mon Sep 28 20:38:11 2015 @@ -117,7 +117,7 @@ public class PDFDebugger extends JFrame COSName.CALRGB, COSName.LAB)); private static final String PASSWORD = "-password"; -private static final String VIEWPAGES = "-viewpages"; +private static final String VIEW_STRUCTURE = "-viewstructure"; private static final int SHORCUT_KEY_MASK = Toolkit.getDefaultToolkit().getMenuShortcutKeyMask(); @@ -1087,7 +1087,7 @@ public class PDFDebugger extends JFrame // open file, if any String filename = null; String password = ""; -boolean viewPages = false; +boolean viewPages = true; for( int i = 0; i < args.length; i++ ) { @@ -1100,9 +1100,9 @@ public class PDFDebugger extends JFrame } password = args[i]; } -else if( args[i].equals( VIEWPAGES ) ) +else if( args[i].equals(VIEW_STRUCTURE) ) { -viewPages = true; +viewPages = false; } else { @@ -1260,7 +1260,7 @@ public class PDFDebugger extends JFrame String message = "Usage: java -jar pdfbox-app-x.y.z.jar PDFDebugger [options] \n" + "\nOptions:\n" + " -password : Password to decrypt the document\n" -+ " -viewpages : activate the page mode on startup\n" ++ " -viewstructure : activate structure mode on startup\n" + ": The PDF document to be loaded\n"; System.err.println(message);
svn commit: r1705782 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/ tools/src/main/java/org/apache/pdfbox/tools/
Author: jahewson Date: Mon Sep 28 21:36:47 2015 New Revision: 1705782 URL: http://svn.apache.org/viewvc?rev=1705782=rev Log: PDFBOX-2893: Remove getStream() from PDImage Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java?rev=1705782=1705781=1705782=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDImage.java Mon Sep 28 21:36:47 2015 @@ -23,7 +23,6 @@ import java.io.InputStream; import java.util.List; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.pdmodel.common.COSObjectable; -import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; /** @@ -49,13 +48,7 @@ public interface PDImage extends COSObje * @throws IllegalStateException if the image is not a stencil. */ BufferedImage getStencilImage(Paint paint) throws IOException; - -/** - * Returns a stream containing this image's data. Null for inline images. - * @throws IOException if the stream could not be read. - */ -PDStream getStream() throws IOException; - + /** * Returns an InputStream containing the image data, irrespective of whether this is an * inline image or an image XObject. Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java?rev=1705782=1705781=1705782=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/graphics/image/PDInlineImage.java Mon Sep 28 21:36:47 2015 @@ -32,7 +32,6 @@ import org.apache.pdfbox.filter.Filter; import org.apache.pdfbox.filter.FilterFactory; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.COSArrayList; -import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; @@ -252,15 +251,6 @@ public final class PDInlineImage impleme parameters.setBoolean(COSName.IM, isStencil); } -/** - * Always null, use {@link #createInputStream()} instead. - */ -@Override -public PDStream getStream() throws IOException -{ -return null; -} - @Override public InputStream createInputStream() throws IOException { Modified: pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java?rev=1705782=1705781=1705782=diff == --- pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java (original) +++ pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/ExtractImages.java Mon Sep 28 21:36:47 2015 @@ -322,7 +322,7 @@ public final class ExtractImages PDDeviceRGB.INSTANCE.getName().equals(colorSpaceName)) { // RGB or Gray colorspace: get and write the unmodifiedJPEG stream -InputStream data = pdImage.getStream().createInputStream(JPEG); +InputStream data = pdImage.createInputStream(JPEG); IOUtils.copy(data, out); IOUtils.closeQuietly(data); }
svn commit: r1705696 - /pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java
Author: tboehme Date: Mon Sep 28 14:12:16 2015 New Revision: 1705696 URL: http://svn.apache.org/viewvc?rev=1705696=rev Log: PDFBOX-2883: missed change for preflight test class after changes on COSDocument Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java Modified: pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java?rev=1705696=1705695=1705696=diff == --- pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java (original) +++ pdfbox/trunk/preflight/src/test/java/org/apache/pdfbox/preflight/utils/TestCOSUtils.java Mon Sep 28 14:12:16 2015 @@ -39,6 +39,7 @@ import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.cos.COSObjectKey; +import org.apache.pdfbox.io.ScratchFile; import org.junit.Test; public class TestCOSUtils @@ -221,7 +222,7 @@ public class TestCOSUtils IOCOSDocument(File scratchDir) throws IOException { -super(scratchDir, true); +super(new ScratchFile(scratchDir)); } @Override
svn commit: r1705707 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
Author: tboehme Date: Mon Sep 28 15:26:38 2015 New Revision: 1705707 URL: http://svn.apache.org/viewvc?rev=1705707=rev Log: PDFBOX-2883: use ScratchFile in COSStream in every case Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1705707=1705706=1705707=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Mon Sep 28 15:26:38 2015 @@ -24,11 +24,11 @@ import java.io.InputStream; import java.io.OutputStream; import java.util.ArrayList; import java.util.List; + import org.apache.pdfbox.filter.Filter; import org.apache.pdfbox.filter.FilterFactory; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccess; -import org.apache.pdfbox.io.RandomAccessBuffer; import org.apache.pdfbox.io.RandomAccessInputStream; import org.apache.pdfbox.io.RandomAccessOutputStream; import org.apache.pdfbox.io.ScratchFile; @@ -49,8 +49,7 @@ public class COSStream extends COSDictio */ public COSStream() { -this.randomAccess = new RandomAccessBuffer(); -this.scratchFile = null; +this(ScratchFile.getMainMemoryOnlyInstance()); } /** @@ -62,7 +61,7 @@ public class COSStream extends COSDictio { super(); this.randomAccess = createRandomAccess(scratchFile); -this.scratchFile = scratchFile; +this.scratchFile = scratchFile != null ? scratchFile : ScratchFile.getMainMemoryOnlyInstance(); } /** @@ -70,21 +69,14 @@ public class COSStream extends COSDictio */ private RandomAccess createRandomAccess(ScratchFile scratchFile) { -if (scratchFile != null) +try { -try -{ -return scratchFile.createBuffer(); -} -catch (IOException e) -{ -// user can't recover from this exception anyway -throw new RuntimeException(e); -} +return scratchFile.createBuffer(); } -else +catch (IOException e) { -return new RandomAccessBuffer(); +// user can't recover from this exception anyway +throw new RuntimeException(e); } }
svn commit: r1705711 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java
Author: tboehme Date: Mon Sep 28 15:54:22 2015 New Revision: 1705711 URL: http://svn.apache.org/viewvc?rev=1705711=rev Log: PDFBOX-2883: ensure ScratchFileBuffer is closed (using finalize) in order to cope with sloppy usage in COSStream and to ensure buffers are freed if no needed anymore Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java?rev=1705711=1705710=1705711=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFileBuffer.java Mon Sep 28 15:54:22 2015 @@ -19,6 +19,10 @@ package org.apache.pdfbox.io; import java.io.EOFException; import java.io.IOException; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.pdfbox.cos.COSStream; + /** * Implementation of {@link RandomAccess} as sequence of multiple fixed size pages handled * by {@link ScratchFile}. @@ -60,6 +64,8 @@ class ScratchFileBuffer implements Rando /** number of pages held by this buffer */ private int pageCount = 0; +private static final Log LOG = LogFactory.getLog(ScratchFileBuffer.class); + /** * Creates a new buffer using pages handled by provided {@link ScratchFile}. * @@ -489,4 +495,24 @@ class ScratchFileBuffer implements Rando size = 0; } } + +/** + * While calling finalize is normally discouraged we will have to + * use it here as long as closing a scratch file buffer is not + * done in every case. Currently {@link COSStream} creates new + * buffers without closing the old one - which might still be + * used. + * + * Enabling debugging one will see if there are still cases + * where the buffer is not closed. + */ +@Override +protected void finalize() throws Throwable +{ +if ((pageHandler != null) && LOG.isDebugEnabled()) +{ +LOG.debug("ScratchFileBuffer not closed!"); +} +close(); +} }
svn commit: r1705611 - in /pdfbox/trunk/pdfbox/src/main: java/org/apache/pdfbox/text/PDFTextStripper.java resources/org/apache/pdfbox/resources/text/ resources/org/apache/pdfbox/resources/text/BidiMir
Author: msahyoun Date: Mon Sep 28 08:30:06 2015 New Revision: 1705611 URL: http://svn.apache.org/viewvc?rev=1705611=rev Log: PDFBOX-2252: initial support for mixed language directions (bidi) text Added: pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/text/ pdfbox/trunk/pdfbox/src/main/resources/org/apache/pdfbox/resources/text/BidiMirroring.txt (with props) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java?rev=1705611=1705610=1705611=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/text/PDFTextStripper.java Mon Sep 28 08:30:06 2015 @@ -17,8 +17,12 @@ package org.apache.pdfbox.text; import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.LineNumberReader; import java.io.StringWriter; import java.io.Writer; +import java.text.Bidi; import java.text.Normalizer; import java.util.ArrayList; import java.util.Collections; @@ -29,10 +33,14 @@ import java.util.List; import java.util.Map; import java.util.SortedMap; import java.util.SortedSet; +import java.util.StringTokenizer; import java.util.TreeMap; import java.util.TreeSet; import java.util.Vector; import java.util.regex.Pattern; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.PDPage; import org.apache.pdfbox.pdmodel.PDPageTree; @@ -57,6 +65,8 @@ public class PDFTextStripper extends PDF private static float defaultDropThreshold = 2.5f; private static final boolean useCustomQuickSort; +private static final Log LOG = LogFactory.getLog(PDFTextStripper.class); + // enable the ability to set the default indent/drop thresholds // with -D system properties: // pdftextstripper.indent @@ -1729,27 +1739,156 @@ public class PDFTextStripper extends PDF List normalized = new LinkedList(); StringBuilder lineBuilder = new StringBuilder(); List wordPositions = new ArrayList(); -// concatenate the pieces of text in opposite order if RTL is dominant -if (isRtlDominant) + +for (LineItem item : line) { -int numberOfPositions = line.size(); -for (int i = numberOfPositions - 1; i >= 0; i--) -{ -lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, line.get(i)); -} +lineBuilder = normalizeAdd(normalized, lineBuilder, wordPositions, item); } -else + +if (lineBuilder.length() > 0) +{ +normalized.add(createWord(lineBuilder.toString(), wordPositions)); +} +return normalized; +} + +/** + * Handles the LTR and RTL direction of the given words. The whole implementation stands and falls with the given + * word. If the word is a full line, the results will be the best. If the word contains of single words or + * characters, the order of the characters in a word or words in a line may wrong, due to RTL and LTR marks and + * characters! + * + * Based on http://www.nesterovsky-bros.com/weblog/2013/07/28/VisualToLogicalConversionInJava.aspx + * + * @param word The word that shall be processed + * @return new word with the correct direction of the containing characters + */ +private String handleDirection(String word) +{ +Bidi bidi = new Bidi(word, Bidi.DIRECTION_DEFAULT_LEFT_TO_RIGHT); + +// if there is pure LTR text no need to process further +if (!bidi.isMixed() && bidi.getBaseLevel() == Bidi.DIRECTION_LEFT_TO_RIGHT) { -for (LineItem item : line) +return word; +} + +// collect individual bidi information +int runCount = bidi.getRunCount(); +byte[] levels = new byte[runCount]; +Integer[] runs = new Integer[runCount]; + +for (int i = 0; i < runCount; i++) +{ + levels[i] = (byte)bidi.getRunLevel(i); + runs[i] = i; +} + +// reorder individual parts based on their levels +Bidi.reorderVisually(levels, 0, runs, 0, runCount); + +// collect the parts based on the direction within the run +StringBuilder result = new StringBuilder(); + +for (int i = 0; i < runCount; i++) +{ + int index = runs[i]; + int start = bidi.getRunStart(index); + int end = bidi.getRunLimit(index); + +int level = levels[index]; + +
svn commit: r1705610 - in /pdfbox/trunk/pdfbox/src/test/resources: input/ org/apache/pdfbox/text/
Author: msahyoun Date: Mon Sep 28 08:28:24 2015 New Revision: 1705610 URL: http://svn.apache.org/viewvc?rev=1705610=rev Log: PDFBOX-2252: remove old test files; add new test files with Bidi text Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/ pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf (with props) pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt (with props) pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt (with props) Removed: pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf-sorted.txt pdfbox/trunk/pdfbox/src/test/resources/input/allah2.pdf.txt Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf?rev=1705610=auto == Binary file - no diff available. Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf -- svn:mime-type = application/pdf Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt?rev=1705610=auto == --- pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt (added) +++ pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt Mon Sep 28 08:28:24 2015 @@ -0,0 +1,8 @@ +اÙعربÙØ© +اÙعربÙØ© +test اÙختبار +14 april 1434 ١٤ إبرÙ٠١٤٣٤ +14-april-1434 ١٤-إبرÙÙ-١٤٣٤ +(january) (ÙÙاÙر) +«self test» «إختبار ذاتÙ» +A line with an Arabic اÙعربÙØ© word embedded. Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt -- svn:eol-style = native Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf-sorted.txt -- svn:mime-type = text/plain Added: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt?rev=1705610=auto == --- pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt (added) +++ pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt Mon Sep 28 08:28:24 2015 @@ -0,0 +1,8 @@ +اÙعربÙØ© +اÙعربÙØ© +test اÙختبار +14 april 1434 ١٤ إبرÙ٠١٤٣٤ +14-april-1434 ١٤-إبرÙÙ-١٤٣٤ +(january) (ÙÙاÙر) +«self test» «إختبار ذاتÙ» +A line with an Arabic اÙعربÙØ© word embedded. Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt -- svn:eol-style = native Propchange: pdfbox/trunk/pdfbox/src/test/resources/org/apache/pdfbox/text/BidiSample.pdf.txt -- svn:mime-type = text/plain
svn commit: r1705657 - in /pdfbox/trunk: pdfbox/src/main/java/org/apache/pdfbox/io/ pdfbox/src/main/java/org/apache/pdfbox/multipdf/ pdfbox/src/main/java/org/apache/pdfbox/pdfparser/ pdfbox/src/main/j
Author: tboehme Date: Mon Sep 28 11:31:47 2015 New Revision: 1705657 URL: http://svn.apache.org/viewvc?rev=1705657=rev Log: PDFBOX-2883: remove PDDocument constructor/methods using boolean 'useScratchFiles' parameter and ensure to have an equivalent constructor/method using MemoryUsageSetting object; default is using main memory only (as before) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFParser.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/encryption/TestPublicKeyEncryption.java pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/multipdf/PDFMergerUtilityTest.java pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestPDFParser.java pdfbox/trunk/tools/src/main/java/org/apache/pdfbox/tools/PDFMerger.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java?rev=1705657=1705656=1705657=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/MemoryUsageSetting.java Mon Sep 28 11:31:47 2015 @@ -162,6 +162,30 @@ public final class MemoryUsageSetting { return new MemoryUsageSetting(true, true, maxMainMemoryBytes, maxStorageBytes); } + +/** + * Returns a copy of this instance with the maximum memory/storage restriction + * divided by the provided number of parallel uses. + * + * @param parallelUseCount specifies the number of parallel usages for the setting to + * be returned + * + * @return a copy from this instance with the maximum memory/storage restrictions + * adjusted to the multiple usage + */ +public MemoryUsageSetting getPartitionedCopy(int parallelUseCount) +{ +long newMaxMainMemoryBytes = maxMainMemoryBytes <= 0 ? maxMainMemoryBytes : + maxMainMemoryBytes / parallelUseCount; +long newMaxStorageBytes = maxStorageBytes <= 0 ? maxStorageBytes : + maxStorageBytes / parallelUseCount; + +MemoryUsageSetting copy = new MemoryUsageSetting( useMainMemory, useTempFile, + newMaxMainMemoryBytes, newMaxStorageBytes ); +copy.tempDir = tempDir; + +return copy; +} /** * Sets directory to be used for temporary files. Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1705657=1705656=1705657=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Mon Sep 28 11:31:47 2015 @@ -131,6 +131,19 @@ public class ScratchFile implements Clos } /** + * Getter for an instance using only unrestricted main memory for buffering + * (same as new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly())). + * + * @return instance configured to only use main memory with no size restriction + * + * @throws IOException + */ +public static ScratchFile getMainMemoryOnlyInstance() throws IOException +{ +return new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); +} + +/** * Returns a new free page, either from free page pool * or by enlarging scratch file (may be created). * Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java?rev=1705657=1705656=1705657=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java Mon Sep 28 11:31:47 2015 @@ -36,6 +36,7 @@ import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; +import
svn commit: r1705659 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java
Author: tboehme Date: Mon Sep 28 11:54:49 2015 New Revision: 1705659 URL: http://svn.apache.org/viewvc?rev=1705659=rev Log: PDFBOX-2882: move 'isClosed' test in ScratchFile.close() under ioLock synchronization Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1705659=1705658=1705659=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Mon Sep 28 11:54:49 2015 @@ -457,17 +457,17 @@ public class ScratchFile implements Clos @Override public void close() throws IOException { -if (isClosed) -{ -return; -} - -isClosed = true; - IOException ioexc = null; synchronized (ioLock) { +if (isClosed) +{ +return; +} + +isClosed = true; + if (raf != null) { try
svn commit: r1705619 - /pdfbox/trunk/pdfbox/pom.xml
Author: msahyoun Date: Mon Sep 28 08:52:40 2015 New Revision: 1705619 URL: http://svn.apache.org/viewvc?rev=1705619=rev Log: PDFBOX-2252: add BidiMirroring.txt to rat excludes Modified: pdfbox/trunk/pdfbox/pom.xml Modified: pdfbox/trunk/pdfbox/pom.xml URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/pom.xml?rev=1705619=1705618=1705619=diff == --- pdfbox/trunk/pdfbox/pom.xml (original) +++ pdfbox/trunk/pdfbox/pom.xml Mon Sep 28 08:52:40 2015 @@ -129,6 +129,7 @@ src/main/resources/org/apache/pdfbox/resources/icc/* src/main/resources/org/apache/pdfbox/resources/glyphlist/glyphlist.txt src/main/resources/org/apache/pdfbox/resources/glyphlist/zapfdingbats.txt + src/main/resources/org/apache/pdfbox/resources/text/BidiMirroring.txt src/main/resources/META-INF/services/* src/test/resources/input/rendering/*.ai src/test/resources/output/*
svn commit: r1705654 - /pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE
Author: msahyoun Date: Mon Sep 28 10:49:33 2015 New Revision: 1705654 URL: http://svn.apache.org/viewvc?rev=1705654=rev Log: PDFBOX-2252: add copyright information for BidiMirroring.txt Modified: pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE Modified: pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE?rev=1705654=1705653=1705654=diff == --- pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE (original) +++ pdfbox/trunk/pdfbox/src/main/appended-resources/META-INF/NOTICE Mon Sep 28 10:49:33 2015 @@ -6,3 +6,6 @@ Copyright 1997, 1998, 2002, 2007, 2010 A Includes the Zapf Dingbats Glyph List Copyright 2002, 2010 Adobe Systems Incorporated. + +Includes the Bidi Mirroring Glyph Property (BidiMirroring-8.0.0.txt) +Copyright 1991-2015 Unicode, Inc.
svn commit: r1705636 - /pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java
Author: msahyoun Date: Mon Sep 28 09:41:44 2015 New Revision: 1705636 URL: http://svn.apache.org/viewvc?rev=1705636=rev Log: PDFBOX-2252: add unit test Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java (with props) Added: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java?rev=1705636=auto == --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java (added) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java Mon Sep 28 09:41:44 2015 @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.text; + +import static org.junit.Assert.assertEquals; + +import java.io.BufferedReader; +import java.io.File; +import java.io.FileReader; +import java.io.IOException; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; + +/** + * Test for the PDButton class. + * + */ +public class BidiTest +{ +private static final File IN_DIR = new File("src/test/resources/org/apache/pdfbox/text/"); +private static final String NAME_OF_PDF = "BidiSample.pdf"; +private static final String NAME_OF_TXT = "BidiSample.pdf.txt"; +private static final String NAME_OF_TXT_SORTED = "BidiSample.pdf-sorted.txt"; + +private PDDocument document; +private PDFTextStripper stripper; + +@Before +public void setUp() throws IOException +{ +document = PDDocument.load(new File(IN_DIR, NAME_OF_PDF)); +stripper = new PDFTextStripper(); +stripper.setLineSeparator("\n"); +} + +@Test +public void testSorted() throws IOException +{ +stripper.setSortByPosition(true); +String extractedText = stripper.getText(document); + +FileReader compareTextReader = new FileReader(new File(IN_DIR, NAME_OF_TXT_SORTED)); +BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader); + +StringBuilder compareTextBuilder = new StringBuilder(); + +String line = bufferedCompareTextReader.readLine(); + +while (line != null) +{ +compareTextBuilder.append(line); +compareTextBuilder.append('\n'); +line = bufferedCompareTextReader.readLine(); +} + +bufferedCompareTextReader.close(); + +assertEquals(extractedText, compareTextBuilder.toString()); + +} + +@Test +public void testNotSorted() throws IOException +{ +stripper.setSortByPosition(false); +String extractedText = stripper.getText(document); + +FileReader compareTextReader = new FileReader(new File(IN_DIR, NAME_OF_TXT)); +BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader); + +StringBuilder compareTextBuilder = new StringBuilder(); +String line = bufferedCompareTextReader.readLine(); + +while (line != null) +{ +compareTextBuilder.append(line); +compareTextBuilder.append('\n'); +line = bufferedCompareTextReader.readLine(); +} + +bufferedCompareTextReader.close(); + +assertEquals(extractedText, compareTextBuilder.toString()); + +} + +@After +public void tearDown() throws IOException +{ +document.close(); +} + +} Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java -- svn:eol-style = native Propchange: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java -- svn:mime-type = text/plain
svn commit: r1705645 - /pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java
Author: msahyoun Date: Mon Sep 28 10:13:44 2015 New Revision: 1705645 URL: http://svn.apache.org/viewvc?rev=1705645=rev Log: PDFBOX-2252: define encoding for Reader Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java Modified: pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java?rev=1705645=1705644=1705645=diff == --- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java (original) +++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/text/BidiTest.java Mon Sep 28 10:13:44 2015 @@ -21,8 +21,10 @@ import static org.junit.Assert.assertEqu import java.io.BufferedReader; import java.io.File; -import java.io.FileReader; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStreamReader; +import java.io.Reader; import org.apache.pdfbox.pdmodel.PDDocument; import org.junit.After; @@ -39,6 +41,8 @@ public class BidiTest private static final String NAME_OF_PDF = "BidiSample.pdf"; private static final String NAME_OF_TXT = "BidiSample.pdf.txt"; private static final String NAME_OF_TXT_SORTED = "BidiSample.pdf-sorted.txt"; + +private static final String ENCODING = "UTF-8"; private PDDocument document; private PDFTextStripper stripper; @@ -57,7 +61,7 @@ public class BidiTest stripper.setSortByPosition(true); String extractedText = stripper.getText(document); -FileReader compareTextReader = new FileReader(new File(IN_DIR, NAME_OF_TXT_SORTED)); +Reader compareTextReader = new InputStreamReader(new FileInputStream(new File(IN_DIR, NAME_OF_TXT_SORTED)), ENCODING);; BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader); StringBuilder compareTextBuilder = new StringBuilder(); @@ -83,7 +87,7 @@ public class BidiTest stripper.setSortByPosition(false); String extractedText = stripper.getText(document); -FileReader compareTextReader = new FileReader(new File(IN_DIR, NAME_OF_TXT)); +Reader compareTextReader = new InputStreamReader(new FileInputStream(new File(IN_DIR, NAME_OF_TXT)), ENCODING);; BufferedReader bufferedCompareTextReader = new BufferedReader(compareTextReader); StringBuilder compareTextBuilder = new StringBuilder();
svn commit: r1705786 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font: PDType3CharProc.java PDType3Font.java
Author: jahewson Date: Mon Sep 28 22:04:13 2015 New Revision: 1705786 URL: http://svn.apache.org/viewvc?rev=1705786=rev Log: PDFBOX-2943: move type 3 char proc parsing to PDType3CharProc Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java?rev=1705786=1705785=1705786=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3CharProc.java Mon Sep 28 22:04:13 2015 @@ -19,8 +19,15 @@ package org.apache.pdfbox.pdmodel.font; import java.io.IOException; import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; import org.apache.pdfbox.contentstream.PDContentStream; +import org.apache.pdfbox.contentstream.operator.Operator; +import org.apache.pdfbox.cos.COSBase; +import org.apache.pdfbox.cos.COSNumber; +import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; +import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.COSObjectable; import org.apache.pdfbox.pdmodel.common.PDRectangle; @@ -83,5 +90,57 @@ public final class PDType3CharProc imple return font.getFontMatrix(); } -// todo: add methods for getting the character's width from the stream +/** + * todo. + * + * @return + * @throws IOException + */ +public float getWidth() throws IOException +{ +List arguments = new ArrayList(); +PDFStreamParser parser = new PDFStreamParser(this); +Object token = parser.parseNextToken(); +while (token != null) +{ +if (token instanceof COSObject) +{ +arguments.add(((COSObject) token).getObject()); +} +else if (token instanceof Operator) +{ +return parseWidth((Operator) token, arguments); +} +else +{ +arguments.add((COSBase) token); +} +token = parser.parseNextToken(); +} +throw new IOException("Unexpected end of stream"); +} + +private float parseWidth(Operator operator, List arguments) throws IOException +{ +if (operator.getName().equals("d0") || operator.getName().equals("d1")) +{ +Object obj = arguments.get(0); +if (obj instanceof Number) +{ +return ((Number) obj).floatValue(); +} +else if (obj instanceof COSNumber) +{ +return ((COSNumber) obj).floatValue(); +} +else +{ +throw new IOException("Unexpected argument type: " + obj.getClass().getName()); +} +} +else +{ +throw new IOException("First operator must be d0 or d1"); +} +} } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java?rev=1705786=1705785=1705786=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/font/PDType3Font.java Mon Sep 28 22:04:13 2015 @@ -19,22 +19,12 @@ package org.apache.pdfbox.pdmodel.font; import java.awt.geom.GeneralPath; import java.io.IOException; import java.io.InputStream; -import java.util.ArrayList; -import java.util.List; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.fontbox.FontBoxFont; import org.apache.fontbox.util.BoundingBox; -import org.apache.pdfbox.contentstream.operator.Operator; import org.apache.pdfbox.cos.COSArray; -import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; -import org.apache.pdfbox.cos.COSNumber; -import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSStream; -import org.apache.pdfbox.pdfparser.PDFStreamParser; import org.apache.pdfbox.pdmodel.PDResources; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.font.encoding.DictionaryEncoding; @@ -50,11 +40,6 @@ import org.apache.pdfbox.util.Vector; */ public class PDType3Font extends PDSimpleFont { -private static final Log
svn commit: r1705785 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java
Author: tboehme Date: Mon Sep 28 22:01:29 2015 New Revision: 1705785 URL: http://svn.apache.org/viewvc?rev=1705785=rev Log: PDFBOX-2999: only create random access buffer if needed; added debug log if input stream is requested without an output stream being created before for adding data Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java?rev=1705785=1705784=1705785=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSStream.java Mon Sep 28 22:01:29 2015 @@ -25,6 +25,8 @@ import java.io.OutputStream; import java.util.ArrayList; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.filter.Filter; import org.apache.pdfbox.filter.FilterFactory; import org.apache.pdfbox.io.IOUtils; @@ -44,6 +46,8 @@ public class COSStream extends COSDictio private final ScratchFile scratchFile; // used as a temp buffer during decoding private boolean isWriting; // true if there's an open OutputStream +private static final Log LOG = LogFactory.getLog(COSStream.class); + /** * Creates a new stream with an empty dictionary. */ @@ -60,33 +64,16 @@ public class COSStream extends COSDictio public COSStream(ScratchFile scratchFile) { super(); -this.randomAccess = createRandomAccess(scratchFile); this.scratchFile = scratchFile != null ? scratchFile : ScratchFile.getMainMemoryOnlyInstance(); } /** - * Creates a buffer for writing stream data, either in-memory or on-disk. - */ -private RandomAccess createRandomAccess(ScratchFile scratchFile) -{ -try -{ -return scratchFile.createBuffer(); -} -catch (IOException e) -{ -// user can't recover from this exception anyway -throw new RuntimeException(e); -} -} - -/** * Throws if the random access backing store has been closed. Helpful for catching cases where * a user tries to use a COSStream which has outlived its COSDocument. */ private void checkClosed() throws IOException { -if (randomAccess.isClosed()) +if ((randomAccess != null) && randomAccess.isClosed()) { throw new IOException("COSStream has been closed and cannot be read. " + "Perhaps its enclosing PDDocument has been closed?"); @@ -105,6 +92,28 @@ public class COSStream extends COSDictio { return createRawInputStream(); } + +/** + * Ensures {@link #randomAccess} is not null by creating a + * buffer from {@link #scratchFile} if needed. + * + * @param forInputStream if true and {@link #randomAccess} is null + *a debug message is logged - input stream should be retrieved after + *data being written to stream + * @throws IOException + */ +private void ensureRandomAccessExists(boolean forInputStream) throws IOException +{ +if (randomAccess == null) +{ +if (forInputStream && LOG.isDebugEnabled()) +{ +// no data written to stream - maybe this should be an exception +LOG.debug("Create InputStream called without data being written before to stream."); +} +randomAccess = scratchFile.createBuffer(); +} +} /** * Returns a new InputStream which reads the encoded PDF stream data. Experts only! @@ -119,6 +128,7 @@ public class COSStream extends COSDictio { throw new IllegalStateException("Cannot read while there is an open stream writer"); } +ensureRandomAccessExists(true); return new RandomAccessInputStream(randomAccess); } @@ -148,6 +158,7 @@ public class COSStream extends COSDictio { throw new IllegalStateException("Cannot read while there is an open stream writer"); } +ensureRandomAccessExists(true); InputStream input = new RandomAccessInputStream(randomAccess); return COSInputStream.create(getFilterList(), this, input, scratchFile); } @@ -195,7 +206,7 @@ public class COSStream extends COSDictio { setItem(COSName.FILTER, filters); } -randomAccess = createRandomAccess(scratchFile); // discards old data +randomAccess = scratchFile.createBuffer(); // discards old data - TODO: close existing buffer? OutputStream randomOut = new
svn commit: r1705792 - /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java
Author: jahewson Date: Tue Sep 29 00:34:50 2015 New Revision: 1705792 URL: http://svn.apache.org/viewvc?rev=1705792=rev Log: PDFBOX-2370: Don't cache PDImageXObject or PDPattern Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1705792=1705791=1705792=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Tue Sep 29 00:34:50 2015 @@ -27,6 +27,7 @@ import org.apache.pdfbox.pdmodel.common. import org.apache.pdfbox.pdmodel.documentinterchange.markedcontent.PDPropertyList; import org.apache.pdfbox.pdmodel.font.PDFont; import org.apache.pdfbox.pdmodel.font.PDFontFactory; +import org.apache.pdfbox.pdmodel.graphics.color.PDPattern; import org.apache.pdfbox.pdmodel.graphics.form.PDFormXObject; import org.apache.pdfbox.pdmodel.graphics.optionalcontent.PDOptionalContentGroup; import org.apache.pdfbox.pdmodel.graphics.state.PDExtendedGraphicsState; @@ -142,8 +143,7 @@ public final class PDResources implement PDColorSpace cached = cache.getColorSpace(indirect); if (cached != null) { -// cache disabled due to resource dependency, see PDFBOX-2370 -//return cached +return cached; } } @@ -159,7 +159,8 @@ public final class PDResources implement colorSpace = PDColorSpace.create(name, this); } -if (cache != null) +// we can't cache PDPattern, because it holds page resources, see PDFBOX-2370 +if (cache != null && !(colorSpace instanceof PDPattern)) { cache.put(indirect, colorSpace); } @@ -321,8 +322,7 @@ public final class PDResources implement PDXObject cached = cache.getXObject(indirect); if (cached != null) { -// cache disabled due to resource dependency, see PDFBOX-2370 -//return cached +return cached; } } @@ -342,7 +342,8 @@ public final class PDResources implement xobject = PDXObject.createXObject(value, this); } -if (cache != null) +// we can't cache PDImageXObject, because it holds page resources, see PDFBOX-2370 +if (cache != null && !(xobject instanceof PDImageXObject)) { cache.put(indirect, xobject); }
svn commit: r1705688 - in /pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox: cos/COSDocument.java io/ScratchFile.java pdfparser/FDFParser.java
Author: tboehme Date: Mon Sep 28 13:23:52 2015 New Revision: 1705688 URL: http://svn.apache.org/viewvc?rev=1705688=rev Log: PDFBOX-2883: remove COSDocument constructors using boolean 'useScratchFiles' parameter and ensure to have an equivalent constructor using MemoryUsageSetting object; default is using main memory only (as before) Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/FDFParser.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java?rev=1705688=1705687=1705688=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDocument.java Mon Sep 28 13:23:52 2015 @@ -17,12 +17,12 @@ package org.apache.pdfbox.cos; import java.io.Closeable; -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.io.ScratchFile; @@ -79,46 +79,18 @@ public class COSDocument extends COSBase private ScratchFile scratchFile; /** - * Constructor. - * - * @param useScratchFiles enables the usage of a scratch file if set to true - * - */ -public COSDocument(boolean useScratchFiles) -{ -this((File)null, useScratchFiles); -} - -/** - * Constructor that will use a temporary file in the given directory - * for storage of the PDF streams. The temporary file is automatically - * removed when this document gets closed. - * - * @param scratchDir directory for the temporary file, - * or null to use the system default - * @param useScratchFiles enables the usage of a scratch file if set to true - * + * Constructor. Uses main memory to buffer PDF streams. */ -public COSDocument(File scratchDir, boolean useScratchFiles) +public COSDocument() { -if (useScratchFiles) -{ -try -{ -scratchFile = new ScratchFile(scratchDir); -} -catch (IOException e) -{ -LOG.error("Can't create temp file, using memory buffer instead", e); -} -} +this(ScratchFile.getMainMemoryOnlyInstance()); } /** * Constructor that will use the provide memory handler for storage of the * PDF streams. * - * @param scratchFile memory handler for storage of PDF streams + * @param scratchFile memory handler for buffering of PDF streams * */ public COSDocument(ScratchFile scratchFile) @@ -127,14 +99,6 @@ public class COSDocument extends COSBase } /** - * Constructor. Uses memory to store stream. - */ -public COSDocument() -{ -this(false); -} - -/** * Creates a new COSStream using the current configuration for scratch files. * * @return the new COSStream @@ -169,7 +133,7 @@ public class COSDocument extends COSBase * @return This will return an object with the specified type. * @throws IOException If there is an error getting the object */ -public COSObject getObjectByType( COSName type ) throws IOException +public COSObject getObjectByType(COSName type) throws IOException { for( COSObject object : objectPool.values() ) { Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java?rev=1705688=1705687=1705688=diff == --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/io/ScratchFile.java Mon Sep 28 13:23:52 2015 @@ -135,12 +135,19 @@ public class ScratchFile implements Clos * (same as new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly())). * * @return instance configured to only use main memory with no size restriction - * - * @throws IOException */ -public static ScratchFile getMainMemoryOnlyInstance() throws IOException +public static ScratchFile getMainMemoryOnlyInstance() { -return new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); +try +{ +return new ScratchFile(MemoryUsageSetting.setupMainMemoryOnly()); +} +catch