Author: jahewson Date: Wed Aug 10 18:22:59 2016 New Revision: 1755794 URL: http://svn.apache.org/viewvc?rev=1755794&view=rev Log: PDFBOX-2941: show text bounds as an overlay
Added: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java (with props) pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java (with props) pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java (with props) Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java?rev=1755794&r1=1755793&r2=1755794&view=diff ============================================================================== --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/PDFDebugger.java Wed Aug 10 18:22:59 2016 @@ -16,6 +16,53 @@ */ package org.apache.pdfbox.debugger; +import java.awt.BorderLayout; +import java.awt.Component; +import java.awt.Dimension; +import java.awt.FileDialog; +import java.awt.Toolkit; +import java.awt.datatransfer.DataFlavor; +import java.awt.datatransfer.Transferable; +import java.awt.datatransfer.UnsupportedFlavorException; +import java.awt.event.ActionEvent; +import java.awt.event.ActionListener; +import java.awt.event.InputEvent; +import java.awt.event.KeyEvent; +import java.awt.event.WindowEvent; +import java.awt.print.PrinterException; +import java.awt.print.PrinterJob; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FilenameFilter; +import java.io.IOException; +import java.io.InputStream; +import java.lang.reflect.Method; +import java.net.URL; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import javax.swing.AbstractAction; +import javax.swing.Action; +import javax.swing.JCheckBoxMenuItem; +import javax.swing.JComponent; +import javax.swing.JFrame; +import javax.swing.JLabel; +import javax.swing.JMenu; +import javax.swing.JMenuBar; +import javax.swing.JMenuItem; +import javax.swing.JOptionPane; +import javax.swing.JPanel; +import javax.swing.JPasswordField; +import javax.swing.JScrollPane; +import javax.swing.KeyStroke; +import javax.swing.TransferHandler; +import javax.swing.UIManager; +import javax.swing.border.BevelBorder; +import javax.swing.event.TreeSelectionEvent; +import javax.swing.event.TreeSelectionListener; +import javax.swing.filechooser.FileFilter; +import javax.swing.tree.TreePath; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSBoolean; @@ -57,53 +104,6 @@ import org.apache.pdfbox.pdmodel.PDDocum import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException; import org.apache.pdfbox.printing.PDFPageable; -import javax.swing.AbstractAction; -import javax.swing.Action; -import javax.swing.JComponent; -import javax.swing.JFrame; -import javax.swing.JLabel; -import javax.swing.JMenu; -import javax.swing.JMenuBar; -import javax.swing.JMenuItem; -import javax.swing.JOptionPane; -import javax.swing.JPanel; -import javax.swing.JPasswordField; -import javax.swing.JScrollPane; -import javax.swing.KeyStroke; -import javax.swing.TransferHandler; -import javax.swing.UIManager; -import javax.swing.border.BevelBorder; -import javax.swing.event.TreeSelectionEvent; -import javax.swing.event.TreeSelectionListener; -import javax.swing.filechooser.FileFilter; -import javax.swing.tree.TreePath; -import java.awt.BorderLayout; -import java.awt.Component; -import java.awt.Dimension; -import java.awt.FileDialog; -import java.awt.Toolkit; -import java.awt.datatransfer.DataFlavor; -import java.awt.datatransfer.Transferable; -import java.awt.datatransfer.UnsupportedFlavorException; -import java.awt.event.ActionEvent; -import java.awt.event.ActionListener; -import java.awt.event.InputEvent; -import java.awt.event.KeyEvent; -import java.awt.event.WindowEvent; -import java.awt.print.PrinterException; -import java.awt.print.PrinterJob; -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FilenameFilter; -import java.io.IOException; -import java.io.InputStream; -import java.lang.reflect.Method; -import java.net.URL; -import java.util.Arrays; -import java.util.HashSet; -import java.util.List; -import java.util.Set; - /** * PDF Debugger. * @@ -153,6 +153,9 @@ public class PDFDebugger extends JFrame // view menu private JMenuItem viewModeItem; + public static JCheckBoxMenuItem showTextStripper; + public static JCheckBoxMenuItem showFontBBox; + public static JCheckBoxMenuItem showGlyphBounds; /** * Constructor. @@ -534,6 +537,20 @@ public class PDFDebugger extends JFrame RotationMenu rotationMenu = RotationMenu.getInstance(); rotationMenu.setEnableMenu(false); viewMenu.add(rotationMenu.getMenu()); + + viewMenu.addSeparator(); + + showTextStripper = new JCheckBoxMenuItem("Show TextStripper Bounds"); + showTextStripper.setEnabled(false); + viewMenu.add(showTextStripper); + + showFontBBox = new JCheckBoxMenuItem("Show Approximate Text Bounds"); + showFontBBox.setEnabled(false); + viewMenu.add(showFontBBox); + + showGlyphBounds = new JCheckBoxMenuItem("Show Glyph Bounds"); + showGlyphBounds.setEnabled(false); + viewMenu.add(showGlyphBounds); return viewMenu; } Added: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java?rev=1755794&view=auto ============================================================================== --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java (added) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java Wed Aug 10 18:22:59 2016 @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.pagepane; + +import java.io.IOException; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.apache.pdfbox.rendering.PageDrawer; +import org.apache.pdfbox.rendering.PageDrawerParameters; + +/** + * A custom PDFRenderer which creates instances of DebugPageDrawer. + * + * @author John Hewson + */ +final class DebugPDFRenderer extends PDFRenderer +{ + private final boolean showGlyphBounds; + + DebugPDFRenderer(PDDocument document, boolean showGlyphBounds) + { + super(document); + this.showGlyphBounds = showGlyphBounds; + } + + @Override + protected PageDrawer createPageDrawer(PageDrawerParameters parameters) throws IOException + { + return new DebugPageDrawer(parameters, this.showGlyphBounds); + } +} Propchange: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPDFRenderer.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java?rev=1755794&view=auto ============================================================================== --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java (added) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java Wed Aug 10 18:22:59 2016 @@ -0,0 +1,119 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.pagepane; + +import java.awt.BasicStroke; +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.Shape; +import java.awt.Stroke; +import java.awt.geom.AffineTransform; +import java.awt.geom.GeneralPath; +import java.io.IOException; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.pdmodel.font.PDVectorFont; +import org.apache.pdfbox.rendering.PageDrawer; +import org.apache.pdfbox.rendering.PageDrawerParameters; +import org.apache.pdfbox.util.Matrix; +import org.apache.pdfbox.util.Vector; + +/** + * A custom PageDrawer with debugging overlays. + * + * @author John Hewson + */ +final class DebugPageDrawer extends PageDrawer +{ + private final boolean showGlyphBounds; + + DebugPageDrawer(PageDrawerParameters parameters, boolean showGlyphBounds) throws IOException + { + super(parameters); + this.showGlyphBounds = showGlyphBounds; + } + + /** + * Glyph bounding boxes. + */ + @Override + protected void showGlyph(Matrix textRenderingMatrix, PDFont font, int code, String unicode, + Vector displacement) throws IOException + { + // draw glyph + super.showGlyph(textRenderingMatrix, font, code, unicode, displacement); + + if (showGlyphBounds) + { + Shape bbox; + + // compute visual bounds + if (font instanceof PDType3Font) + { + // todo: implement me + return; + } + else + { + AffineTransform at = textRenderingMatrix.createAffineTransform(); + at.concatenate(font.getFontMatrix().createAffineTransform()); + + // get the path + PDVectorFont vectorFont = (PDVectorFont) font; + GeneralPath path = vectorFont.getNormalizedPath(code); + + if (path == null) + { + return; + } + + // stretch non-embedded glyph if it does not match the width contained in the PDF + if (!font.isEmbedded()) + { + float fontWidth = font.getWidthFromFont(code); + if (fontWidth > 0 && // ignore spaces + Math.abs(fontWidth - displacement.getX() * 1000) > 0.0001) + { + float pdfWidth = displacement.getX() * 1000; + at.scale(pdfWidth / fontWidth, 1); + } + } + + Shape glyph = at.createTransformedShape(path); + bbox = glyph.getBounds2D(); + } + + // save + Graphics2D graphics = getGraphics(); + Color color = graphics.getColor(); + Stroke stroke = graphics.getStroke(); + Shape clip = graphics.getClip(); + + // draw + graphics.setClip(graphics.getDeviceConfiguration().getBounds()); + graphics.setColor(new Color(88, 152, 82)); // green + graphics.setStroke(new BasicStroke(1f)); + graphics.draw(bbox); + + // restore + graphics.setStroke(stroke); + graphics.setColor(color); + graphics.setClip(clip); + } + } +} Propchange: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugPageDrawer.java ------------------------------------------------------------------------------ svn:eol-style = native Added: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java?rev=1755794&view=auto ============================================================================== --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java (added) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java Wed Aug 10 18:22:59 2016 @@ -0,0 +1,179 @@ +/* + * Copyright 2015 The Apache Software Foundation. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.pdfbox.debugger.pagepane; + +import java.awt.Color; +import java.awt.Graphics2D; +import java.awt.geom.AffineTransform; +import java.awt.geom.Rectangle2D; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; +import java.io.Writer; +import java.util.List; +import org.apache.fontbox.util.BoundingBox; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.common.PDRectangle; +import org.apache.pdfbox.pdmodel.font.PDFont; +import org.apache.pdfbox.pdmodel.font.PDType3Font; +import org.apache.pdfbox.text.PDFTextStripper; +import org.apache.pdfbox.text.TextPosition; + +/** + * Draws an overlay showing the locations of text found by PDFTextStripper and another heuristic. + * + * @author Ben Litchfield + * @author Tilman Hausherr + * @author John Hewson + */ +final class DebugTextOverlay +{ + private PDDocument document; + private int pageIndex; + private float scale; + private boolean showTextStripper; + private boolean showFontBBox; + + private class DebugTextStripper extends PDFTextStripper + { + private Graphics2D graphics; + private AffineTransform flip; + + public DebugTextStripper(Graphics2D graphics) throws IOException + { + this.graphics = graphics; + } + + public void stripPage(PDDocument document, PDPage page, int pageIndex, float scale) throws IOException + { + // flip y-axis + PDRectangle cropBox = page.getCropBox(); + this.flip = new AffineTransform(); + flip.translate(0, cropBox.getHeight()); + flip.scale(1, -1); + + // scale and rotate + transform(graphics, page, scale); + + setStartPage(pageIndex + 1); + setEndPage(pageIndex + 1); + + Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream()); + writeText(document, dummy); + } + + // scale rotate translate + private void transform(Graphics2D graphics, PDPage page, float scale) + { + graphics.scale(scale, scale); + + int rotationAngle = page.getRotation(); + PDRectangle cropBox = page.getCropBox(); + + if (rotationAngle != 0) + { + float translateX = 0; + float translateY = 0; + switch (rotationAngle) + { + case 90: + translateX = cropBox.getHeight(); + break; + case 270: + translateY = cropBox.getWidth(); + break; + case 180: + translateX = cropBox.getWidth(); + translateY = cropBox.getHeight(); + break; + } + graphics.translate(translateX, translateY); + graphics.rotate((float) Math.toRadians(rotationAngle)); + } + } + + @Override + protected void writeString(String string, List<TextPosition> textPositions) throws IOException + { + for (TextPosition text : textPositions) + { + if (DebugTextOverlay.this.showTextStripper) + { + // in red: + // show rectangles with the "height" (not a real height, but used for text extraction + // heuristics, it is 1/2 of the bounding box height and starts at y=0) + Rectangle2D.Float rect = new Rectangle2D.Float( + text.getXDirAdj(), + (text.getYDirAdj() - text.getHeightDir()), + text.getWidthDirAdj(), + text.getHeightDir()); + graphics.setColor(Color.red); + graphics.draw(rect); + } + + if (DebugTextOverlay.this.showFontBBox) + { + // in blue: + // show rectangle with the real vertical bounds, based on the font bounding box y values + // usually, the height is identical to what you see when marking text in Adobe Reader + PDFont font = text.getFont(); + BoundingBox bbox = font.getBoundingBox(); + + // advance width, bbox height (glyph space) + float xadvance = font.getWidth(text.getCharacterCodes()[0]); // todo: should iterate all chars + Rectangle2D rect = new Rectangle2D.Float(0, bbox.getLowerLeftY(), xadvance, bbox.getHeight()); + + // glyph space -> user space + // note: text.getTextMatrix() is *not* the Text Matrix, it's the Text Rendering Matrix + AffineTransform at = (AffineTransform) flip.clone(); + at.concatenate(text.getTextMatrix().createAffineTransform()); + + if (font instanceof PDType3Font) + { + // bbox and font matrix are unscaled + at.concatenate(font.getFontMatrix().createAffineTransform()); + } + else + { + // bbox and font matrix are already scaled to 1000 + at.scale(1 / 1000f, 1 / 1000f); + } + + graphics.setColor(Color.blue); + graphics.draw(at.createTransformedShape(rect)); + } + } + } + } + + public DebugTextOverlay(PDDocument document, int pageIndex, float scale, + boolean showTextStripper, boolean showFontBBox) + { + this.document = document; + this.pageIndex = pageIndex; + this.scale = scale; + this.showTextStripper = showTextStripper; + this.showFontBBox = showFontBBox; + } + + public void renderTo(Graphics2D graphics) throws IOException + { + DebugTextStripper stripper = new DebugTextStripper(graphics); + stripper.stripPage(this.document, this.document.getPage(pageIndex), this.pageIndex, this.scale); + } +} Propchange: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/DebugTextOverlay.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java?rev=1755794&r1=1755793&r2=1755794&view=diff ============================================================================== --- pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java (original) +++ pdfbox/trunk/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java Wed Aug 10 18:22:59 2016 @@ -16,7 +16,9 @@ package org.apache.pdfbox.debugger.pagepane; +import java.awt.Graphics2D; import org.apache.pdfbox.cos.COSDictionary; +import org.apache.pdfbox.debugger.PDFDebugger; import org.apache.pdfbox.debugger.ui.ImageUtil; import org.apache.pdfbox.debugger.ui.RotationMenu; import org.apache.pdfbox.debugger.ui.ZoomMenu; @@ -94,7 +96,7 @@ public class PagePane implements ActionL zoomMenu.changeZoomSelection(zoomMenu.getPageZoomScale()); // render in a background thread: rendering is read-only, so this should be ok, despite // the fact that PDDocument is not officially thread safe - new RenderWorker(zoomMenu.getPageZoomScale(), 0).execute(); + new RenderWorker(zoomMenu.getPageZoomScale(), 0, false, false, false).execute(); } /** @@ -111,9 +113,18 @@ public class PagePane implements ActionL public void actionPerformed(ActionEvent actionEvent) { String actionCommand = actionEvent.getActionCommand(); - if (ZoomMenu.isZoomMenu(actionCommand) || RotationMenu.isRotationMenu(actionCommand)) + if (ZoomMenu.isZoomMenu(actionCommand) || + RotationMenu.isRotationMenu(actionCommand) || + actionEvent.getSource() == PDFDebugger.showTextStripper || + actionEvent.getSource() == PDFDebugger.showFontBBox || + actionEvent.getSource() == PDFDebugger.showGlyphBounds) { - new RenderWorker(ZoomMenu.getZoomScale(), RotationMenu.getRotationDegrees()).execute(); + new RenderWorker(ZoomMenu.getZoomScale(), + RotationMenu.getRotationDegrees(), + PDFDebugger.showTextStripper.isSelected(), + PDFDebugger.showFontBBox.isSelected(), + PDFDebugger.showGlyphBounds.isSelected() + ).execute(); zoomMenu.setPageZoomScale(ZoomMenu.getZoomScale()); } } @@ -128,6 +139,15 @@ public class PagePane implements ActionL rotationMenu.addMenuListeners(this); rotationMenu.setRotationSelection(RotationMenu.ROTATE_0_DEGREES); rotationMenu.setEnableMenu(true); + + PDFDebugger.showTextStripper.setEnabled(true); + PDFDebugger.showTextStripper.addActionListener(this); + + PDFDebugger.showFontBBox.setEnabled(true); + PDFDebugger.showFontBBox.addActionListener(this); + + PDFDebugger.showGlyphBounds.setEnabled(true); + PDFDebugger.showGlyphBounds.addActionListener(this); } @Override @@ -183,7 +203,7 @@ public class PagePane implements ActionL y1 = (int) (height - y + offsetY); break; } - statuslabel.setText(x1 + "," + y1); + statuslabel.setText("x: " + x1 + ", y: " + y1); } @Override @@ -219,11 +239,18 @@ public class PagePane implements ActionL { private final float scale; private final int rotation; - - private RenderWorker(float scale, int rotation) + private boolean showTextStripper; + private boolean showFontBBox; + private boolean showGlyphBounds; + + private RenderWorker(float scale, int rotation, boolean showTextStripper, + boolean showFontBBox, boolean showGlyphBounds) { this.scale = scale; this.rotation = rotation; + this.showTextStripper = showTextStripper; + this.showFontBBox = showFontBBox; + this.showGlyphBounds = showGlyphBounds; } @Override @@ -233,15 +260,23 @@ public class PagePane implements ActionL label.setText("Rendering..."); statuslabel.setText("Rendering..."); - PDFRenderer renderer = new PDFRenderer(document); + PDFRenderer renderer = new DebugPDFRenderer(document, this.showGlyphBounds); long t0 = System.nanoTime(); - BufferedImage bim = renderer.renderImage(pageIndex, scale); + BufferedImage image = renderer.renderImage(pageIndex, scale); long t1 = System.nanoTime(); long ms = TimeUnit.MILLISECONDS.convert(t1 - t0, TimeUnit.NANOSECONDS); statuslabel.setText("Rendered in " + ms + " ms"); - return ImageUtil.getRotatedImage(bim, rotation); + + // debug overlays + DebugTextOverlay debugText = new DebugTextOverlay(document, pageIndex, scale, + showTextStripper, showFontBBox); + Graphics2D g = image.createGraphics(); + debugText.renderTo(g); + g.dispose(); + + return ImageUtil.getRotatedImage(image, rotation); } @Override