Author: tilman Date: Tue Jul 29 14:25:07 2025 New Revision: 1927520 Log: PDFBOX-6047: add text extraction options submenu
Added: pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java (contents, props changed) Modified: pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java Modified: pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java ============================================================================== --- pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java Tue Jul 29 12:29:58 2025 (r1927519) +++ pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/pagepane/PagePane.java Tue Jul 29 14:25:07 2025 (r1927520) @@ -68,6 +68,7 @@ import org.apache.pdfbox.debugger.ui.Hig import org.apache.pdfbox.debugger.ui.ImageTypeMenu; import org.apache.pdfbox.debugger.ui.RenderDestinationMenu; import org.apache.pdfbox.debugger.ui.TextDialog; +import org.apache.pdfbox.debugger.ui.TextStripperMenu; import org.apache.pdfbox.pdmodel.common.PDRectangle; import org.apache.pdfbox.pdmodel.fixup.AcroFormDefaultFixup; import org.apache.pdfbox.pdmodel.fixup.PDDocumentFixup; @@ -309,6 +310,8 @@ public class PagePane implements ActionL PDFTextStripper stripper = new PDFTextStripper(); stripper.setStartPage(pageIndex + 1); stripper.setEndPage(pageIndex + 1); + stripper.setSortByPosition(TextStripperMenu.isSorted()); + stripper.setIgnoreContentStreamSpaceGlyphs(TextStripperMenu.isIgnoreSpaces()); textDialog.setText(stripper.getText(document)); } catch (IOException ex) Added: pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java ============================================================================== --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/TextStripperMenu.java Tue Jul 29 14:25:07 2025 (r1927520) @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.pdfbox.debugger.ui; + +import javax.swing.JCheckBoxMenuItem; +import javax.swing.JMenu; + +/** + * + * @author Tilman Hausherr + */ +public class TextStripperMenu extends MenuBase +{ + private static TextStripperMenu instance; + private static JCheckBoxMenuItem sortOptionMenuItem; + private static JCheckBoxMenuItem ignoreSpacesOptionMenuItem; + + /** + * Constructor. + */ + private TextStripperMenu() + { + JMenu menu = new JMenu("Text extraction options"); + + sortOptionMenuItem = new JCheckBoxMenuItem("sort"); + menu.add(sortOptionMenuItem); + + ignoreSpacesOptionMenuItem = new JCheckBoxMenuItem("ignore spaces"); + menu.add(ignoreSpacesOptionMenuItem); + + setMenu(menu); + } + + /** + * Provides the TextStripperMenu instance. + * + * @return TextStripperMenu instance. + */ + public static TextStripperMenu getInstance() + { + if (instance == null) + { + instance = new TextStripperMenu(); + } + return instance; + } + + public static boolean isSorted() + { + return sortOptionMenuItem.isSelected(); + } + + public static boolean isIgnoreSpaces() + { + return ignoreSpacesOptionMenuItem.isSelected(); + } +} Modified: pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java ============================================================================== --- pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java Tue Jul 29 12:29:58 2025 (r1927519) +++ pdfbox/branches/3.0/debugger/src/main/java/org/apache/pdfbox/debugger/ui/ViewMenu.java Tue Jul 29 14:25:07 2025 (r1927520) @@ -229,6 +229,10 @@ public class ViewMenu extends MenuBase extractTextMenuItem.setEnabled(false); viewMenu.add(extractTextMenuItem); + TextStripperMenu textStripperMenu = TextStripperMenu.getInstance(); + textStripperMenu.setEnableMenu(false); + viewMenu.add(textStripperMenu.getMenu()); + viewMenu.addSeparator(); repairAcroFormMenuItem = new JCheckBoxMenuItem(REPAIR_ACROFORM);